diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index a893f710..0ace6595 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -350,7 +350,23 @@ jobs:
 
       - name: Run Python tests
         run: |
-          uv run pytest tests/python/ -v --tb=short
+          uv run python - <<'PY'
+          import contextlib
+          import os
+          import sys
+
+          import pytest
+
+          exit_code = pytest.main(["tests/python/", "-v", "--tb=short"])
+          if "tensorcast._C" in sys.modules:
+              with contextlib.suppress(Exception):
+                  from tensorcast._c_ext import get_c_ext
+
+                  get_c_ext().shutdown_native_runtime()
+          sys.stdout.flush()
+          sys.stderr.flush()
+          os._exit(int(exit_code))
+          PY
         env:
           LD_LIBRARY_PATH: ${{ github.workspace }}/tensorcast/lib:${{ env.LD_LIBRARY_PATH }}
           TENSORCAST_CUDA_BACKEND: fake
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 404c3d64..6b84e31a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,7 @@
 default_install_hook_types:
   - pre-commit
   - commit-msg
+  - pre-push
 exclude: ^.github/actions/assigner/dist
 repos:
     - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -90,6 +91,20 @@ repos:
             - --use-current-year
     - repo: local
       hooks:
+        - id: pyright
+          name: pyright (tensorcast)
+          language: system
+          entry: env UV_NO_SYNC=1 uv run pyright ./tensorcast
+          pass_filenames: false
+          files: ^(tensorcast/.*\.py|pyproject\.toml)$
+          stages: [pre-push]
+        - id: mypy
+          name: mypy (tensorcast)
+          language: system
+          entry: env UV_NO_SYNC=1 uv run mypy ./tensorcast
+          pass_filenames: false
+          files: ^(tensorcast/.*\.py|pyproject\.toml)$
+          stages: [pre-push]
         - id: webui-prettier
           name: webui-prettier-check
           language: system
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index dc751391..dae468a5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -34,6 +34,14 @@ ruff check .
 ruff format . --check
 ```
 
+The pre-push hook also runs the CI-matching package type checks through the
+project `uv` environment:
+
+```bash
+pyright ./tensorcast
+mypy ./tensorcast
+```
+
 If you modify protocol buffers, regenerate Python stubs and C++ headers:
 
 ```bash
diff --git a/daemon/BUILD b/daemon/BUILD
index 9c8222b6..cb56bdc8 100644
--- a/daemon/BUILD
+++ b/daemon/BUILD
@@ -1683,6 +1683,7 @@ sc_cc_library(
         "//proto/tensorcast/global_store/v1:global_store_grpc_cc",
         "@abseil-cpp//absl/strings",
         "@abseil-cpp//absl/types:span",
+        "@protobuf",
     ],
 )
 
@@ -1965,7 +1966,9 @@ cc_test(
     name = "pid_monitor_unwatch_integration_test",
     srcs = ["state/pid_monitor_unwatch_integration_test.cc"],
     deps = [
+        ":handle_lease_registry_lib",
         ":ipc_region_registry_lib",
+        ":lifecycle_kernel_lib",
         ":lip_manager_lib",
         ":pid_monitor_lib",
         ":ref_tracker_hdr",
@@ -1973,6 +1976,7 @@ cc_test(
         ":session_lifecycle_lib",
         ":session_manager_hdr",
         "//core/store:device_registry",
+        "//core/store:store_engine",
         "@catch2//:catch2_main",
     ],
 )
diff --git a/daemon/service/controllers/materialization_controller.cc b/daemon/service/controllers/materialization_controller.cc
index 484f5953..8bef4c58 100644
--- a/daemon/service/controllers/materialization_controller.cc
+++ b/daemon/service/controllers/materialization_controller.cc
@@ -625,6 +625,7 @@ grpc::Status MaterializationController::prefetch_serving_binding(
     status->set_message(failed ? "serving binding set materialization failed" : "serving binding set is local-ready");
     status->set_progress(1.0);
     status->mutable_result()->PackFrom(set_result);
+    attach_controller_realization_plan_span_attrs(rctx, realization_plan);
     rctx.mark_success();
     return grpc::Status::OK;
   }
diff --git a/daemon/service/controllers/materialization_policy_utils.cc b/daemon/service/controllers/materialization_policy_utils.cc
index 79786675..c6b187c6 100644
--- a/daemon/service/controllers/materialization_policy_utils.cc
+++ b/daemon/service/controllers/materialization_policy_utils.cc
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <format>
+#include <limits>
 #include <optional>
 #include <string>
 #include <string_view>
@@ -18,6 +19,9 @@
 #include "core/common/artifact_hash.h"
 #include "core/store/materialization/dataplane/view/view_identity.h"
 #include "daemon/service/rpc_context.h"
+#include "google/protobuf/io/coded_stream.h"
+#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
+#include "google/protobuf/message_lite.h"
 
 namespace tensorcast::daemon::materialization_policy {
 
@@ -30,6 +34,7 @@ using store::loader::ViewOp;
 constexpr std::string_view kGroupRealizationTransportKind = "group_realization_transport";
 constexpr std::string_view kGroupRealizationChildTransportRequestProfile =
     "tensorcast.group_realization.child_transport_request.v1";
+constexpr std::string_view kControllerSourceSelectionDigestProfile = "tensorcast.controller.source_selection_digest.v1";
 
 store::loading::SourceLocalityHint to_source_locality(v2::SourceLocality locality) {
   switch (locality) {
@@ -375,20 +380,124 @@ std::vector<std::string> acquire_group_barriers_for(const v2::GroupRealizationAc
   return barriers;
 }
 
+std::string serialize_deterministic(const google::protobuf::MessageLite& message) {
+  std::string output;
+  {
+    google::protobuf::io::StringOutputStream string_stream(&output);
+    google::protobuf::io::CodedOutputStream coded_stream(&string_stream);
+    coded_stream.SetSerializationDeterministic(true);
+    if (!message.SerializeToCodedStream(&coded_stream) || coded_stream.HadError()) {
+      return message.SerializeAsString();
+    }
+  }
+  return output;
+}
+
+void append_big_endian_u64(std::vector<uint8_t>* out, uint64_t value) {
+  for (int shift = 56; shift >= 0; shift -= 8) {
+    out->push_back(static_cast<uint8_t>((value >> shift) & 0xffU));
+  }
+}
+
+void append_digest_part(std::vector<uint8_t>* out, std::string_view part) {
+  append_big_endian_u64(out, static_cast<uint64_t>(part.size()));
+  out->insert(out->end(), part.begin(), part.end());
+}
+
+std::string sha256_hex_for_parts(const std::vector<std::string_view>& parts) {
+  uint64_t total_size = 0;
+  for (std::string_view part : parts) {
+    total_size += 8U + static_cast<uint64_t>(part.size());
+  }
+  std::vector<uint8_t> payload;
+  if (total_size <= static_cast<uint64_t>(std::numeric_limits<size_t>::max())) {
+    payload.reserve(static_cast<size_t>(total_size));
+  }
+  for (std::string_view part : parts) {
+    append_digest_part(&payload, part);
+  }
+  const std::vector<uint8_t> digest = common::sha256_digest_bytes(absl::MakeConstSpan(payload));
+  return absl::BytesToHexString(std::string(reinterpret_cast<const char*>(digest.data()), digest.size()));
+}
+
+std::string artifact_profile_for(std::string_view artifact_id) {
+  if (artifact_id.starts_with("msa1:")) {
+    return "mounted_source";
+  }
+  if (artifact_id.starts_with("cgid:")) {
+    return "byte_artifact";
+  }
+  return "durable_artifact";
+}
+
+std::string authority_scope_for(std::string_view artifact_id) {
+  if (artifact_id.starts_with("msa1:")) {
+    return "daemon_local_mounted_source";
+  }
+  return "daemon_mediated_durable";
+}
+
+std::optional<std::string> requested_generation_hint_for(const v2::GroupRealizationOptions* group_realization) {
+  if (group_realization == nullptr || !group_realization->enabled()) {
+    return std::nullopt;
+  }
+  if (group_realization->version().value_case() != v2::VersionReference::kKeyReference) {
+    return std::nullopt;
+  }
+  const v2::KeyVersionReference& key_ref = group_realization->version().key_reference();
+  if (!key_ref.has_expected_generation()) {
+    return std::nullopt;
+  }
+  return std::to_string(key_ref.expected_generation());
+}
+
+std::optional<std::string> requested_version_set_id_for(const v2::GroupRealizationOptions* group_realization) {
+  if (group_realization == nullptr || !group_realization->enabled()) {
+    return std::nullopt;
+  }
+  if (group_realization->version().value_case() != v2::VersionReference::kExplicitVersionSet) {
+    return std::nullopt;
+  }
+  const std::string& version_set_id = group_realization->version().explicit_version_set().version_set_id();
+  if (version_set_id.empty()) {
+    return std::nullopt;
+  }
+  return version_set_id;
+}
+
 std::optional<std::string> selection_digest_for(
     const v2::GroupRealizationOptions* group_realization,
     const GroupRealizationBeginContext* begin_context,
     const tensorcast::common::v1::ArtifactSelection& selection) {
-  if (begin_context != nullptr && !begin_context->selection_hash.empty()) {
-    return absl::BytesToHexString(begin_context->selection_hash);
-  }
-  if (!selection.selection_hash().empty()) {
-    return absl::BytesToHexString(selection.selection_hash());
-  }
-  if (group_realization != nullptr && group_realization->enabled()) {
+  const tensorcast::common::v1::ArtifactSelection& effective_selection =
+      begin_context != nullptr && !begin_context->part_selection.artifact_id().empty() ? begin_context->part_selection
+                                                                                       : selection;
+  if (effective_selection.artifact_id().empty()) {
     return std::nullopt;
   }
-  return std::nullopt;
+
+  const std::string serialized_selection = serialize_deterministic(effective_selection);
+  const std::string selection_identity = begin_context != nullptr && !begin_context->selection_hash.empty()
+      ? begin_context->selection_hash
+      : effective_selection.selection_hash();
+  std::string generation_hint;
+  if (begin_context != nullptr && begin_context->key_generation != 0) {
+    generation_hint = std::to_string(begin_context->key_generation);
+  } else if (std::optional<std::string> requested_generation = requested_generation_hint_for(group_realization);
+             requested_generation.has_value()) {
+    generation_hint = *requested_generation;
+  }
+  const std::string profile = artifact_profile_for(effective_selection.artifact_id());
+  const std::string scope = authority_scope_for(effective_selection.artifact_id());
+  return sha256_hex_for_parts({
+      kControllerSourceSelectionDigestProfile,
+      serialized_selection,
+      effective_selection.logical_layout_hash(),
+      selection_identity,
+      profile,
+      scope,
+      generation_hint,
+  });
 }
 
 std::optional<std::string> operation_id_for(const v2::MaterializeIntoTargetRequest& request) {
@@ -896,7 +1005,7 @@ absl::StatusOr<ControllerRealizationPlan> build_controller_realization_plan_impl
       .group_barriers = group_barriers_for(group_realization),
       .version_set_id = group_begin_context != nullptr && !group_begin_context->version_set.version_set_id().empty()
           ? std::optional<std::string>(group_begin_context->version_set.version_set_id())
-          : std::nullopt,
+          : requested_version_set_id_for(group_realization),
       .transaction_id = group_begin_context != nullptr && !group_begin_context->transaction_id.empty()
           ? std::optional<std::string>(group_begin_context->transaction_id)
           : std::nullopt,
@@ -956,11 +1065,15 @@ absl::StatusOr<ControllerRealizationPlan> build_prefetch_target_set_realization_
           : "same_daemon_session",
       .collective_policy = prefetch_collective_policy_for(request, member_count),
       .group_barriers = group_barriers_for(request.has_group_realization() ? &request.group_realization() : nullptr),
-      .version_set_id = std::nullopt,
+      .version_set_id =
+          requested_version_set_id_for(request.has_group_realization() ? &request.group_realization() : nullptr),
       .transaction_id = std::nullopt,
       .source_selection_digest = !request.source().artifact_selection_digest().empty()
           ? std::optional<std::string>(request.source().artifact_selection_digest())
-          : selection_digest_for(nullptr, nullptr, request.source_selection()),
+          : selection_digest_for(
+                request.has_group_realization() ? &request.group_realization() : nullptr,
+                nullptr,
+                request.source_selection()),
   };
   plan.lifecycle = ControllerRealizationLifecyclePlan{
       .capability = "target_set",
@@ -1020,11 +1133,15 @@ absl::StatusOr<ControllerRealizationPlan> build_prefetch_member_realization_plan
           : "same_daemon_session",
       .collective_policy = prefetch_collective_policy_for(request, member_count),
       .group_barriers = group_barriers_for(request.has_group_realization() ? &request.group_realization() : nullptr),
-      .version_set_id = std::nullopt,
+      .version_set_id =
+          requested_version_set_id_for(request.has_group_realization() ? &request.group_realization() : nullptr),
       .transaction_id = std::nullopt,
       .source_selection_digest = !request.source().artifact_selection_digest().empty()
           ? std::optional<std::string>(request.source().artifact_selection_digest())
-          : selection_digest_for(nullptr, nullptr, request.source_selection()),
+          : selection_digest_for(
+                request.has_group_realization() ? &request.group_realization() : nullptr,
+                nullptr,
+                request.source_selection()),
   };
   plan.lifecycle = ControllerRealizationLifecyclePlan{
       .capability = "retained_binding",
@@ -1259,7 +1376,7 @@ absl::StatusOr<v2::CollectivePolicy> resolve_collective_policy(
     const ExecutionTopologyContext& execution_topology) {
   const bool has_collective_group = execution_topology.collective_load_group.has_value();
   if (requested == v2::CollectivePolicy::COLLECTIVE_POLICY_UNSPECIFIED) {
-    return has_collective_group ? v2::CollectivePolicy::COLLECTIVE_POLICY_REQUIRE_COLLECTIVE
+    return has_collective_group ? v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST
                                 : v2::CollectivePolicy::COLLECTIVE_POLICY_DISABLE_COLLECTIVE;
   }
   if (requested == v2::CollectivePolicy::COLLECTIVE_POLICY_DISABLE_COLLECTIVE && has_collective_group) {
@@ -1367,13 +1484,11 @@ absl::StatusOr<ControllerRealizationPlan> build_controller_realization_plan(
       .group_barriers = group_barriers_for(group_realization),
       .version_set_id = group_begin_context != nullptr && !group_begin_context->version_set.version_set_id().empty()
           ? std::optional<std::string>(group_begin_context->version_set.version_set_id())
-          : std::nullopt,
+          : requested_version_set_id_for(group_realization),
       .transaction_id = group_begin_context != nullptr && !group_begin_context->transaction_id.empty()
           ? std::optional<std::string>(group_begin_context->transaction_id)
           : std::nullopt,
-      .source_selection_digest = !resolved_selection.selection_hash().empty()
-          ? std::optional<std::string>(absl::BytesToHexString(resolved_selection.selection_hash()))
-          : std::nullopt,
+      .source_selection_digest = selection_digest_for(group_realization, group_begin_context, resolved_selection),
   };
   plan.lifecycle = ControllerRealizationLifecyclePlan{
       .capability = target_kind,
@@ -1439,9 +1554,8 @@ absl::StatusOr<ControllerRealizationPlan> build_controller_realization_plan(cons
       .group_barriers = {},
       .version_set_id = std::nullopt,
       .transaction_id = std::nullopt,
-      .source_selection_digest =
-          request.has_initial_selection() && !request.initial_selection().selection_hash().empty()
-          ? std::optional<std::string>(absl::BytesToHexString(request.initial_selection().selection_hash()))
+      .source_selection_digest = request.has_initial_selection()
+          ? selection_digest_for(nullptr, nullptr, request.initial_selection())
           : std::nullopt,
   };
   const bool daemon_owned = request.ownership() == v2::BindingOwnership::BINDING_OWNERSHIP_DAEMON;
@@ -1967,9 +2081,7 @@ absl::StatusOr<ControllerRealizationPlan> build_controller_realization_plan(
       .group_barriers = group_barriers_for(group_realization),
       .version_set_id = std::nullopt,
       .transaction_id = std::nullopt,
-      .source_selection_digest = !scope.selection().selection_hash().empty()
-          ? std::optional<std::string>(absl::BytesToHexString(scope.selection().selection_hash()))
-          : std::nullopt,
+      .source_selection_digest = selection_digest_for(group_realization, nullptr, scope.selection()),
   };
   plan.lifecycle = ControllerRealizationLifecyclePlan{
       .capability = "publication",
diff --git a/daemon/service/controllers/owned_binding_service.cc b/daemon/service/controllers/owned_binding_service.cc
index 6ff39041..bb68ca74 100644
--- a/daemon/service/controllers/owned_binding_service.cc
+++ b/daemon/service/controllers/owned_binding_service.cc
@@ -207,6 +207,7 @@ std::string serialize_proto_for_cache_key(const google::protobuf::MessageLite& p
 
 std::string compute_target_layout_geometry_hash(const v2::TargetLayout& layout) {
   std::string payload;
+  absl::flat_hash_map<std::string, uint64_t> storage_ordinals;
   absl::StrAppend(
       &payload,
       "layout_kind=",
@@ -218,15 +219,26 @@ std::string compute_target_layout_geometry_hash(const v2::TargetLayout& layout)
       "|view_id=");
   append_cache_field(&payload, layout.view_id());
   append_cache_field(&payload, layout.logical_layout_hash());
+  uint64_t storage_ordinal = 0;
   for (const auto& storage : layout.storages()) {
-    append_cache_field(&payload, storage.storage_id());
+    if (!storage.storage_id().empty()) {
+      storage_ordinals.emplace(storage.storage_id(), storage_ordinal);
+    }
+    append_cache_uint64(&payload, storage_ordinal);
     append_cache_uint64(&payload, static_cast<uint64_t>(storage.device_id()));
     append_cache_uint64(&payload, storage.storage_length());
     append_cache_uint64(&payload, storage.mapping_base_offset());
+    storage_ordinal++;
   }
   for (const auto& entry : layout.offsets()) {
     append_cache_field(&payload, entry.name());
-    append_cache_field(&payload, entry.storage_id());
+    auto storage_it = storage_ordinals.find(entry.storage_id());
+    if (storage_it == storage_ordinals.end()) {
+      append_cache_field(&payload, "unknown-storage");
+      append_cache_field(&payload, entry.storage_id());
+    } else {
+      append_cache_uint64(&payload, storage_it->second);
+    }
     append_cache_uint64(&payload, entry.storage_offset());
     append_cache_uint64(&payload, entry.logical_length());
   }
@@ -242,7 +254,7 @@ std::string binding_realization_plan_cache_key(
     std::string_view canonical_index_json,
     v2::TransformPlacement placement) {
   std::string payload;
-  append_cache_field(&payload, "binding-realization-plan-v1");
+  append_cache_field(&payload, "binding-realization-plan-v2");
   append_cache_field(&payload, resolved_artifact_id);
   append_cache_field(&payload, serialize_proto_for_cache_key(selection));
   append_cache_field(&payload, serialize_proto_for_cache_key(realization_plan));
@@ -306,7 +318,7 @@ std::string mapped_execution_template_cache_key(
     const store::loading::ExecutionTopologyContext& topology,
     bool disk_source_available) {
   std::string payload;
-  append_cache_field(&payload, "mapped-execution-template-v1");
+  append_cache_field(&payload, "mapped-execution-template-v2");
   append_cache_field(&payload, plan_key);
   append_cache_field(
       &payload,
diff --git a/daemon/service/materialization_policy_utils_test.cc b/daemon/service/materialization_policy_utils_test.cc
index c630b810..f5f406e0 100644
--- a/daemon/service/materialization_policy_utils_test.cc
+++ b/daemon/service/materialization_policy_utils_test.cc
@@ -3,6 +3,7 @@
 #include "daemon/service/controllers/materialization_policy_utils.h"
 
 #include <algorithm>
+#include <cctype>
 #include <format>
 #include <memory>
 #include <string>
@@ -24,6 +25,7 @@ using tensorcast::daemon::materialization_policy::GroupRealizationPreparedMember
 using tensorcast::daemon::materialization_policy::report_group_realization_prepared_if_enabled;
 using tensorcast::daemon::materialization_policy::require_controller_export_kind;
 using tensorcast::daemon::materialization_policy::require_controller_resource_authority;
+using tensorcast::daemon::materialization_policy::resolve_collective_policy;
 using tensorcast::daemon::materialization_policy::resolve_group_realization_transport_context;
 using tensorcast::daemon::materialization_policy::resolve_materialization_request_context;
 using tensorcast::daemon::materialization_policy::resolve_operation_transport_context;
@@ -84,6 +86,18 @@ bool has_resource_authority(const ControllerRealizationPlan& plan, std::string_v
       [authority](const std::string& current) { return std::string_view(current) == authority; });
 }
 
+bool is_sha256_hex(std::string_view value) {
+  return value.size() == 64 && std::all_of(value.begin(), value.end(), [](char ch) {
+           return std::isxdigit(static_cast<unsigned char>(ch)) != 0;
+         });
+}
+
+void check_controller_source_selection_digest(const std::optional<std::string>& digest) {
+  REQUIRE(digest.has_value());
+  CHECK(is_sha256_hex(*digest));
+  CHECK(*digest != "73656c656374696f6e2d68617368");
+}
+
 v2::GroupRealizationOptions build_group_realization_options() {
   v2::GroupRealizationOptions options;
   options.set_enabled(true);
@@ -180,7 +194,7 @@ TEST_CASE("Controller realization plan mirrors caller target materialization", "
   CHECK(plan_or->target.member_count == 1);
   CHECK(plan_or->strategy.source_selection_mode == "single_selection");
   CHECK(plan_or->strategy.source_coordination == "single_request");
-  CHECK(plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368");
+  check_controller_source_selection_digest(plan_or->strategy.source_selection_digest);
   CHECK(plan_or->lifecycle.capability == "caller_tensors");
   CHECK((plan_or->lifecycle.release_policy == std::vector<std::string>{"release_external_target_storage_lease"}));
   CHECK(plan_or->resource_envelope.projection_kind == "completion");
@@ -191,6 +205,44 @@ TEST_CASE("Controller realization plan mirrors caller target materialization", "
   REQUIRE(require_controller_resource_authority(*plan_or, "caller_allocation", "MaterializeIntoTarget").ok());
 }
 
+TEST_CASE(
+    "Controller source selection digest is independent from target layout digest",
+    "[daemon][materialization][policy]") {
+  v2::MaterializeIntoTargetRequest base_request;
+  base_request.mutable_selection()->set_artifact_id("mi2:test:artifact");
+  base_request.mutable_selection()->set_logical_layout_hash("logical-layout");
+  base_request.mutable_selection()->set_selection_hash("selection-hash");
+  base_request.set_pid(1234);
+  base_request.set_device_uuid("GPU-0");
+  fill_target_layout(base_request.mutable_target_layout());
+  auto request_context_or = resolve_materialization_request_context(nullptr);
+  REQUIRE(request_context_or.ok());
+
+  const auto first_transport = resolve_operation_transport_context("op-identity-1");
+  auto first_or = build_controller_realization_plan(
+      base_request, *request_context_or, first_transport, nullptr, "mi2:test:artifact");
+  REQUIRE(first_or.ok());
+
+  v2::MaterializeIntoTargetRequest target_changed_request = base_request;
+  target_changed_request.mutable_target_layout()->mutable_storages(0)->set_storage_length(2048);
+  const auto target_changed_transport = resolve_operation_transport_context("op-identity-2");
+  auto target_changed_or = build_controller_realization_plan(
+      target_changed_request, *request_context_or, target_changed_transport, nullptr, "mi2:test:artifact");
+  REQUIRE(target_changed_or.ok());
+
+  v2::MaterializeIntoTargetRequest source_changed_request = base_request;
+  source_changed_request.mutable_selection()->set_logical_layout_hash("other-logical-layout");
+  const auto source_changed_transport = resolve_operation_transport_context("op-identity-3");
+  auto source_changed_or = build_controller_realization_plan(
+      source_changed_request, *request_context_or, source_changed_transport, nullptr, "mi2:test:artifact");
+  REQUIRE(source_changed_or.ok());
+
+  check_controller_source_selection_digest(first_or->strategy.source_selection_digest);
+  CHECK(first_or->strategy.source_selection_digest == target_changed_or->strategy.source_selection_digest);
+  CHECK(first_or->target.target_layout_digest != target_changed_or->target.target_layout_digest);
+  CHECK(first_or->strategy.source_selection_digest != source_changed_or->strategy.source_selection_digest);
+}
+
 TEST_CASE("Controller realization plan mirrors binding creation ownership", "[daemon][materialization][policy]") {
   v2::CreateBindingRequest daemon_request;
   daemon_request.set_ownership(v2::BindingOwnership::BINDING_OWNERSHIP_DAEMON);
@@ -243,8 +295,7 @@ TEST_CASE("Controller realization plan mirrors binding creation ownership", "[da
   CHECK(adopted_plan_or->target.resolved_artifact_id == "mi2:source-override");
   CHECK(adopted_plan_or->strategy.source_selection_mode == "single_selection");
   CHECK(adopted_plan_or->strategy.source_coordination == "binding_initial_value");
-  REQUIRE(adopted_plan_or->strategy.source_selection_digest.has_value());
-  CHECK(*adopted_plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368");
+  check_controller_source_selection_digest(adopted_plan_or->strategy.source_selection_digest);
   CHECK(adopted_plan_or->lifecycle.export_lifetime_kind == "binding_registry");
   CHECK(adopted_plan_or->lifecycle.mutability_contract == "caller_region_borrowed");
   CHECK(adopted_plan_or->resource_envelope.backing_kind == "caller_region");
@@ -450,8 +501,7 @@ TEST_CASE("Controller realization plan mirrors owned binding refill", "[daemon][
   CHECK(*plan_or->target.operation_id == "refill-op");
   CHECK(plan_or->strategy.source_selection_mode == "single_selection");
   CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_REQUIRE_COLLECTIVE);
-  REQUIRE(plan_or->strategy.source_selection_digest.has_value());
-  CHECK(*plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368");
+  check_controller_source_selection_digest(plan_or->strategy.source_selection_digest);
   CHECK(plan_or->lifecycle.capability == "binding_owned");
   CHECK(plan_or->lifecycle.export_lifetime_kind == "binding_current_value");
   CHECK(plan_or->lifecycle.mutability_contract == "binding_controlled_read_only");
@@ -688,8 +738,7 @@ TEST_CASE(
   CHECK(plan_or->strategy.source_selection_mode == "single_selection");
   CHECK(plan_or->strategy.source_coordination == "single_request");
   CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST);
-  REQUIRE(plan_or->strategy.source_selection_digest.has_value());
-  CHECK(*plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368");
+  check_controller_source_selection_digest(plan_or->strategy.source_selection_digest);
   CHECK(plan_or->lifecycle.capability == "tensor_dict");
   CHECK(plan_or->lifecycle.export_lifetime_kind == "handle_lease");
   CHECK(
@@ -827,6 +876,8 @@ TEST_CASE(
   fill_serving_target(target_set->add_members(), "member-1", 1, "GPU-1");
   target_set->mutable_source()->CopyFrom(request.source());
   request.mutable_group_realization()->CopyFrom(build_group_realization_options());
+  request.mutable_group_realization()->mutable_version()->mutable_explicit_version_set()->set_version_set_id(
+      "vs-requested");
   request.mutable_group_realization()->set_require_staged_publish(true);
 
   auto plan_or = build_controller_realization_plan(request);
@@ -840,6 +891,10 @@ TEST_CASE(
   CHECK(plan_or->strategy.source_selection_mode == "same_selection");
   CHECK(plan_or->strategy.source_coordination == "group_realization_transport");
   CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST);
+  REQUIRE(plan_or->strategy.version_set_id.has_value());
+  CHECK(*plan_or->strategy.version_set_id == "vs-requested");
+  REQUIRE(plan_or->strategy.source_selection_digest.has_value());
+  CHECK(*plan_or->strategy.source_selection_digest == "source-selection");
   CHECK(
       (plan_or->strategy.group_barriers ==
        std::vector<std::string>{"member_readiness", "group_acquire", "staged_values", "publish_barrier"}));
@@ -977,8 +1032,7 @@ TEST_CASE("Controller realization plan mirrors target publication lifecycle", "[
   CHECK(plan_or->strategy.source_selection_mode == "single_selection");
   CHECK(plan_or->strategy.source_coordination == "publication_lifecycle");
   CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_DISABLE_COLLECTIVE);
-  REQUIRE(plan_or->strategy.source_selection_digest.has_value());
-  CHECK(*plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368");
+  check_controller_source_selection_digest(plan_or->strategy.source_selection_digest);
   CHECK(plan_or->lifecycle.capability == "publication");
   CHECK(plan_or->lifecycle.export_lifetime_kind == "publication_lease");
   CHECK(plan_or->lifecycle.mutability_contract == "published_read_only");
@@ -1157,6 +1211,19 @@ TEST_CASE(
       v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST);
 }
 
+TEST_CASE(
+    "Unspecified collective policy defaults to collective-first when topology is present",
+    "[daemon][materialization][policy]") {
+  ExecutionTopologyContext execution_topology;
+  execution_topology.collective_load_group =
+      CollectiveLoadGroupHint{.group_id = "same-host-tp-load", .world_size = 8, .rank = 3};
+
+  auto policy_or = resolve_collective_policy(v2::CollectivePolicy::COLLECTIVE_POLICY_UNSPECIFIED, execution_topology);
+
+  REQUIRE(policy_or.ok());
+  CHECK(*policy_or == v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST);
+}
+
 TEST_CASE(
     "Mapped target defaults to disable-collective without collective topology",
     "[daemon][materialization][policy]") {
diff --git a/daemon/state/handle_lease_registry.cc b/daemon/state/handle_lease_registry.cc
index dcf7623f..88115a48 100644
--- a/daemon/state/handle_lease_registry.cc
+++ b/daemon/state/handle_lease_registry.cc
@@ -858,6 +858,9 @@ absl::StatusOr<std::string> HandleLeaseRegistry::mint_external_cuda_lease(pid_t
   if (!cleanup) {
     return absl::InvalidArgumentError("cleanup is required");
   }
+  if (lifecycle_ == nullptr) {
+    return absl::FailedPreconditionError("lifecycle manager is unavailable");
+  }
 
   std::string token;
   {
@@ -880,11 +883,13 @@ absl::StatusOr<std::string> HandleLeaseRegistry::mint_external_cuda_lease(pid_t
         .external_cleanup = std::move(cleanup),
     };
   }
+  lifecycle_->watch_pid(pid);
   return token;
 }
 
 absl::Status HandleLeaseRegistry::release(const std::string& lease_token) {
   SessionLifecycleManager::LeaseId id = 0;
+  pid_t external_owner_pid = 0;
   std::function<void()> external_cleanup;
   {
     absl::MutexLock lock(&mu_);
@@ -893,6 +898,7 @@ absl::Status HandleLeaseRegistry::release(const std::string& lease_token) {
       return absl::NotFoundError("lease_token not found");
     }
     if (it->second.kind == HandleKind::kExternal) {
+      external_owner_pid = it->second.external_owner_pid;
       external_cleanup = std::move(it->second.external_cleanup);
       leases_.erase(it);
     } else {
@@ -901,6 +907,9 @@ absl::Status HandleLeaseRegistry::release(const std::string& lease_token) {
   }
   if (external_cleanup) {
     external_cleanup();
+    if (lifecycle_ != nullptr && external_owner_pid > 0) {
+      lifecycle_->unwatch_pid(external_owner_pid);
+    }
     return absl::OkStatus();
   }
   lifecycle_->release_lease(id);
diff --git a/daemon/state/pid_monitor_unwatch_integration_test.cc b/daemon/state/pid_monitor_unwatch_integration_test.cc
index 51c95c62..cd66cca5 100644
--- a/daemon/state/pid_monitor_unwatch_integration_test.cc
+++ b/daemon/state/pid_monitor_unwatch_integration_test.cc
@@ -2,8 +2,16 @@
 
 #include <catch2/catch_test_macros.hpp>
 
+#include <cstdlib>
+#include <filesystem>
+
+#include "absl/status/status.h"
 #include "core/store/device_registry.h"
+#include "core/store/store_engine.h"
+#include "core/store/store_engine_options.h"
+#include "daemon/state/handle_lease_registry.h"
 #include "daemon/state/ipc_region_registry.h"
+#include "daemon/state/lifecycle_kernel.h"
 #include "daemon/state/lip_manager.h"
 #include "daemon/state/pid_monitor.h"
 #include "daemon/state/ref_tracker.h"
@@ -17,6 +25,30 @@ using tensorcast::daemon::SessionLifecycleManager;
 using tensorcast::store::DeviceRegistry;
 using tensorcast::store::loading::ReplicaKey;
 
+namespace {
+
+std::filesystem::path test_tmpdir() {
+  const char* env = std::getenv("TEST_TMPDIR");
+  if (env != nullptr && *env != '\0') {
+    return std::filesystem::path(env);
+  }
+  return std::filesystem::temp_directory_path() / "tensorcast_pid_monitor_unwatch_integration_test";
+}
+
+tensorcast::store::StoreEngineOptions make_engine_opts() {
+  tensorcast::store::StoreEngineOptions opts;
+  opts.storage_path = (test_tmpdir() / "engine").string();
+  std::filesystem::create_directories(opts.storage_path);
+  opts.p2p_port = 0;
+  opts.memory_pool_size = 32ULL << 20;
+  opts.tx_slice_bytes = 1ULL << 20;
+  opts.num_thread = 2;
+  opts.global_store_address.clear();
+  return opts;
+}
+
+} // namespace
+
 TEST_CASE("PidMonitor unwatch called on last guard retire", "[daemon][lifecycle][pid]") {
   ReplicaSessionManager sessions(std::chrono::seconds(60));
   RefTracker refs;
@@ -71,3 +103,55 @@ TEST_CASE("PidMonitor unwatch is suppressed by external watches", "[daemon][life
   mgr.unwatch_pid(pid);
   REQUIRE_FALSE(mon.is_watching_for_test(pid));
 }
+
+TEST_CASE("external CUDA handle lease watches owner pid until release", "[daemon][lifecycle][pid][handle]") {
+  ReplicaSessionManager sessions(std::chrono::seconds(60));
+  RefTracker refs;
+  tensorcast::daemon::IpcRegionRegistry regions(tensorcast::daemon::IpcRegionRegistry::Options{});
+  auto lip =
+      std::make_unique<tensorcast::daemon::LipManager>(std::shared_ptr<tensorcast::store::StoreEngine>(), &regions);
+  SessionLifecycleManager mgr(sessions, refs, *lip);
+
+  PidMonitor mon([&](pid_t) {});
+  mgr.attach_pid_monitor(&mon);
+  auto engine = std::make_shared<tensorcast::store::StoreEngine>(make_engine_opts());
+  tensorcast::daemon::LifecycleKernel lifecycle_kernel("daemon-test");
+  tensorcast::daemon::HandleLeaseRegistry leases(
+      tensorcast::daemon::HandleLeaseRegistry::Options{}, *engine, mgr, lifecycle_kernel);
+
+  const int32_t pid = 884422;
+  int cleanup_calls = 0;
+  auto token_or = leases.mint_external_cuda_lease(pid, [&]() { ++cleanup_calls; });
+  REQUIRE(token_or.ok());
+  REQUIRE(mon.is_watching_for_test(pid));
+
+  REQUIRE(leases.release(*token_or).ok());
+
+  CHECK(cleanup_calls == 1);
+  REQUIRE_FALSE(mon.is_watching_for_test(pid));
+}
+
+TEST_CASE("external CUDA handle lease cleanup runs on owner pid exit", "[daemon][lifecycle][pid][handle]") {
+  ReplicaSessionManager sessions(std::chrono::seconds(60));
+  RefTracker refs;
+  tensorcast::daemon::IpcRegionRegistry regions(tensorcast::daemon::IpcRegionRegistry::Options{});
+  auto lip =
+      std::make_unique<tensorcast::daemon::LipManager>(std::shared_ptr<tensorcast::store::StoreEngine>(), &regions);
+  SessionLifecycleManager mgr(sessions, refs, *lip);
+
+  auto engine = std::make_shared<tensorcast::store::StoreEngine>(make_engine_opts());
+  tensorcast::daemon::LifecycleKernel lifecycle_kernel("daemon-test");
+  tensorcast::daemon::HandleLeaseRegistry leases(
+      tensorcast::daemon::HandleLeaseRegistry::Options{}, *engine, mgr, lifecycle_kernel);
+
+  const int32_t pid = 884423;
+  int cleanup_calls = 0;
+  auto token_or = leases.mint_external_cuda_lease(pid, [&]() { ++cleanup_calls; });
+  REQUIRE(token_or.ok());
+
+  mgr.handle_pid_exit(pid);
+  leases.handle_pid_exit(pid);
+
+  CHECK(cleanup_calls == 1);
+  CHECK(absl::IsNotFound(leases.release(*token_or)));
+}
diff --git a/docs/designs/0111-source-to-serving-builder-and-representation-publication.md b/docs/designs/0111-source-to-serving-builder-and-representation-publication.md
index d45e6a3d..9c8da26c 100644
--- a/docs/designs/0111-source-to-serving-builder-and-representation-publication.md
+++ b/docs/designs/0111-source-to-serving-builder-and-representation-publication.md
@@ -15,7 +15,7 @@ related_code:
   - docs/internals/model-loading.md
   - tensorcast/types.py
   - tensorcast/api/store/__init__.py
-  - tensorcast/api/store/serving_builder.py
+  - tensorcast/api/store/publication_builder.py
   - tensorcast/api/store/binding.py
   - tensorcast/api/store/owned_binding_slot.py
   - proto/tensorcast/daemon/v2/store_daemon.proto
diff --git a/docs/designs/0112-binding-native-serving-realization-and-publication.md b/docs/designs/0112-binding-native-serving-realization-and-publication.md
index 724c0ea3..dfa94c4e 100644
--- a/docs/designs/0112-binding-native-serving-realization-and-publication.md
+++ b/docs/designs/0112-binding-native-serving-realization-and-publication.md
@@ -18,7 +18,7 @@ related_code:
   - tensorcast/api/store/binding.py
   - tensorcast/api/store/owned_binding_slot.py
   - tensorcast/api/store/realization_plan.py
-  - tensorcast/api/store/serving_builder.py
+  - tensorcast/api/store/publication_builder.py
   - tensorcast/api/store/mapped_binding.py
   - tensorcast/types.py
   - proto/tensorcast/daemon/v2/store_daemon.proto
diff --git a/docs/designs/0120-artifact-centered-model-runtime-realization.md b/docs/designs/0120-artifact-centered-model-runtime-realization.md
index 033e1b8d..9563974f 100644
--- a/docs/designs/0120-artifact-centered-model-runtime-realization.md
+++ b/docs/designs/0120-artifact-centered-model-runtime-realization.md
@@ -4,7 +4,7 @@ title: Artifact-Centered Model Runtime Realization
 status: draft
 areas: ["sdk", "serving", "daemon", "core", "integrations", "docs", "tests"]
 created: 2026-05-23
-last_updated: 2026-05-25
+last_updated: 2026-05-26
 related_code:
   - docs/designs/0039-artifact-first-sdk.md
   - docs/designs/0078-selection-first-artifact-retrieval.md
@@ -24,15 +24,14 @@ related_code:
   - tensorcast/api/store/artifact.py
   - tensorcast/api/store/binding.py
   - tensorcast/types.py
-  - tensorcast/serving/runtime.py
-  - tensorcast/serving/config.py
-  - tensorcast/serving/policy.py
-  - tensorcast/serving/hosts.py
-  - tensorcast/serving/binding_plan.py
-  - tensorcast/serving/retained_binding.py
-  - tensorcast/serving/runtime_attachment.py
-  - tensorcast/serving/replica_publication.py
-  - tensorcast/serving/_runtime_impl/lifecycle.py
+  - tensorcast/artifact_runtime/lifecycle.py
+  - tensorcast/artifact_runtime/config.py
+  - tensorcast/artifact_runtime/policy.py
+  - tensorcast/artifact_runtime/host.py
+  - tensorcast/artifact_runtime/attachment.py
+  - tensorcast/artifact_runtime/binding/retained.py
+  - tensorcast/artifact_runtime/publication/replica.py
+  - tensorcast/artifact_runtime/recipe/
 links:
   plan: ../plans/0120-artifact-centered-model-runtime-realization.md
   dependencies:
@@ -94,7 +93,7 @@ The decision is:
   model-runtime stacks;
 - preserve the current vLLM scenario semantics, fastest compatible data path,
   retained memory-credit timing, and zero-extra-weight-residency behavior even
-  when TensorCast and vllm APIs are changed incompatibly.
+  when TensorCast and vLLM APIs are changed incompatibly.
 
 ```mermaid
 flowchart LR
@@ -156,7 +155,7 @@ The plan paired with this design owns current code status, phase tracking,
 implementation gaps, and rollout order. This design should not be read as an
 implementation snapshot.
 
-The vllm baseline remains important only as a regression contract:
+The serving-runtime baseline remains important only as a regression contract:
 
 - vLLM behavior is a regression baseline;
 - runtime attachment, retained acquire, reload, runtime view, publication, and
@@ -231,7 +230,7 @@ serialized handoff is a retained binding claim/capability for a realization
 target. It is not user-facing preload vocabulary and should not remain a
 separate serving materialization family.
 
-## vllm cleanup baseline
+## Serving runtime cleanup baseline
 
 Kept as current behavior baseline, not as an API compatibility boundary. Its
 `ServingRuntimeSession`, `RuntimeAttachment`, retained acquire, runtime view,
@@ -284,7 +283,7 @@ publication, or diagnostics.
 | `tensorcast.serving` | internal serving ABI helpers, optional private lowerings, builder/publication implementation details while they remain serving-ABI-specific | public runtime session root, public locator authority, independent retained acquire model, independent diagnostics/report model |
 | Store Daemon | binding values, leases, mounted-source attestation, local realization ownership, PID/session safety, device-local movement | framework construction/finalize hooks, durable metadata authority |
 | Global Store | durable artifact metadata, replica metadata, coordination records, publication visibility | SDK direct control path, process-local attachment state |
-| vllm `vllm.tensorcast.*` | runtime host capability construction, vLLM placement/source/collective facts, model construction/finalize hooks, reload/publication calls | TensorCast artifact selection authority, daemon lease authority, duplicate vllm session model |
+| vLLM `vllm.tensorcast.*` | runtime host capability construction, vLLM placement/source/collective facts, model construction/finalize hooks, reload/publication calls | TensorCast artifact selection authority, daemon lease authority, duplicate serving runtime session model |
 
 The intended end state is one public root and one professional framework
 boundary. If a serving-named object remains after migration, it must satisfy one
@@ -443,7 +442,7 @@ state.
 
 - acceptable as package namespace during migration;
 - acceptable in profile names such as `serving_abi_version` when the payload is
-  specifically the model-vllm ABI;
+  specifically the model-serving runtime ABI;
 - not acceptable as a second root for artifact identity, source discovery,
   P2P routing, or publication.
 
@@ -629,9 +628,9 @@ parallel TensorDict or source subsystem:
    current publication state, swap the active binding value, and project the
    reload response from the new attachment.
 
-## vllm target integration flow
+## vLLM target integration flow
 
-vllm should become a client of the artifact-runtime professional API,
+vLLM should become a client of the artifact-runtime professional API,
 not a client of a public TensorCast serving session. The expected final flow is:
 
 1. `model_loader_extra_config` is normalized into an artifact/runtime request:
@@ -659,7 +658,7 @@ not a client of a public TensorCast serving session. The expected final flow is:
    local-ready durable promotion call artifact-runtime publication actions. They
    do not call a separate serving publication authority.
 
-| vllm concern | Target TensorCast interaction | Decision logic |
+| vLLM concern | Target TensorCast interaction | Decision logic |
 | --- | --- | --- |
 | loader startup | `Artifact.realize(... model_runtime ..., runtime_host=...)` | model loading is a realization of an artifact selection |
 | placement/topology facts | `ArtifactRealizationSpec` plus runtime host admission facts | topology affects realization/admission, not artifact identity |
@@ -671,7 +670,7 @@ not a client of a public TensorCast serving session. The expected final flow is:
 | publication/shutdown | handle or attachment publication actions with active-generation CAS | publication is artifact replica lifecycle |
 | main/draft models | target-set transaction or documented sequential semantics | partial reload behavior must be explicit |
 
-At the end of migration, vllm should not import public
+At the end of migration, vLLM should not import public
 `tensorcast.serving.*` session/config/retained/publication APIs for normal
 startup, reload, memory accounting, runtime view, or shutdown. Any remaining
 serving import must be either an internal implementation dependency with no
@@ -786,7 +785,7 @@ These objects should not become the everyday user API.
 
 The following names are the preferred long-term conceptual direction. They do
 not require compatibility aliases; the implementation may rename or reshape the
-current vllm interfaces directly once the vLLM scenario matrix is
+current serving-runtime interfaces directly once the vLLM scenario matrix is
 covered. When a replacement is wired and tested, the old public name should be
 deleted or narrowed to an internal implementation name; it should not remain as a
 parallel compatibility path.
@@ -1042,7 +1041,7 @@ artifact-realization stack:
 - The final `tensorcast.serving` module is shallow: normal startup, reload,
   retained memory credit, runtime view, and shutdown/publication do not require
   public serving-session/config/retained/publication APIs.
-- vllm normal paths use the direct artifact-runtime API and runtime
+- vLLM normal paths use the direct artifact-runtime API and runtime
   host capabilities; remaining serving imports are private implementation or
   serving-ABI-specific builder paths with owners.
 - Retained pre-admission credit, mounted-source bootstrap, active-generation
@@ -1091,10 +1090,10 @@ the same migration window instead of kept as permanent compatibility aliases.
 - `docs/designs/0114-collective-first-binding-realization-for-tp-serving-startup.md`
 - `docs/designs/0116-prefetch-serving-binding-target.md`
 - `docs/architecture/p2p-transfer-strategies.md`
-- `/opt/vllm/vllm/tensorcast/loader.py`
-- `/opt/vllm/vllm/tensorcast/placement.py`
-- `/opt/vllm/vllm/tensorcast/source.py`
-- `/opt/vllm/vllm/tensorcast/collective.py`
-- `/opt/vllm/vllm/tensorcast/adapter.py`
-- `/opt/vllm/vllm/model_executor/model_loader/memory_accounting.py`
-- `/opt/vllm/vllm/v1/worker/gpu_model_runner.py`
+- `vllm/tensorcast/loader.py`
+- `vllm/tensorcast/placement.py`
+- `vllm/tensorcast/source.py`
+- `vllm/tensorcast/collective.py`
+- `vllm/tensorcast/adapter.py`
+- `vllm/model_executor/model_loader/memory_accounting.py`
+- `vllm/v1/worker/gpu_model_runner.py`
diff --git a/docs/guides/steptron-vllm-binding-integration.md b/docs/guides/steptron-vllm-binding-integration.md
index 397ad284..82159362 100644
--- a/docs/guides/steptron-vllm-binding-integration.md
+++ b/docs/guides/steptron-vllm-binding-integration.md
@@ -241,8 +241,8 @@ TensorCast must own:
 - contributor liveness through the existing lease/guard/finalizer runtime
 - final source `seal_assembly(...)`
 - source immutable version-key publication in the current dependency-ready wave
-- optional source -> serving builder or publisher only in the successor wave
-  after typed child closeout contracts exist
+- optional source -> runtime-artifact representation builder or publisher only
+  in the successor wave after typed child closeout contracts exist
 - final serving-key or serving-manifest publication only in that successor wave
 
 `steptron` should not:
diff --git a/docs/internals/model-loading.md b/docs/internals/model-loading.md
index 35ea377f..706e0bad 100644
--- a/docs/internals/model-loading.md
+++ b/docs/internals/model-loading.md
@@ -166,13 +166,14 @@ region-backed data plane:
   current bound layout once the local overwrite succeeds.
 
 This publish path is the ordinary artifact-backed replica path from `0084`. It
-is not the serving-artifact publication or `representation_publish` closeout
-path used by source-to-serving builder work.
+is not the runtime-artifact publication or `representation_publish` closeout
+path used by representation-publication builder work.
 
-## Serving-Artifact Runtime Preflight
+## Runtime-Artifact Preflight
 
-When runtime consumes a serving artifact, TensorCast now performs a serving
-artifact preflight before accepting it into the steady-state loading path.
+When runtime consumes an artifact with serving-manifest ABI metadata,
+TensorCast performs a runtime-artifact preflight before accepting it into the
+steady-state loading path.
 
 Phase-1 rules:
 
@@ -180,12 +181,12 @@ Phase-1 rules:
   `tensor:__tensorcast_meta__.manifest_json`
 - artifacts without that reserved manifest tensor continue to load as ordinary
   non-serving artifacts
-- strict serving runtime is now explicit rather than inferred from every
+- runtime-artifact policy is explicit rather than inferred from every
   generic materialization request:
-  `PublishedModelVersion.require_serving_runtime_policy()`,
+  `PublishedModelVersion.require_runtime_artifact_policy()`,
   `RepresentationPublishContract.to_runtime_policy()`, and
-  `ServingArtifactManifest.to_runtime_policy()` produce a
-  `ServingRuntimePolicy` that callers can pass into
+  `RuntimeArtifactManifest.to_runtime_policy()` produce a
+  `RuntimeArtifactPolicy` that callers can pass as `runtime_artifact_policy` to
   `artifact.bind(...)`, `artifact.bind_into(...)`, and `binding.swap(...)`
 - artifacts with that reserved manifest tensor must pass:
   - manifest JSON parseability
@@ -196,7 +197,7 @@ Phase-1 rules:
     `serving_build_digest`, `tensor_schema_hash`, `builder_mode`, and
     `build_pipeline_version`
   - `serving_manifest_ref` agreement between the manifest and the runtime
-    policy when strict serving runtime is requested
+    policy when runtime-artifact policy is requested
   - canonical tensor count equality between manifest and canonical index
   - tensor schema hash equality between manifest and the canonical index with
     the reserved manifest tensor excluded
@@ -207,22 +208,22 @@ Current daemon coverage:
 - `MaterializeIntoTarget`
 - source-bound owned-binding create/refill paths
 
-This keeps serving-artifact publication-time validation and runtime acceptance
+This keeps runtime-artifact publication-time validation and runtime acceptance
 validation on the same contract, so runtime no longer silently accepts a
-manifest-bearing serving artifact whose self-description is inconsistent with
-its canonical tensor layout.
+manifest-bearing artifact whose self-description is inconsistent with its
+canonical tensor layout.
 
 Important distinction:
 
 - generic artifact load remains fail-open for ordinary non-serving artifacts
-- strict serving runtime is opt-in through `ServingRuntimePolicy`
-- this lets serving startup and reload fail closed without turning the whole
-  artifact runtime into a serving-only surface
+- runtime-artifact preflight is opt-in through `RuntimeArtifactPolicy`
+- this lets model-runtime startup and reload fail closed without turning the
+  whole artifact runtime into a serving-only surface
 
-### Serving-Builder Guardrails
+### Runtime Recipe Builder Guardrails
 
-The Python serving builder keeps artifact identity as the source authority for
-compiled serving recipes:
+The Python runtime recipe builder keeps artifact identity as the source
+authority for compiled runtime recipes:
 
 - `SourceCatalog.source_artifact_ref` must be a real artifact identity. The
   builder accepts `mi2` content identities and daemon-attested `msa1` mounted
diff --git a/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md b/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md
index da383398..4ca45c2b 100644
--- a/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md
+++ b/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md
@@ -73,9 +73,9 @@ Execution policy for this plan:
   evidence, mixed-residual policy, and prototype deletion remain open:
   - `core/store/replica/collective_disk_loader.cc`
 - `0111` repo-owned builder/publication bridge is landed at base scope:
-  - `tensorcast/api/store/serving_builder.py`
+  - `tensorcast/api/store/publication_builder.py`
   - `daemon/service/controllers/assembly_operation_service.cc`
-- `ServingAdmissionFacts.same_binding_fast_path_validated` is already a correctness and
+- `RuntimeAdmissionFacts.same_binding_fast_path_validated` is already a correctness and
   admission gate for same-binding publication:
   - `tensorcast/types.py`
   - `docs/designs/0111-source-to-serving-builder-and-representation-publication.md`
diff --git a/docs/plans/0116-prefetch-serving-binding-target.md b/docs/plans/0116-prefetch-serving-binding-target.md
index ae1335cf..4cf0f87c 100644
--- a/docs/plans/0116-prefetch-serving-binding-target.md
+++ b/docs/plans/0116-prefetch-serving-binding-target.md
@@ -14,7 +14,7 @@ related_code:
   - proto/tensorcast/config/v1/daemon_config.proto
   - tensorcast/api/store/artifact.py
   - tensorcast/api/store/owned_binding_slot.py
-  - tensorcast/api/store/serving_binding_reference_consumer.py
+  - tensorcast/api/store/runtime_realization_reference_consumer.py
   - tensorcast/api/store/serving_binding_spec_cache.py
   - tensorcast/types.py
   - daemon/service/controllers/materialization_controller.cc
@@ -71,7 +71,7 @@ Updated 2026-05-11.
 
 ## P0: Stabilize The Public Example
 
-- [ ] Convert `examples/serving_binding_consumer/` into the canonical
+- [ ] Convert `examples/runtime_realization_reference_consumer/` into the canonical
       TensorCast-side serving binding example.
 - [ ] Document the parent-to-worker handoff payload:
       `ServingBindingTarget`, `PrefetchedServingBinding`, expected digests, and
@@ -157,9 +157,9 @@ bazel test //daemon:grpc_service_impl_operation_rpc_test \
   --ui_event_filters=warning,error
 
 source .venv/bin/activate
-pytest tests/python/api/test_serving_binding_reference_consumer.py \
+pytest tests/python/api/test_runtime_realization_reference_consumer.py \
   tests/python/api/test_serving_binding_spec_cache.py \
-  tests/python/api/test_prefetch_serving_binding_target.py \
+  tests/python/api/test_runtime_realization_target.py \
   tests/python/api/test_operation_semantics.py -q
 
 pytest tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py -q
diff --git a/docs/plans/0120-artifact-centered-model-runtime-realization.md b/docs/plans/0120-artifact-centered-model-runtime-realization.md
deleted file mode 100644
index 5cf459c1..00000000
--- a/docs/plans/0120-artifact-centered-model-runtime-realization.md
+++ /dev/null
@@ -1,537 +0,0 @@
----
-slug: artifact-centered-model-runtime-realization
-title: Artifact-Centered Model Runtime Realization Plan
-status: draft
-areas: ["sdk", "serving", "daemon", "core", "integrations", "docs", "tests"]
-created: 2026-05-23
-last_updated: 2026-05-25
-related_code:
-  - docs/designs/0120-artifact-centered-model-runtime-realization.md
-  - docs/designs/0121-unified-artifact-realization-kernel.md
-  - docs/plans/0121-unified-artifact-realization-kernel.md
-  - docs/designs/0116-prefetch-serving-binding-target.md
-  - docs/plans/0116-prefetch-serving-binding-target.md
-  - tensorcast/api/store/artifact.py
-  - tensorcast/types.py
-  - tensorcast/serving/runtime.py
-  - tensorcast/serving/config.py
-  - tensorcast/serving/policy.py
-  - tensorcast/serving/hosts.py
-  - tensorcast/serving/binding_plan.py
-  - tensorcast/serving/retained_binding.py
-  - tensorcast/serving/runtime_attachment.py
-  - tensorcast/serving/replica_publication.py
-  - tensorcast/serving/_runtime_impl/lifecycle.py
-links:
-  design: ../designs/0120-artifact-centered-model-runtime-realization.md
-  dependencies:
-    - ../designs/0121-unified-artifact-realization-kernel.md
----
-
-# Objective
-
-Plan the successor work from the current vllm baseline: move
-TensorCast TensorDict retrieval, binding, prefetch, and model-runtime loading
-from parallel surfaces toward one artifact-centered realization model while
-preserving all vLLM behavior and performance-sensitive semantics.
-
-No source compatibility guarantee is required. This plan is delete-forward:
-replacement artifact-runtime paths should absorb behavior, prove equivalence
-with focused tests, and then remove or internalize old serving-rooted public
-entrypoints, compatibility adapters, duplicate diagnostics, and redundant tests.
-Semantic regression is not allowed; maintaining two long-term stacks is also not
-allowed.
-
-# Current State & Grounding
-
-The current vllm baseline is implemented and folded into `0120` as
-context. It is not a separate long-term public model.
-- `tensorcast.serving.runtime` is the narrow framework-facing runtime API.
-- `tensorcast.serving.config` selects exactly one startup plan.
-- `tensorcast.serving.policy` owns serving locator and policy normalization.
-- `tensorcast.serving.binding_plan` centralizes trace/recipe/spec/layout/schema
-  identity.
-- `tensorcast.serving.retained_binding` owns retained acquire validation,
-  reservation bytes, lease restore, and runtime ownership transfer.
-- `tensorcast.serving.runtime_attachment` owns process-local attachment state.
-- `tensorcast.serving.replica_publication` owns artifact-backed runtime replica
-  publication and retirement.
-- `tensorcast.serving._runtime_impl.lifecycle` still performs most orchestration.
-
-The Store SDK baseline has moved from target model to implemented kernel
-baseline:
-
-- `Artifact.tensor_dict(...)`, `tensor_dict_with_diagnostics(...)`,
-  `tensor_dict_into(...)`, `tensor_into(...)`, `bind(...)`, and `bind_into(...)`
-  lower through `Artifact.realize(...)`.
-- `Artifact.prefetch(device=...)` and `Artifact.prefetch(target=...)` lower
-  through `Artifact.realize_async(...)` for retained replica, retained binding,
-  and target-set operation semantics.
-- `ArtifactRealizationSpec`, `ArtifactRealizationHandle`,
-  `ArtifactRealizationReport`, `RealizationResourceEnvelope`,
-  `RealizationReleaseContract`, target-set reports, mounted-source reports,
-  runtime-attachment reports, model-runtime report wrappers, and publication
-  reports are implemented and exported from the Store SDK.
-- Direct `Artifact.realize(ArtifactRealizationSpec.model_runtime(...))` still
-  fails closed. Serving lifecycle code creates runtime-attachment and
-  model-runtime handles internally while vllm still enters through
-  `ServingRuntimeSession`.
-- Daemon materialization already has the key performance primitives the target
-  API must preserve: `MaterializeReplica` tries the artifact LIP/local-replica
-  fast path before engine-backed materialization, binds CUDA IPC or CPU memfd
-  leases into the response, and reports the selected materialization source.
-- Binding materialization already attempts direct byte-space planning for
-  compatible mapped source artifacts and exposes source-bound plan diagnostics;
-  direct model-runtime realization should reuse this behavior rather than
-  routing through Python TensorDict materialization.
-- Python materialization reconstructs tensor views from daemon CUDA IPC or memfd
-  handles and records IPC/restore timings. Those views are acceptable
-  projections but must not become a required intermediate for vLLM weight
-  loading.
-
-Current gaps against the `0120` target state:
-
-- Top-level `tensorcast` does not yet expose `ArtifactRealizationSpec`,
-  `ArtifactRealizationHandle`, or `ArtifactRealizationReport`, even though the
-  target design treats them as public SDK peers of `Artifact`.
-- Direct model-runtime realization still returns `UNIMPLEMENTED` from
-  `Artifact.realize(...)`; runtime attachment lowering is only reachable through
-  serving lifecycle code.
-- `ArtifactRealizationHandle.attach(...)` currently exists as a delegation hook,
-  but the target design needs completed-handle runtime attachment projection
-  semantics. The implementation must either add `attachment()` or make
-  `attach(...)` explicitly projection-only.
-- Runtime host capabilities exist under serving host/integration names
-  (`IntegrationHost`, framework/tensor-surface/placement/source/collective
-  protocols). They need to become the artifact-runtime professional boundary
-  instead of a serving-session dependency.
-- `ArtifactRealizationSpec` still carries serving-rooted fields such as
-  `serving_runtime_policy`; those fields need a neutral target/profile policy or
-  a binding-specific options object.
-- Retained pre-admission memory credit is still exposed through
-  retained-serving-binding helpers. A neutral retained realization claim wrapper
-  must absorb that behavior before vLLM migration.
-- Publication generation, active-generation checks, replay, reload rejection,
-  and shutdown retirement already exist in serving publication code but are not
-  yet formalized as the shared artifact-runtime publication/CAS contract.
-- A fake second framework already proves the vllm is not purely
-  vLLM-shaped, but the proof still enters through `ServingRuntimeSession`; it
-  must be repeated on the direct artifact-runtime API.
-- Serving-rooted public DTOs and helpers remain broadly exported. They must be
-  removed or internalized after replacement, not kept as compatibility aliases.
-
-The plan no longer needs to converge the main SDK paths before broad serving
-cleanup; `0121` did that. This plan now tracks the remaining naming/API boundary
-work after the kernel convergence.
-
-The concrete kernel work is tracked in
-[`0121-unified-artifact-realization-kernel`](../designs/0121-unified-artifact-realization-kernel.md).
-This `0120` plan remains the umbrella migration plan; `0121` owns the
-anti-split-brain implementation sequence for selection, target, strategy,
-representation, lifecycle, execution lowering, reports, and TP target sets.
-
-Execution order:
-
-1. Treat the implemented `0121` kernel as the baseline.
-2. Expose the public realization symbols at the intended package level and make
-   the docs/examples use the real target API shape.
-3. Finish direct public/professional model-runtime realization instead of routing
-   framework integrations through serving-named session APIs.
-4. Port vllm in the same execution window as TensorCast API changes.
-5. Delete or internalize serving-centered public names, compatibility wrappers,
-   redundant diagnostics, and duplicate tests once replacements are wired.
-6. Prove the boundary with SGLang or a minimal second runtime adapter through the
-   direct artifact-runtime API.
-
-The vllm baseline depends on these surfaces:
-
-- `vllm/tensorcast/loader.py`: session start, attachment storage, in-place
-  reload, replica publication, local-ready durable promotion.
-- `vllm/tensorcast/placement.py`: TP/PP/DP member identity, EP/EPLB digests,
-  materialization execution facts.
-- `vllm/tensorcast/source.py`: local source catalog and cache policy.
-- `vllm/tensorcast/collective.py`: same-node source coordination and local-ready
-  TP barrier.
-- `vllm/tensorcast/adapter.py`: meta/runtime model construction, trace capture,
-  tensor attach/finalize, runtime-only tensors, semantic probes.
-- `vllm/model_executor/model_loader/memory_accounting.py`: trusted retained
-  reservation bytes before vLLM memory admission.
-- `vllm/v1/worker/gpu_model_runner.py`: reload endpoint, runtime view,
-  shutdown retirement, EP/EPLB reload safety.
-
-Feasibility result for the current scenario:
-
-- vllm can adapt cleanly to the artifact-centered model because its
-  TensorCast loader is already concentrated behind model loader, placement,
-  source, adapter, and worker reload/publication surfaces;
-- vLLM does not need a direct TensorCast TensorDict model-loading API, and
-  TensorDict is now a first-class projection of the shared realization kernel;
-- retained reservation credit before vLLM memory admission is the main timing
-  constraint and must be represented before any runtime attachment exists;
-- weight loading must keep the current best data path: retained acquire,
-  local-replica/LIP, compatible P2P, local mounted-source/disk streaming, or
-  explicit transform. The direct artifact-runtime API must not introduce a
-  TensorDict or Python state-dict intermediate;
-- local HF/safetensors bootstrap is feasible through daemon-attested mounted
-  source subjects such as `msa1:...`, not as a vLLM-owned source authority;
-- reload/publication semantics are implemented in serving lifecycle and remain a
-  migration constraint for the public artifact-runtime API;
-- EP/EPLB reload safety must combine static semantic digests with live framework
-  checks from vLLM before reload;
-- main and draft TensorCast model reload must be made explicit as either a
-  target-set transaction or the current sequential main-then-draft behavior with
-  unhealthy marking on partial failure.
-
-# Migration Decision Logic
-
-Every migration change should classify the touched concept before renaming or
-deleting it. Use this order:
-
-1. If it is durable identity, discovery, routing, replica visibility, or
-   lifecycle, move it to artifact selection or artifact replica metadata.
-2. If it is target/device/member/layout/strategy/admission intent, move it to
-   `ArtifactRealizationSpec`, target plans, strategy plans, representation
-   admission, or target-set realization.
-3. If it is framework construction, trace capture, tensor surface,
-   runtime-only tensor handling, finalize hooks, placement facts, source
-   catalog, collective behavior, or live EP/EPLB checks, move it to the runtime
-   host capability surface or `RuntimeAttachment`.
-4. If it exists to credit memory or acquire a prepared value later, move it to
-   retained realization claim or prefetch handoff naming.
-5. If it creates a reusable source for later loads/P2P, move it to
-   artifact-runtime publication/promote actions.
-6. If the name is serving-rooted only for source compatibility, delete or
-   internalize it once the replacement behavior and tests exist.
-7. If a serving name truly describes a model-serving ABI payload, it may remain,
-   but only as a profile/ABI field or private implementation detail.
-
-The default decision is not "rename everything first." The default is:
-classify behavior, wire the artifact-runtime replacement, prove behavior with
-tests, then delete or internalize the old public serving surface in the same
-cleanup window. A phase is incomplete if old and new public paths both remain as
-supported peer entrypoints.
-
-# vllm Migration Slice
-
-TensorCast and vllm changes should land in one coordinated window
-because both sides are under our control and no source compatibility guarantee is
-required.
-
-| vllm owner | Current TensorCast dependency | Target interaction | Completion signal |
-| --- | --- | --- | --- |
-| `vllm/tensorcast/loader.py` | `ServingConfig`, `IntegrationHost`, `ServingRuntimeSession`, `RuntimeAttachment` | build artifact/runtime request, call direct `Artifact.realize(... model_runtime ..., runtime_host=...)`, store `handle.attachment()` | startup, reload, required-publication, and local-ready promotion smoke tests no longer instantiate `ServingRuntimeSession` |
-| `vllm/tensorcast/host.py` | `tensorcast.serving.hosts.IntegrationHost` | construct `RuntimeHostCapabilities` or transitional alias with deletion trigger | host construction has no public serving-session dependency |
-| `vllm/tensorcast/adapter.py` | serving host/tensor-surface DTOs | framework capability implementation for construction, trace, runtime-only tensors, finalize hooks, semantic probes | adapter tests pass through direct artifact-runtime handle |
-| `vllm/tensorcast/placement.py` | serving placement/local-ready DTOs | target/member/admission facts plus publication context for artifact-runtime actions | placement no longer creates public serving targets for normal runtime startup |
-| `vllm/tensorcast/source.py` | `ServingConfig` and serving source catalog | mounted-source or durable artifact selection input | local HF/safetensors cold start admits an `msa1:` subject before planning |
-| `vllm/tensorcast/collective.py` | serving-local collective coordination | realization strategy and target-set coordination facts | TP same-node startup uses shared target-set strategy reports |
-| `vllm/tensorcast/retained_binding.py` | retained-serving-binding helpers | retained realization claim helpers | retained startup validates claim through neutral naming |
-| `vllm/model_executor/model_loader/memory_accounting.py` | `tensorcast.serving.retained_binding` trusted bytes | retained realization claim trusted reservation bytes | memory credit still occurs before vLLM admission without public serving imports |
-| `vllm/tensorcast/runtime_view.py` and `gpu_model_runner.py` | vllm view, session shutdown retirement, serving policy helpers | runtime attachment/view projection and artifact-runtime retirement actions | runtime view, reload, and shutdown tests use artifact-runtime actions |
-| `vllm/tensorcast/builder/*` | serving builder/publication helpers | keep only if the payload is serving-ABI-specific; otherwise move to artifact publication actions | remaining builder imports are documented as internal/offline ABI-specific paths |
-
-# Performance Migration Gates
-
-The migration is not complete until the direct artifact-runtime path proves that
-the user-facing API change did not move model loading onto a slower or larger
-path.
-
-| Gate | What to prove | Concrete check |
-| --- | --- | --- |
-| No TensorDict intermediate | `Artifact.realize(... model_runtime ...)` attaches a binding/retained value directly instead of first calling TensorDict materialization. | Direct API tests and vllm startup tests fail if normal model-runtime startup calls TensorDict projection helpers, Python builder materializers, or full state-dict loaders. |
-| Fast source selection preserved | Retained, local replica/LIP, P2P, disk, mounted-source/direct-write, and explicit-transform cases report the expected selected source and fallback status. | Artifact-realization reports assert source kind, fallback reason bucket, copy bytes, temporary bytes, retained bytes, and direct-write bytes for each representative path. |
-| No extra GPU weight residency | Steady-state runtime attach owns one TensorCast weight residency plus framework runtime-only tensors; direct API migration does not keep both serving and artifact-runtime owners. | vLLM smoke/profile captures CUDA allocated/reserved deltas around startup and reload, checks `_vllm_external_weight_bytes`/retained credit, and verifies old attachment/binding handles are retired. |
-| No full host-memory staging | Normal durable, retained, and mounted-source startup do not build a full Python `dict[str, torch.Tensor]`, full safetensors state dict, or full CPU copy of weights. | RSS/profile events and call-site audit keep full host materialization limited to explicit offline builder workflows. |
-| Admission timing preserved | Retained reservation bytes are credited before vLLM calculates requested KV/cache memory. | `memory_accounting.py` tests use the neutral claim helper and assert credit before `gpu_worker` startup admission. |
-| Latency remains stage-local | Direct artifact-runtime start adds no extra data-plane RPC, session start, retained acquire, or IPC restore beyond the chosen source strategy. | Profile events compare current serving baseline and direct API for startup, IPC open, attach/finalize, source-bound plan, reload, and publication stages; any added stage needs an explicit reason in the report. |
-| Reload overlap bounded | In-place reload may temporarily overlap old and new weights only under declared swap semantics. | Reload tests assert active-generation CAS, stale publication retirement, and resource-envelope overlap accounting. |
-
-These gates are intentionally behavior-based. A rename can pass only when the
-resolved source, memory ownership, and timing shape match the current optimal
-path for the same compatibility class.
-
-# Phases & Milestones
-
-- [x] Phase 1: Freeze The Current vllm Baseline
-  - [x] Remove the standalone serving-centered design and fold baseline context
-        into `0120`.
-  - [x] Record the current vllm code/module state and mark behavior
-        contracts versus temporary names.
-  - [x] Capture the vLLM scenario matrix with owner files and expected behavior
-        in the design.
-  - [x] Record that current vLLM does not use TensorCast TensorDict as its steady
-        model-loading path; TensorDict is the equivalence proof for shared
-        realization semantics.
-  - [x] Verify no Python SDK artifact metadata or realization path added direct
-        Global Store access; `0121` guardrails now cover this.
-
-- [x] Phase 2: Land The Shared Realization Kernel Through `0121`
-  - [x] Define and export `ArtifactRealizationSpec`,
-        `ArtifactRealizationHandle`, `ArtifactRealizationReport`, selection,
-        target, strategy, representation, lifecycle, resource-envelope,
-        release-contract, and report DTOs.
-  - [x] Lower `Artifact.tensor_dict(...)`,
-        `tensor_dict_with_diagnostics(...)`, `tensor_dict_into(...)`,
-        `tensor_into(...)`, `bind(...)`, and `bind_into(...)` through
-        `Artifact.realize(...)`.
-  - [x] Lower retained replica, retained binding, and target-set prefetch through
-        `Artifact.realize_async(...)` while preserving `Operation[T]`.
-  - [x] Add TensorDict projection ownership and release-contract lifecycle
-        coverage.
-  - [x] Add retained binding/target-set reports, mounted-source realization,
-        runtime-attachment reports, model-runtime report wrappers, and
-        publication reports.
-  - [x] Add direct Global Store guardrails for SDK artifact metadata and
-        realization paths.
-
-- [x] Phase 3: Preserve TP, P2P, Publication, And Runtime-Attachment Correctness
-  - [x] Represent TP as target-set realization with member-local layouts and
-        source-selection modes.
-  - [x] Keep same-node collective-first as strategy-plane state.
-  - [x] Keep P2P direct reuse gated by compatible
-        representation/topology/member/layout/schema.
-  - [x] Route runtime attachment, retained acquire close, and publication
-        projection close through realization release contracts.
-  - [x] Preserve source coordination, local-ready barrier, active-generation
-        publication, stale-publication retirement, and shutdown retirement
-        semantics.
-
-- [ ] Phase 4: Expose Direct Model-Runtime Realization
-  - [ ] Export `ArtifactRealizationSpec`, `ArtifactRealizationHandle`, and
-        `ArtifactRealizationReport` from the intended public SDK package level
-        and add import smoke tests.
-  - [ ] Adopt direct
-        `Artifact.realize(ArtifactRealizationSpec.model_runtime(...), runtime_host=...)`
-        or an equivalent artifact-rooted signature as the professional framework
-        API. Do not introduce a new public artifact-runtime session facade.
-  - [ ] Lower model-runtime realization through the existing runtime attachment
-        implementation instead of returning `UNIMPLEMENTED`.
-  - [ ] Ensure that lowering calls the daemon binding/retained/source-bound
-        paths directly. Direct model-runtime realization must not first call
-        TensorDict projection helpers, materialize a Python state dict, or start
-        a second public serving session.
-  - [ ] Define the runtime host capability surface for construction, trace
-        capture, tensor surface, runtime-only tensors, finalize hooks, placement
-        facts, source catalog, collective behavior, semantic probes, reload
-        admission, and live EP/EPLB checks.
-  - [ ] Define completed-handle projection semantics: add
-        `ArtifactRealizationHandle.attachment()` or make `attach(...)`
-        explicitly projection-only, not a second execution step.
-  - [ ] Preserve `RuntimeAttachment` as the process-local framework boundary and
-        keep model object state out of `Artifact`.
-  - [ ] Add tests that direct model-runtime handle reports match
-        serving-lifecycle model-runtime reports.
-  - [ ] Add report assertions for selected source kind, fallback reason,
-        copy bytes, temporary bytes, retained bytes, direct-write bytes,
-        IPC-open timing, and attach/finalize timing.
-  - [ ] Update public examples to use the actual target API shape and current
-        binding arguments (`mapping` / target-plan DTOs), not stale `layout=...`
-        placeholders.
-
-- [ ] Phase 5: Migrate vllm to the successor boundary
-  - [ ] Port `TensorcastModelLoader` startup from serving session naming to the
-        successor artifact-runtime API.
-  - [ ] Port `vllm/tensorcast/host.py` from public `IntegrationHost`
-        construction to `RuntimeHostCapabilities` construction or a transitional
-        alias with an explicit deletion trigger.
-  - [ ] Preserve retained reservation byte credit before vLLM admission through
-        retained realization claim validation.
-  - [ ] Introduce neutral retained realization claim helpers and migrate vLLM
-        memory accounting off retained-serving-binding public helpers.
-  - [ ] Add vLLM memory-admission tests proving retained credit is applied
-        before startup admission and is not double-counted after acquire.
-  - [ ] Port vLLM source bootstrap to the mounted-source artifact contract and
-        keep `msa1:`/durable artifact admission explicit.
-  - [ ] Keep durable, retained, and local-source startup off TensorDict and full
-        Python state-dict paths; direct API startup should attach daemon-owned
-        tensors through the selected binding/retained/source-bound path.
-  - [ ] Keep `VLLMTensorcastAdapter` as the owner for model construction,
-        trace capture, runtime-only tensor rehydration, finalize hooks, and
-        semantic probes.
-  - [ ] Add vllm profile/smoke coverage for CUDA allocated/reserved
-        deltas, host RSS deltas, selected source kind, attach/finalize timing,
-        reload overlap, and old-handle retirement.
-  - [ ] Preserve in-place reload response projection, stale/duplicate reload
-        handling, after-ready publication, required-publication failure state,
-        stale publication retirement, shutdown retirement, EP/EPLB reload
-        safety, and drafter sequential failure/unhealthy behavior.
-  - [ ] Remove normal vllm startup, reload, memory-accounting,
-        runtime-view, and shutdown imports of public `tensorcast.serving.*`
-        APIs after replacement paths pass.
-
-- [ ] Phase 6: Narrow Serving-Centered Names
-  - [ ] Classify `serving.artifact_locator`, `serving.policy`, `bootstrap.*`,
-        `materialization.collective`, `retained_binding_acquire.*`,
-        `replica_publication.*`, and `diagnostics.*` into artifact selection,
-        representation preflight, source artifact bootstrap, realization
-        strategy, retained claim, publication policy, and diagnostics fields.
-  - [ ] Decide which names remain because they describe serving ABI semantics
-        and which move to artifact/runtime vocabulary.
-  - [ ] Rename or absorb serving-rooted DTOs such as `ServingBindingTarget`,
-        `PrefetchedServingBinding`, and `ServingArtifactManifest` only after
-        replacements exist and tests cover the replacement behavior.
-  - [ ] Move public docs toward artifact/runtime terminology.
-  - [ ] Remove broad facades that expose private lifecycle helpers as public API.
-  - [ ] Delete compatibility aliases and duplicate helper functions in the same
-        cleanup window; do not leave old and new public surfaces as peers.
-  - [ ] Rewrite or delete tests that primarily assert the old serving surface
-        instead of the artifact-runtime contract.
-  - [ ] Close every applicable entry in the deletion ledger below; Phase 6 is
-        not complete while any old public serving path remains as a supported
-        peer of the artifact-runtime path.
-
-- [ ] Phase 7: Extend To A Second Runtime
-  - [ ] Use SGLang or a minimal mock runtime adapter to prove the direct
-        artifact-runtime API is not vLLM-shaped.
-  - [ ] Confirm source catalog, target layout, runtime-only tensors, and
-        publication can be represented without vLLM-specific public names.
-
-# Deletion Ledger
-
-The migration is delete-forward. Each old surface below must either be removed
-from the public API or narrowed to an explicitly internal implementation detail
-after its replacement is wired.
-
-| Current surface | Replacement owner | Delete/internalize after | Guardrail |
-| --- | --- | --- | --- |
-| `ServingRuntimeSession` public runtime root | `Artifact.realize(... model_runtime ..., runtime_host=...)` plus completed `ArtifactRealizationHandle.attachment()` | vllm startup/reload/shutdown and second-runtime fixture use the direct API | import/call-site search shows no normal public startup path instantiates `ServingRuntimeSession`; smoke tests show no extra binding owner or TensorDict intermediate |
-| `ServingConfig` as public runtime request | artifact/runtime request DTOs and profile policy fields | loader/source/reload paths parse the new request and preserve behavior | semantic field-map tests cover durable, source, retained, diagnostics, publication, and reload inputs |
-| `serving_runtime_policy` on generic realization specs | neutral runtime profile/preflight policy or binding-specific options object | binding/runtime preflight no longer needs serving-rooted field names | spec construction tests use neutral field names; old field rejected or private |
-| `ServingArtifactLocator` | artifact locator or artifact selection locator | durable startup and reload resolve through artifact selection | reload/startup tests assert artifact selection digest and no serving locator authority |
-| `ServingBindingTarget` and `ServingBindingSetTarget` | `RealizationTarget` and `RealizationTargetSet` | retained prefetch, TP target-set, and direct runtime startup accept replacement targets | target-set tests cover member layout, source reuse, and collective strategy |
-| `PrefetchedServingBinding` and `PrefetchedServingBindingSet` | `PrefetchHandoff` or `RetainedRealizationClaim` | vLLM retained acquire and memory accounting use neutral claim helpers | trusted reservation byte tests pass before admission; old public names absent from normal API |
-| `RetainedServingBindingAuthority` and retained-serving helpers | retained realization claim parser/validator | acquire validation, lease restore, and reservation credit are represented neutrally | retained startup tests validate member/device/layout at credit and acquire time |
-| serving publication helpers used by normal runtime startup | handle/attachment artifact-runtime publication actions | after-ready publication, reload retirement, shutdown retirement, and local-ready promotion use shared actions | generation/CAS tests cover stale result, duplicate reload, required-publication failure, and shutdown |
-| `ServingArtifactManifest` for non-ABI metadata | runtime representation manifest or artifact representation metadata | manifest fields are reclassified into representation/runtime profile terms | preflight tests assert schema/build/topology/contract admission without public serving manifest authority |
-| `serving_build_digest` when not serving-ABI-specific | `runtime_build_digest` or `representation_build_digest` | build identity is owned by representation/runtime profile | manifest/build tests explain any remaining serving ABI field |
-| `ServingRealizationReport` and serving diagnostics aliases | `ArtifactRealizationReport` and target-specific report payloads | direct model-runtime reports match current serving lifecycle reports | diagnostics tests assert one report model and no duplicate path-specific assertions |
-| old tests that assert public serving-session behavior | direct artifact-runtime tests plus private lowering tests where needed | replacement tests pass and internal lowerings are covered directly | test inventory has no compatibility-only public serving tests |
-
-# Tasks
-
-- Keep this plan as the post-`0121` migration ledger; do not duplicate the
-  completed `0121` kernel checklist.
-- Add public SDK exports for the realization spec/handle/report symbols and
-  verify `import tensorcast as tc; tc.ArtifactRealizationSpec` works.
-- Implement the direct model-runtime API path from `Artifact.realize(...)` to the
-  existing runtime attachment lowerings, preserving the current report and
-  release-contract behavior.
-- Preserve the current optimal weight-loading data paths while changing the API:
-  retained acquire, local-replica/LIP, compatible P2P, local mounted-source/disk
-  streaming, and explicit transforms must remain distinguishable in reports.
-- Define the completed-handle runtime attachment projection API and update tests
-  so `attach(...)`, if retained, is not treated as a separate execution step.
-- Rename or wrap `IntegrationHost`-style serving host capabilities into the
-  artifact-runtime professional API without changing framework behavior.
-- Move `serving_runtime_policy` out of the generic realization spec or fence it
-  behind a transitional binding/runtime-profile options object with a deletion
-  trigger.
-- Build a current vllm import/call-site table showing every remaining
-  `ServingRuntimeSession`, `ServingConfig`, retained-binding, publication, and
-  runtime-view dependency.
-- Create the semantic field map from current `model_loader_extra_config` to
-  artifact/runtime terminology.
-- Define the retained realization claim public/professional naming path while
-  preserving current trusted reservation validation inputs.
-- Formalize the existing publication generation/CAS contract shared by
-  after-ready publication, reload retirement, and shutdown retirement as the
-  vLLM migration acceptance rule.
-- Add direct model-runtime realization tests once the public lowering exists.
-- Add or update vllm smoke/integration tests for startup, retained
-  memory credit, local source cold start, durable artifact startup, in-place
-  reload, after-ready publication, shutdown retirement, EP/EPLB rejection, and
-  draft partial-failure/unhealthy behavior.
-- Add vllm profile checks for CUDA allocated/reserved deltas, host RSS,
-  selected source kind, copy/temporary/direct-write bytes, attach/finalize
-  timing, reload overlap, and old-handle retirement.
-- Add direct artifact-runtime second-framework tests or fixtures before retiring
-  serving-centered public vocabulary broadly.
-- Delete old serving-session public tests, compatibility wrappers, and redundant
-  diagnostics assertions after the replacement tests pass; keep only tests that
-  exercise internal lowerings still intentionally owned by serving modules.
-- Maintain the deletion ledger above as implementation work proceeds; every
-  temporary serving compatibility object must have a replacement, owner,
-  guardrail, and removal trigger.
-- Add a vllm call-site search check before cleanup completion so
-  normal startup, reload, memory accounting, runtime view, and shutdown do not
-  import public `tensorcast.serving.*` APIs.
-
-# Test / Rollout / Recovery
-
-Validation now splits completed `0121` kernel guardrails from remaining `0120`
-migration checks.
-
-Completed kernel guardrails:
-
-- `source .venv/bin/activate && pytest tests/python/api/test_realization_kernel.py`
-- `source .venv/bin/activate && pytest tests/python/api/test_artifact_handle.py`
-- `source .venv/bin/activate && pytest tests/python/api/test_prefetch_operation.py`
-- `source .venv/bin/activate && pytest tests/python/test_serving_integration.py`
-- `source .venv/bin/activate && pytest tests/python/test_serving_replica_publication.py`
-
-Remaining migration checks:
-
-- public SDK import/export smoke tests for `tc.ArtifactRealizationSpec`,
-  `tc.ArtifactRealizationHandle`, and `tc.ArtifactRealizationReport`;
-- direct model-runtime realization tests once `Artifact.realize(model_runtime)`
-  no longer fails closed;
-- completed-handle runtime attachment projection tests proving no second attach
-  execution path exists;
-- vllm smoke/integration tests for startup, reload, publication, and
-  retained credit;
-- performance migration gates for no TensorDict intermediate, source selection,
-  no extra GPU/host full-weight residency, pre-admission retained credit, stage
-  timing, and bounded reload overlap;
-- direct artifact-runtime second-framework adapter proof before broad serving
-  vocabulary retirement;
-- cleanup guardrails proving old public serving-session entrypoints,
-  compatibility aliases, duplicate diagnostics paths, and redundant tests are
-  deleted or internalized after replacement;
-- deletion-ledger audit showing each old public serving surface is removed,
-  private, or serving-ABI-specific with an owner;
-- vllm import/call-site audit proving normal startup, reload,
-  memory-accounting, runtime-view, and shutdown paths use artifact-runtime
-  APIs;
-- C++ daemon/core tests only when proto, materialization, binding, or P2P
-  behavior changes.
-
-- Execute TensorCast and vllm changes together, because both codebases
-  are under our control.
-- Recovery is behavior-based and delete-forward: if the new API shape is wrong,
-  revise the refactor before landing rather than preserving a parallel
-  compatibility layer.
-
-# Risks & Tracking
-
-- Artifact API overgrowth: track whether proposed methods are durable artifact
-  lifecycle operations or realization-handle projection/actions.
-- TensorDict split-brain: track whether TensorDict tests still exercise a
-  separate materialization path that bypasses realization specs, strategy
-  selection, diagnostics, or P2P compatibility checks.
-- vLLM timing regression: specifically track retained reservation memory credit
-  before admission.
-- Weight-loading fast-path regression: track whether direct model-runtime
-  startup keeps retained acquire, local-replica/LIP, P2P, disk, mounted-source,
-  and explicit-transform cases on their intended paths.
-- Hidden memory duplication: track GPU allocated/reserved deltas, host RSS,
-  live binding owners, reload overlap, and compatibility wrappers that keep old
-  handles alive after artifact-runtime attachment.
-- Source authority split-brain: track whether local source bootstrap can run
-  without an admitted `msa1:` or durable artifact subject.
-- Reload/publication race: track whether publication, reload, and shutdown
-  retirement compare the active attachment or binding-value generation before
-  mutating state.
-- Drafter partial reload: track whether main/draft behavior is target-set atomic
-  or explicitly sequential with worker-unhealthy marking.
-- TP/P2P semantic confusion: require explicit compatibility validation before
-  direct P2P member reuse.
-- Hidden framework leakage: keep vLLM model attributes and finalize hooks inside
-  runtime adapters and attachments.
-- Naming churn without semantic gain: do not rename a class until ownership is
-  clear and the vLLM migration path is documented.
-- Dual-stack drift: every compatibility adapter or old public entrypoint must
-  have an owner, replacement, test coverage, and deletion trigger. A migration
-  phase is not complete while old and new public paths both remain supported.
-- Redundant test drift: tests that assert old serving-session behavior must be
-  rewritten to assert artifact-runtime behavior or deleted after internal
-  lowering coverage exists.
diff --git a/examples/runtime_realization_reference_consumer/README.md b/examples/runtime_realization_reference_consumer/README.md
new file mode 100644
index 00000000..0536d79f
--- /dev/null
+++ b/examples/runtime_realization_reference_consumer/README.md
@@ -0,0 +1,18 @@
+# Runtime Realization Reference Consumer
+
+This example is a minimal TensorCast-side consumer for the runtime realization
+prefetch/acquire flow. It is intentionally independent of internal-vLLM so the
+daemon API can be exercised as a public reference path.
+
+```bash
+source .venv/bin/activate
+python examples/runtime_realization_reference_consumer/reference_consumer.py \
+  --daemon-address 127.0.0.1:8073 \
+  --source-artifact-id mi2:<source-artifact> \
+  --device-uuid <daemon-device-uuid>
+```
+
+The parent process writes a resolved realization target cache entry, calls the
+daemon `PrefetchServingBinding` wire RPC, and launches a worker subprocess that
+reconstructs a `RealizationTarget` plus `PrefetchHandoff`, calls
+`AcquireBindingValue`, and releases the returned lease.
diff --git a/examples/serving_binding_consumer/reference_consumer.py b/examples/runtime_realization_reference_consumer/reference_consumer.py
similarity index 91%
rename from examples/serving_binding_consumer/reference_consumer.py
rename to examples/runtime_realization_reference_consumer/reference_consumer.py
index a5abd881..48721a37 100644
--- a/examples/serving_binding_consumer/reference_consumer.py
+++ b/examples/runtime_realization_reference_consumer/reference_consumer.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Minimal serving-binding consumer for TensorCast prefetch/acquire flows."""
+"""Minimal runtime realization consumer for TensorCast prefetch/acquire flows."""
 
 from __future__ import annotations
 
@@ -14,7 +14,7 @@
 from pathlib import Path
 
 from tensorcast.api.store import (
-    ReferenceServingTensorSpec,
+    ReferenceRuntimeTensorSpec,
     acquire_reference_binding,
     build_reference_resolved_spec,
     prefetch_reference_binding,
@@ -24,12 +24,12 @@
 )
 from tensorcast.daemon_ctl import DaemonCtl
 from tensorcast.proto.operation.v1 import operation_pb2
-from tensorcast.types import PrefetchedServingBinding, ServingBindingTarget
+from tensorcast.types import PrefetchHandoff, RealizationTarget
 
 
 def _parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
-        description="Run a minimal TensorCast serving-binding prefetch/acquire flow."
+        description="Run a minimal TensorCast runtime realization prefetch/acquire flow."
     )
     parser.add_argument("--daemon-address", default="127.0.0.1:8073")
     parser.add_argument("--source-artifact-id", required=True)
@@ -48,10 +48,10 @@ def _worker_main(args: argparse.Namespace) -> None:
         raise SystemExit("--target-path and --prefetched-path are required for worker")
     target_proto = operation_pb2.ServingBindingTarget()
     target_proto.ParseFromString(Path(args.target_path).read_bytes())
-    target = ServingBindingTarget.from_proto(target_proto)
+    target = RealizationTarget.from_proto(target_proto)
     prefetched_proto = operation_pb2.PrefetchServingBindingResult()
     prefetched_proto.ParseFromString(Path(args.prefetched_path).read_bytes())
-    prefetched = PrefetchedServingBinding.from_proto(prefetched_proto)
+    prefetched = PrefetchHandoff.from_proto(prefetched_proto)
     client = DaemonCtl(args.daemon_address)
     acquired = acquire_reference_binding(
         client,
@@ -80,7 +80,7 @@ def _worker_main(args: argparse.Namespace) -> None:
 def _parent_main(args: argparse.Namespace) -> None:
     cache_root = Path(args.cache_root) if args.cache_root else Path(tempfile.mkdtemp())
     client = DaemonCtl(args.daemon_address)
-    tensor = ReferenceServingTensorSpec(
+    tensor = ReferenceRuntimeTensorSpec(
         name=args.tensor_name,
         size_bytes=args.tensor_size_bytes,
         shape=(args.tensor_size_bytes,),
diff --git a/examples/runtime_reference_framework/README.md b/examples/runtime_reference_framework/README.md
new file mode 100644
index 00000000..7fe34935
--- /dev/null
+++ b/examples/runtime_reference_framework/README.md
@@ -0,0 +1,22 @@
+# Runtime Reference Framework
+
+This example is the smallest TensorCast Level 1 framework integration shape.
+It consumes an existing durable artifact through
+`Artifact.realize(... model_runtime ...)` and validates the integration with the
+artifact-runtime conformance kit.
+
+The runtime path intentionally uses only:
+
+- `tensorcast`
+- `tensorcast.artifact_runtime.host`
+- `tensorcast.artifact_runtime.testing`
+
+It does not import `tensorcast.serving`, vLLM, source catalog helpers, retained
+preload helpers, `ArtifactRuntimeSession`, or low-level bind/swap/restore
+functions.
+
+Run:
+
+```bash
+python examples/runtime_reference_framework/reference_framework.py
+```
diff --git a/examples/serving_runtime_reference_framework/reference_framework.py b/examples/runtime_reference_framework/reference_framework.py
similarity index 78%
rename from examples/serving_runtime_reference_framework/reference_framework.py
rename to examples/runtime_reference_framework/reference_framework.py
index f0ae58c1..a9459c35 100644
--- a/examples/serving_runtime_reference_framework/reference_framework.py
+++ b/examples/runtime_reference_framework/reference_framework.py
@@ -1,5 +1,5 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Minimal Level 1 TensorCast serving runtime framework integration."""
+"""Minimal Level 1 TensorCast artifact-runtime framework integration."""
 
 from __future__ import annotations
 
@@ -8,9 +8,9 @@
 
 import torch
 
-import tensorcast.serving.hosts as tc_hosts
-import tensorcast.serving.runtime as tc_runtime
-import tensorcast.serving.testing as tc_testing
+import tensorcast as tc
+import tensorcast.artifact_runtime.host as tc_runtime_host
+import tensorcast.artifact_runtime.testing as tc_testing
 
 
 class ReferenceRuntimeModel:
@@ -25,9 +25,9 @@ def __init__(self) -> None:
 class ReferenceFrameworkHost:
     """Framework-owned model construction and semantic facts."""
 
-    def identity(self, model_config: object) -> tc_hosts.FrameworkIdentity:
+    def identity(self, model_config: object) -> tc_runtime_host.FrameworkIdentity:
         del model_config
-        return tc_hosts.FrameworkIdentity(
+        return tc_runtime_host.FrameworkIdentity(
             framework_name="referencefw",
             framework_version="0",
             adapter_version="level1-example",
@@ -83,9 +83,9 @@ class ReferencePlacementHost:
     def identity_facts(
         self,
         framework_config: object | None,
-    ) -> tc_hosts.PlacementIdentityFacts:
+    ) -> tc_runtime_host.PlacementIdentityFacts:
         del framework_config
-        return tc_hosts.PlacementIdentityFacts(
+        return tc_runtime_host.PlacementIdentityFacts(
             tensor_parallel_rank=0,
             tensor_parallel_size=1,
             pipeline_parallel_rank=0,
@@ -97,16 +97,16 @@ def identity_facts(
     def admission_facts(
         self,
         framework_config: object | None,
-    ) -> tc_hosts.PlacementAdmissionFacts:
+    ) -> tc_runtime_host.PlacementAdmissionFacts:
         del framework_config
-        return tc_hosts.PlacementAdmissionFacts()
+        return tc_runtime_host.PlacementAdmissionFacts()
 
     def member_facts(
         self,
         framework_config: object | None,
-    ) -> tc_hosts.PlacementMemberFacts:
+    ) -> tc_runtime_host.PlacementMemberFacts:
         del framework_config
-        return tc_hosts.PlacementMemberFacts(
+        return tc_runtime_host.PlacementMemberFacts(
             runtime_rank=0,
             runtime_world_size=1,
             member_id="member-0",
@@ -118,9 +118,9 @@ def member_facts(
     def execution_facts(
         self,
         framework_config: object | None,
-    ) -> tc_hosts.MaterializationExecutionFacts:
+    ) -> tc_runtime_host.MaterializationExecutionFacts:
         del framework_config
-        return tc_hosts.MaterializationExecutionFacts(
+        return tc_runtime_host.MaterializationExecutionFacts(
             collective_rank=0,
             collective_world_size=1,
             tensor_parallel_ranks=(0,),
@@ -205,41 +205,21 @@ def validate_tensor_invariants(
             raise AssertionError("reference tensor invariants changed")
 
 
-def build_reference_host() -> tc_hosts.IntegrationHost:
-    """Build the minimal host object a framework passes to TensorCast."""
+def build_reference_host() -> tc.RuntimeHostCapabilities:
+    """Build the minimal runtime host object a framework passes to TensorCast."""
 
-    return tc_hosts.IntegrationHost(
+    return tc.RuntimeHostCapabilities(
         framework=ReferenceFrameworkHost(),
         placement=ReferencePlacementHost(),
         tensor_surface=ReferenceTensorSurface(),
     )
 
 
-def create_session() -> tc_runtime.ServingRuntimeSession:
-    """Create a Level 1 session shape for a durable serving artifact selector."""
-
-    return tc_runtime.ServingRuntimeSession.from_config(
-        {
-            "bootstrap": {
-                "mode": "disabled",
-            },
-            "serving": {
-                "selector": {
-                    "kind": "artifact_ref",
-                    "value": "mi2:serving",
-                },
-            },
-        },
-        host=build_reference_host(),
-    )
-
-
 def run_level1_conformance() -> tc_testing.ConformanceResult:
     """Run the TensorCast Level 1 conformance kit against this host."""
 
-    return tc_testing.assert_level1_runtime_conformance(
-        tc_runtime,
-        tc_hosts,
+    return tc_testing.assert_level1_artifact_runtime_conformance(
+        tc,
         host=build_reference_host(),
     )
 
diff --git a/examples/serving_binding_consumer/README.md b/examples/serving_binding_consumer/README.md
deleted file mode 100644
index 12b27b58..00000000
--- a/examples/serving_binding_consumer/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Serving Binding Reference Consumer
-
-This example is a minimal TensorCast-side consumer for the serving binding
-prefetch/acquire flow. It is intentionally independent of vllm so the
-daemon API can be exercised as a public reference path.
-
-```bash
-source .venv/bin/activate
-python examples/serving_binding_consumer/reference_consumer.py \
-  --daemon-address 127.0.0.1:8073 \
-  --source-artifact-id mi2:<source-artifact> \
-  --device-uuid <daemon-device-uuid>
-```
-
-The parent process writes a resolved serving binding spec cache entry, calls
-`PrefetchServingBinding`, and launches a worker subprocess that calls
-`AcquireBindingValue` and releases the returned lease.
diff --git a/examples/serving_runtime_reference_framework/README.md b/examples/serving_runtime_reference_framework/README.md
deleted file mode 100644
index 054ed7ab..00000000
--- a/examples/serving_runtime_reference_framework/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Serving Runtime Reference Framework
-
-This example is the smallest TensorCast Level 1 framework integration shape.
-It consumes an existing durable serving artifact through
-`ServingRuntimeSession` and validates the integration with the conformance kit.
-
-The runtime path intentionally uses only:
-
-- `tensorcast.serving.runtime`
-- `tensorcast.serving.hosts`
-- `tensorcast.serving.testing`
-
-It does not import `tensorcast.serving.integration`, builder/admin modules,
-vLLM, source catalog helpers, retained preload helpers, or low-level
-bind/swap/restore functions.
-
-Run:
-
-```bash
-python examples/serving_runtime_reference_framework/reference_framework.py
-```
diff --git a/pyproject.toml b/pyproject.toml
index c9d30661..c8764374 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,9 +60,9 @@ dev = [
     "hypothesis>=6.135.26",
     "ipdb>=0.13.13",
     "ipynb>=0.5.1",
-    "mypy>=1.15.0",
+    "mypy==2.1.0",
     "pre-commit>=4.2.0",
-    "pyright>=1.1.403",
+    "pyright==1.1.409",
     "pytest>=8.3.5",
     "pytest-asyncio>=0.26.0",
     "pytest-xdist>=3.8.0",
diff --git a/tensorcast/__init__.py b/tensorcast/__init__.py
index 234b4ed4..01d29974 100644
--- a/tensorcast/__init__.py
+++ b/tensorcast/__init__.py
@@ -200,6 +200,9 @@ def _install_c_extension_bootstrap() -> None:
     "ArtifactDescriptor": ("tensorcast.api", "ArtifactDescriptor"),
     "ArtifactError": ("tensorcast.api", "ArtifactError"),
     "ArtifactFuture": ("tensorcast.api", "ArtifactFuture"),
+    "ArtifactRealizationHandle": ("tensorcast.api", "ArtifactRealizationHandle"),
+    "ArtifactRealizationReport": ("tensorcast.api", "ArtifactRealizationReport"),
+    "ArtifactRealizationSpec": ("tensorcast.api", "ArtifactRealizationSpec"),
     "AssemblyAttemptRef": ("tensorcast.api", "AssemblyAttemptRef"),
     "BindingReservationCapability": (
         "tensorcast.api",
@@ -219,44 +222,42 @@ def _install_c_extension_bootstrap() -> None:
     "PlanType": ("tensorcast.api", "PlanType"),
     "RegisterArtifactOptions": ("tensorcast.api", "RegisterArtifactOptions"),
     "RegisteredArtifact": ("tensorcast.api", "RegisteredArtifact"),
-    "RegisteredServingPublication": (
-        "tensorcast.api",
-        "RegisteredServingPublication",
-    ),
     "PrefetchRetentionPolicy": ("tensorcast.api", "PrefetchRetentionPolicy"),
-    "PrefetchedServingBinding": ("tensorcast.api", "PrefetchedServingBinding"),
-    "PrefetchedServingBindingSet": (
+    "PrefetchHandoff": ("tensorcast.api", "PrefetchHandoff"),
+    "PrefetchHandoffMemberFailure": (
         "tensorcast.api",
-        "PrefetchedServingBindingSet",
+        "PrefetchHandoffMemberFailure",
     ),
+    "PrefetchHandoffSet": ("tensorcast.api", "PrefetchHandoffSet"),
     "RegisteredLease": ("tensorcast.api", "RegisteredLease"),
     "RegistrationResult": ("tensorcast.api", "RegistrationResult"),
-    "ServingBindingMemberRef": ("tensorcast.api", "ServingBindingMemberRef"),
-    "ServingBindingReadiness": ("tensorcast.api", "ServingBindingReadiness"),
-    "ServingBindingResolvedLayout": (
+    "RuntimeBindingMemberRef": ("tensorcast.api", "RuntimeBindingMemberRef"),
+    "RuntimeBindingReadiness": ("tensorcast.api", "RuntimeBindingReadiness"),
+    "RuntimeBindingResolvedLayout": (
         "tensorcast.api",
-        "ServingBindingResolvedLayout",
+        "RuntimeBindingResolvedLayout",
     ),
-    "ServingBindingResolvedSpecCacheEntry": (
+    "RuntimeRealizationSpecCacheEntry": (
         "tensorcast.api",
-        "ServingBindingResolvedSpecCacheEntry",
+        "RuntimeRealizationSpecCacheEntry",
     ),
-    "ServingBindingSetTarget": ("tensorcast.api", "ServingBindingSetTarget"),
-    "ServingBindingSourceKind": ("tensorcast.api", "ServingBindingSourceKind"),
-    "ServingBindingSourceMemberRef": (
+    "RuntimeBindingSourceKind": ("tensorcast.api", "RuntimeBindingSourceKind"),
+    "RuntimeBindingSourceMemberRef": (
         "tensorcast.api",
-        "ServingBindingSourceMemberRef",
+        "RuntimeBindingSourceMemberRef",
     ),
-    "ServingBindingSourceRef": ("tensorcast.api", "ServingBindingSourceRef"),
-    "ServingBindingSourceReuseDecision": (
+    "RuntimeBindingSourceRef": ("tensorcast.api", "RuntimeBindingSourceRef"),
+    "RuntimeBindingSourceReuseDecision": (
         "tensorcast.api",
-        "ServingBindingSourceReuseDecision",
+        "RuntimeBindingSourceReuseDecision",
     ),
-    "ServingBindingSourceReuseMode": (
+    "RuntimeBindingSourceReuseMode": (
         "tensorcast.api",
-        "ServingBindingSourceReuseMode",
+        "RuntimeBindingSourceReuseMode",
     ),
-    "ServingBindingTarget": ("tensorcast.api", "ServingBindingTarget"),
+    "RuntimeTopologyRef": ("tensorcast.api", "RuntimeTopologyRef"),
+    "RealizationTarget": ("tensorcast.api", "RealizationTarget"),
+    "RealizationTargetSet": ("tensorcast.api", "RealizationTargetSet"),
     "SealAssemblyResult": ("tensorcast.api", "SealAssemblyResult"),
     "SERVING_MANIFEST_TENSOR_NAME": ("tensorcast.api", "SERVING_MANIFEST_TENSOR_NAME"),
     "ViewRegistrationKind": ("tensorcast.api", "ViewRegistrationKind"),
@@ -276,14 +277,6 @@ def _install_c_extension_bootstrap() -> None:
     "GroupVersionSetRef": ("tensorcast.api", "GroupVersionSetRef"),
     "GovernanceContext": ("tensorcast.api", "GovernanceContext"),
     "DirectorySnapshot": ("tensorcast.api", "DirectorySnapshot"),
-    "CapabilityDirectoryClient": (
-        "tensorcast.capability_directory",
-        "CapabilityDirectoryClient",
-    ),
-    "CapabilityDirectoryOptions": (
-        "tensorcast.capability_directory",
-        "CapabilityDirectoryOptions",
-    ),
     "Operation": ("tensorcast.api", "Operation"),
     "OperationError": ("tensorcast.api", "OperationError"),
     "OperationStatus": ("tensorcast.api", "OperationStatus"),
@@ -295,10 +288,6 @@ def _install_c_extension_bootstrap() -> None:
     "PlanStepResult": ("tensorcast.api", "PlanStepResult"),
     "PartialSealResult": ("tensorcast.api", "PartialSealResult"),
     "PublicDiskSourceHandle": ("tensorcast.api", "PublicDiskSourceHandle"),
-    "PreparedServingRegistration": (
-        "tensorcast.api",
-        "PreparedServingRegistration",
-    ),
     "PublishedModelVersion": ("tensorcast.api", "PublishedModelVersion"),
     "ExecutionDiagnostics": ("tensorcast.api", "ExecutionDiagnostics"),
     "BindingUpdateEpoch": ("tensorcast.api", "BindingUpdateEpoch"),
@@ -321,23 +310,138 @@ def _install_c_extension_bootstrap() -> None:
         "tensorcast.api",
         "RepresentationPublishSpec",
     ),
+    "RuntimeArtifactBuildIntent": ("tensorcast.api", "RuntimeArtifactBuildIntent"),
+    "RuntimeArtifactManifest": ("tensorcast.api", "RuntimeArtifactManifest"),
+    "RuntimeArtifactPolicy": ("tensorcast.api", "RuntimeArtifactPolicy"),
+    "RuntimeArtifactPolicyInput": ("tensorcast.api", "RuntimeArtifactPolicyInput"),
     "SourceBoundCapability": ("tensorcast.api", "SourceBoundCapability"),
-    "ServingPublicationSubject": ("tensorcast.api", "ServingPublicationSubject"),
-    "ServingAdmissionFacts": ("tensorcast.api", "ServingAdmissionFacts"),
-    "ServingArtifactManifest": ("tensorcast.api", "ServingArtifactManifest"),
-    "SERVING_BUILD_DIGEST_VERSION": (
+    "coerce_runtime_artifact_policy": (
         "tensorcast.api",
-        "SERVING_BUILD_DIGEST_VERSION",
+        "coerce_runtime_artifact_policy",
     ),
-    "ServingBuildIntent": ("tensorcast.api", "ServingBuildIntent"),
-    "ServingRuntimePolicy": ("tensorcast.api", "ServingRuntimePolicy"),
-    "ServingSupportLevel": ("tensorcast.api", "ServingSupportLevel"),
     "Instance": ("tensorcast.api", "Instance"),
     "InstanceExecutionRoute": ("tensorcast.api", "InstanceExecutionRoute"),
     "Worker": ("tensorcast.api", "Worker"),
     "TargetSpec": ("tensorcast.api", "TargetSpec"),
     "TransformSpec": ("tensorcast.api", "TransformSpec"),
     "Runtime": ("tensorcast.api", "Runtime"),
+    "RuntimeAttachment": (
+        "tensorcast.artifact_runtime.attachment",
+        "RuntimeAttachment",
+    ),
+    "RuntimeBindingState": (
+        "tensorcast.artifact_runtime.attachment",
+        "RuntimeBindingState",
+    ),
+    "RuntimeAdmissionDecision": (
+        "tensorcast.artifact_runtime.host",
+        "RuntimeAdmissionDecision",
+    ),
+    "RuntimeAdmissionPolicy": (
+        "tensorcast.artifact_runtime.host",
+        "RuntimeAdmissionPolicy",
+    ),
+    "RuntimeAdmissionRequest": (
+        "tensorcast.artifact_runtime.host",
+        "RuntimeAdmissionRequest",
+    ),
+    "RuntimeHostCapabilities": (
+        "tensorcast.artifact_runtime.host",
+        "RuntimeHostCapabilities",
+    ),
+    "RuntimePlacement": ("tensorcast.artifact_runtime.host", "RuntimePlacement"),
+    "RuntimeProfile": ("tensorcast.artifact_runtime.host", "RuntimeProfile"),
+    "RuntimeTensorView": ("tensorcast.artifact_runtime.host", "RuntimeTensorView"),
+    "ArtifactLocator": ("tensorcast.artifact_runtime.locator", "ArtifactLocator"),
+    "RuntimeArtifactLocator": (
+        "tensorcast.artifact_runtime.config",
+        "RuntimeArtifactLocator",
+    ),
+    "RuntimeStartPlanError": (
+        "tensorcast.artifact_runtime.config",
+        "RuntimeStartPlanError",
+    ),
+    "RuntimePolicy": ("tensorcast.artifact_runtime.policy", "RuntimePolicy"),
+    "RuntimeRealizationReport": (
+        "tensorcast.artifact_runtime.diagnostics",
+        "RuntimeRealizationReport",
+    ),
+    "TensorCastRuntimeConfig": (
+        "tensorcast.artifact_runtime.config",
+        "TensorCastRuntimeConfig",
+    ),
+    "plan_runtime_start": ("tensorcast.artifact_runtime.config", "plan_runtime_start"),
+    "RuntimeRequestContext": (
+        "tensorcast.artifact_runtime.intent",
+        "RuntimeRequestContext",
+    ),
+    "ModelAttributeNames": ("tensorcast.artifact_runtime.state", "ModelAttributeNames"),
+    "ModelAttributeRuntimeState": (
+        "tensorcast.artifact_runtime.state",
+        "ModelAttributeRuntimeState",
+    ),
+    "OneShotRuntimeHook": ("tensorcast.artifact_runtime.state", "OneShotRuntimeHook"),
+    "BindingValueRefProjection": (
+        "tensorcast.artifact_runtime.view",
+        "BindingValueRefProjection",
+    ),
+    "RuntimeEndpointProjection": (
+        "tensorcast.artifact_runtime.view",
+        "RuntimeEndpointProjection",
+    ),
+    "RuntimeWorkerView": ("tensorcast.artifact_runtime.view", "RuntimeWorkerView"),
+    "SourceSelectionProjection": (
+        "tensorcast.artifact_runtime.view",
+        "SourceSelectionProjection",
+    ),
+    "WeightVersionProjection": (
+        "tensorcast.artifact_runtime.view",
+        "WeightVersionProjection",
+    ),
+    "aggregate_runtime_view_outputs": (
+        "tensorcast.artifact_runtime.view",
+        "aggregate_runtime_view_outputs",
+    ),
+    "RuntimeReplicaPublicationSettings": (
+        "tensorcast.artifact_runtime.publication.actions",
+        "RuntimeReplicaPublicationSettings",
+    ),
+    "RetainedRealizationClaim": (
+        "tensorcast.retained_realization",
+        "RetainedRealizationClaim",
+    ),
+    "RetainedRealizationExpectedDigests": (
+        "tensorcast.retained_realization",
+        "RetainedRealizationExpectedDigests",
+    ),
+    "project_runtime_replica_publication_state": (
+        "tensorcast.artifact_runtime.publication.actions",
+        "project_runtime_replica_publication_state",
+    ),
+    "publish_runtime_replica": (
+        "tensorcast.artifact_runtime.publication.actions",
+        "publish_runtime_replica",
+    ),
+    "reload_runtime_attachment": (
+        "tensorcast.artifact_runtime.reload",
+        "reload_runtime_attachment",
+    ),
+    "merge_runtime_reload_extra_config": (
+        "tensorcast.artifact_runtime.reload",
+        "merge_runtime_reload_extra_config",
+    ),
+    "normalize_runtime_reload_request_payload": (
+        "tensorcast.artifact_runtime.reload",
+        "normalize_runtime_reload_request_payload",
+    ),
+    "retire_runtime_replica": (
+        "tensorcast.artifact_runtime.publication.actions",
+        "retire_runtime_replica",
+    ),
+    "runtime_replica_publication_settings": (
+        "tensorcast.artifact_runtime.publication.actions",
+        "runtime_replica_publication_settings",
+    ),
     "SignalSnapshot": ("tensorcast.api", "SignalSnapshot"),
     "TensorCastDirectory": ("tensorcast.api", "TensorCastDirectory"),
     "TensorCastSignals": ("tensorcast.api", "TensorCastSignals"),
@@ -346,10 +450,29 @@ def _install_c_extension_bootstrap() -> None:
     "connect": ("tensorcast.api", "connect"),
     "context": ("tensorcast.api", "context"),
     "plan": ("tensorcast.api", "plan"),
-    "runtime": ("tensorcast.api", "runtime"),
     "RetentionHandle": ("tensorcast.retention", "RetentionHandle"),
     "acquire_retention_handle": ("tensorcast.retention", "acquire_retention_handle"),
+    "parse_retained_realization_claim": (
+        "tensorcast.retained_realization",
+        "parse_retained_realization_claim",
+    ),
     "renew_retention_handle": ("tensorcast.retention", "renew_retention_handle"),
+    "retained_realization_claim_extra_from_handoff": (
+        "tensorcast.retained_realization",
+        "retained_realization_claim_extra_from_handoff",
+    ),
+    "retained_realization_claim_extra_json_from_handoff": (
+        "tensorcast.retained_realization",
+        "retained_realization_claim_extra_json_from_handoff",
+    ),
+    "retained_realization_claim_mode": (
+        "tensorcast.retained_realization",
+        "retained_realization_claim_mode",
+    ),
+    "retained_realization_trusted_reservation_bytes": (
+        "tensorcast.retained_realization",
+        "retained_realization_trusted_reservation_bytes",
+    ),
     "release_retention_handle": ("tensorcast.retention", "release_retention_handle"),
     "artifact": ("tensorcast.api.store", "artifact"),
     "artifact_async": ("tensorcast.api.store", "artifact_async"),
@@ -422,18 +545,10 @@ def _install_c_extension_bootstrap() -> None:
         "tensorcast.api.store",
         "build_binding_finalize_publication_bundle",
     ),
-    "build_serving_publication_bundle": (
-        "tensorcast.api.store",
-        "build_serving_publication_bundle",
-    ),
     "build_pure_transform_publication_bundle_from_registered_artifact": (
         "tensorcast.api.store",
         "build_pure_transform_publication_bundle_from_registered_artifact",
     ),
-    "build_serving_publication_bundle_from_registered_artifact": (
-        "tensorcast.api.store",
-        "build_serving_publication_bundle_from_registered_artifact",
-    ),
     "build_pure_transform_publication_spec": (
         "tensorcast.api.store",
         "build_pure_transform_publication_spec",
@@ -478,42 +593,10 @@ def _install_c_extension_bootstrap() -> None:
         "tensorcast.api.store",
         "complete_structural_representation_publish_attempt",
     ),
-    "build_serving_manifest_ref": (
-        "tensorcast.api.store",
-        "build_serving_manifest_ref",
-    ),
-    "coerce_serving_runtime_policy": (
-        "tensorcast.api.store",
-        "coerce_serving_runtime_policy",
-    ),
     "compute_pure_transform_representation_contract_hash": (
         "tensorcast.api.store",
         "compute_pure_transform_representation_contract_hash",
     ),
-    "compute_serving_tensor_schema_hash": (
-        "tensorcast.api.store",
-        "compute_serving_tensor_schema_hash",
-    ),
-    "count_canonical_serving_tensors": (
-        "tensorcast.api.store",
-        "count_canonical_serving_tensors",
-    ),
-    "prepare_pure_transform_serving_registration": (
-        "tensorcast.api.store",
-        "prepare_pure_transform_serving_registration",
-    ),
-    "prepare_binding_finalize_serving_registration": (
-        "tensorcast.api.store",
-        "prepare_binding_finalize_serving_registration",
-    ),
-    "prepare_serving_registration": (
-        "tensorcast.api.store",
-        "prepare_serving_registration",
-    ),
-    "parse_serving_manifest_ref": (
-        "tensorcast.api.store",
-        "parse_serving_manifest_ref",
-    ),
     "init": ("tensorcast.startup", "init"),
     "PortConfig": ("tensorcast.startup", "PortConfig"),
     "is_initialized": ("tensorcast.startup", "is_initialized"),
@@ -522,11 +605,19 @@ def _install_c_extension_bootstrap() -> None:
 
 context: Any
 plan: Any
+runtime: Any
+RuntimeAdmissionDecision: Any
+RuntimeAdmissionPolicy: Any
+RuntimeAdmissionRequest: Any
+RuntimePlacement: Any
+RuntimeProfile: Any
+RuntimeTensorView: Any
+coerce_runtime_artifact_policy: Any
 
 
 def __getattr__(name: str) -> Any:
-    if name == "serving":
-        module = importlib.import_module("tensorcast.serving")
+    if name == "runtime":
+        module = importlib.import_module("tensorcast.runtime")
         globals()[name] = module
         return module
     if name not in _LAZY_ATTRS:
@@ -539,16 +630,18 @@ def __getattr__(name: str) -> Any:
 
 
 def __dir__() -> list[str]:
-    return sorted(set(globals()).union(_LAZY_ATTRS).union({"serving"}))
+    return sorted(set(globals()).union(_LAZY_ATTRS).union({"runtime"}))
 
 
 if TYPE_CHECKING:
-    import tensorcast.serving as serving  # noqa: F401
     from tensorcast.api import (  # noqa: F401
         Artifact,
         ArtifactDescriptor,
         ArtifactError,
         ArtifactFuture,
+        ArtifactRealizationHandle,
+        ArtifactRealizationReport,
+        ArtifactRealizationSpec,
         BindingRealizationEntry,
         BindingRealizationPlan,
         BindingReservationCapability,
@@ -558,8 +651,6 @@ def __dir__() -> list[str]:
         CallContext,
         CanonicalIndex,
         CanonicalIndexEntry,
-        CapabilityDirectoryClient,
-        CapabilityDirectoryOptions,
         CollectiveLoadGroup,
         DirectorySnapshot,
         ExecutionDiagnostics,
@@ -584,30 +675,33 @@ def __dir__() -> list[str]:
         PlanStepRef,
         PlanStepResult,
         PlanType,
-        PrefetchedServingBinding,
-        PrefetchedServingBindingSet,
+        PrefetchHandoff,
+        PrefetchHandoffMemberFailure,
+        PrefetchHandoffSet,
         PrefetchRetentionPolicy,
-        PreparedServingRegistration,
         PublicDiskSourceHandle,
+        RealizationTarget,
+        RealizationTargetSet,
         RegisterArtifactOptions,
         RegisteredArtifact,
         RegisteredLease,
-        RegisteredServingPublication,
         RegistrationResult,
         RetentionHandle,
         Runtime,
-        ServingBindingMemberRef,
-        ServingBindingReadiness,
-        ServingBindingResolvedLayout,
-        ServingBindingResolvedSpecCacheEntry,
-        ServingBindingSetTarget,
-        ServingBindingSourceKind,
-        ServingBindingSourceMemberRef,
-        ServingBindingSourceRef,
-        ServingBindingSourceReuseDecision,
-        ServingBindingSourceReuseMode,
-        ServingBindingTarget,
-        ServingPublicationSubject,
+        RuntimeArtifactBuildIntent,
+        RuntimeArtifactManifest,
+        RuntimeArtifactPolicy,
+        RuntimeArtifactPolicyInput,
+        RuntimeBindingMemberRef,
+        RuntimeBindingReadiness,
+        RuntimeBindingResolvedLayout,
+        RuntimeBindingSourceKind,
+        RuntimeBindingSourceMemberRef,
+        RuntimeBindingSourceRef,
+        RuntimeBindingSourceReuseDecision,
+        RuntimeBindingSourceReuseMode,
+        RuntimeRealizationSpecCacheEntry,
+        RuntimeTopologyRef,
         SignalSnapshot,
         SourceBoundCapability,
         Store,
@@ -630,8 +724,6 @@ def __dir__() -> list[str]:
         artifact,
         artifact_async,
         binding_realization_plan_to_proto,
-        build_serving_publication_bundle,
-        build_serving_publication_bundle_from_registered_artifact,
         complete_pure_transform_publication,
         deregister_artifact,
         from_disk,
@@ -639,7 +731,6 @@ def __dir__() -> list[str]:
         normalize_binding_realization_plan,
         persist_artifact,
         persistence_operation,
-        prepare_serving_registration,
         promote_mounted_source,
         put,
         put_async,
@@ -654,6 +745,57 @@ def __dir__() -> list[str]:
         store,
         unregister_vram_region,
     )
+    from tensorcast.artifact_runtime.attachment import (  # noqa: F401
+        RuntimeAttachment,
+        RuntimeBindingState,
+    )
+    from tensorcast.artifact_runtime.config import (  # noqa: F401
+        RuntimeArtifactLocator,
+        RuntimeStartPlanError,
+        TensorCastRuntimeConfig,
+        plan_runtime_start,
+    )
+    from tensorcast.artifact_runtime.diagnostics import (
+        RuntimeRealizationReport,  # noqa: F401
+    )
+    from tensorcast.artifact_runtime.host import RuntimeHostCapabilities  # noqa: F401
+    from tensorcast.artifact_runtime.intent import RuntimeRequestContext  # noqa: F401
+    from tensorcast.artifact_runtime.locator import ArtifactLocator  # noqa: F401
+    from tensorcast.artifact_runtime.policy import RuntimePolicy  # noqa: F401
+    from tensorcast.artifact_runtime.publication.actions import (  # noqa: F401
+        RuntimeReplicaPublicationSettings,
+        project_runtime_replica_publication_state,
+        publish_runtime_replica,
+        retire_runtime_replica,
+        runtime_replica_publication_settings,
+    )
+    from tensorcast.artifact_runtime.reload import (  # noqa: F401
+        merge_runtime_reload_extra_config,
+        normalize_runtime_reload_request_payload,
+        reload_runtime_attachment,
+    )
+    from tensorcast.artifact_runtime.state import (  # noqa: F401
+        ModelAttributeNames,
+        ModelAttributeRuntimeState,
+        OneShotRuntimeHook,
+    )
+    from tensorcast.artifact_runtime.view import (  # noqa: F401
+        BindingValueRefProjection,
+        RuntimeEndpointProjection,
+        RuntimeWorkerView,
+        SourceSelectionProjection,
+        WeightVersionProjection,
+        aggregate_runtime_view_outputs,
+    )
+    from tensorcast.retained_realization import (  # noqa: F401
+        RetainedRealizationClaim,
+        RetainedRealizationExpectedDigests,
+        parse_retained_realization_claim,
+        retained_realization_claim_extra_from_handoff,
+        retained_realization_claim_extra_json_from_handoff,
+        retained_realization_claim_mode,
+        retained_realization_trusted_reservation_bytes,
+    )
     from tensorcast.startup import (  # noqa: F401
         PortConfig,
         init,
@@ -664,14 +806,13 @@ def __dir__() -> list[str]:
 
 __all__ = [
     "__version__",
-    "serving",
+    "runtime",
     "init",
     "is_initialized",
     "shutdown",
     "Store",
     "StoreOptions",
     "RegisteredArtifact",
-    "RegisteredServingPublication",
     "ArtifactError",
     "ArtifactFuture",
     "BindingReservationCapability",
@@ -689,8 +830,9 @@ def __dir__() -> list[str]:
     "RegisterArtifactOptions",
     "GetArtifactOptions",
     "PrefetchRetentionPolicy",
-    "PrefetchedServingBinding",
-    "PrefetchedServingBindingSet",
+    "PrefetchHandoff",
+    "PrefetchHandoffMemberFailure",
+    "PrefetchHandoffSet",
     "calculate_tensor_device_offsets",
     "build_indices_from_safetensors",
     "binding_realization_plan_to_proto",
@@ -698,6 +840,8 @@ def __dir__() -> list[str]:
     "CollectiveLoadGroup",
     "GroupRealization",
     "GroupVersionSetRef",
+    "RealizationTarget",
+    "RealizationTargetSet",
     "ExecutionDiagnostics",
     "BindingUpdateEpoch",
     "HashBackend",
@@ -726,20 +870,21 @@ def __dir__() -> list[str]:
     "resolve_public_disk_source",
     "Artifact",
     "ArtifactDescriptor",
+    "ArtifactRealizationHandle",
+    "ArtifactRealizationReport",
+    "ArtifactRealizationSpec",
     "PublicDiskSourceHandle",
     "SourceBoundCapability",
-    "ServingBindingMemberRef",
-    "ServingBindingReadiness",
-    "ServingBindingResolvedLayout",
-    "ServingBindingResolvedSpecCacheEntry",
-    "ServingBindingSetTarget",
-    "ServingBindingSourceKind",
-    "ServingBindingSourceMemberRef",
-    "ServingBindingSourceRef",
-    "ServingBindingSourceReuseDecision",
-    "ServingBindingSourceReuseMode",
-    "ServingBindingTarget",
-    "ServingPublicationSubject",
+    "RuntimeBindingMemberRef",
+    "RuntimeBindingReadiness",
+    "RuntimeBindingResolvedLayout",
+    "RuntimeRealizationSpecCacheEntry",
+    "RuntimeBindingSourceKind",
+    "RuntimeBindingSourceMemberRef",
+    "RuntimeBindingSourceRef",
+    "RuntimeBindingSourceReuseDecision",
+    "RuntimeBindingSourceReuseMode",
+    "RuntimeTopologyRef",
     "store",
     "register",
     "register_async",
@@ -755,4 +900,50 @@ def __dir__() -> list[str]:
     "unregister_vram_region",
     "deregister_artifact",
     "BuildConfigMismatchError",
+    "RuntimeAttachment",
+    "RuntimeAdmissionDecision",
+    "RuntimeAdmissionPolicy",
+    "RuntimeAdmissionRequest",
+    "RuntimeArtifactBuildIntent",
+    "RuntimeArtifactManifest",
+    "RuntimeArtifactPolicy",
+    "RuntimeArtifactPolicyInput",
+    "RuntimeBindingState",
+    "RuntimeHostCapabilities",
+    "RuntimePlacement",
+    "RuntimeProfile",
+    "RuntimeRequestContext",
+    "ArtifactLocator",
+    "RuntimeArtifactLocator",
+    "RuntimePolicy",
+    "RuntimeRealizationReport",
+    "RuntimeStartPlanError",
+    "RuntimeTensorView",
+    "TensorCastRuntimeConfig",
+    "ModelAttributeNames",
+    "ModelAttributeRuntimeState",
+    "OneShotRuntimeHook",
+    "BindingValueRefProjection",
+    "RuntimeEndpointProjection",
+    "RuntimeWorkerView",
+    "SourceSelectionProjection",
+    "WeightVersionProjection",
+    "aggregate_runtime_view_outputs",
+    "RuntimeReplicaPublicationSettings",
+    "coerce_runtime_artifact_policy",
+    "RetainedRealizationClaim",
+    "RetainedRealizationExpectedDigests",
+    "parse_retained_realization_claim",
+    "project_runtime_replica_publication_state",
+    "publish_runtime_replica",
+    "reload_runtime_attachment",
+    "merge_runtime_reload_extra_config",
+    "normalize_runtime_reload_request_payload",
+    "plan_runtime_start",
+    "retained_realization_claim_extra_from_handoff",
+    "retained_realization_claim_extra_json_from_handoff",
+    "retained_realization_claim_mode",
+    "retained_realization_trusted_reservation_bytes",
+    "retire_runtime_replica",
+    "runtime_replica_publication_settings",
 ]
diff --git a/tensorcast/api/__init__.py b/tensorcast/api/__init__.py
index 852fe0f6..052f585f 100644
--- a/tensorcast/api/__init__.py
+++ b/tensorcast/api/__init__.py
@@ -69,6 +69,9 @@
     Artifact,
     ArtifactError,
     ArtifactFuture,
+    ArtifactRealizationHandle,
+    ArtifactRealizationReport,
+    ArtifactRealizationSpec,
     AssemblyAttemptRef,
     AssemblyCloseoutContract,
     AssemblyReadinessPolicy,
@@ -91,33 +94,36 @@
     HashLocation,
     IdentityMintStrategy,
     PartialSealResult,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingSet,
+    PrefetchHandoff,
+    PrefetchHandoffMemberFailure,
+    PrefetchHandoffSet,
     PrefetchRetentionPolicy,
-    PreparedServingRegistration,
+    PreparedRuntimeArtifactRegistration,
     PublicDiskSourceHandle,
     PublishedModelVersion,
+    RealizationTarget,
+    RealizationTargetSet,
     RegisteredArtifact,
-    RegisteredServingPublication,
+    RegisteredRuntimeArtifactPublication,
     RepresentationPublishContract,
     RepresentationPublishSpec,
-    ServingAdmissionFacts,
-    ServingArtifactManifest,
-    ServingBindingMemberRef,
-    ServingBindingReadiness,
-    ServingBindingResolvedLayout,
-    ServingBindingResolvedSpecCacheEntry,
-    ServingBindingSetTarget,
-    ServingBindingSourceKind,
-    ServingBindingSourceMemberRef,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingBindingSourceReuseMode,
-    ServingBindingTarget,
-    ServingBuildIntent,
-    ServingPublicationSubject,
-    ServingRuntimePolicy,
-    ServingSupportLevel,
+    RuntimeAdmissionFacts,
+    RuntimeArtifactBuildIntent,
+    RuntimeArtifactManifest,
+    RuntimeArtifactPolicy,
+    RuntimeArtifactPolicyInput,
+    RuntimeBindingMemberRef,
+    RuntimeBindingReadiness,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceKind,
+    RuntimeBindingSourceMemberRef,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeBindingSourceReuseMode,
+    RuntimePublicationSubject,
+    RuntimeRealizationSpecCacheEntry,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
     SourceBoundCapability,
     Store,
     StoreOptions,
@@ -129,10 +135,10 @@
     build_pure_transform_publication_spec,
     build_pure_transform_transform_spec,
     build_representation_publish_requirements,
+    build_runtime_artifact_publication_bundle,
+    build_runtime_artifact_publication_bundle_from_registered_artifact,
     build_serving_manifest_ref,
-    build_serving_publication_bundle,
-    build_serving_publication_bundle_from_registered_artifact,
-    coerce_serving_runtime_policy,
+    coerce_runtime_artifact_policy,
     complete_binding_finalize_publication_from_binding,
     complete_canonical_representation_publish_attempt,
     complete_plan_repo_owned_representation_publish_attempt,
@@ -142,17 +148,17 @@
     complete_representation_publish_attempt,
     complete_structural_representation_publish_attempt,
     compute_pure_transform_representation_contract_hash,
-    compute_serving_tensor_schema_hash,
-    count_canonical_serving_tensors,
+    compute_runtime_artifact_tensor_schema_hash,
+    count_canonical_runtime_tensors,
     from_disk,
     import_from_disk,
     list_artifact_layouts,
     normalize_binding_realization_plan,
     parse_serving_manifest_ref,
     persist_artifact,
-    prepare_binding_finalize_serving_registration,
-    prepare_pure_transform_serving_registration,
-    prepare_serving_registration,
+    prepare_binding_finalize_runtime_registration,
+    prepare_pure_transform_runtime_registration,
+    prepare_runtime_artifact_registration,
     promote_mounted_source,
     realize_into_binding,
     register_pure_transform_publication,
@@ -218,6 +224,9 @@
     "BindingRealizationEntry",
     "BindingRealizationPlan",
     "Artifact",
+    "ArtifactRealizationHandle",
+    "ArtifactRealizationReport",
+    "ArtifactRealizationSpec",
     "CallContext",
     "CollectiveLoadGroup",
     "GroupRealization",
@@ -234,38 +243,42 @@
     "QosClass",
     "PartialSealResult",
     "PrefetchRetentionPolicy",
-    "PrefetchedServingBinding",
-    "PrefetchedServingBindingSet",
+    "PrefetchHandoff",
+    "PrefetchHandoffMemberFailure",
+    "PrefetchHandoffSet",
     "PublicDiskSourceHandle",
-    "PreparedServingRegistration",
+    "PreparedRuntimeArtifactRegistration",
     "PublishedModelVersion",
+    "RealizationTarget",
+    "RealizationTargetSet",
     "BindingUpdateEpoch",
-    "ServingBindingMemberRef",
-    "ServingBindingReadiness",
-    "ServingBindingResolvedLayout",
-    "ServingBindingResolvedSpecCacheEntry",
-    "ServingBindingSetTarget",
-    "ServingBindingSourceKind",
-    "ServingBindingSourceMemberRef",
-    "ServingBindingSourceRef",
-    "ServingBindingSourceReuseDecision",
-    "ServingBindingSourceReuseMode",
-    "ServingBindingTarget",
+    "RuntimeBindingMemberRef",
+    "RuntimeBindingReadiness",
+    "RuntimeBindingResolvedLayout",
+    "RuntimeRealizationSpecCacheEntry",
+    "RuntimeBindingSourceKind",
+    "RuntimeBindingSourceMemberRef",
+    "RuntimeBindingSourceRef",
+    "RuntimeBindingSourceReuseDecision",
+    "RuntimeBindingSourceReuseMode",
+    "RuntimeTopologyRef",
     "ExecutionDiagnostics",
     "HashBackend",
     "HashLocation",
     "IdentityMintStrategy",
     "RepresentationPublishSpec",
-    "RegisteredServingPublication",
+    "RegisteredRuntimeArtifactPublication",
     "RepresentationPublishContract",
+    "RuntimeArtifactBuildIntent",
+    "RuntimeArtifactManifest",
+    "RuntimeArtifactPolicy",
+    "RuntimeArtifactPolicyInput",
     "SourceBoundCapability",
-    "ServingPublicationSubject",
-    "ServingAdmissionFacts",
-    "ServingArtifactManifest",
+    "RuntimePublicationSubject",
+    "RuntimeAdmissionFacts",
     "SERVING_BUILD_DIGEST_VERSION",
-    "ServingBuildIntent",
-    "ServingRuntimePolicy",
-    "ServingSupportLevel",
+    "RuntimeSupportLevel",
+    "coerce_runtime_artifact_policy",
     "SpanAttributeValue",
     "TimeoutErrorDetails",
     "context",
@@ -291,23 +304,22 @@
     "build_binding_finalize_admission_facts",
     "binding_realization_plan_to_proto",
     "build_binding_finalize_publication_bundle",
-    "build_serving_publication_bundle",
-    "build_serving_publication_bundle_from_registered_artifact",
+    "build_runtime_artifact_publication_bundle",
+    "build_runtime_artifact_publication_bundle_from_registered_artifact",
     "build_pure_transform_publication_bundle",
     "build_pure_transform_publication_bundle_from_registered_artifact",
     "build_pure_transform_publication_spec",
     "build_representation_publish_requirements",
     "build_pure_transform_transform_spec",
     "build_serving_manifest_ref",
-    "coerce_serving_runtime_policy",
     "complete_binding_finalize_publication_from_binding",
     "complete_pure_transform_publication",
     "complete_pure_transform_publication_from_binding",
     "complete_canonical_representation_publish_attempt",
     "complete_plan_repo_owned_representation_publish_attempt",
     "compute_pure_transform_representation_contract_hash",
-    "compute_serving_tensor_schema_hash",
-    "count_canonical_serving_tensors",
+    "compute_runtime_artifact_tensor_schema_hash",
+    "count_canonical_runtime_tensors",
     "from_disk",
     "import_from_disk",
     "promote_mounted_source",
@@ -315,9 +327,9 @@
     "list_artifact_layouts",
     "parse_serving_manifest_ref",
     "realize_into_binding",
-    "prepare_binding_finalize_serving_registration",
-    "prepare_serving_registration",
-    "prepare_pure_transform_serving_registration",
+    "prepare_binding_finalize_runtime_registration",
+    "prepare_runtime_artifact_registration",
+    "prepare_pure_transform_runtime_registration",
     "persist_artifact",
     "register_pure_transform_publication",
     "resolve_public_disk_source",
diff --git a/tensorcast/api/_config.py b/tensorcast/api/_config.py
index 08098479..b13e4cb1 100644
--- a/tensorcast/api/_config.py
+++ b/tensorcast/api/_config.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import os
 import threading
 from collections.abc import Mapping
 from enum import Enum
@@ -26,7 +25,6 @@
 # Global daemon address configuration
 _daemon_address_lock = threading.RLock()
 _global_daemon_address: str | None = None
-_global_store_address = os.environ.get("TENSORCAST_GLOBAL_STORE", "127.0.0.1:8085")
 
 
 def set_daemon_address(address: str) -> None:
@@ -56,15 +54,6 @@ def has_daemon_address() -> bool:
         return _global_daemon_address is not None
 
 
-def set_global_store_address(address: str) -> None:
-    global _global_store_address
-    _global_store_address = address
-
-
-def get_global_store_address() -> str:
-    return _global_store_address
-
-
 class PlanType(Enum):
     DRAM_STABLE = "dram_stable"
     VRAM_COALESCED = "vram_coalesced"
@@ -729,9 +718,12 @@ class CollectivePolicyMode(str, Enum):
     def parse(value: object) -> "CollectivePolicyMode":
         if isinstance(value, CollectivePolicyMode):
             return value
-        normalized = (
-            "require_collective" if value is None else str(value).strip().lower()
-        )
+        if value is None or value == "":
+            raise ValueError(
+                "collective_policy must be explicit when parsed directly; "
+                "leave it unset for request normalization to choose the default."
+            )
+        normalized = str(value).strip().lower()
         if normalized == "require_collective":
             return CollectivePolicyMode.REQUIRE_COLLECTIVE
         if normalized == "collective_first":
@@ -893,8 +885,6 @@ def _normalize_wait_for_shared_disk_ms(cls, value: object) -> int:
     "policy_requires_persistence",
     "clear_daemon_address",
     "get_daemon_address",
-    "get_global_store_address",
     "has_daemon_address",
     "set_daemon_address",
-    "set_global_store_address",
 ]
diff --git a/tensorcast/api/plan/__init__.py b/tensorcast/api/plan/__init__.py
index e6203b04..180be953 100644
--- a/tensorcast/api/plan/__init__.py
+++ b/tensorcast/api/plan/__init__.py
@@ -23,7 +23,7 @@
 )
 from tensorcast.api.plan.targets import TargetSpec
 from tensorcast.api.plan.transforms import TransformSpec
-from tensorcast.api.store.serving_builder import build_pure_transform_transform_spec
+from tensorcast.api.store.publication_builder import build_pure_transform_transform_spec
 
 __all__ = [
     "ARTIFACT_SET_CARRIER_INLINE",
diff --git a/tensorcast/api/plan/plan.py b/tensorcast/api/plan/plan.py
index 372261a7..46699fba 100644
--- a/tensorcast/api/plan/plan.py
+++ b/tensorcast/api/plan/plan.py
@@ -39,7 +39,7 @@
     PrefetchedReplica,
     _decode_capability_token,
 )
-from tensorcast.api.store.serving_builder import build_pure_transform_transform_spec
+from tensorcast.api.store.publication_builder import build_pure_transform_transform_spec
 from tensorcast.api.store.view_composer import compute_view_id
 from tensorcast.engine_adapter.artifact_api import (
     BatchOutcome,
@@ -59,20 +59,20 @@
     AssemblyContractFamily,
     AssemblyReadinessPolicy,
     AssemblyRequirementSetRef,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingSet,
+    PrefetchHandoff,
+    PrefetchHandoffSet,
     PrefetchRetentionPolicy,
+    RealizationTarget,
+    RealizationTargetSet,
     RepresentationPublishContract,
     RepresentationPublishSpec,
-    ServingArtifactManifest,
-    ServingBindingReadiness,
-    ServingBindingSetTarget,
-    ServingBindingTarget,
+    RuntimeArtifactManifest,
+    RuntimeBindingReadiness,
 )
 
 if TYPE_CHECKING:
     from tensorcast.api.store import Store
-    from tensorcast.types import ServingBuildIntent
+    from tensorcast.types import RuntimeArtifactBuildIntent
 
 T = TypeVar("T")
 ArtifactActionResult = (
@@ -223,8 +223,8 @@ class _PrefetchAction:
     artifact: Artifact
     device: str | int | None
     device_id: int
-    target: ServingBindingTarget | ServingBindingSetTarget | None = None
-    readiness: ServingBindingReadiness = "serving_local_ready"
+    target: RealizationTarget | RealizationTargetSet | None = None
+    readiness: RuntimeBindingReadiness = "runtime_local_ready"
     retention: PrefetchRetentionPolicy | None = None
 
 
@@ -426,7 +426,7 @@ def _artifact_result_from_proto(
             serving_manifest_ref=str(
                 result.pure_transform_publication.serving_manifest_ref
             ),
-            serving_manifest=ServingArtifactManifest.from_bytes(
+            serving_manifest=RuntimeArtifactManifest.from_bytes(
                 bytes(result.pure_transform_publication.serving_manifest_bytes)
             ),
             serving_manifest_bytes=bytes(
@@ -761,13 +761,11 @@ def prefetch(
         art: Artifact,
         *,
         device: str | int | None = None,
-        target: ServingBindingTarget | ServingBindingSetTarget | None = None,
-        readiness: ServingBindingReadiness = "serving_local_ready",
+        target: RealizationTarget | RealizationTargetSet | None = None,
+        readiness: RuntimeBindingReadiness = "runtime_local_ready",
         retention: PrefetchRetentionPolicy | None = None,
         depends_on: Sequence[PlanStepRef[Any]] | None = None,
-    ) -> PlanStepRef[
-        PrefetchedReplica | PrefetchedServingBinding | PrefetchedServingBindingSet
-    ]:
+    ) -> PlanStepRef[PrefetchedReplica | PrefetchHandoff | PrefetchHandoffSet]:
         if target is not None and device is not None:
             raise ArtifactError(
                 "prefetch target and device are mutually exclusive",
@@ -1012,7 +1010,7 @@ def transform_register_pure_transform(
         self,
         art: Artifact,
         *,
-        build_intent: "ServingBuildIntent",
+        build_intent: "RuntimeArtifactBuildIntent",
         contract_family: str | None = None,
         out_key: str,
         transform_name: str = "identity.v1",
@@ -1329,7 +1327,7 @@ def to_spec(self) -> plan_pb2.PlanSpec:
                 _fill_selection_proto(selection, prefetch_action.selection)
                 prefetch_action.device_id = int(step.action.device_id)
                 if step.action.target is not None:
-                    if isinstance(step.action.target, ServingBindingTarget):
+                    if isinstance(step.action.target, RealizationTarget):
                         prefetch_action.serving_binding_target.CopyFrom(
                             step.action.target.to_proto()
                         )
diff --git a/tensorcast/api/store/README.md b/tensorcast/api/store/README.md
index f40ca8d9..bdea7c53 100644
--- a/tensorcast/api/store/README.md
+++ b/tensorcast/api/store/README.md
@@ -45,8 +45,12 @@ managing clients manually.
   metadata-first mounted-source path for same-daemon loading. Successful calls
   return a lazy `Artifact` seeded from `ResolvePublicDiskSource` metadata,
   usually with primary `artifact_id = msa1:...`, without hashing payload bytes
-  during metadata resolution. Use `show_progress=True` or call
-  `import_from_disk(...)` explicitly when you need streamed daemon import.
+  during metadata resolution. The returned mounted-source artifact keeps the
+  daemon-attested source handle for direct
+  `Artifact.realize(ArtifactRealizationSpec.model_runtime(...),
+  runtime_host=...)` startup through framework runtime host capabilities. Use
+  `show_progress=True` or call `import_from_disk(...)` explicitly when you need
+  streamed daemon import.
 - `tensorcast.import_from_disk(path)` / `Store.import_from_disk(path)` keep the
   explicit daemon import contract via `ImportArtifactFromPath` /
   `ImportArtifactFromPathStream`. This path returns `mi2:` and remains the
@@ -162,15 +166,15 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only
     `serving_manifest_ref`.
   - Phase 1 currently supports the reserved manifest-tensor carrier
     `tensor:__tensorcast_meta__.manifest_json`.
-  - `ServingArtifactManifest` now self-describes its phase-1 carrier through
+  - `RuntimeArtifactManifest` now self-describes its phase-1 carrier through
     `serving_manifest_ref`, and the typed serving-lineage models can derive a
     strict runtime gate:
     `RepresentationPublishContract.to_runtime_policy()`,
-    `ServingArtifactManifest.to_runtime_policy()`, and
-    `PublishedModelVersion.require_serving_runtime_policy()`.
+    `RuntimeArtifactManifest.to_runtime_policy()`, and
+    `PublishedModelVersion.require_runtime_artifact_policy()`.
   - The repo-owned serving-lineage carriers now also expose explicit phase-1
     build identity fields:
-    `ServingArtifactManifest.serving_build_digest_version` and
+    `RuntimeArtifactManifest.serving_build_digest_version` and
     `RepresentationPublishContract.serving_build_digest_version`.
     Runtime policy gates on `serving_manifest_ref`,
     `representation_contract_hash`, and `serving_build_digest`.
@@ -185,12 +189,12 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only
     spec directly and forward it into `start_assembly_attempt(...)` through the
     typed `representation_publish_spec` daemon ingress instead of re-authoring
     the generic closeout shell at each call site.
-    When the spec carries optional `ServingAdmissionFacts`, TensorCast validates
+    When the spec carries optional `RuntimeAdmissionFacts`, TensorCast validates
     the supplied finalize classification, same-binding proof, and support level
     for consistency without inferring missing integration-private rollout state.
   - `BINDING_FINALIZE` publication is same-binding-only. Use
     `Store.complete_binding_finalize_publication_from_binding(...)` after the
-    serving binding current value has been realized, finalized, and sealed.
+    runtime binding current value has been realized, finalized, and sealed.
     The resulting spec must carry a binding-value publication subject and
     `same_binding_fast_path_validated=True`.
   - Tensor-entry `BINDING_FINALIZE` publication helpers have been removed.
@@ -257,8 +261,8 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only
     `transform_register` path now also prepares the reserved manifest tensor
     before registration, so the resulting serving artifact can already carry
     `tensor:__tensorcast_meta__.manifest_json`.
-  - For steady-state serving bind or swap, pass
-    `serving_runtime_policy=...` to `artifact.bind(...)`,
+  - For steady-state runtime bind or swap, pass
+    `runtime_artifact_policy=...` to `artifact.bind(...)`,
     `artifact.bind_into(...)`, or `binding.swap(...)`.
     This keeps generic artifact load permissive while giving serving runtime an
     explicit strict gate. When the policy is present, the daemon requires a
@@ -267,12 +271,12 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only
     artifact is accepted into the serving path.
     If you pass a full `RepresentationPublishSpec` instead of a plain runtime
     policy, TensorCast also requires
-    `ServingSupportLevel.RUNTIME_BIND_SWAP_READY` when caller-supplied
+    `RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY` when caller-supplied
     admission facts are present.
   - The same runtime-ready gate now also applies to serving-key activation on
     typed `representation_publish` specs: a spec carrying
     `serving_version_key` must be admitted at
-    `ServingSupportLevel.RUNTIME_BIND_SWAP_READY`.
+    `RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY`.
 - `Store.seal_assembly(assembly_id, publish_canonical=True)` seals an assembly
   into a stable MI2 identity and returns the bound descriptor.
 
@@ -399,19 +403,19 @@ binding.swap("model:v2")
   `lease_mode=NO_LEASE` so it does not create PID-bound UseLeases and does not mint IPC handle leases. Prefetch is
   supported for both GPU VRAM (`"cuda:0"`/`0`) and daemon-owned host DRAM (`"cpu"`/`"dram"`/`-1`). Handle-exporting APIs
   remain PID/lease-bound and are separate from daemon-owned warm replicas.
-- Retained serving prefetch lowers through the unified realization facade:
+- Retained realization prefetch lowers through the unified realization facade:
   `ArtifactRealizationSpec.retained_binding(...)` for one retained binding and
   `ArtifactRealizationSpec.target_set(...)` for TP/group target sets.
-  `artifact.prefetch(target=ServingBindingTarget(...))` and
-  `artifact.prefetch(target=ServingBindingSetTarget(...))` remain ergonomic
+  `artifact.prefetch(target=RealizationTarget(...))` and
+  `artifact.prefetch(target=RealizationTargetSet(...))` remain ergonomic
   wrappers, but target sets must use the target-set realization path so group
   admission, strategy, lifecycle, resource-envelope, and report state all carry
   `target_kind="target_set"`. Ordinary `device=` prefetch behavior is
-  unchanged. Serving targets require runtime-provided resolved layout/index
+  unchanged. Runtime targets require runtime-provided resolved layout/index
   metadata before daemon allocation; unresolved layouts fail closed before GPU
   memory is reserved. The daemon keeps serving prefetch behind
   `daemon_config.serving_prefetch.enabled` and returns a typed
-  `PrefetchedServingBinding` / `PrefetchedServingBindingSet` result once the
+  `PrefetchHandoff` / `PrefetchHandoffSet` result once the
   retained binding materialization path is enabled.
 - Prefetch idempotency derives a stable action fingerprint from selection identity (`artifact_id`,
   `logical_layout_hash`, `selection_hash`) and target placement (daemon + device/tier). `selection_hash` is computed via
diff --git a/tensorcast/api/store/__init__.py b/tensorcast/api/store/__init__.py
index db7d3f48..f32013f7 100644
--- a/tensorcast/api/store/__init__.py
+++ b/tensorcast/api/store/__init__.py
@@ -86,6 +86,24 @@
     OwnedBindingSlot,
     restore_owned_binding_tensors,
 )
+from tensorcast.api.store.publication_builder import (
+    PreparedRuntimeArtifactRegistration,
+    RegisteredRuntimeArtifactPublication,
+    build_binding_finalize_admission_facts,
+    build_binding_finalize_publication_bundle,
+    build_pure_transform_publication_bundle,
+    build_pure_transform_publication_bundle_from_registered_artifact,
+    build_pure_transform_publication_spec,
+    build_pure_transform_transform_spec,
+    build_runtime_artifact_publication_bundle,
+    build_runtime_artifact_publication_bundle_from_registered_artifact,
+    compute_pure_transform_representation_contract_hash,
+    compute_runtime_artifact_tensor_schema_hash,
+    count_canonical_runtime_tensors,
+    prepare_binding_finalize_runtime_registration,
+    prepare_pure_transform_runtime_registration,
+    prepare_runtime_artifact_registration,
+)
 from tensorcast.api.store.realization_kernel import (
     ArtifactRealizationHandle,
     ArtifactRealizationReport,
@@ -156,11 +174,11 @@
 from tensorcast.api.store.runtime import (
     get_context as get_runtime_context,
 )
-from tensorcast.api.store.serving_binding_reference_consumer import (
+from tensorcast.api.store.runtime_realization_reference_consumer import (
     REFERENCE_RUNTIME,
-    ReferenceServingAcquireResult,
-    ReferenceServingResolvedSpec,
-    ReferenceServingTensorSpec,
+    ReferenceRuntimeAcquireResult,
+    ReferenceRuntimeResolvedSpec,
+    ReferenceRuntimeTensorSpec,
     acquire_reference_binding,
     build_reference_resolved_spec,
     build_reference_target_layout,
@@ -169,39 +187,21 @@
     prefetch_reference_binding_set,
     release_reference_acquire,
     target_from_reference_cache_record,
-    unpack_prefetched_serving_binding,
-    unpack_prefetched_serving_binding_set,
+    unpack_prefetch_handoff,
+    unpack_prefetch_handoff_set,
     write_reference_resolved_spec_cache_entry,
 )
-from tensorcast.api.store.serving_binding_spec_cache import (
-    ServingBindingSpecCacheGroupIndex,
-    ServingBindingSpecCacheRecord,
+from tensorcast.api.store.runtime_realization_spec_cache import (
+    RuntimeRealizationSpecCacheGroupIndex,
+    RuntimeRealizationSpecCacheRecord,
     canonical_json_bytes,
     read_matching_resolved_spec_cache_entry,
     read_resolved_spec_cache_entry,
     read_resolved_spec_cache_group_index,
-    serving_binding_spec_cache_root,
+    runtime_realization_spec_cache_root,
     write_resolved_spec_cache_entry,
     write_resolved_spec_cache_group_index,
 )
-from tensorcast.api.store.serving_builder import (
-    PreparedServingRegistration,
-    RegisteredServingPublication,
-    build_binding_finalize_admission_facts,
-    build_binding_finalize_publication_bundle,
-    build_pure_transform_publication_bundle,
-    build_pure_transform_publication_bundle_from_registered_artifact,
-    build_pure_transform_publication_spec,
-    build_pure_transform_transform_spec,
-    build_serving_publication_bundle,
-    build_serving_publication_bundle_from_registered_artifact,
-    compute_pure_transform_representation_contract_hash,
-    compute_serving_tensor_schema_hash,
-    count_canonical_serving_tensors,
-    prepare_binding_finalize_serving_registration,
-    prepare_pure_transform_serving_registration,
-    prepare_serving_registration,
-)
 from tensorcast.api.store.target_region_lifecycle import (
     register_store_target_regions_for_realization,
 )
@@ -255,38 +255,40 @@
     IdentityMintStrategy,
     LocalRegionHandle,
     PartialSealResult,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingSet,
+    PrefetchHandoff,
+    PrefetchHandoffMemberFailure,
+    PrefetchHandoffSet,
     PrefetchRetentionPolicy,
     PublicDiskSourceHandle,
     PublishedModelVersion,
+    RealizationTarget,
+    RealizationTargetSet,
     RegionMemoryKind,
     RepresentationPublishContract,
     RepresentationPublishSpec,
+    RuntimeAdmissionFacts,
+    RuntimeArtifactBuildIntent,
+    RuntimeArtifactManifest,
+    RuntimeArtifactPolicy,
+    RuntimeArtifactPolicyInput,
+    RuntimeBindingMemberRef,
+    RuntimeBindingReadiness,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceKind,
+    RuntimeBindingSourceMemberRef,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeBindingSourceReuseMode,
+    RuntimePublicationSubject,
+    RuntimeRealizationSpecCacheEntry,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
     SealAssemblyResult,
-    ServingAdmissionFacts,
-    ServingArtifactManifest,
-    ServingBindingMemberRef,
-    ServingBindingReadiness,
-    ServingBindingResolvedLayout,
-    ServingBindingResolvedSpecCacheEntry,
-    ServingBindingSetTarget,
-    ServingBindingSourceKind,
-    ServingBindingSourceMemberRef,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingBindingSourceReuseMode,
-    ServingBindingTarget,
-    ServingBuildIntent,
-    ServingPublicationSubject,
-    ServingRuntimePolicy,
-    ServingRuntimePolicyInput,
-    ServingSupportLevel,
     SourceBoundCapability,
     SourceBoundPlanDiagnostics,
     VramRegionHandle,
     build_serving_manifest_ref,
-    coerce_serving_runtime_policy,
+    coerce_runtime_artifact_policy,
     parse_serving_manifest_ref,
 )
 from tensorcast.types import (
@@ -875,36 +877,29 @@ def _decode_published_model_version_from_response(
         total_size=int(artifact.total_size),
         id_kind=_artifact_id_kind_from_proto(artifact.id_kind, artifact.artifact_id),
     )
+    serving_descriptor = None
+    if payload.HasField("serving_artifact") and payload.serving_artifact.artifact_id:
+        serving_artifact = payload.serving_artifact
+        serving_descriptor = TypedArtifactDescriptor(
+            artifact_id=str(serving_artifact.artifact_id),
+            index_multihash=str(serving_artifact.index_multihash or "") or None,
+            data_multihash=str(serving_artifact.data_multihash or "") or None,
+            schema_version=str(serving_artifact.schema_version or "") or None,
+            encoding=str(serving_artifact.encoding or "") or None,
+            total_size=int(serving_artifact.total_size),
+            id_kind=_artifact_id_kind_from_proto(
+                serving_artifact.id_kind,
+                serving_artifact.artifact_id,
+            ),
+        )
     return PublishedModelVersion(
         assembly_id=assembly_id,
         source_artifact_id=descriptor.artifact_id,
         source_descriptor=descriptor,
         serving_artifact_id=(
-            str(payload.serving_artifact.artifact_id)
-            if payload.HasField("serving_artifact")
-            and payload.serving_artifact.artifact_id
-            else None
-        ),
-        serving_descriptor=(
-            TypedArtifactDescriptor(
-                artifact_id=str(payload.serving_artifact.artifact_id),
-                index_multihash=str(payload.serving_artifact.index_multihash or "")
-                or None,
-                data_multihash=str(payload.serving_artifact.data_multihash or "")
-                or None,
-                schema_version=str(payload.serving_artifact.schema_version or "")
-                or None,
-                encoding=str(payload.serving_artifact.encoding or "") or None,
-                total_size=int(payload.serving_artifact.total_size),
-                id_kind=_artifact_id_kind_from_proto(
-                    payload.serving_artifact.id_kind,
-                    payload.serving_artifact.artifact_id,
-                ),
-            )
-            if payload.HasField("serving_artifact")
-            and payload.serving_artifact.artifact_id
-            else None
+            serving_descriptor.artifact_id if serving_descriptor is not None else None
         ),
+        serving_descriptor=serving_descriptor,
         source_version_key=str(payload.source_version_key or "") or None,
         serving_version_key=str(payload.serving_version_key or "") or None,
         representation_contract_hash=(
@@ -1789,7 +1784,7 @@ def register_pure_transform_publication(
         self,
         tensors: TensorDict,
         *,
-        build_intent: ServingBuildIntent,
+        build_intent: RuntimeArtifactBuildIntent,
         source_artifact: Artifact
         | RegisteredArtifact
         | CanonicalIndex
@@ -1804,8 +1799,8 @@ def register_pure_transform_publication(
         serving_version_key: str | None = None,
         logical_topology_json: str | None = None,
         serving_manifest_ref: str | None = None,
-    ) -> RegisteredServingPublication:
-        prepared = prepare_pure_transform_serving_registration(
+    ) -> RegisteredRuntimeArtifactPublication:
+        prepared = prepare_pure_transform_runtime_registration(
             build_intent=build_intent,
             source_artifact=source_artifact,
             tensors=tensors,
@@ -1829,7 +1824,7 @@ def register_pure_transform_publication(
             logical_topology_json=logical_topology_json,
             serving_manifest_ref=prepared.serving_manifest_ref,
         )
-        return RegisteredServingPublication(
+        return RegisteredRuntimeArtifactPublication(
             registered_artifact=registered_artifact,
             prepared_registration=prepared,
             publication=publication,
@@ -1839,7 +1834,7 @@ def complete_pure_transform_publication(
         self,
         tensors: TensorDict,
         *,
-        build_intent: ServingBuildIntent,
+        build_intent: RuntimeArtifactBuildIntent,
         source_artifact: Artifact
         | RegisteredArtifact
         | CanonicalIndex
@@ -1909,7 +1904,7 @@ def complete_pure_transform_publication_from_binding(
         self,
         binding: Binding | SealedBindingValue,
         *,
-        build_intent: ServingBuildIntent,
+        build_intent: RuntimeArtifactBuildIntent,
         source_artifact: Artifact
         | RegisteredArtifact
         | CanonicalIndex
@@ -1933,7 +1928,7 @@ def complete_pure_transform_publication_from_binding(
         authoritative_canonical_index = _build_bound_publication_canonical_index(
             resolved_binding.layout
         )
-        prepared = prepare_pure_transform_serving_registration(
+        prepared = prepare_pure_transform_runtime_registration(
             build_intent=build_intent,
             source_artifact=source_artifact,
             tensors=dict(resolved_binding.tensors),
@@ -1969,7 +1964,7 @@ def complete_pure_transform_publication_from_binding(
     def _complete_registered_representation_publication(
         self,
         *,
-        publication: RegisteredServingPublication,
+        publication: RegisteredRuntimeArtifactPublication,
         contract_family: AssemblyContractFamily | str | None = None,
         source_artifact: Artifact
         | RegisteredArtifact
@@ -2183,8 +2178,8 @@ def complete_binding_finalize_publication_from_binding(
         self,
         binding: Binding | SealedBindingValue,
         *,
-        build_intent: ServingBuildIntent,
-        admission_facts: ServingAdmissionFacts,
+        build_intent: RuntimeArtifactBuildIntent,
+        admission_facts: RuntimeAdmissionFacts,
         source_artifact: Artifact
         | RegisteredArtifact
         | CanonicalIndex
@@ -2209,7 +2204,7 @@ def complete_binding_finalize_publication_from_binding(
         authoritative_canonical_index = _build_bound_publication_canonical_index(
             resolved_binding.layout
         )
-        prepared = prepare_binding_finalize_serving_registration(
+        prepared = prepare_binding_finalize_runtime_registration(
             build_intent=build_intent,
             tensors=dict(resolved_binding.tensors),
             representation_contract_hash=representation_contract_hash,
@@ -3078,6 +3073,7 @@ def _artifact_from_disk_metadata(
         event_name: str,
         resolution_mode: str,
         trusted_content_artifact_id: str | None = None,
+        source_subject: object | None = None,
     ) -> Artifact:
         if not artifact_id:
             raise ArtifactError(
@@ -3133,6 +3129,7 @@ def _artifact_from_disk_metadata(
             canonical_index_bytes=canonical_index_bytes or None,
             canonical_index=canonical_index,
             generation=generation,
+            source_subject=source_subject,
         )
 
     def import_from_disk(
@@ -3365,6 +3362,7 @@ def from_disk(
             event_name="store.from_disk.summary",
             resolution_mode="attested_mounted_source",
             trusted_content_artifact_id=source.trusted_content_artifact_id,
+            source_subject=source,
         )
 
     def resolve_public_disk_source(
@@ -3896,7 +3894,7 @@ def register_piece(
 def register_pure_transform_publication(
     tensors: TensorDict,
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: Artifact
     | RegisteredArtifact
     | CanonicalIndex
@@ -3911,7 +3909,7 @@ def register_pure_transform_publication(
     serving_version_key: str | None = None,
     logical_topology_json: str | None = None,
     serving_manifest_ref: str | None = None,
-) -> RegisteredServingPublication:
+) -> RegisteredRuntimeArtifactPublication:
     return _coerce_store().register_pure_transform_publication(
         tensors,
         build_intent=build_intent,
@@ -3931,7 +3929,7 @@ def register_pure_transform_publication(
 def complete_pure_transform_publication(
     tensors: TensorDict,
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: Artifact
     | RegisteredArtifact
     | CanonicalIndex
@@ -3982,7 +3980,7 @@ def complete_pure_transform_publication(
 def complete_pure_transform_publication_from_binding(
     binding: Binding | SealedBindingValue,
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: Artifact
     | RegisteredArtifact
     | CanonicalIndex
@@ -4019,8 +4017,8 @@ def complete_pure_transform_publication_from_binding(
 def complete_binding_finalize_publication_from_binding(
     binding: Binding | SealedBindingValue,
     *,
-    build_intent: ServingBuildIntent,
-    admission_facts: ServingAdmissionFacts,
+    build_intent: RuntimeArtifactBuildIntent,
+    admission_facts: RuntimeAdmissionFacts,
     source_artifact: Artifact
     | RegisteredArtifact
     | CanonicalIndex
@@ -4645,12 +4643,13 @@ def realize_into_binding(
     "RealizationTargetSetReport",
     "RealizationTargetPlan",
     "PrefetchRetentionPolicy",
+    "PrefetchHandoff",
+    "PrefetchHandoffMemberFailure",
+    "PrefetchHandoffSet",
     "PrefetchedReplica",
-    "PrefetchedServingBinding",
-    "PrefetchedServingBindingSet",
     "PartialSealResult",
     "PublicDiskSourceHandle",
-    "PreparedServingRegistration",
+    "PreparedRuntimeArtifactRegistration",
     "PublishedModelVersion",
     "RegionMemoryKind",
     "ExecutionDiagnostics",
@@ -4658,37 +4657,41 @@ def realize_into_binding(
     "HashBackend",
     "HashLocation",
     "IdentityMintStrategy",
-    "RegisteredServingPublication",
+    "RegisteredRuntimeArtifactPublication",
     "RegisteredArtifact",
+    "RealizationTarget",
+    "RealizationTargetSet",
     "RepresentationAdmissionPlan",
     "RepresentationPublishContract",
     "RepresentationPublishSpec",
+    "RuntimeArtifactBuildIntent",
+    "RuntimeArtifactManifest",
+    "RuntimeArtifactPolicy",
+    "RuntimeArtifactPolicyInput",
     "SourceBoundCapability",
-    "ServingPublicationSubject",
+    "RuntimePublicationSubject",
     "ReplicaInfo",
     "RetryPolicy",
     "ResolvedArtifactSelection",
     "SERVING_MANIFEST_TENSOR_NAME",
     "SealedBindingValue",
     "StagedBindingValue",
-    "ServingArtifactManifest",
-    "ServingAdmissionFacts",
-    "ServingBindingMemberRef",
-    "ServingBindingReadiness",
-    "ServingBindingResolvedLayout",
-    "ServingBindingResolvedSpecCacheEntry",
-    "ServingBindingSetTarget",
-    "ServingBindingSourceKind",
-    "ServingBindingSourceMemberRef",
-    "ServingBindingSourceRef",
-    "ServingBindingSourceReuseDecision",
-    "ServingBindingSourceReuseMode",
-    "ServingBindingTarget",
-    "ServingBindingSpecCacheRecord",
+    "RuntimeAdmissionFacts",
+    "RuntimeBindingMemberRef",
+    "RuntimeBindingReadiness",
+    "RuntimeBindingResolvedLayout",
+    "RuntimeRealizationSpecCacheEntry",
+    "RuntimeBindingSourceKind",
+    "RuntimeBindingSourceMemberRef",
+    "RuntimeBindingSourceRef",
+    "RuntimeBindingSourceReuseDecision",
+    "RuntimeBindingSourceReuseMode",
+    "RuntimeTopologyRef",
+    "RuntimeRealizationSpecCacheRecord",
     "REFERENCE_RUNTIME",
-    "ReferenceServingAcquireResult",
-    "ReferenceServingResolvedSpec",
-    "ReferenceServingTensorSpec",
+    "ReferenceRuntimeAcquireResult",
+    "ReferenceRuntimeResolvedSpec",
+    "ReferenceRuntimeTensorSpec",
     "artifact_realization_profile_payload",
     "artifact_realization_report_to_dict",
     "acquire_reference_binding",
@@ -4721,19 +4724,17 @@ def realize_into_binding(
     "read_matching_resolved_spec_cache_entry",
     "read_resolved_spec_cache_entry",
     "release_reference_acquire",
-    "serving_binding_spec_cache_root",
+    "runtime_realization_spec_cache_root",
     "target_set_report_for_retained_bindings",
     "target_set_strategy_plan_for",
     "target_from_reference_cache_record",
-    "unpack_prefetched_serving_binding",
-    "unpack_prefetched_serving_binding_set",
+    "unpack_prefetch_handoff",
+    "unpack_prefetch_handoff_set",
     "write_resolved_spec_cache_entry",
     "write_reference_resolved_spec_cache_entry",
-    "ServingBuildIntent",
     "SERVING_BUILD_DIGEST_VERSION",
-    "ServingRuntimePolicy",
-    "ServingRuntimePolicyInput",
-    "ServingSupportLevel",
+    "RuntimeSupportLevel",
+    "coerce_runtime_artifact_policy",
     "StoreCapabilities",
     "Store",
     "StoreOptions",
@@ -4751,8 +4752,8 @@ def realize_into_binding(
     "build_binding_finalize_admission_facts",
     "build_binding_finalize_publication_bundle",
     "build_owned_layout",
-    "build_serving_publication_bundle",
-    "build_serving_publication_bundle_from_registered_artifact",
+    "build_runtime_artifact_publication_bundle",
+    "build_runtime_artifact_publication_bundle_from_registered_artifact",
     "build_pure_transform_publication_bundle",
     "build_pure_transform_publication_bundle_from_registered_artifact",
     "build_pure_transform_publication_spec",
@@ -4768,22 +4769,21 @@ def realize_into_binding(
     "complete_structural_representation_publish_attempt",
     "compute_pure_transform_representation_contract_hash",
     "build_serving_manifest_ref",
-    "coerce_serving_runtime_policy",
-    "compute_serving_tensor_schema_hash",
-    "count_canonical_serving_tensors",
-    "prepare_binding_finalize_serving_registration",
-    "prepare_serving_registration",
-    "prepare_pure_transform_serving_registration",
+    "compute_runtime_artifact_tensor_schema_hash",
+    "count_canonical_runtime_tensors",
+    "prepare_binding_finalize_runtime_registration",
+    "prepare_runtime_artifact_registration",
+    "prepare_pure_transform_runtime_registration",
     "parse_serving_manifest_ref",
     "TargetTensors",
     "PersistenceStatusResult",
     "PersistenceShardStatus",
-    "ServingBindingSpecCacheGroupIndex",
-    "ServingBindingSpecCacheRecord",
+    "RuntimeRealizationSpecCacheGroupIndex",
+    "RuntimeRealizationSpecCacheRecord",
     "REFERENCE_RUNTIME",
-    "ReferenceServingAcquireResult",
-    "ReferenceServingResolvedSpec",
-    "ReferenceServingTensorSpec",
+    "ReferenceRuntimeAcquireResult",
+    "ReferenceRuntimeResolvedSpec",
+    "ReferenceRuntimeTensorSpec",
     "acquire_reference_binding",
     "build_reference_resolved_spec",
     "build_reference_target_layout",
@@ -4795,10 +4795,10 @@ def realize_into_binding(
     "read_resolved_spec_cache_entry",
     "read_resolved_spec_cache_group_index",
     "release_reference_acquire",
-    "serving_binding_spec_cache_root",
+    "runtime_realization_spec_cache_root",
     "target_from_reference_cache_record",
-    "unpack_prefetched_serving_binding",
-    "unpack_prefetched_serving_binding_set",
+    "unpack_prefetch_handoff",
+    "unpack_prefetch_handoff_set",
     "write_resolved_spec_cache_entry",
     "write_resolved_spec_cache_group_index",
     "write_reference_resolved_spec_cache_entry",
diff --git a/tensorcast/api/store/artifact.py b/tensorcast/api/store/artifact.py
index 4e418496..9b69127f 100644
--- a/tensorcast/api/store/artifact.py
+++ b/tensorcast/api/store/artifact.py
@@ -103,6 +103,7 @@
     retained_binding_lifecycle_plan_for,
     retained_binding_reports_for,
     risk_labels_for_target,
+    selection_report_fields,
     strategy_plan_for_execution,
 )
 from tensorcast.api.store.retry import (
@@ -134,21 +135,27 @@
 from tensorcast.proto.operation.v1 import operation_pb2
 from tensorcast.types import (
     GroupRealizationAcquireRef,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingSet,
+    PrefetchHandoff,
+    PrefetchHandoffSet,
     PrefetchRetentionPolicy,
-    ServingBindingReadiness,
-    ServingBindingSetTarget,
-    ServingBindingSourceReuseDecision,
-    ServingBindingTarget,
-    ServingRuntimePolicy,
-    ServingRuntimePolicyInput,
-    coerce_serving_runtime_policy,
+    RealizationTarget,
+    RealizationTargetSet,
+    RuntimeArtifactPolicy,
+    RuntimeArtifactPolicyInput,
+    RuntimeBindingReadiness,
+    RuntimeBindingSourceReuseDecision,
+    coerce_runtime_artifact_policy,
 )
 
 logger = logging.getLogger(__name__)
 
 
+def _resolve_runtime_artifact_policy(
+    runtime_artifact_policy: RuntimeArtifactPolicyInput | None,
+) -> RuntimeArtifactPolicy | None:
+    return coerce_runtime_artifact_policy(runtime_artifact_policy)
+
+
 def _has_validated_byte_artifact_profile(artifact_id: str) -> bool:
     if not is_byte_artifact_id(artifact_id):
         return False
@@ -249,22 +256,22 @@ class PrefetchedReplica:
     report: ArtifactRealizationReport | None = None
 
 
-ServingPrefetchResult = PrefetchedServingBinding | PrefetchedServingBindingSet
+RuntimePrefetchResult = PrefetchHandoff | PrefetchHandoffSet
 
 
 def _parse_serving_prefetch_result_any(
     result: Any,
-) -> ServingPrefetchResult:
+) -> RuntimePrefetchResult:
     binding_result = operation_pb2.PrefetchServingBindingResult()
     if result.Is(binding_result.DESCRIPTOR):
         result.Unpack(binding_result)
-        return PrefetchedServingBinding.from_proto(binding_result)
+        return PrefetchHandoff.from_proto(binding_result)
     set_result = operation_pb2.PrefetchServingBindingSetResult()
     if result.Is(set_result.DESCRIPTOR):
         result.Unpack(set_result)
-        return PrefetchedServingBindingSet.from_proto(set_result)
+        return PrefetchHandoffSet.from_proto(set_result)
     raise ArtifactError(
-        "Serving prefetch operation did not return a typed serving binding result",
+        "Runtime prefetch operation did not return a typed prefetch handoff result",
         status_code="DATA_LOSS",
         retryable=False,
     )
@@ -272,13 +279,13 @@ def _parse_serving_prefetch_result_any(
 
 def _serving_prefetch_result_from_operation_response(
     response: operation_pb2.GetOperationResponse,
-) -> ServingPrefetchResult:
+) -> RuntimePrefetchResult:
     if response.status.HasField("result"):
         return _parse_serving_prefetch_result_any(response.status.result)
     if response.HasField("snapshot"):
         return _parse_serving_prefetch_result_any(response.snapshot)
     raise ArtifactError(
-        "Serving prefetch operation completed without result metadata",
+        "Runtime prefetch operation completed without result metadata",
         status_code="DATA_LOSS",
         retryable=False,
     )
@@ -293,11 +300,11 @@ def _digest_hex(label: str, payload: bytes) -> str:
 
 
 def _serving_target_layout_digest(
-    target: ServingBindingTarget | ServingBindingSetTarget,
+    target: RealizationTarget | RealizationTargetSet,
     *,
     target_bytes: bytes,
 ) -> str:
-    if isinstance(target, ServingBindingTarget):
+    if isinstance(target, RealizationTarget):
         return str(target.resolved_layout.target_layout_hash or "") or _digest_hex(
             "serving-target-layout",
             target_bytes,
@@ -306,11 +313,11 @@ def _serving_target_layout_digest(
 
 
 def _serving_target_copy_plan_digest(
-    target: ServingBindingTarget | ServingBindingSetTarget,
+    target: RealizationTarget | RealizationTargetSet,
     *,
     target_bytes: bytes,
 ) -> str:
-    if isinstance(target, ServingBindingTarget):
+    if isinstance(target, RealizationTarget):
         digest = str(target.resolved_layout.spec_digest or "")
         if digest:
             return digest
@@ -320,16 +327,43 @@ def _serving_target_copy_plan_digest(
     return _digest_hex("serving-target-copy-plan", target_bytes)
 
 
+def _binding_layout_target_digest(binding_layout_id: str) -> str | None:
+    normalized = str(binding_layout_id or "").strip()
+    if not normalized:
+        return None
+    return f"binding-layout:{normalized}"
+
+
+def _mapped_target_specs_from_layout(
+    layout: object | None,
+) -> tuple[dict[str, object], ...]:
+    if layout is None:
+        return ()
+    specs = getattr(layout, "dst_specs", None)
+    if specs is None:
+        return ()
+    return tuple(
+        {
+            "name": str(getattr(spec, "name", "")),
+            "dtype": str(getattr(spec, "dtype", "")),
+            "shape": tuple(int(v) for v in getattr(spec, "shape", ())),
+            "stride": tuple(int(v) for v in getattr(spec, "stride", ())),
+            "logical_length": int(getattr(spec, "logical_length", 0) or 0),
+        }
+        for spec in specs
+    )
+
+
 def _with_retained_binding_report(
-    result: ServingPrefetchResult,
+    result: RuntimePrefetchResult,
     *,
     selection: ResolvedArtifactSelection,
-    target: ServingBindingTarget | ServingBindingSetTarget,
+    target: RealizationTarget | RealizationTargetSet,
     target_bytes: bytes,
     operation_id: str,
-) -> ServingPrefetchResult:
+) -> RuntimePrefetchResult:
     retained_bindings = retained_binding_reports_for(result)
-    is_target_set = isinstance(result, PrefetchedServingBindingSet)
+    is_target_set = isinstance(result, PrefetchHandoffSet)
     target_plan = RealizationTargetPlan(
         kind="target_set" if is_target_set else "retained_binding",
         target_layout_digest=_serving_target_layout_digest(
@@ -373,6 +407,7 @@ def _with_retained_binding_report(
         artifact_profile=selection.artifact_profile,
         authority_scope=selection.authority_scope,
         generation_hint=selection.generation_hint,
+        **selection_report_fields(selection),
         envelope=envelope,
         target_plan=target_plan,
         strategy_plan=strategy_plan_for_execution(envelope=envelope),
@@ -425,13 +460,13 @@ def _operation_status_from_proto(
 
 
 def _serving_target_source_reuse(
-    target: ServingBindingTarget | ServingBindingSetTarget,
-) -> ServingBindingSourceReuseDecision:
-    if isinstance(target, ServingBindingTarget):
+    target: RealizationTarget | RealizationTargetSet,
+) -> RuntimeBindingSourceReuseDecision:
+    if isinstance(target, RealizationTarget):
         return target.resolved_layout.source_reuse
     if not target.members:
         raise ArtifactError(
-            "Serving binding set members must use one source reuse decision",
+            "Realization target set members must use one source reuse decision",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
@@ -441,7 +476,7 @@ def _serving_target_source_reuse(
         for member in target.members[1:]
     ):
         raise ArtifactError(
-            "Serving binding set members must use one source reuse decision",
+            "Realization target set members must use one source reuse decision",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
@@ -756,9 +791,11 @@ def __init__(
         canonical_index_bytes: bytes | None = None,
         canonical_index: CanonicalIndex | None = None,
         generation: int | None = None,
+        key_generation: int | None = None,
         view_spec: ViewSpecBuildResult | None = None,
         view_metadata: ViewMetadataCache | None = None,
         view_depth: int = 0,
+        source_subject: Any | None = None,
     ) -> None:
         identifiers = [bool(artifact_id), bool(key)]
         if sum(identifiers) == 0:
@@ -775,6 +812,7 @@ def __init__(
         self._view_spec = view_spec
         self._view_metadata = view_metadata
         self._view_depth = max(0, int(view_depth))
+        self._source_subject = source_subject
         effective_index = (
             view_metadata.selected_index
             if view_metadata is not None
@@ -785,6 +823,7 @@ def __init__(
                 entry.name: _meta_from_entry(entry) for entry in effective_index.entries
             }
         self._generation = generation
+        self._key_generation = key_generation
         self._store_ref = store_ref
         self._lock = threading.RLock()
         self._released = False
@@ -861,11 +900,174 @@ def _realization_handle(
         emit_artifact_realization_profile_event(handle.report)
         return handle
 
+    def _model_runtime_request_facts(
+        self,
+        spec: ArtifactRealizationSpec,
+        runtime_context: Any | None,
+    ) -> tuple[ArtifactRealizationSpec, Any]:
+        from tensorcast.artifact_runtime.request_facts import (
+            ModelRuntimeRequestFactsError,
+            resolve_model_runtime_request_facts,
+        )
+
+        try:
+            facts = resolve_model_runtime_request_facts(
+                spec=spec,
+                runtime_context=runtime_context,
+            )
+        except ModelRuntimeRequestFactsError as exc:
+            raise ArtifactError(
+                str(exc),
+                status_code="INVALID_ARGUMENT",
+                retryable=False,
+            ) from exc
+        return cast(ArtifactRealizationSpec, facts.spec), facts.context
+
+    def _store_bound_runtime_artifact_resolver(self) -> Any:
+        from tensorcast.artifact_runtime.artifact.resolver import (
+            RuntimeArtifactResolver,
+        )
+        from tensorcast.types import (
+            SERVING_MANIFEST_TENSOR_NAME,
+            RuntimeArtifactManifest,
+        )
+
+        store, _runtime, _pipeline = self._require_components()
+        return RuntimeArtifactResolver(
+            manifest_tensor_name=SERVING_MANIFEST_TENSOR_NAME,
+            schema_version=int(
+                RuntimeArtifactManifest.model_fields["schema_version"].default
+            ),
+            open_artifact_fn=lambda ref: store.artifact(ref=ref),
+        )
+
+    def _execute_model_runtime_realization(
+        self,
+        spec: ArtifactRealizationSpec,
+        *,
+        runtime_host: Any | None,
+        runtime_context: Any | None,
+        runtime_resolver: Any | None,
+        profile_sink: Any | None,
+    ) -> ArtifactRealizationHandle:
+        if runtime_host is None:
+            raise ArtifactError(
+                "model_runtime realization requires runtime_host",
+                status_code="INVALID_ARGUMENT",
+                retryable=False,
+            )
+
+        from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration
+
+        artifact_id = self._ensure_identified()
+        resolved_spec, context = self._model_runtime_request_facts(
+            spec,
+            runtime_context,
+        )
+        resolved_runtime_resolver = runtime_resolver
+        if resolved_runtime_resolver is None and not artifact_id.startswith("msa1:"):
+            resolved_runtime_resolver = self._store_bound_runtime_artifact_resolver()
+        integration = ArtifactRuntimeIntegration(
+            resolver=resolved_runtime_resolver,
+            profile_sink=profile_sink,
+            host=runtime_host,
+        )
+        from tensorcast.artifact_runtime.request_facts import (
+            ModelRuntimeRequestFactsError,
+        )
+
+        try:
+            if artifact_id.startswith("msa1:"):
+                if self._source_subject is None:
+                    raise ArtifactError(
+                        "mounted-source model_runtime realization requires a "
+                        "daemon-attested source handle; create the artifact with "
+                        "tensorcast.from_disk(...)",
+                        status_code="FAILED_PRECONDITION",
+                        retryable=False,
+                    )
+                source_selection = self._resolve_model_runtime_source_selection(
+                    artifact_id
+                )
+                attachment = integration.realize_mounted_source_model_runtime(
+                    artifact_ref=artifact_id,
+                    source_subject=self._source_subject,
+                    spec=resolved_spec,
+                    context=context,
+                    source_selection=source_selection,
+                    materialization=resolved_spec.options,
+                )
+            else:
+                source_selection = self._resolve_model_runtime_source_selection(
+                    artifact_id
+                )
+                attachment = integration.realize_model_runtime(
+                    artifact_ref=artifact_id,
+                    spec=resolved_spec,
+                    context=context,
+                    source_selection=source_selection,
+                    runtime_artifact_policy=resolved_spec.runtime_artifact_policy,
+                    materialization=resolved_spec.options,
+                )
+        except ModelRuntimeRequestFactsError as exc:
+            raise ArtifactError(
+                str(exc),
+                status_code="INVALID_ARGUMENT",
+                retryable=False,
+            ) from exc
+        handle = getattr(attachment.state, "model_runtime_handle", None)
+        if not isinstance(handle, ArtifactRealizationHandle):
+            raise ArtifactError(
+                "model_runtime realization completed without a realization handle",
+                status_code="INTERNAL",
+                retryable=False,
+            )
+        return handle
+
+    def _resolve_model_runtime_source_selection(
+        self,
+        artifact_id: str,
+    ) -> ResolvedArtifactSelection | None:
+        if not artifact_id.startswith("msa1:"):
+            if (
+                self._canonical_index_bytes is None
+                and not self._model_runtime_can_resolve_artifact_index()
+                and self._view_spec is None
+                and self._view_metadata is None
+            ):
+                return None
+            return self._resolve_realization_selection()
+        canonical_index_bytes = self._canonical_index_bytes
+        if canonical_index_bytes is None and self._source_subject is not None:
+            canonical_index_bytes = bytes(
+                getattr(self._source_subject, "canonical_index_bytes", None) or b""
+            )
+        return resolve_artifact_selection(
+            artifact_id=artifact_id,
+            canonical_index_bytes=canonical_index_bytes,
+            generation_hint=(
+                self._key_generation
+                if self._key_generation is not None
+                else self._generation
+            ),
+        )
+
+    def _model_runtime_can_resolve_artifact_index(self) -> bool:
+        store = self._store_ref() if self._store_ref is not None else None
+        if store is None or bool(getattr(store, "closed", False)):
+            return False
+        runtime = getattr(store, "_runtime", None)
+        return callable(getattr(runtime, "ensure_client", None))
+
     def realize(
         self,
         spec: ArtifactRealizationSpec,
         *,
         ctx: CallContext | None = None,
+        runtime_host: Any | None = None,
+        runtime_context: Any | None = None,
+        runtime_resolver: Any | None = None,
+        profile_sink: Any | None = None,
     ) -> ArtifactRealizationHandle:
         if spec.target_kind == "tensor_dict":
             if spec.device is None:
@@ -900,6 +1102,7 @@ def realize(
                 artifact_profile=selection.artifact_profile,
                 authority_scope=selection.authority_scope,
                 generation_hint=selection.generation_hint,
+                **selection_report_fields(selection),
                 envelope=envelope,
                 target_plan=target_plan,
                 strategy_plan=strategy_plan_for_execution(
@@ -975,6 +1178,7 @@ def realize(
                 artifact_profile=selection.artifact_profile,
                 authority_scope=selection.authority_scope,
                 generation_hint=selection.generation_hint,
+                **selection_report_fields(selection),
                 envelope=envelope,
                 target_plan=target_plan,
                 strategy_plan=strategy_plan_for_execution(
@@ -1014,17 +1218,48 @@ def realize(
                 options=cast("GetArtifactOptions | None", spec.options),
                 capacity_bytes=spec.capacity_bytes,
                 publish=spec.publish,
-                serving_runtime_policy=cast(
-                    ServingRuntimePolicyInput | None,
-                    spec.serving_runtime_policy,
+                runtime_artifact_policy=cast(
+                    RuntimeArtifactPolicyInput | None,
+                    spec.runtime_artifact_policy,
                 ),
                 ctx=ctx,
             )
-            binding_layout_id = str(getattr(binding, "binding_layout_id", "") or "")
+            binding_layout = getattr(binding, "layout", None)
+            layout_binding_id = str(
+                getattr(binding_layout, "binding_layout_id", "") or ""
+            )
+            binding_layout_id = str(
+                getattr(binding, "binding_layout_id", "") or layout_binding_id
+            )
+            mapped_view_id = None
+            copy_plan_digest = None
+            if spec.mapping is not None:
+                target_layout = getattr(binding_layout, "target_layout", None)
+                mapped_view_id = str(getattr(target_layout, "view_id", "") or "")
+                if not mapped_view_id:
+                    target_specs = _mapped_target_specs_from_layout(binding_layout)
+                    if target_specs:
+                        mapped_view_id = compute_mapped_view_id_from_specs(
+                            canonical_index_bytes=selection.canonical_index_bytes,
+                            source_view_id=selection.view_id,
+                            plan=normalize_copy_plan(cast(CopyPlan, spec.mapping)),
+                            target_specs=target_specs,
+                        )
+                if not mapped_view_id:
+                    raise ArtifactError(
+                        "mapped owned binding realization requires mapped target "
+                        "layout identity",
+                        status_code="FAILED_PRECONDITION",
+                        retryable=False,
+                    )
+                copy_plan_digest = mapped_view_id
             target_plan = RealizationTargetPlan(
                 kind="binding_owned",
                 device=spec.device,
+                target_layout_digest=_binding_layout_target_digest(binding_layout_id),
                 binding_layout_id=binding_layout_id,
+                mapped_view_id=mapped_view_id,
+                copy_plan_digest=copy_plan_digest,
             )
             envelope = envelope_for_binding(
                 binding,
@@ -1065,9 +1300,9 @@ def realize(
                 packing=spec.packing,
                 options=cast("GetArtifactOptions | None", spec.options),
                 publish=spec.publish,
-                serving_runtime_policy=cast(
-                    ServingRuntimePolicyInput | None,
-                    spec.serving_runtime_policy,
+                runtime_artifact_policy=cast(
+                    RuntimeArtifactPolicyInput | None,
+                    spec.runtime_artifact_policy,
                 ),
                 ctx=ctx,
             )
@@ -1153,7 +1388,11 @@ def realize(
             selection = resolve_artifact_selection(
                 artifact_id=source_artifact_id,
                 canonical_index_bytes=canonical_index_bytes,
-                generation_hint=self._generation,
+                generation_hint=(
+                    self._key_generation
+                    if self._key_generation is not None
+                    else self._generation
+                ),
             )
             target_plan = RealizationTargetPlan(
                 kind="mounted_source",
@@ -1182,10 +1421,12 @@ def realize(
                 promote_fn=lambda: promoted,
             )
         if spec.target_kind == "model_runtime":
-            raise ArtifactError(
-                "model_runtime realization is not lowered through Artifact.realize yet",
-                status_code="UNIMPLEMENTED",
-                retryable=False,
+            return self._execute_model_runtime_realization(
+                spec,
+                runtime_host=runtime_host,
+                runtime_context=runtime_context,
+                runtime_resolver=runtime_resolver,
+                profile_sink=profile_sink,
             )
         raise ArtifactError(
             f"Unsupported realization target kind: {spec.target_kind}",
@@ -1198,7 +1439,7 @@ def realize_async(
         spec: ArtifactRealizationSpec,
         *,
         ctx: CallContext | None = None,
-    ) -> Operation[PrefetchedReplica] | Operation[ServingPrefetchResult]:
+    ) -> Operation[PrefetchedReplica] | Operation[RuntimePrefetchResult]:
         if spec.target_kind == "retained_replica":
             if spec.device is None:
                 raise ArtifactError(
@@ -1218,21 +1459,19 @@ def realize_async(
                     status_code="INVALID_ARGUMENT",
                     retryable=False,
                 )
-            if isinstance(spec.target, ServingBindingSetTarget):
+            if isinstance(spec.target, RealizationTargetSet):
                 raise ArtifactError(
-                    "ServingBindingSetTarget requires target_set realization",
+                    "RealizationTargetSet requires target_set realization",
                     status_code="INVALID_ARGUMENT",
                     retryable=False,
                 )
             readiness = (
-                cast(ServingBindingReadiness, spec.readiness)
+                cast(RuntimeBindingReadiness, spec.readiness)
                 if spec.readiness is not None
-                else "serving_local_ready"
+                else "runtime_local_ready"
             )
             return self._execute_prefetch(
-                target=cast(
-                    ServingBindingTarget | ServingBindingSetTarget, spec.target
-                ),
+                target=cast(RealizationTarget | RealizationTargetSet, spec.target),
                 readiness=readiness,
                 retention=cast(PrefetchRetentionPolicy | None, spec.retention),
                 ctx=ctx,
@@ -1244,16 +1483,16 @@ def realize_async(
                     status_code="INVALID_ARGUMENT",
                     retryable=False,
                 )
-            if not isinstance(spec.target, ServingBindingSetTarget):
+            if not isinstance(spec.target, RealizationTargetSet):
                 raise ArtifactError(
-                    "target_set realization requires ServingBindingSetTarget",
+                    "target_set realization requires RealizationTargetSet",
                     status_code="INVALID_ARGUMENT",
                     retryable=False,
                 )
             readiness = (
-                cast(ServingBindingReadiness, spec.readiness)
+                cast(RuntimeBindingReadiness, spec.readiness)
                 if spec.readiness is not None
-                else "serving_local_ready"
+                else "runtime_local_ready"
             )
             return self._execute_prefetch(
                 target=spec.target,
@@ -1560,7 +1799,7 @@ def bind(
         options: GetArtifactOptions | None = None,
         capacity_bytes: int | None = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicyInput | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None,
         ctx: CallContext | None = None,
     ) -> Binding:
         handle = self.realize(
@@ -1571,7 +1810,7 @@ def bind(
                 options=options,
                 capacity_bytes=capacity_bytes,
                 publish=publish,
-                serving_runtime_policy=serving_runtime_policy,
+                runtime_artifact_policy=runtime_artifact_policy,
             ),
             ctx=ctx,
         )
@@ -1586,7 +1825,7 @@ def _execute_bind_owned(
         options: GetArtifactOptions | None = None,
         capacity_bytes: int | None = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicyInput | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None,
         ctx: CallContext | None = None,
     ) -> Binding:
         """Allocate daemon-owned target tensors, fill from this artifact, and return a Binding."""
@@ -1641,8 +1880,8 @@ def _execute_bind_owned(
             packing=packing,
             options=options,
             publish=publish,
-            serving_runtime_policy=coerce_serving_runtime_policy(
-                serving_runtime_policy
+            runtime_artifact_policy=_resolve_runtime_artifact_policy(
+                runtime_artifact_policy
             ),
             ctx=ctx,
         )
@@ -1655,7 +1894,7 @@ def bind_into(
         packing: str = "byte_space",
         options: GetArtifactOptions | None = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicyInput | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None,
         ctx: CallContext | None = None,
     ) -> Binding:
         handle = self.realize(
@@ -1665,7 +1904,7 @@ def bind_into(
                 packing=packing,
                 options=options,
                 publish=publish,
-                serving_runtime_policy=serving_runtime_policy,
+                runtime_artifact_policy=runtime_artifact_policy,
             ),
             ctx=ctx,
         )
@@ -1679,10 +1918,13 @@ def _execute_bind_into(
         packing: str = "byte_space",
         options: GetArtifactOptions | None = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicyInput | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None,
         ctx: CallContext | None = None,
     ) -> Binding:
         """Adopt user-owned CUDA tensors, fill once, and return a Binding."""
+        resolved_runtime_artifact_policy = _resolve_runtime_artifact_policy(
+            runtime_artifact_policy
+        )
         if mapping is not None:
             return self._bind_into_mapped(
                 target_tensors=target_tensors,
@@ -1690,9 +1932,7 @@ def _execute_bind_into(
                 packing=packing,
                 options=options,
                 publish=publish,
-                serving_runtime_policy=coerce_serving_runtime_policy(
-                    serving_runtime_policy
-                ),
+                runtime_artifact_policy=resolved_runtime_artifact_policy,
                 ctx=ctx,
             )
         store, runtime, pipeline = self._require_components()
@@ -1823,9 +2063,7 @@ def _execute_bind_into(
                         target_layout=region_layout.layout,
                         device_uuid=device_uuid_for(device_id),
                         source_policy=source_policy,
-                        serving_runtime_policy=coerce_serving_runtime_policy(
-                            serving_runtime_policy
-                        ),
+                        runtime_artifact_policy=resolved_runtime_artifact_policy,
                         operation_id=operation_id,
                         group_realization=ctx.group_realization
                         if ctx is not None
@@ -1930,7 +2168,7 @@ def _bind_into_mapped(
         packing: str,
         options: GetArtifactOptions | None,
         publish: bool,
-        serving_runtime_policy: ServingRuntimePolicy | None,
+        runtime_artifact_policy: RuntimeArtifactPolicy | None,
         ctx: CallContext | None,
     ) -> Binding:
         store, runtime, pipeline = self._require_components()
@@ -2076,7 +2314,7 @@ def _bind_into_mapped(
                         target_layout=region_layout.layout,
                         device_uuid=device_uuid_for(device_id),
                         source_policy=source_policy,
-                        serving_runtime_policy=serving_runtime_policy,
+                        runtime_artifact_policy=runtime_artifact_policy,
                         copy_plan=copy_plan,
                         dst_tensors=target_tensors,
                         operation_id=operation_id,
@@ -2216,7 +2454,7 @@ def _bind_owned(
         packing: str,
         options: GetArtifactOptions | None,
         publish: bool,
-        serving_runtime_policy: ServingRuntimePolicy | None,
+        runtime_artifact_policy: RuntimeArtifactPolicy | None,
         ctx: CallContext | None,
     ) -> Binding:
         store, runtime, _ = self._require_components()
@@ -2374,7 +2612,7 @@ def _bind_owned(
                     device_uuid=device_uuid_for(device_id),
                     binding_layout_id=owner_layout.binding_layout_id,
                     source_policy=source_policy,
-                    serving_runtime_policy=serving_runtime_policy,
+                    runtime_artifact_policy=runtime_artifact_policy,
                     copy_plan=copy_plan_proto,
                     dst_specs=dst_specs,
                     operation_id=operation_id,
@@ -2577,24 +2815,24 @@ async def tensor_dict_async(
     def _prefetch_serving_binding(
         self,
         *,
-        target: ServingBindingTarget | ServingBindingSetTarget,
-        readiness: ServingBindingReadiness,
+        target: RealizationTarget | RealizationTargetSet,
+        readiness: RuntimeBindingReadiness,
         retention: PrefetchRetentionPolicy | None,
         ctx: CallContext | None,
-    ) -> Operation[ServingPrefetchResult]:
+    ) -> Operation[RuntimePrefetchResult]:
         artifact_id = self._ensure_identified()
         _, runtime, _ = self._require_components()
         resolved_selection = self._resolve_realization_selection()
         selection = resolved_selection.proto
         source_reuse = _serving_target_source_reuse(target)
-        if source_reuse.mode in {"serving_transform_required", "unsupported"}:
+        if source_reuse.mode in {"runtime_transform_required", "unsupported"}:
             reason = source_reuse.reason or (
-                "source-to-target serving transform requires a topology-scoped executor"
-                if source_reuse.mode == "serving_transform_required"
-                else "serving binding source is unsupported"
+                "source-to-target runtime transform requires a topology-scoped executor"
+                if source_reuse.mode == "runtime_transform_required"
+                else "runtime binding source is unsupported"
             )
             raise ArtifactError(
-                f"serving binding prefetch rejected before allocation: {reason}",
+                f"runtime binding prefetch rejected before allocation: {reason}",
                 status_code="FAILED_PRECONDITION",
                 retryable=False,
             )
@@ -2663,7 +2901,7 @@ def _prefetch_serving_binding(
 
         def _result_factory(
             operation_response: operation_pb2.GetOperationResponse,
-        ) -> ServingPrefetchResult:
+        ) -> RuntimePrefetchResult:
             return _with_retained_binding_report(
                 _serving_prefetch_result_from_operation_response(operation_response),
                 selection=resolved_selection,
@@ -2689,12 +2927,12 @@ def prefetch(
         self,
         *,
         device: torch.device | str | int | None = None,
-        target: ServingBindingTarget | ServingBindingSetTarget | None = None,
-        readiness: ServingBindingReadiness = "serving_local_ready",
+        target: RealizationTarget | RealizationTargetSet | None = None,
+        readiness: RuntimeBindingReadiness = "runtime_local_ready",
         retention: PrefetchRetentionPolicy | None = None,
         ctx: CallContext | None = None,
         options: GetArtifactOptions | None = None,
-    ) -> Operation[PrefetchedReplica] | Operation[ServingPrefetchResult]:
+    ) -> Operation[PrefetchedReplica] | Operation[RuntimePrefetchResult]:
         if target is not None:
             if device is not None:
                 raise ArtifactError(
@@ -2708,7 +2946,7 @@ def prefetch(
                     readiness=readiness,
                     retention=retention,
                 )
-                if isinstance(target, ServingBindingSetTarget)
+                if isinstance(target, RealizationTargetSet)
                 else ArtifactRealizationSpec.retained_binding(
                     target=target,
                     readiness=readiness,
@@ -2735,12 +2973,12 @@ def _execute_prefetch(
         self,
         *,
         device: torch.device | str | int | None = None,
-        target: ServingBindingTarget | ServingBindingSetTarget | None = None,
-        readiness: ServingBindingReadiness = "serving_local_ready",
+        target: RealizationTarget | RealizationTargetSet | None = None,
+        readiness: RuntimeBindingReadiness = "runtime_local_ready",
         retention: PrefetchRetentionPolicy | None = None,
         ctx: CallContext | None = None,
         options: GetArtifactOptions | None = None,
-    ) -> Operation[PrefetchedReplica] | Operation[ServingPrefetchResult]:
+    ) -> Operation[PrefetchedReplica] | Operation[RuntimePrefetchResult]:
         from tensorcast.api._config import GetArtifactOptions
 
         artifact_id = self._ensure_identified()
@@ -2868,13 +3106,7 @@ def _execute_prefetch(
                 retryable=False,
             )
 
-        source: str | None = None
-        if payload.source == store_daemon_pb2.MATERIALIZATION_SOURCE_P2P:
-            source = "p2p"
-        elif payload.source == store_daemon_pb2.MATERIALIZATION_SOURCE_DISK:
-            source = "disk"
-        elif payload.source == store_daemon_pb2.MATERIALIZATION_SOURCE_LOCAL_REPLICA:
-            source = "local"
+        source = materialization_source_label(payload.source)
 
         target_plan = RealizationTargetPlan(
             kind="retained_replica",
@@ -2896,6 +3128,7 @@ def _execute_prefetch(
             artifact_profile=resolved_selection.artifact_profile,
             authority_scope=resolved_selection.authority_scope,
             generation_hint=resolved_selection.generation_hint,
+            **selection_report_fields(resolved_selection),
             envelope=envelope,
             target_plan=target_plan,
             strategy_plan=strategy_plan_for_execution(
@@ -3203,7 +3436,11 @@ def _resolve_realization_selection(
                 tensor_names=requested_names,
                 view_subset_hash=inputs.view_subset_hash,
                 view_index_hint=view_index_hint,
-                generation_hint=self._generation,
+                generation_hint=(
+                    self._key_generation
+                    if self._key_generation is not None
+                    else self._generation
+                ),
                 allow_view_id_without_spec=bool(
                     inputs.view_id_hint
                     and not (view_spec_proto is not None and view_spec_proto.tensors)
@@ -3419,9 +3656,19 @@ def _ensure_identified(self) -> str:
             if self._artifact_id:
                 return self._artifact_id
             if self._key_hint:
-                artifact_id, _disk_path = runtime.resolve_key_mapping_cached(
+                resolved_mapping = runtime.resolve_key_mapping_cached(
                     key=self._key_hint
                 )
+                if isinstance(resolved_mapping, tuple):
+                    artifact_id = resolved_mapping[0]
+                    generation = (
+                        int(resolved_mapping[2])
+                        if len(resolved_mapping) > 2 and resolved_mapping[2] is not None
+                        else None
+                    )
+                else:
+                    artifact_id = getattr(resolved_mapping, "artifact_id", None)
+                    generation = getattr(resolved_mapping, "generation", None)
                 if not artifact_id:
                     raise ArtifactError(
                         f"Artifact key '{self._key_hint}' is not mapped",
@@ -3429,6 +3676,8 @@ def _ensure_identified(self) -> str:
                         retryable=False,
                     )
                 self._artifact_id = artifact_id
+                if self._key_generation is None and generation is not None:
+                    self._key_generation = int(generation)
                 return artifact_id
             raise ArtifactError(
                 "Artifact handle missing identity",
@@ -3737,6 +3986,7 @@ def _derive_view(
                 view_spec=self._view_spec,
                 view_metadata=self._view_metadata,
                 view_depth=self._view_depth,
+                source_subject=self._source_subject,
             )
         base_index = self._effective_index()
         entry_shapes = {entry.name: tuple(entry.shape) for entry in base_index.entries}
@@ -3762,9 +4012,11 @@ def _derive_view(
             canonical_index_bytes=self._canonical_index_bytes,
             canonical_index=self._canonical_index,
             generation=self._generation,
+            key_generation=self._key_generation,
             view_spec=composed_spec,
             view_metadata=view_cache,
             view_depth=depth,
+            source_subject=self._source_subject,
         )
 
     def _hydrate_from_cache_entry(self, entry: ArtifactCacheEntry) -> None:
diff --git a/tensorcast/api/store/binding.py b/tensorcast/api/store/binding.py
index 52e5bfc9..d53a0b52 100644
--- a/tensorcast/api/store/binding.py
+++ b/tensorcast/api/store/binding.py
@@ -33,9 +33,10 @@
     GroupRealizationAcquireRef,
     PartialSealResult,
     PublicDiskSourceHandle,
-    ServingRuntimePolicyInput,
+    RuntimeArtifactPolicy,
+    RuntimeArtifactPolicyInput,
     SourceBoundPlanDiagnostics,
-    coerce_serving_runtime_policy,
+    coerce_runtime_artifact_policy,
 )
 
 if TYPE_CHECKING:
@@ -124,6 +125,12 @@ def _reject_live_swap_group_realization(ctx: CallContext | None) -> None:
     )
 
 
+def _resolve_runtime_artifact_policy(
+    runtime_artifact_policy: RuntimeArtifactPolicyInput | None,
+) -> RuntimeArtifactPolicy | None:
+    return coerce_runtime_artifact_policy(runtime_artifact_policy)
+
+
 def _clone_view_spec(
     view_spec: common_pb2.ViewSpec | None,
 ) -> common_pb2.ViewSpec | None:
@@ -682,7 +689,7 @@ def swap(
         *,
         options: "GetArtifactOptions | None" = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicyInput | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None,
         activate_key: str | None = None,
         expected_active_artifact_id: str | None = None,
         expected_active_generation: int | None = None,
@@ -709,8 +716,8 @@ def swap(
                 artifact,
                 options=options,
                 publish=publish,
-                serving_runtime_policy=coerce_serving_runtime_policy(
-                    serving_runtime_policy
+                runtime_artifact_policy=_resolve_runtime_artifact_policy(
+                    runtime_artifact_policy
                 ),
                 wait=wait,
                 drain_timeout_s=drain_timeout_s,
diff --git a/tensorcast/api/store/inplace_slot.py b/tensorcast/api/store/inplace_slot.py
index bef86a23..e97679ed 100644
--- a/tensorcast/api/store/inplace_slot.py
+++ b/tensorcast/api/store/inplace_slot.py
@@ -47,7 +47,7 @@
 )
 from tensorcast.proto.common.v1 import common_pb2
 from tensorcast.proto.daemon.v2 import store_daemon_pb2
-from tensorcast.types import ServingRuntimePolicy
+from tensorcast.types import RuntimeArtifactPolicy
 
 logger = logging.getLogger(__name__)
 
@@ -589,7 +589,7 @@ def swap(
         *,
         options: GetArtifactOptions | None = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicy | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicy | None = None,
         wait: bool = True,
         drain_timeout_s: float | None = None,
         ctx: CallContext | None = None,
@@ -690,7 +690,7 @@ def swap(
                         target_layout=region_layout.layout,
                         device_uuid=device_uuid_for(self._device_id),
                         source_policy=source_policy,
-                        serving_runtime_policy=serving_runtime_policy,
+                        runtime_artifact_policy=runtime_artifact_policy,
                         copy_plan=self._copy_plan,
                         dst_tensors=self._tensors,
                         operation_id=operation_id,
@@ -822,7 +822,7 @@ def swap(
                     target_layout=region_layout.layout,
                     device_uuid=device_uuid_for(self._device_id),
                     source_policy=source_policy,
-                    serving_runtime_policy=serving_runtime_policy,
+                    runtime_artifact_policy=runtime_artifact_policy,
                     operation_id=operation_id,
                     timeout_s=rpc_timeout_s if rpc_timeout_s is not None else 600.0,
                 )
diff --git a/tensorcast/api/store/materialization.py b/tensorcast/api/store/materialization.py
index 8a42084b..013dcddd 100644
--- a/tensorcast/api/store/materialization.py
+++ b/tensorcast/api/store/materialization.py
@@ -1993,7 +1993,7 @@ def _materialize_payload(
                 if isinstance(resolved_mapping, tuple):
                     resolved_artifact_id = resolved_mapping[0]
                 else:
-                    resolved_artifact_id = resolved_mapping
+                    resolved_artifact_id = resolved_mapping.artifact_id
             except Exception:  # noqa: BLE001
                 logger.exception(
                     "store.materialize.key_mapping_prefetch_failed",
diff --git a/tensorcast/api/store/owned_binding_slot.py b/tensorcast/api/store/owned_binding_slot.py
index a019138c..052fb7e6 100644
--- a/tensorcast/api/store/owned_binding_slot.py
+++ b/tensorcast/api/store/owned_binding_slot.py
@@ -52,8 +52,8 @@
     ExecutionDiagnostics,
     GroupRealizationAcquireRef,
     PublicDiskSourceHandle,
+    RuntimeArtifactPolicy,
     ServerConfig,
-    ServingRuntimePolicy,
     SourceBoundCapability,
     SourceBoundPlanDiagnostics,
 )
@@ -342,7 +342,7 @@ def _build_source_execution_contract(
     if policy_mode is None and explicit_collective_group is not None:
         from tensorcast.api._config import CollectivePolicyMode
 
-        policy_mode = CollectivePolicyMode.REQUIRE_COLLECTIVE
+        policy_mode = CollectivePolicyMode.COLLECTIVE_FIRST
 
     if str(getattr(policy_mode, "value", policy_mode) or "") == "disable_collective":
         if explicit_collective_group is not None:
@@ -1262,7 +1262,7 @@ def swap(
         *,
         options: GetArtifactOptions | None = None,
         publish: bool = False,
-        serving_runtime_policy: ServingRuntimePolicy | None = None,
+        runtime_artifact_policy: RuntimeArtifactPolicy | None = None,
         wait: bool = True,
         drain_timeout_s: float | None = None,
         ctx: CallContext | None = None,
@@ -1304,7 +1304,7 @@ def swap(
                 source_policy=source_policy,
                 execution_topology=execution_topology,
                 collective_policy=collective_policy,
-                serving_runtime_policy=serving_runtime_policy,
+                runtime_artifact_policy=runtime_artifact_policy,
                 operation_id=operation_id,
                 timeout_s=rpc_timeout_s if rpc_timeout_s is not None else 600.0,
             )
diff --git a/tensorcast/api/store/serving_builder.py b/tensorcast/api/store/publication_builder.py
similarity index 83%
rename from tensorcast/api/store/serving_builder.py
rename to tensorcast/api/store/publication_builder.py
index 8404b020..3d09cca2 100644
--- a/tensorcast/api/store/serving_builder.py
+++ b/tensorcast/api/store/publication_builder.py
@@ -1,4 +1,9 @@
 #  Copyright (c) 2026, TensorCast Team.
+"""Build serving-manifest publication payloads for runtime artifacts.
+
+The serving names in this module are intentional publication/manifest ABI
+terms. Do not add runtime session, resolver, or source-authority behavior here.
+"""
 
 from __future__ import annotations
 
@@ -32,11 +37,11 @@
     PureTransformPublicationSpec,
     RepresentationPublishContract,
     RepresentationPublishSpec,
-    ServingAdmissionFacts,
-    ServingArtifactManifest,
-    ServingBuildIntent,
-    ServingPublicationSubject,
-    ServingSupportLevel,
+    RuntimeAdmissionFacts,
+    RuntimeArtifactBuildIntent,
+    RuntimeArtifactManifest,
+    RuntimePublicationSubject,
+    RuntimeSupportLevel,
     build_serving_manifest_ref,
 )
 
@@ -47,28 +52,28 @@
 
 
 @dataclass(frozen=True, slots=True)
-class PreparedServingRegistration:
+class PreparedRuntimeArtifactRegistration:
     tensors: dict[str, torch.Tensor]
     serving_manifest_ref: str
     manifest_tensor_name: str
-    serving_manifest: ServingArtifactManifest
+    serving_manifest: RuntimeArtifactManifest
     serving_manifest_bytes: bytes
     representation_contract_hash: str
     canonical_index: CanonicalIndex
 
 
 @dataclass(frozen=True, slots=True)
-class ServingManifestCarrier:
+class RuntimeArtifactManifestCarrier:
     serving_manifest_ref: str
     manifest_tensor_name: str
-    serving_manifest: ServingArtifactManifest
+    serving_manifest: RuntimeArtifactManifest
     serving_manifest_bytes: bytes
 
 
 @dataclass(frozen=True, slots=True)
-class RegisteredServingPublication:
+class RegisteredRuntimeArtifactPublication:
     registered_artifact: RegisteredArtifact
-    prepared_registration: PreparedServingRegistration
+    prepared_registration: PreparedRuntimeArtifactRegistration
     publication: RepresentationPublishSpec
 
 
@@ -77,10 +82,6 @@ class RegisteredServingPublication:
 _PURE_TRANSFORM_TARGET_REALIZATION_KIND = "artifact_publishable"
 
 
-def _canonical_json_bytes(payload: object) -> bytes:
-    return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8")
-
-
 def _pad_manifest_carrier_bytes(payload: bytes) -> bytes:
     align = int(DEFAULT_ALIGN)
     if align <= 1:
@@ -97,7 +98,7 @@ def _manifest_byte_mismatch_message(
     *,
     existing_bytes: bytes,
     expected_bytes: bytes,
-    expected_manifest: ServingArtifactManifest,
+    expected_manifest: RuntimeArtifactManifest,
 ) -> str:
     def _sha(data: bytes) -> str:
         return hashlib.sha256(data).hexdigest()
@@ -127,7 +128,7 @@ def _summarize_value(value: object) -> object:
         "first_diff_offset": _first_diff_offset(existing_bytes, expected_bytes),
     }
     try:
-        existing_manifest = ServingArtifactManifest.from_bytes(existing_bytes)
+        existing_manifest = RuntimeArtifactManifest.from_bytes(existing_bytes)
     except Exception as exc:  # pragma: no cover - diagnostic path
         details["existing_manifest_error"] = type(exc).__name__
         details["existing_manifest_error_message"] = str(exc)
@@ -154,15 +155,15 @@ def _summarize_value(value: object) -> object:
     )
 
 
-def prepare_serving_manifest_carrier(
+def prepare_runtime_artifact_manifest_carrier(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     canonical_index: CanonicalIndex,
     representation_contract_hash: str | None = None,
     logical_topology_json: str | None = None,
     serving_manifest_ref: str | None = None,
     topology_admission_digest: str | None = None,
-) -> ServingManifestCarrier:
+) -> RuntimeArtifactManifestCarrier:
     normalized_logical_topology = _normalize_logical_topology_payload(
         logical_topology_json
     )
@@ -177,14 +178,14 @@ def prepare_serving_manifest_carrier(
             helper_name="serving manifest carrier",
         )
     )
-    manifest = ServingArtifactManifest.from_build_intent(
+    manifest = RuntimeArtifactManifest.from_build_intent(
         intent=build_intent,
         representation_contract_hash=resolved_representation_contract_hash,
-        tensor_schema_hash=compute_serving_tensor_schema_hash(
+        tensor_schema_hash=compute_runtime_artifact_tensor_schema_hash(
             canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         ),
-        canonical_tensor_count=count_canonical_serving_tensors(
+        canonical_tensor_count=count_canonical_runtime_tensors(
             canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         ),
@@ -198,7 +199,7 @@ def prepare_serving_manifest_carrier(
         ),
         topology_admission_digest=topology_admission_digest,
     )
-    return ServingManifestCarrier(
+    return RuntimeArtifactManifestCarrier(
         serving_manifest_ref=resolved_manifest_ref,
         manifest_tensor_name=manifest_tensor_name,
         serving_manifest=manifest,
@@ -206,32 +207,6 @@ def prepare_serving_manifest_carrier(
     )
 
 
-def _multibase_multihash_sha256(digest: bytes) -> str:
-    if len(digest) != 32:
-        raise ValueError("SHA256 digest must be 32 bytes")
-    import base64
-
-    multihash = b"\x12\x20" + digest
-    encoded = base64.b32encode(multihash).decode("ascii").lower().rstrip("=")
-    return f"b{encoded}"
-
-
-def _hash_payload_to_multihash(payload: object) -> str:
-    import hashlib
-
-    return _multibase_multihash_sha256(
-        hashlib.sha256(_canonical_json_bytes(payload)).digest()
-    )
-
-
-def _hash_versioned_payload_to_multihash(version: str, payload: object) -> str:
-    import hashlib
-
-    serialized = _canonical_json_bytes(payload)
-    versioned_payload = version.encode("utf-8") + b"\n" + serialized
-    return _multibase_multihash_sha256(hashlib.sha256(versioned_payload).digest())
-
-
 def _dtype_to_string(dtype: torch.dtype) -> str:
     return str(dtype)
 
@@ -257,12 +232,12 @@ def _normalize_contract_family(
     return cast(AssemblyContractFamily, normalized)
 
 
-def _coerce_serving_support_level(
-    value: ServingSupportLevel | str,
-) -> ServingSupportLevel:
-    if isinstance(value, ServingSupportLevel):
+def _coerce_runtime_support_level(
+    value: RuntimeSupportLevel | str,
+) -> RuntimeSupportLevel:
+    if isinstance(value, RuntimeSupportLevel):
         return value
-    return ServingSupportLevel(str(value).strip())
+    return RuntimeSupportLevel(str(value).strip())
 
 
 def _resolve_manifest_tensor_name(
@@ -293,7 +268,7 @@ def _resolve_manifest_tensor_name(
 
 def _resolve_explicit_representation_contract_hash(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     representation_contract_hash: str | None,
     helper_name: str,
 ) -> str:
@@ -303,14 +278,14 @@ def _resolve_explicit_representation_contract_hash(
     if not resolved_representation_contract_hash:
         raise ArtifactError(
             f"{helper_name} requires an explicit representation_contract_hash in "
-            "the argument or ServingBuildIntent",
+            "the argument or RuntimeArtifactBuildIntent",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
     return resolved_representation_contract_hash
 
 
-def _canonical_serving_entries(
+def _canonical_runtime_entries(
     canonical_index: CanonicalIndex,
     *,
     manifest_tensor_name: str,
@@ -330,7 +305,7 @@ def _repack_canonical_index(
     repacked_entries: list[CanonicalIndexEntry] = []
     offset = 0
     for entry in sorted(
-        _canonical_serving_entries(
+        _canonical_runtime_entries(
             canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         ),
@@ -355,40 +330,28 @@ def _repack_canonical_index(
     )
 
 
-def compute_serving_tensor_schema_hash(
+def compute_runtime_artifact_tensor_schema_hash(
     canonical_index: CanonicalIndex,
     *,
     manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME,
 ) -> str:
-    tensors = [
-        {
-            "name": str(entry.name),
-            "dtype": _dtype_to_string(entry.dtype),
-            "shape": [int(dim) for dim in entry.shape],
-            "stride": [int(dim) for dim in entry.stride],
-            "element_size": int(entry.dtype.itemsize),
-        }
-        for entry in sorted(
-            _canonical_serving_entries(
-                canonical_index,
-                manifest_tensor_name=manifest_tensor_name,
-            ),
-            key=lambda entry: str(entry.name),
-        )
-    ]
-    return _hash_versioned_payload_to_multihash(
-        "tensorcast.representation.tensor_schema.v1",
-        {"tensors": tensors},
+    from tensorcast.artifact_runtime.contract import (
+        compute_canonical_runtime_tensor_schema_hash,
+    )
+
+    return compute_canonical_runtime_tensor_schema_hash(
+        canonical_index,
+        manifest_tensor_name=manifest_tensor_name,
     )
 
 
-def count_canonical_serving_tensors(
+def count_canonical_runtime_tensors(
     canonical_index: CanonicalIndex,
     *,
     manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME,
 ) -> int:
     return len(
-        _canonical_serving_entries(
+        _canonical_runtime_entries(
             canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         )
@@ -424,8 +387,8 @@ def _resolve_publication_subject(
     | "StoreArtifactDescriptor"
     | str
     | None = None,
-    publication_subject: ServingPublicationSubject | BindingValueRef | None = None,
-) -> ServingPublicationSubject:
+    publication_subject: RuntimePublicationSubject | BindingValueRef | None = None,
+) -> RuntimePublicationSubject:
     if (serving_artifact is None) == (publication_subject is None):
         raise ArtifactError(
             "publication bundle requires exactly one of serving_artifact or publication_subject",
@@ -434,10 +397,10 @@ def _resolve_publication_subject(
         )
     if publication_subject is not None:
         if isinstance(publication_subject, BindingValueRef):
-            return ServingPublicationSubject(binding_value_ref=publication_subject)
+            return RuntimePublicationSubject(binding_value_ref=publication_subject)
         return publication_subject
     assert serving_artifact is not None
-    return ServingPublicationSubject(
+    return RuntimePublicationSubject(
         serving_artifact_id=_artifact_id_from_input(serving_artifact)
     )
 
@@ -544,72 +507,16 @@ def _canonical_index_from_tensors(
 def _normalize_logical_topology_payload(
     logical_topology_json: str | None,
 ) -> dict[str, object] | None:
-    if logical_topology_json is None:
-        return None
+    from tensorcast.artifact_runtime.contract import normalize_logical_topology_payload
+
     try:
-        payload = json.loads(logical_topology_json)
-    except Exception as exc:  # noqa: BLE001
+        return normalize_logical_topology_payload(logical_topology_json)
+    except ValueError as exc:
         raise ArtifactError(
-            "logical_topology_json must be valid JSON",
+            str(exc),
             status_code="INVALID_ARGUMENT",
             retryable=False,
         ) from exc
-    if not isinstance(payload, dict):
-        raise ArtifactError(
-            "logical_topology_json must encode an object",
-            status_code="INVALID_ARGUMENT",
-            retryable=False,
-        )
-    family = str(payload.get("family", "")).strip()
-    version = str(payload.get("version", "")).strip()
-    raw_dimensions = payload.get("dimensions", [])
-    if not family:
-        raise ArtifactError(
-            "logical_topology_json.family must not be empty",
-            status_code="INVALID_ARGUMENT",
-            retryable=False,
-        )
-    if not version:
-        raise ArtifactError(
-            "logical_topology_json.version must not be empty",
-            status_code="INVALID_ARGUMENT",
-            retryable=False,
-        )
-    if not isinstance(raw_dimensions, list):
-        raise ArtifactError(
-            "logical_topology_json.dimensions must be a list",
-            status_code="INVALID_ARGUMENT",
-            retryable=False,
-        )
-    dimensions: list[dict[str, int | str]] = []
-    for raw_dimension in raw_dimensions:
-        if not isinstance(raw_dimension, dict):
-            raise ArtifactError(
-                "logical_topology_json.dimensions items must be objects",
-                status_code="INVALID_ARGUMENT",
-                retryable=False,
-            )
-        name = str(raw_dimension.get("name", "")).strip()
-        if not name:
-            raise ArtifactError(
-                "logical_topology_json dimensions require non-empty name",
-                status_code="INVALID_ARGUMENT",
-                retryable=False,
-            )
-        size = raw_dimension.get("size", None)
-        if not isinstance(size, int) or size <= 0:
-            raise ArtifactError(
-                "logical_topology_json dimensions require positive integer size",
-                status_code="INVALID_ARGUMENT",
-                retryable=False,
-            )
-        dimensions.append({"name": name, "size": int(size)})
-    dimensions.sort(key=lambda item: (str(item["name"]), int(item["size"])))
-    return {
-        "family": family,
-        "version": version,
-        "dimensions": dimensions,
-    }
 
 
 def _tensor_spec_payload(
@@ -674,14 +581,14 @@ def compute_pure_transform_representation_contract_hash(
     )
     source_entries = {
         str(entry.name): entry
-        for entry in _canonical_serving_entries(
+        for entry in _canonical_runtime_entries(
             source_canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         )
     }
     target_entries = {
         str(entry.name): entry
-        for entry in _canonical_serving_entries(
+        for entry in _canonical_runtime_entries(
             target_canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         )
@@ -696,7 +603,7 @@ def compute_pure_transform_representation_contract_hash(
             retryable=False,
         )
 
-    tensor_schema_hash = compute_serving_tensor_schema_hash(
+    tensor_schema_hash = compute_runtime_artifact_tensor_schema_hash(
         target_canonical_index,
         manifest_tensor_name=manifest_tensor_name,
     )
@@ -760,31 +667,33 @@ def compute_pure_transform_representation_contract_hash(
             "segments": [],
         },
     }
-    return _hash_versioned_payload_to_multihash(
+    from tensorcast.artifact_runtime.contract import hash_versioned_payload_to_multihash
+
+    return hash_versioned_payload_to_multihash(
         "tensorcast.representation.contract.v1",
         payload,
     )
 
 
-def prepare_pure_transform_serving_registration(
+def prepare_pure_transform_runtime_registration(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None,
     tensors: Mapping[str, torch.Tensor],
     logical_topology_json: str | None = None,
     serving_manifest_ref: str | None = None,
     topology_admission_digest: str | None = None,
-) -> PreparedServingRegistration:
+) -> PreparedRuntimeArtifactRegistration:
     if build_intent.builder_mode is not BuilderMode.PURE_TRANSFORM:
         raise ArtifactError(
-            "prepare_pure_transform_serving_registration requires ServingBuildIntent.builder_mode=PURE_TRANSFORM",
+            "prepare_pure_transform_runtime_registration requires RuntimeArtifactBuildIntent.builder_mode=PURE_TRANSFORM",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
     prepared_tensors = {str(name): tensor for name, tensor in dict(tensors).items()}
     resolved_manifest_ref, manifest_tensor_name = _resolve_manifest_tensor_name(
         serving_manifest_ref,
-        helper_name="PURE_TRANSFORM serving registration",
+        helper_name="PURE_TRANSFORM runtime artifact registration",
     )
     manifest_tensor = prepared_tensors.pop(manifest_tensor_name, None)
     base_canonical_index = _canonical_index_from_tensors(prepared_tensors)
@@ -797,7 +706,7 @@ def prepare_pure_transform_serving_registration(
             manifest_tensor_name=manifest_tensor_name,
         )
     )
-    carrier = prepare_serving_manifest_carrier(
+    carrier = prepare_runtime_artifact_manifest_carrier(
         build_intent=build_intent,
         canonical_index=base_canonical_index,
         representation_contract_hash=resolved_representation_contract_hash,
@@ -832,7 +741,7 @@ def prepare_pure_transform_serving_registration(
             )
         prepared_tensors[manifest_tensor_name] = manifest_tensor
     final_canonical_index = _canonical_index_from_tensors(prepared_tensors)
-    return PreparedServingRegistration(
+    return PreparedRuntimeArtifactRegistration(
         tensors=prepared_tensors,
         serving_manifest_ref=carrier.serving_manifest_ref,
         manifest_tensor_name=carrier.manifest_tensor_name,
@@ -843,23 +752,23 @@ def prepare_pure_transform_serving_registration(
     )
 
 
-def prepare_serving_registration(
+def prepare_runtime_artifact_registration(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     tensors: Mapping[str, torch.Tensor],
     representation_contract_hash: str | None = None,
     logical_topology_json: str | None = None,
     serving_manifest_ref: str | None = None,
     topology_admission_digest: str | None = None,
-) -> PreparedServingRegistration:
+) -> PreparedRuntimeArtifactRegistration:
     prepared_tensors = {str(name): tensor for name, tensor in dict(tensors).items()}
     resolved_manifest_ref, manifest_tensor_name = _resolve_manifest_tensor_name(
         serving_manifest_ref,
-        helper_name="serving registration",
+        helper_name="runtime artifact registration",
     )
     manifest_tensor = prepared_tensors.pop(manifest_tensor_name, None)
     base_canonical_index = _canonical_index_from_tensors(prepared_tensors)
-    carrier = prepare_serving_manifest_carrier(
+    carrier = prepare_runtime_artifact_manifest_carrier(
         build_intent=build_intent,
         canonical_index=base_canonical_index,
         representation_contract_hash=representation_contract_hash,
@@ -871,7 +780,7 @@ def prepare_serving_registration(
         _resolve_explicit_representation_contract_hash(
             build_intent=build_intent,
             representation_contract_hash=representation_contract_hash,
-            helper_name="serving registration",
+            helper_name="runtime artifact registration",
         )
     )
     manifest_device = (
@@ -901,7 +810,7 @@ def prepare_serving_registration(
             )
         prepared_tensors[manifest_tensor_name] = manifest_tensor
     final_canonical_index = _canonical_index_from_tensors(prepared_tensors)
-    return PreparedServingRegistration(
+    return PreparedRuntimeArtifactRegistration(
         tensors=prepared_tensors,
         serving_manifest_ref=carrier.serving_manifest_ref,
         manifest_tensor_name=carrier.manifest_tensor_name,
@@ -912,23 +821,23 @@ def prepare_serving_registration(
     )
 
 
-def prepare_binding_finalize_serving_registration(
+def prepare_binding_finalize_runtime_registration(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     tensors: Mapping[str, torch.Tensor],
     representation_contract_hash: str | None = None,
     logical_topology_json: str | None = None,
     serving_manifest_ref: str | None = None,
     topology_admission_digest: str | None = None,
-) -> PreparedServingRegistration:
+) -> PreparedRuntimeArtifactRegistration:
     if build_intent.builder_mode is not BuilderMode.BINDING_FINALIZE:
         raise ArtifactError(
-            "prepare_binding_finalize_serving_registration requires "
-            "ServingBuildIntent.builder_mode=BINDING_FINALIZE",
+            "prepare_binding_finalize_runtime_registration requires "
+            "RuntimeArtifactBuildIntent.builder_mode=BINDING_FINALIZE",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
-    return prepare_serving_registration(
+    return prepare_runtime_artifact_registration(
         build_intent=build_intent,
         tensors=tensors,
         representation_contract_hash=representation_contract_hash,
@@ -940,13 +849,13 @@ def prepare_binding_finalize_serving_registration(
 
 def build_binding_finalize_admission_facts(
     *,
-    support_level: ServingSupportLevel | str,
+    support_level: RuntimeSupportLevel | str,
     topology_admission_digest: str | None = None,
     same_binding_fast_path_validated: bool,
-) -> ServingAdmissionFacts:
-    return ServingAdmissionFacts(
+) -> RuntimeAdmissionFacts:
+    return RuntimeAdmissionFacts(
         finalize_class=FinalizeClass.REPRESENTATION_CHANGING,
-        support_level=_coerce_serving_support_level(support_level),
+        support_level=_coerce_runtime_support_level(support_level),
         topology_admission_digest=topology_admission_digest,
         same_binding_fast_path_validated=bool(same_binding_fast_path_validated),
     )
@@ -954,7 +863,7 @@ def build_binding_finalize_admission_facts(
 
 def build_pure_transform_publication_spec(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     contract_family: AssemblyContractFamily | str | None = None,
     source_version_key: str | None = None,
     serving_version_key: str | None = None,
@@ -964,7 +873,7 @@ def build_pure_transform_publication_spec(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> PureTransformPublicationSpec:
     return PureTransformPublicationSpec(
         build_intent=build_intent,
@@ -990,7 +899,7 @@ def build_pure_transform_publication_spec(
 def build_pure_transform_transform_spec(
     *,
     transform_name: str,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     contract_family: AssemblyContractFamily | str | None = None,
     source_version_key: str | None = None,
     serving_version_key: str | None = None,
@@ -1000,7 +909,7 @@ def build_pure_transform_transform_spec(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
     transform_args: dict[str, str | int] | None = None,
     layout_hash: str | None = None,
 ) -> TransformSpec:
@@ -1032,7 +941,7 @@ def build_pure_transform_transform_spec(
 
 def build_pure_transform_publication_bundle(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None,
     contract_family: AssemblyContractFamily | str | None = None,
     serving_artifact: RegisteredArtifact
@@ -1040,7 +949,7 @@ def build_pure_transform_publication_bundle(
     | "StoreArtifactDescriptor"
     | str
     | None = None,
-    publication_subject: ServingPublicationSubject | BindingValueRef | None = None,
+    publication_subject: RuntimePublicationSubject | BindingValueRef | None = None,
     canonical_index: CanonicalIndex,
     source_version_key: str | None = None,
     serving_version_key: str | None = None,
@@ -1050,11 +959,11 @@ def build_pure_transform_publication_bundle(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> RepresentationPublishSpec:
     if build_intent.builder_mode is not BuilderMode.PURE_TRANSFORM:
         raise ArtifactError(
-            "build_pure_transform_publication_bundle requires ServingBuildIntent.builder_mode=PURE_TRANSFORM",
+            "build_pure_transform_publication_bundle requires RuntimeArtifactBuildIntent.builder_mode=PURE_TRANSFORM",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
@@ -1066,11 +975,11 @@ def build_pure_transform_publication_bundle(
         helper_name="PURE_TRANSFORM publication bundle",
     )
 
-    tensor_schema_hash = compute_serving_tensor_schema_hash(
+    tensor_schema_hash = compute_runtime_artifact_tensor_schema_hash(
         canonical_index,
         manifest_tensor_name=manifest_tensor_name,
     )
-    canonical_tensor_count = count_canonical_serving_tensors(
+    canonical_tensor_count = count_canonical_runtime_tensors(
         canonical_index,
         manifest_tensor_name=manifest_tensor_name,
     )
@@ -1093,7 +1002,7 @@ def build_pure_transform_publication_bundle(
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
-    manifest = ServingArtifactManifest.from_build_intent(
+    manifest = RuntimeArtifactManifest.from_build_intent(
         intent=build_intent,
         representation_contract_hash=resolved_representation_contract_hash,
         tensor_schema_hash=tensor_schema_hash,
@@ -1153,9 +1062,9 @@ def build_pure_transform_publication_bundle(
     )
 
 
-def build_serving_publication_bundle(
+def build_runtime_artifact_publication_bundle(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None,
     contract_family: AssemblyContractFamily | str | None = None,
     serving_artifact: RegisteredArtifact
@@ -1163,7 +1072,7 @@ def build_serving_publication_bundle(
     | "StoreArtifactDescriptor"
     | str
     | None = None,
-    publication_subject: ServingPublicationSubject | BindingValueRef | None = None,
+    publication_subject: RuntimePublicationSubject | BindingValueRef | None = None,
     canonical_index: CanonicalIndex,
     representation_contract_hash: str | None = None,
     source_version_key: str | None = None,
@@ -1174,21 +1083,21 @@ def build_serving_publication_bundle(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> RepresentationPublishSpec:
     normalized_logical_topology = _normalize_logical_topology_payload(
         logical_topology_json
     )
     resolved_manifest_ref, manifest_tensor_name = _resolve_manifest_tensor_name(
         serving_manifest_ref,
-        helper_name="serving publication bundle",
+        helper_name="runtime artifact publication bundle",
     )
 
-    tensor_schema_hash = compute_serving_tensor_schema_hash(
+    tensor_schema_hash = compute_runtime_artifact_tensor_schema_hash(
         canonical_index,
         manifest_tensor_name=manifest_tensor_name,
     )
-    canonical_tensor_count = count_canonical_serving_tensors(
+    canonical_tensor_count = count_canonical_runtime_tensors(
         canonical_index,
         manifest_tensor_name=manifest_tensor_name,
     )
@@ -1196,10 +1105,10 @@ def build_serving_publication_bundle(
         _resolve_explicit_representation_contract_hash(
             build_intent=build_intent,
             representation_contract_hash=representation_contract_hash,
-            helper_name="serving publication bundle",
+            helper_name="runtime artifact publication bundle",
         )
     )
-    manifest = ServingArtifactManifest.from_build_intent(
+    manifest = RuntimeArtifactManifest.from_build_intent(
         intent=build_intent,
         representation_contract_hash=resolved_representation_contract_hash,
         tensor_schema_hash=tensor_schema_hash,
@@ -1261,10 +1170,10 @@ def build_serving_publication_bundle(
 
 def build_binding_finalize_publication_bundle(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None,
     contract_family: AssemblyContractFamily | str | None = None,
-    publication_subject: ServingPublicationSubject | BindingValueRef | None = None,
+    publication_subject: RuntimePublicationSubject | BindingValueRef | None = None,
     canonical_index: CanonicalIndex,
     representation_contract_hash: str | None = None,
     source_version_key: str | None = None,
@@ -1275,12 +1184,12 @@ def build_binding_finalize_publication_bundle(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> RepresentationPublishSpec:
     if build_intent.builder_mode is not BuilderMode.BINDING_FINALIZE:
         raise ArtifactError(
             "build_binding_finalize_publication_bundle requires "
-            "ServingBuildIntent.builder_mode=BINDING_FINALIZE",
+            "RuntimeArtifactBuildIntent.builder_mode=BINDING_FINALIZE",
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
@@ -1297,7 +1206,7 @@ def build_binding_finalize_publication_bundle(
             retryable=False,
         )
     if (
-        isinstance(publication_subject, ServingPublicationSubject)
+        isinstance(publication_subject, RuntimePublicationSubject)
         and publication_subject.binding_value_ref is None
     ):
         raise ArtifactError(
@@ -1305,7 +1214,7 @@ def build_binding_finalize_publication_bundle(
             status_code="FAILED_PRECONDITION",
             retryable=False,
         )
-    return build_serving_publication_bundle(
+    return build_runtime_artifact_publication_bundle(
         build_intent=build_intent,
         source_artifact=source_artifact,
         contract_family=contract_family,
@@ -1327,7 +1236,7 @@ def build_binding_finalize_publication_bundle(
 
 def build_pure_transform_publication_bundle_from_registered_artifact(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None,
     contract_family: AssemblyContractFamily | str | None = None,
     serving_artifact: RegisteredArtifact,
@@ -1339,7 +1248,7 @@ def build_pure_transform_publication_bundle_from_registered_artifact(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> RepresentationPublishSpec:
     return build_pure_transform_publication_bundle(
         build_intent=build_intent,
@@ -1359,9 +1268,9 @@ def build_pure_transform_publication_bundle_from_registered_artifact(
     )
 
 
-def build_serving_publication_bundle_from_registered_artifact(
+def build_runtime_artifact_publication_bundle_from_registered_artifact(
     *,
-    build_intent: ServingBuildIntent,
+    build_intent: RuntimeArtifactBuildIntent,
     source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None,
     contract_family: AssemblyContractFamily | str | None = None,
     serving_artifact: RegisteredArtifact,
@@ -1374,9 +1283,9 @@ def build_serving_publication_bundle_from_registered_artifact(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> RepresentationPublishSpec:
-    return build_serving_publication_bundle(
+    return build_runtime_artifact_publication_bundle(
         build_intent=build_intent,
         source_artifact=source_artifact,
         contract_family=contract_family,
@@ -1396,22 +1305,22 @@ def build_serving_publication_bundle_from_registered_artifact(
 
 
 __all__ = [
-    "PreparedServingRegistration",
+    "PreparedRuntimeArtifactRegistration",
     "PureTransformPublicationSpec",
     "RepresentationPublishSpec",
-    "RegisteredServingPublication",
-    "build_serving_publication_bundle",
-    "build_serving_publication_bundle_from_registered_artifact",
+    "RegisteredRuntimeArtifactPublication",
+    "build_runtime_artifact_publication_bundle",
+    "build_runtime_artifact_publication_bundle_from_registered_artifact",
     "build_binding_finalize_admission_facts",
     "build_binding_finalize_publication_bundle",
     "build_pure_transform_publication_spec",
     "build_pure_transform_transform_spec",
     "build_pure_transform_publication_bundle",
     "build_pure_transform_publication_bundle_from_registered_artifact",
-    "compute_serving_tensor_schema_hash",
+    "compute_runtime_artifact_tensor_schema_hash",
     "compute_pure_transform_representation_contract_hash",
-    "count_canonical_serving_tensors",
-    "prepare_binding_finalize_serving_registration",
-    "prepare_serving_registration",
-    "prepare_pure_transform_serving_registration",
+    "count_canonical_runtime_tensors",
+    "prepare_binding_finalize_runtime_registration",
+    "prepare_runtime_artifact_registration",
+    "prepare_pure_transform_runtime_registration",
 ]
diff --git a/tensorcast/api/store/realization_kernel.py b/tensorcast/api/store/realization_kernel.py
index 3bfba8be..2c7afb69 100644
--- a/tensorcast/api/store/realization_kernel.py
+++ b/tensorcast/api/store/realization_kernel.py
@@ -9,7 +9,7 @@
 import time
 from collections.abc import Callable, Mapping, Sequence
 from dataclasses import asdict, dataclass, field, replace
-from typing import Any, Literal, NoReturn
+from typing import Any, Literal, NoReturn, TypedDict
 
 from tensorcast.api.store.common import canonical_index_from_bytes
 from tensorcast.api.store.types import ArtifactError
@@ -189,6 +189,22 @@ class ResolvedArtifactSelection:
     diagnostics: Mapping[str, object] = field(default_factory=dict)
 
 
+class SelectionReportFields(TypedDict):
+    view_subset_hash: str
+    logical_layout_hash: str
+    selection_hash: str
+
+
+def selection_report_fields(
+    selection: ResolvedArtifactSelection,
+) -> SelectionReportFields:
+    return {
+        "view_subset_hash": selection.view_subset_hash.hex(),
+        "logical_layout_hash": selection.logical_layout_hash.hex(),
+        "selection_hash": selection.selection_hash.hex(),
+    }
+
+
 def resolve_artifact_selection(
     *,
     artifact_id: str | None,
@@ -456,7 +472,7 @@ def validate_for_target(self, target: RealizationTargetPlan) -> None:
                     retryable=False,
                 )
         if (
-            target.kind in {"binding_adopted", "caller_tensors"}
+            target.kind in {"binding_owned", "binding_adopted", "caller_tensors"}
             and not target.target_layout_digest
         ):
             raise ArtifactError(
@@ -727,6 +743,8 @@ class ArtifactRealizationReport:
     materialize_sec: float | None = None
     tensor_bind_sec: float | None = None
     total_sec: float | None = None
+    runtime_attach_sec: float | None = None
+    runtime_finalize_sec: float | None = None
     source: str | None = None
     operation_id: str | None = None
     operation_backend: str | None = None
@@ -742,6 +760,9 @@ class ArtifactRealizationReport:
     execution_commit: RealizationExecutionCommitReport | None = None
     execution_diagnostics: object | None = None
     source_bound_plan_diagnostics: object | None = None
+    view_subset_hash: str = ""
+    logical_layout_hash: str = ""
+    selection_hash: str = ""
 
     def validate_for_handle(self, target_kind: RealizationTargetKind) -> None:
         if self.target_kind != target_kind:
@@ -793,6 +814,9 @@ def artifact_realization_profile_payload(
         "artifact_profile": report.artifact_profile,
         "authority_scope": report.authority_scope,
         "source_selection_digest": report.source_selection_digest,
+        "view_subset_hash": report.view_subset_hash,
+        "logical_layout_hash": report.logical_layout_hash,
+        "selection_hash": report.selection_hash,
         "target_layout_digest": report.target_layout_digest,
         "copy_plan_digest": report.copy_plan_digest,
         "operation_backend": report.operation_backend,
@@ -802,6 +826,8 @@ def artifact_realization_profile_payload(
         "materialize_sec": report.materialize_sec,
         "tensor_bind_sec": report.tensor_bind_sec,
         "total_sec": report.total_sec,
+        "runtime_attach_sec": report.runtime_attach_sec,
+        "runtime_finalize_sec": report.runtime_finalize_sec,
         "envelope_backing_kind": envelope.backing_kind,
         "envelope_export_kind": envelope.export_kind,
         "envelope_projection_kind": envelope.projection_kind,
@@ -1771,8 +1797,8 @@ def _target_member_runtime_profile_digest(
         "load_config_digest": _optional_str(
             _safe_attr(target_member, "load_config_digest")
         ),
-        "serving_build_digest": _optional_str(
-            _safe_attr(target_member, "serving_build_digest")
+        "runtime_build_digest": _optional_str(
+            _safe_attr(target_member, "runtime_build_digest")
         ),
     }
     if not any(payload.values()):
@@ -1833,7 +1859,7 @@ def _source_selection_mode(
     source: object | None,
 ) -> str:
     source_kind = _literal_value(_safe_attr(source, "source_kind"))
-    if source_kind == "serving_artifact_set":
+    if source_kind == "runtime_artifact_set":
         source_members = _source_members_by_member_id(source)
         artifact_refs = {
             artifact_ref
@@ -2029,7 +2055,7 @@ def target_set_report_for_retained_bindings(
         ready_member_count=sum(
             1
             for report in retained
-            if report.readiness in {"serving_local_ready", "serving_published_ready"}
+            if report.readiness in {"runtime_local_ready", "runtime_published_ready"}
         ),
         staged_member_count=sum(1 for report in retained if report.staged_value),
         total_reservation_bytes=sum(report.reservation_bytes for report in retained),
@@ -2518,6 +2544,7 @@ def report_for_binding_realization(
         artifact_profile=selection.artifact_profile,
         authority_scope=selection.authority_scope,
         generation_hint=selection.generation_hint,
+        **selection_report_fields(selection),
         envelope=envelope,
         target_plan=target_plan,
         strategy_plan=strategy_plan_for_execution(
@@ -2597,6 +2624,7 @@ def report_for_runtime_attachment(
         artifact_profile=selection.artifact_profile,
         authority_scope=selection.authority_scope,
         generation_hint=selection.generation_hint,
+        **selection_report_fields(selection),
         envelope=envelope,
         target_plan=target_plan,
         strategy_plan=strategy_plan_for_execution(
@@ -2711,7 +2739,7 @@ def report_for_publication(
         copy_plan_digest=target_plan.copy_plan_digest,
         artifact_id=str(artifact_id),
         view_id="",
-        artifact_profile="serving_artifact",
+        artifact_profile="runtime_artifact",
         authority_scope="daemon_publication",
         generation_hint=None,
         envelope=envelope,
@@ -2765,6 +2793,7 @@ def report_for_mounted_source(
         artifact_profile=selection.artifact_profile,
         authority_scope=selection.authority_scope,
         generation_hint=selection.generation_hint,
+        **selection_report_fields(selection),
         envelope=envelope,
         target_plan=target_plan,
         representation_admission=representation_admission_for_target(target_plan),
@@ -2808,6 +2837,7 @@ def report_for_target_set(
         artifact_profile=selection.artifact_profile,
         authority_scope=selection.authority_scope,
         generation_hint=selection.generation_hint,
+        **selection_report_fields(selection),
         envelope=envelope,
         target_plan=target_plan,
         strategy_plan=target_set_strategy_plan_for(
@@ -2950,6 +2980,7 @@ def __init__(
         tensor_dict_value: Mapping[str, Any] | None = None,
         binding_value: Any | None = None,
         prefetch_value: Any | None = None,
+        attachment_value: Any | None = None,
         promote_fn: Callable[..., Any] | None = None,
         attach_fn: Callable[..., Any] | None = None,
         release_contract: RealizationReleaseContract | None = None,
@@ -2962,6 +2993,7 @@ def __init__(
         self._tensor_dict_projection: TensorDictProjection | None = None
         self._binding_value = binding_value
         self._prefetch_value = prefetch_value
+        self._attachment_value = attachment_value
         self._promote_fn = promote_fn
         self._attach_fn = attach_fn
         self._release_contract = release_contract or release_contract_for(
@@ -3007,9 +3039,20 @@ def complete(self) -> None:
 
     def attach(self, *args: object, **kwargs: object) -> Any:
         if self._attach_fn is None:
-            self._unsupported("attach")
+            if args or kwargs or self._attachment_value is None:
+                self._unsupported("attach")
+            return self._attachment_value
+        if self._attachment_value is not None and not args and not kwargs:
+            return self._attachment_value
         return self._attach_fn(*args, **kwargs)
 
+    def attachment(self) -> Any:
+        if self._attachment_value is not None:
+            return self._attachment_value
+        if self._attach_fn is None:
+            self._unsupported("attach")
+        return self._attach_fn()
+
     def publish_replica(self, *args: object, **kwargs: object) -> Any:
         binding_value = self._binding_value
         publish = getattr(binding_value, "publish_replica", None)
@@ -3050,7 +3093,7 @@ class ArtifactRealizationSpec:
     packing: str = "byte_space"
     capacity_bytes: int | None = None
     publish: bool = False
-    serving_runtime_policy: object | None = None
+    runtime_artifact_policy: object | None = None
     readiness: object | None = None
     retention: object | None = None
     verify_checksums: bool = True
@@ -3095,7 +3138,7 @@ def binding(
         options: object | None = None,
         capacity_bytes: int | None = None,
         publish: bool = False,
-        serving_runtime_policy: object | None = None,
+        runtime_artifact_policy: object | None = None,
     ) -> "ArtifactRealizationSpec":
         return cls(
             target_kind="binding_owned",
@@ -3105,7 +3148,7 @@ def binding(
             options=options,
             capacity_bytes=capacity_bytes,
             publish=publish,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
 
     @classmethod
@@ -3117,7 +3160,7 @@ def adopted_binding(
         packing: str = "byte_space",
         options: object | None = None,
         publish: bool = False,
-        serving_runtime_policy: object | None = None,
+        runtime_artifact_policy: object | None = None,
     ) -> "ArtifactRealizationSpec":
         return cls(
             target_kind="binding_adopted",
@@ -3126,7 +3169,7 @@ def adopted_binding(
             packing=packing,
             options=options,
             publish=publish,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
 
     @classmethod
@@ -3198,6 +3241,7 @@ def model_runtime(
         adapter_version: str | None = None,
         runtime_abi_version: str | None = None,
         options: object | None = None,
+        runtime_artifact_policy: object | None = None,
     ) -> "ArtifactRealizationSpec":
         if not str(framework or "").strip():
             raise ArtifactError(
@@ -3214,10 +3258,11 @@ def model_runtime(
             member=member,
             adapter_version=adapter_version,
             runtime_abi_version=runtime_abi_version,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
 
     @classmethod
-    def publication(
+    def _publication(
         cls,
         *,
         target: object,
diff --git a/tensorcast/api/store/runtime.py b/tensorcast/api/store/runtime.py
index 0c9c5a29..31e4cd9e 100644
--- a/tensorcast/api/store/runtime.py
+++ b/tensorcast/api/store/runtime.py
@@ -13,6 +13,7 @@
 import weakref
 from collections.abc import Callable
 from contextlib import contextmanager
+from dataclasses import dataclass
 from typing import Iterator, Mapping
 
 from opentelemetry import trace
@@ -63,17 +64,19 @@ def close(self) -> None:
             _log_best_effort_cleanup_failure("store_runtime.fork_handle_cleanup")
 
 
+@dataclass(frozen=True, slots=True)
+class ResolvedKeyMapping:
+    artifact_id: str | None
+    disk_path: str | None
+    generation: int | None
+
+
+@dataclass(frozen=True, slots=True)
 class _KeyCacheEntry:
-    def __init__(
-        self,
-        *,
-        artifact_id: str | None,
-        disk_path: str | None,
-        expires_at: float,
-    ) -> None:
-        self.artifact_id = artifact_id
-        self.disk_path = disk_path
-        self.expires_at = expires_at
+    artifact_id: str | None
+    disk_path: str | None
+    generation: int | None
+    expires_at: float
 
 
 class StoreRuntimeContext:
@@ -292,6 +295,7 @@ def cache_key_mapping(
         *,
         artifact_id: str | None,
         disk_path: str | None = None,
+        generation: int | None = None,
         ttl_override: float | None = None,
     ) -> None:
         if not key:
@@ -304,28 +308,40 @@ def cache_key_mapping(
             self._key_cache[key] = _KeyCacheEntry(
                 artifact_id=artifact_id,
                 disk_path=disk_path,
+                generation=generation,
                 expires_at=expires_at,
             )
 
-    def resolve_key_mapping_cached(self, *, key: str) -> tuple[str | None, str | None]:
+    def resolve_key_mapping_cached(self, *, key: str) -> ResolvedKeyMapping:
         now = time.monotonic()
         with self._key_cache_lock:
             cached = self._key_cache.get(key)
             if cached and cached.expires_at > now:
-                return cached.artifact_id, cached.disk_path
+                return ResolvedKeyMapping(
+                    artifact_id=cached.artifact_id,
+                    disk_path=cached.disk_path,
+                    generation=cached.generation,
+                )
             if cached is not None:
                 del self._key_cache[key]
         mapping = self.ensure_client().resolve_key_mapping(key)
         resolved_id = mapping.artifact_id or None
         resolved_path = getattr(mapping, "used_disk_path", "") or None
+        raw_generation = int(getattr(mapping, "generation", 0) or 0)
+        generation = raw_generation if raw_generation > 0 else None
         ttl_override = float(mapping.cache_ttl_seconds)
         self.cache_key_mapping(
             key,
             artifact_id=resolved_id,
             disk_path=resolved_path,
+            generation=generation,
             ttl_override=ttl_override,
         )
-        return resolved_id, resolved_path
+        return ResolvedKeyMapping(
+            artifact_id=resolved_id,
+            disk_path=resolved_path,
+            generation=generation,
+        )
 
     def get_artifact_index_cached(self, artifact_id: str) -> ArtifactCacheEntry | None:
         return self._artifact_cache.get_artifact_index_cached(artifact_id)
diff --git a/tensorcast/api/store/serving_binding_reference_consumer.py b/tensorcast/api/store/runtime_realization_reference_consumer.py
similarity index 77%
rename from tensorcast/api/store/serving_binding_reference_consumer.py
rename to tensorcast/api/store/runtime_realization_reference_consumer.py
index 7fa4900e..0847bbe1 100644
--- a/tensorcast/api/store/serving_binding_reference_consumer.py
+++ b/tensorcast/api/store/runtime_realization_reference_consumer.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Reference serving-binding consumer helpers for examples and E2E tests."""
+"""Reference runtime-realization consumer helpers for examples and E2E tests."""
 
 from __future__ import annotations
 
@@ -12,8 +12,9 @@
 from pydantic import BaseModel, ConfigDict, model_validator
 
 from tensorcast.api.context import GroupRealization
-from tensorcast.api.store.serving_binding_spec_cache import (
-    ServingBindingSpecCacheRecord,
+from tensorcast.api.store.realization_kernel import resolve_artifact_selection
+from tensorcast.api.store.runtime_realization_spec_cache import (
+    RuntimeRealizationSpecCacheRecord,
     read_matching_resolved_spec_cache_entry,
     write_resolved_spec_cache_entry,
 )
@@ -24,17 +25,17 @@
 from tensorcast.types import (
     BindingValueRef,
     BlobRef,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingSet,
+    PrefetchHandoff,
+    PrefetchHandoffSet,
     PrefetchRetentionPolicy,
-    ServingBindingMemberRef,
-    ServingBindingResolvedLayout,
-    ServingBindingResolvedSpecCacheEntry,
-    ServingBindingSetTarget,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingBindingTarget,
-    ServingTopologyRef,
+    RealizationTarget,
+    RealizationTargetSet,
+    RuntimeBindingMemberRef,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeRealizationSpecCacheEntry,
+    RuntimeTopologyRef,
 )
 
 REFERENCE_RUNTIME = "tensorcast-reference"
@@ -42,7 +43,7 @@
 _TARGET_INDEX_BLOB = "target_index"
 
 
-class ReferenceServingTensorSpec(BaseModel):
+class ReferenceRuntimeTensorSpec(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     name: str = "alpha"
@@ -52,7 +53,7 @@ class ReferenceServingTensorSpec(BaseModel):
     stride: tuple[int, ...] = (1,)
 
     @model_validator(mode="after")
-    def _validate_spec(self) -> "ReferenceServingTensorSpec":
+    def _validate_spec(self) -> "ReferenceRuntimeTensorSpec":
         if not self.name:
             raise ValueError("name must not be empty")
         if int(self.size_bytes) <= 0:
@@ -66,15 +67,15 @@ def _validate_spec(self) -> "ReferenceServingTensorSpec":
         return self
 
 
-class ReferenceServingResolvedSpec(BaseModel):
+class ReferenceRuntimeResolvedSpec(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    cache_entry: ServingBindingResolvedSpecCacheEntry
-    target: ServingBindingTarget
+    cache_entry: RuntimeRealizationSpecCacheEntry
+    target: RealizationTarget
     blobs: dict[str, bytes]
 
 
-class ReferenceServingAcquireResult(BaseModel):
+class ReferenceRuntimeAcquireResult(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     binding_value_ref: BindingValueRef
@@ -92,7 +93,7 @@ def _canonical_json_bytes(payload: object) -> bytes:
 
 
 def build_reference_tensor_index_bytes(
-    tensor: ReferenceServingTensorSpec,
+    tensor: ReferenceRuntimeTensorSpec,
 ) -> bytes:
     payload = {
         tensor.name: [
@@ -108,7 +109,7 @@ def build_reference_tensor_index_bytes(
 
 
 def build_reference_target_layout(
-    tensor: ReferenceServingTensorSpec,
+    tensor: ReferenceRuntimeTensorSpec,
     *,
     device_id: int = 0,
 ) -> store_daemon_pb2.TargetLayout:
@@ -145,40 +146,40 @@ def build_reference_resolved_spec(
     source_artifact_id: str,
     artifact_selection_digest: str,
     device_uuid: str,
-    tensor: ReferenceServingTensorSpec | None = None,
+    tensor: ReferenceRuntimeTensorSpec | None = None,
     runtime: str = REFERENCE_RUNTIME,
-    topology: ServingTopologyRef | None = None,
-    member: ServingBindingMemberRef | None = None,
+    topology: RuntimeTopologyRef | None = None,
+    member: RuntimeBindingMemberRef | None = None,
     source_schema_hash: str = "reference-source-schema",
     model_config_digest: str = "reference-model-config",
-    serving_build_digest: str = "reference-serving-build",
+    runtime_build_digest: str = "reference-runtime-build",
     representation_contract_hash: str = "reference-representation-contract",
     binding_layout_id: str = "reference-layout-0",
-) -> ReferenceServingResolvedSpec:
+) -> ReferenceRuntimeResolvedSpec:
     if not source_artifact_id:
         raise ValueError("source_artifact_id is required")
     if not artifact_selection_digest:
         raise ValueError("artifact_selection_digest is required")
     if not device_uuid:
         raise ValueError("device_uuid is required")
-    resolved_tensor = tensor or ReferenceServingTensorSpec()
-    resolved_topology = topology or ServingTopologyRef(
+    resolved_tensor = tensor or ReferenceRuntimeTensorSpec()
+    resolved_topology = topology or RuntimeTopologyRef(
         schema_topology_digest="reference-topology"
     )
-    resolved_member = member or ServingBindingMemberRef(
+    resolved_member = member or RuntimeBindingMemberRef(
         member_id="member-0",
         member_index=0,
         member_count=1,
         group_id="reference-group",
     )
-    source = ServingBindingSourceRef(
+    source = RuntimeBindingSourceRef(
         source_kind="checkpoint_artifact",
         artifact_selection_digest=artifact_selection_digest,
         source_artifact_ref=source_artifact_id,
         source_schema_hash=source_schema_hash,
     )
-    source_reuse = ServingBindingSourceReuseDecision(
-        mode="checkpoint_to_serving",
+    source_reuse = RuntimeBindingSourceReuseDecision(
+        mode="checkpoint_to_runtime",
         representation_contract_hash=representation_contract_hash,
     )
     target_layout = build_reference_target_layout(resolved_tensor)
@@ -186,7 +187,7 @@ def build_reference_resolved_spec(
     target_index_bytes = build_reference_tensor_index_bytes(resolved_tensor)
     layout_hash = _sha256_bytes(target_layout_bytes)
     tensor_schema_hash = _sha256_bytes(target_index_bytes)
-    draft_entry = ServingBindingResolvedSpecCacheEntry(
+    draft_entry = RuntimeRealizationSpecCacheEntry(
         schema_version=1,
         cache_key_digest="placeholder",
         spec_digest="placeholder",
@@ -197,7 +198,7 @@ def build_reference_resolved_spec(
         member=resolved_member,
         source_schema_hash=source_schema_hash,
         model_config_digest=model_config_digest,
-        serving_build_digest=serving_build_digest,
+        runtime_build_digest=runtime_build_digest,
         binding_layout_id=binding_layout_id,
         target_layout_hash=layout_hash,
         tensor_schema_hash=tensor_schema_hash,
@@ -212,7 +213,7 @@ def build_reference_resolved_spec(
     entry = entry_with_key.model_copy(
         update={"spec_digest": entry_with_key.computed_spec_digest()}
     )
-    resolved_layout = ServingBindingResolvedLayout(
+    resolved_layout = RuntimeBindingResolvedLayout(
         binding_layout_id=binding_layout_id,
         source=source,
         source_reuse=source_reuse,
@@ -225,7 +226,7 @@ def build_reference_resolved_spec(
         spec_digest=entry.spec_digest,
         source_schema_hash=source_schema_hash,
     )
-    target = ServingBindingTarget(
+    target = RealizationTarget(
         runtime=runtime,
         device="cuda:0",
         device_uuid=device_uuid,
@@ -233,10 +234,10 @@ def build_reference_resolved_spec(
         topology=resolved_topology,
         member=resolved_member,
         model_config_digest=model_config_digest,
-        serving_build_digest=serving_build_digest,
+        runtime_build_digest=runtime_build_digest,
         resolved_layout=resolved_layout,
     )
-    return ReferenceServingResolvedSpec(
+    return ReferenceRuntimeResolvedSpec(
         cache_entry=entry,
         target=target,
         blobs={
@@ -249,8 +250,8 @@ def build_reference_resolved_spec(
 def write_reference_resolved_spec_cache_entry(
     cache_root: str | os.PathLike[str],
     *,
-    resolved_spec: ReferenceServingResolvedSpec,
-) -> ServingBindingSpecCacheRecord:
+    resolved_spec: ReferenceRuntimeResolvedSpec,
+) -> RuntimeRealizationSpecCacheRecord:
     write_resolved_spec_cache_entry(
         cache_root,
         entry=resolved_spec.cache_entry,
@@ -263,11 +264,11 @@ def write_reference_resolved_spec_cache_entry(
 
 
 def target_from_reference_cache_record(
-    record: ServingBindingSpecCacheRecord,
+    record: RuntimeRealizationSpecCacheRecord,
     *,
     device_uuid: str,
     device: str = "cuda:0",
-) -> ServingBindingTarget:
+) -> RealizationTarget:
     target_layout = record.blobs.get(_TARGET_LAYOUT_BLOB)
     target_index = record.blobs.get(_TARGET_INDEX_BLOB)
     if target_layout is None:
@@ -275,7 +276,7 @@ def target_from_reference_cache_record(
     if target_index is None:
         raise ValueError("reference cache record is missing target_index blob")
     entry = record.entry
-    resolved_layout = ServingBindingResolvedLayout(
+    resolved_layout = RuntimeBindingResolvedLayout(
         binding_layout_id=entry.binding_layout_id,
         source=entry.source,
         source_reuse=entry.source_reuse,
@@ -288,7 +289,7 @@ def target_from_reference_cache_record(
         spec_digest=entry.spec_digest,
         source_schema_hash=entry.source_schema_hash,
     )
-    return ServingBindingTarget(
+    return RealizationTarget(
         runtime=entry.runtime,
         device=device,
         device_uuid=device_uuid,
@@ -297,44 +298,62 @@ def target_from_reference_cache_record(
         member=entry.member,
         model_config_digest=entry.model_config_digest,
         load_config_digest=entry.load_config_digest,
-        serving_build_digest=entry.serving_build_digest,
+        runtime_build_digest=entry.runtime_build_digest,
         resolved_layout=resolved_layout,
     )
 
 
-def unpack_prefetched_serving_binding(
+def unpack_prefetch_handoff(
     result_any: Any,
-) -> PrefetchedServingBinding:
+) -> PrefetchHandoff:
     proto = operation_pb2.PrefetchServingBindingResult()
     if not result_any.Unpack(proto):
         raise ValueError("operation result is not PrefetchServingBindingResult")
-    return PrefetchedServingBinding.from_proto(proto)
+    return PrefetchHandoff.from_proto(proto)
 
 
-def unpack_prefetched_serving_binding_set(
+def unpack_prefetch_handoff_set(
     result_any: Any,
-) -> PrefetchedServingBindingSet:
+) -> PrefetchHandoffSet:
     proto = operation_pb2.PrefetchServingBindingSetResult()
     if not result_any.Unpack(proto):
         raise ValueError("operation result is not PrefetchServingBindingSetResult")
-    return PrefetchedServingBindingSet.from_proto(proto)
+    return PrefetchHandoffSet.from_proto(proto)
+
+
+def _reference_source_selection(
+    *,
+    source_artifact_id: str,
+    target: RealizationTarget | RealizationTargetSet,
+) -> common_pb2.ArtifactSelection:
+    if isinstance(target, RealizationTarget):
+        index_bytes = bytes(target.resolved_layout.target_index_bytes)
+    else:
+        index_bytes = bytes(target.members[0].resolved_layout.target_index_bytes)
+    return resolve_artifact_selection(
+        artifact_id=source_artifact_id,
+        canonical_index_bytes=index_bytes,
+    ).proto
 
 
 def prefetch_reference_binding(
     client: DaemonCtl,
     *,
     source_artifact_id: str,
-    target: ServingBindingTarget,
+    target: RealizationTarget,
     retention_policy: PrefetchRetentionPolicy | None = None,
     operation_id: str | None = None,
     group_realization: GroupRealization | None = None,
     timeout_s: float = 30.0,
-) -> PrefetchedServingBinding:
-    selection = common_pb2.ArtifactSelection(artifact_id=source_artifact_id)
+) -> PrefetchHandoff:
+    selection = _reference_source_selection(
+        source_artifact_id=source_artifact_id,
+        target=target,
+    )
     response = client.prefetch_serving_binding(
         source_selection=selection,
         target=target,
-        requested_readiness="serving_local_ready",
+        requested_readiness="runtime_local_ready",
         retention_policy=retention_policy,
         operation_id=operation_id,
         group_realization=group_realization,
@@ -345,24 +364,27 @@ def prefetch_reference_binding(
         if response.status.HasField("error"):
             message = response.status.error.message or message
         raise RuntimeError(message)
-    return unpack_prefetched_serving_binding(response.status.result)
+    return unpack_prefetch_handoff(response.status.result)
 
 
 def prefetch_reference_binding_set(
     client: DaemonCtl,
     *,
     source_artifact_id: str,
-    target: ServingBindingSetTarget,
+    target: RealizationTargetSet,
     retention_policy: PrefetchRetentionPolicy | None = None,
     operation_id: str | None = None,
     group_realization: GroupRealization | None = None,
     timeout_s: float = 30.0,
-) -> PrefetchedServingBindingSet:
-    selection = common_pb2.ArtifactSelection(artifact_id=source_artifact_id)
+) -> PrefetchHandoffSet:
+    selection = _reference_source_selection(
+        source_artifact_id=source_artifact_id,
+        target=target,
+    )
     response = client.prefetch_serving_binding(
         source_selection=selection,
         target=target,
-        requested_readiness="serving_local_ready",
+        requested_readiness="runtime_local_ready",
         retention_policy=retention_policy,
         operation_id=operation_id,
         group_realization=group_realization,
@@ -373,17 +395,17 @@ def prefetch_reference_binding_set(
         if response.status.HasField("error"):
             message = response.status.error.message or message
         raise RuntimeError(message)
-    return unpack_prefetched_serving_binding_set(response.status.result)
+    return unpack_prefetch_handoff_set(response.status.result)
 
 
 def acquire_reference_binding(
     client: DaemonCtl,
     *,
-    prefetched: PrefetchedServingBinding,
-    target: ServingBindingTarget,
+    prefetched: PrefetchHandoff,
+    target: RealizationTarget,
     caller_pid: int | None = None,
     timeout_s: float = 30.0,
-) -> ReferenceServingAcquireResult:
+) -> ReferenceRuntimeAcquireResult:
     response = acquire_reference_binding_response(
         client,
         prefetched=prefetched,
@@ -399,7 +421,7 @@ def acquire_reference_binding(
             binding_value_id=str(response.current_value.binding_value_id),
             seal_generation=int(response.current_value.seal_generation),
         )
-    return ReferenceServingAcquireResult(
+    return ReferenceRuntimeAcquireResult(
         binding_value_ref=binding_value_ref,
         lease_token=bytes(response.mem_handle.lease_token),
         has_cuda_ipc_handle=response.mem_handle.HasField("cuda_ipc_handle"),
@@ -410,8 +432,8 @@ def acquire_reference_binding(
 def acquire_reference_binding_response(
     client: DaemonCtl,
     *,
-    prefetched: PrefetchedServingBinding,
-    target: ServingBindingTarget,
+    prefetched: PrefetchHandoff,
+    target: RealizationTarget,
     caller_pid: int | None = None,
     timeout_s: float = 30.0,
 ) -> store_daemon_pb2.AcquireBindingValueResponse:
@@ -421,7 +443,7 @@ def acquire_reference_binding_response(
         expected_device_uuid=prefetched.device_uuid,
         expected_target_layout_hash=target.resolved_layout.target_layout_hash,
         expected_tensor_schema_hash=target.resolved_layout.tensor_schema_hash,
-        expected_serving_build_digest=target.serving_build_digest,
+        expected_serving_build_digest=target.runtime_build_digest,
         expected_daemon_id=prefetched.daemon_id,
         expected_daemon_session_id=prefetched.daemon_session_id,
         expected_member=prefetched.member,
@@ -435,7 +457,7 @@ def acquire_reference_binding_response(
 def release_reference_acquire(
     client: DaemonCtl,
     *,
-    acquire_result: ReferenceServingAcquireResult,
+    acquire_result: ReferenceRuntimeAcquireResult,
     timeout_s: float = 5.0,
 ) -> None:
     if acquire_result.lease_token:
@@ -447,9 +469,9 @@ def release_reference_acquire(
 
 __all__ = [
     "REFERENCE_RUNTIME",
-    "ReferenceServingAcquireResult",
-    "ReferenceServingResolvedSpec",
-    "ReferenceServingTensorSpec",
+    "ReferenceRuntimeAcquireResult",
+    "ReferenceRuntimeResolvedSpec",
+    "ReferenceRuntimeTensorSpec",
     "acquire_reference_binding",
     "acquire_reference_binding_response",
     "build_reference_resolved_spec",
@@ -459,7 +481,7 @@ def release_reference_acquire(
     "prefetch_reference_binding_set",
     "release_reference_acquire",
     "target_from_reference_cache_record",
-    "unpack_prefetched_serving_binding",
-    "unpack_prefetched_serving_binding_set",
+    "unpack_prefetch_handoff",
+    "unpack_prefetch_handoff_set",
     "write_reference_resolved_spec_cache_entry",
 ]
diff --git a/tensorcast/api/store/serving_binding_spec_cache.py b/tensorcast/api/store/runtime_realization_spec_cache.py
similarity index 86%
rename from tensorcast/api/store/serving_binding_spec_cache.py
rename to tensorcast/api/store/runtime_realization_spec_cache.py
index e3320921..9507aa7f 100644
--- a/tensorcast/api/store/serving_binding_spec_cache.py
+++ b/tensorcast/api/store/runtime_realization_spec_cache.py
@@ -1,5 +1,7 @@
 #  Copyright (c) 2026, TensorCast Team.
 
+"""Runtime realization resolved-spec cache helpers."""
+
 from __future__ import annotations
 
 import contextlib
@@ -16,33 +18,33 @@
 
 from tensorcast.types import (
     BlobRef,
-    ServingBindingResolvedSpecCacheEntry,
-    ServingTopologyRef,
+    RuntimeRealizationSpecCacheEntry,
+    RuntimeTopologyRef,
 )
 
-_MANIFEST_PRODUCER = "tensorcast.serving_binding_spec_cache"
+_MANIFEST_PRODUCER = "tensorcast.runtime_realization_spec_cache"
 _MANIFEST_PRODUCER_VERSION = 1
 
 
 def _validate_runtime(runtime: str) -> None:
     if not str(runtime).strip():
-        raise ValueError("serving runtime must not be empty")
+        raise ValueError("runtime must not be empty")
 
 
-class ServingBindingSpecCacheRecord(BaseModel):
+class RuntimeRealizationSpecCacheRecord(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    entry: ServingBindingResolvedSpecCacheEntry
+    entry: RuntimeRealizationSpecCacheEntry
     blobs: Mapping[str, bytes]
 
 
-class ServingBindingSpecCacheGroupIndex(BaseModel):
+class RuntimeRealizationSpecCacheGroupIndex(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     schema_version: int = 1
     group_cache_key_digest: str
     runtime: str
-    topology: ServingTopologyRef
+    topology: RuntimeTopologyRef
     group_id: str
     member_cache_key_digests: Mapping[str, str]
 
@@ -68,8 +70,8 @@ def canonical_json_bytes(payload: object) -> bytes:
     return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8")
 
 
-def serving_binding_spec_cache_root(cache_root: str | os.PathLike[str]) -> Path:
-    return Path(cache_root) / "serving_binding_specs" / "v1"
+def runtime_realization_spec_cache_root(cache_root: str | os.PathLike[str]) -> Path:
+    return Path(cache_root) / "runtime_realization_specs" / "v1"
 
 
 def _sha256_bytes(data: bytes) -> str:
@@ -94,7 +96,7 @@ def _validate_blob_ref(name: str, blob_ref: BlobRef, data: bytes) -> None:
 
 
 def _validate_entry(
-    entry: ServingBindingResolvedSpecCacheEntry, blobs: Mapping[str, bytes]
+    entry: RuntimeRealizationSpecCacheEntry, blobs: Mapping[str, bytes]
 ) -> None:
     _validate_runtime(entry.runtime)
     if entry.cache_key_digest != entry.computed_cache_key_digest():
@@ -134,7 +136,7 @@ def _group_path(root: Path, group_cache_key_digest: str) -> Path:
     return root / "groups" / "sha256" / f"{group_cache_key_digest}.json"
 
 
-def _validate_group_index(index: ServingBindingSpecCacheGroupIndex) -> None:
+def _validate_group_index(index: RuntimeRealizationSpecCacheGroupIndex) -> None:
     if int(index.schema_version) != 1:
         raise ValueError("unsupported group index schema_version")
     if index.group_cache_key_digest != index.computed_group_cache_key_digest():
@@ -150,7 +152,7 @@ def _validate_group_index(index: ServingBindingSpecCacheGroupIndex) -> None:
         raise ValueError("member cache key digests must be distinct")
 
 
-def _manifest_payload(entry: ServingBindingResolvedSpecCacheEntry) -> dict[str, object]:
+def _manifest_payload(entry: RuntimeRealizationSpecCacheEntry) -> dict[str, object]:
     return {
         "schema_version": 1,
         "producer": _MANIFEST_PRODUCER,
@@ -162,10 +164,10 @@ def _manifest_payload(entry: ServingBindingResolvedSpecCacheEntry) -> dict[str,
 def write_resolved_spec_cache_entry(
     cache_root: str | os.PathLike[str],
     *,
-    entry: ServingBindingResolvedSpecCacheEntry,
+    entry: RuntimeRealizationSpecCacheEntry,
     blobs: Mapping[str, bytes],
 ) -> None:
-    root = serving_binding_spec_cache_root(cache_root)
+    root = runtime_realization_spec_cache_root(cache_root)
     _validate_entry(entry, blobs)
     root.mkdir(parents=True, exist_ok=True)
 
@@ -225,9 +227,9 @@ def write_resolved_spec_cache_entry(
 def write_resolved_spec_cache_group_index(
     cache_root: str | os.PathLike[str],
     *,
-    index: ServingBindingSpecCacheGroupIndex,
+    index: RuntimeRealizationSpecCacheGroupIndex,
 ) -> None:
-    root = serving_binding_spec_cache_root(cache_root)
+    root = runtime_realization_spec_cache_root(cache_root)
     _validate_group_index(index)
     root.mkdir(parents=True, exist_ok=True)
 
@@ -263,7 +265,7 @@ def _read_json(path: Path) -> dict[str, object]:
     return payload
 
 
-def _read_record_from_spec_dir(*, spec_dir: Path) -> ServingBindingSpecCacheRecord:
+def _read_record_from_spec_dir(*, spec_dir: Path) -> RuntimeRealizationSpecCacheRecord:
     manifest = _read_json(spec_dir / "manifest.json")
     if manifest.get("schema_version") != 1:
         raise ValueError("unsupported spec manifest schema_version")
@@ -274,20 +276,20 @@ def _read_record_from_spec_dir(*, spec_dir: Path) -> ServingBindingSpecCacheReco
     entry_payload = manifest.get("entry")
     if not isinstance(entry_payload, dict):
         raise ValueError("spec manifest missing entry")
-    entry = ServingBindingResolvedSpecCacheEntry.model_validate(entry_payload)
+    entry = RuntimeRealizationSpecCacheEntry.model_validate(entry_payload)
     blobs: dict[str, bytes] = {}
     for name, blob_ref in entry.blob_refs.items():
         blob_path = spec_dir / _safe_relative_blob_path(blob_ref.path)
         blobs[name] = blob_path.read_bytes()
     _validate_entry(entry, blobs)
-    return ServingBindingSpecCacheRecord(entry=entry, blobs=blobs)
+    return RuntimeRealizationSpecCacheRecord(entry=entry, blobs=blobs)
 
 
 def read_resolved_spec_cache_entry(
     cache_root: str | os.PathLike[str],
     cache_key_digest: str,
-) -> ServingBindingSpecCacheRecord:
-    root = serving_binding_spec_cache_root(cache_root)
+) -> RuntimeRealizationSpecCacheRecord:
+    root = runtime_realization_spec_cache_root(cache_root)
     key_payload = _read_json(_key_path(root, cache_key_digest))
     if key_payload.get("schema_version") != 1:
         raise ValueError("unsupported cache key schema_version")
@@ -299,7 +301,7 @@ def read_resolved_spec_cache_entry(
     entry_payload = key_payload.get("entry")
     if not isinstance(entry_payload, dict):
         raise ValueError("cache key missing entry")
-    key_entry = ServingBindingResolvedSpecCacheEntry.model_validate(entry_payload)
+    key_entry = RuntimeRealizationSpecCacheEntry.model_validate(entry_payload)
     if key_entry.cache_key_digest != cache_key_digest:
         raise ValueError("cache key entry digest mismatch")
     if key_entry.spec_digest != spec_digest:
@@ -313,8 +315,8 @@ def read_resolved_spec_cache_entry(
 def read_matching_resolved_spec_cache_entry(
     cache_root: str | os.PathLike[str],
     *,
-    expected_entry: ServingBindingResolvedSpecCacheEntry,
-) -> ServingBindingSpecCacheRecord:
+    expected_entry: RuntimeRealizationSpecCacheEntry,
+) -> RuntimeRealizationSpecCacheRecord:
     if expected_entry.cache_key_digest != expected_entry.computed_cache_key_digest():
         raise ValueError("expected cache_key_digest does not match canonical key")
     if expected_entry.spec_digest != expected_entry.computed_spec_digest():
@@ -329,8 +331,8 @@ def read_matching_resolved_spec_cache_entry(
 def read_resolved_spec_cache_group_index(
     cache_root: str | os.PathLike[str],
     group_cache_key_digest: str,
-) -> ServingBindingSpecCacheGroupIndex:
-    root = serving_binding_spec_cache_root(cache_root)
+) -> RuntimeRealizationSpecCacheGroupIndex:
+    root = runtime_realization_spec_cache_root(cache_root)
     payload = _read_json(_group_path(root, group_cache_key_digest))
     if payload.get("schema_version") != 1:
         raise ValueError("unsupported group index schema_version")
@@ -341,7 +343,7 @@ def read_resolved_spec_cache_group_index(
     index_payload = payload.get("index")
     if not isinstance(index_payload, dict):
         raise ValueError("group index missing index")
-    index = ServingBindingSpecCacheGroupIndex.model_validate(index_payload)
+    index = RuntimeRealizationSpecCacheGroupIndex.model_validate(index_payload)
     if index.group_cache_key_digest != group_cache_key_digest:
         raise ValueError("group cache key digest mismatch")
     _validate_group_index(index)
@@ -354,3 +356,16 @@ def read_resolved_spec_cache_group_index(
         if record.entry.topology != index.topology:
             raise ValueError("group index member topology mismatch")
     return index
+
+
+__all__ = [
+    "RuntimeRealizationSpecCacheGroupIndex",
+    "RuntimeRealizationSpecCacheRecord",
+    "canonical_json_bytes",
+    "read_matching_resolved_spec_cache_entry",
+    "read_resolved_spec_cache_entry",
+    "read_resolved_spec_cache_group_index",
+    "runtime_realization_spec_cache_root",
+    "write_resolved_spec_cache_entry",
+    "write_resolved_spec_cache_group_index",
+]
diff --git a/tensorcast/api/store/views.py b/tensorcast/api/store/views.py
index 9708dd58..d875e3f2 100644
--- a/tensorcast/api/store/views.py
+++ b/tensorcast/api/store/views.py
@@ -116,7 +116,7 @@ def resolve_view_inputs(
             if isinstance(resolved, tuple):
                 resolved_artifact_id = resolved[0]
             else:
-                resolved_artifact_id = resolved
+                resolved_artifact_id = resolved.artifact_id
             if not resolved_artifact_id:
                 raise ArtifactError(
                     f"Artifact key '{resolved_key}' is not mapped",
diff --git a/tensorcast/artifact_runtime/__init__.py b/tensorcast/artifact_runtime/__init__.py
new file mode 100644
index 00000000..86cf9b9f
--- /dev/null
+++ b/tensorcast/artifact_runtime/__init__.py
@@ -0,0 +1,2 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Artifact-centered model runtime implementation package."""
diff --git a/tensorcast/serving/admin.py b/tensorcast/artifact_runtime/admin.py
similarity index 70%
rename from tensorcast/serving/admin.py
rename to tensorcast/artifact_runtime/admin.py
index 72125cb9..bd4315fa 100644
--- a/tensorcast/serving/admin.py
+++ b/tensorcast/artifact_runtime/admin.py
@@ -1,23 +1,23 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Admin/offline serving helpers that are not runtime integration APIs."""
+"""Admin/offline runtime helpers that are not runtime integration APIs."""
 
 from dataclasses import dataclass
 
-from tensorcast.serving._runtime_impl.lifecycle import (
+from tensorcast.artifact_runtime.host import SourceSubjectCoordinator
+from tensorcast.artifact_runtime.intent import LocalSourceBootstrap
+from tensorcast.artifact_runtime.lifecycle import (
     build_local_ready_prepared_artifact,
 )
-from tensorcast.serving.builder.publication import (
+from tensorcast.artifact_runtime.publication.context import (
     RecipePublicationContext,
     build_binding_finalize_build_intent,
     build_pure_transform_build_intent,
 )
-from tensorcast.serving.hosts import SourceSubjectCoordinator
-from tensorcast.serving.local_ready import (
+from tensorcast.artifact_runtime.recipe.local_ready import (
     freeze_local_ready_binding,
-    prepare_local_ready_serving,
     prepare_same_binding_manifest_carrier,
+    realize_local_ready_binding_from_source,
 )
-from tensorcast.serving.runtime import LocalSourceBootstrap
 
 
 @dataclass(frozen=True)
@@ -41,6 +41,6 @@ class AdminLocalSourceBootstrap(LocalSourceBootstrap):
     "build_local_ready_prepared_artifact",
     "build_pure_transform_build_intent",
     "freeze_local_ready_binding",
-    "prepare_local_ready_serving",
     "prepare_same_binding_manifest_carrier",
+    "realize_local_ready_binding_from_source",
 ]
diff --git a/tensorcast/artifact_runtime/artifact/__init__.py b/tensorcast/artifact_runtime/artifact/__init__.py
new file mode 100644
index 00000000..655cf0ca
--- /dev/null
+++ b/tensorcast/artifact_runtime/artifact/__init__.py
@@ -0,0 +1,2 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Runtime artifact manifest and resolver helpers."""
diff --git a/tensorcast/artifact_runtime/artifact/manifest.py b/tensorcast/artifact_runtime/artifact/manifest.py
new file mode 100644
index 00000000..fe2fa58b
--- /dev/null
+++ b/tensorcast/artifact_runtime/artifact/manifest.py
@@ -0,0 +1,160 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Runtime artifact manifest parse and validation helpers."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import torch
+
+import tensorcast as tc
+
+RUNTIME_ARTIFACT_SCHEMA_VERSION = int(
+    tc.RuntimeArtifactManifest.model_fields["schema_version"].default
+)
+SERVING_MANIFEST_TENSOR_NAME = tc.SERVING_MANIFEST_TENSOR_NAME
+
+
+class _InvalidRuntimeManifestTensor(RuntimeError):
+    pass
+
+
+def runtime_manifest_from_tensor_bytes(
+    data: bytes | bytearray,
+) -> tc.RuntimeArtifactManifest:
+    return tc.RuntimeArtifactManifest.from_bytes(bytes(data))
+
+
+def _runtime_manifest_bytes_from_device(
+    subset: Any,
+    *,
+    device: torch.device | str,
+    manifest_tensor_name: str,
+) -> bytes:
+    result = subset.tensor_dict_with_diagnostics(device=device)
+    try:
+        manifest_tensor = result.tensors[manifest_tensor_name]
+        if manifest_tensor.dtype != torch.uint8 or manifest_tensor.dim() != 1:
+            raise _InvalidRuntimeManifestTensor(
+                "TensorCast runtime manifest tensor must be 1D torch.uint8"
+            )
+        return bytes(manifest_tensor.detach().cpu().tolist())
+    finally:
+        result.release()
+
+
+def read_runtime_artifact_manifest_tensor(
+    artifact: Any,
+    *,
+    artifact_ref: str,
+    manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME,
+) -> tc.RuntimeArtifactManifest:
+    subset = artifact.subset([manifest_tensor_name])
+    try:
+        manifest_bytes = _runtime_manifest_bytes_from_device(
+            subset,
+            device="cpu",
+            manifest_tensor_name=manifest_tensor_name,
+        )
+    except _InvalidRuntimeManifestTensor:
+        raise
+    except Exception as cpu_exc:
+        try:
+            cuda_device = torch.device("cuda", torch.cuda.current_device())
+            manifest_bytes = _runtime_manifest_bytes_from_device(
+                subset,
+                device=cuda_device,
+                manifest_tensor_name=manifest_tensor_name,
+            )
+        except _InvalidRuntimeManifestTensor:
+            raise
+        except Exception as cuda_exc:
+            raise RuntimeError(
+                f"Failed to materialize runtime manifest from '{artifact_ref}' "
+                f"(cpu_error={cpu_exc!r}; cuda_error={cuda_exc!r})"
+            ) from cuda_exc
+    return runtime_manifest_from_tensor_bytes(manifest_bytes)
+
+
+def cross_check_runtime_artifact_manifest(
+    *,
+    manifest: Any | None,
+    descriptor_tensor_schema_hash: str,
+    tensor_names: tuple[str, ...],
+    expected_tensor_schema_hash: str,
+    runtime_artifact_policy: tc.RuntimeArtifactPolicy | None = None,
+    expected_schema_version: int = RUNTIME_ARTIFACT_SCHEMA_VERSION,
+) -> Any:
+    if manifest is None:
+        raise RuntimeError("TensorCast runtime artifact manifest is missing")
+    if manifest.schema_version != expected_schema_version:
+        raise RuntimeError(
+            "TensorCast runtime artifact schema version mismatch: "
+            f"{manifest.schema_version} != {expected_schema_version}"
+        )
+    if manifest.artifact_kind != "serving":
+        raise RuntimeError(
+            "TensorCast runtime artifact has unsupported artifact_kind: "
+            f"{manifest.artifact_kind}"
+        )
+    if (
+        runtime_artifact_policy is not None
+        and runtime_artifact_policy.serving_manifest_ref is not None
+        and manifest.serving_manifest_ref
+        != runtime_artifact_policy.serving_manifest_ref
+    ):
+        raise RuntimeError("TensorCast runtime artifact manifest ref mismatch")
+    if (
+        runtime_artifact_policy is not None
+        and runtime_artifact_policy.expected_representation_contract_hash is not None
+        and manifest.representation_contract_hash
+        != runtime_artifact_policy.expected_representation_contract_hash
+    ):
+        raise RuntimeError(
+            "TensorCast runtime artifact representation contract mismatch"
+        )
+    if (
+        runtime_artifact_policy is not None
+        and runtime_artifact_policy.expected_serving_build_digest is not None
+        and manifest.serving_build_digest
+        != runtime_artifact_policy.expected_serving_build_digest
+    ):
+        raise RuntimeError("TensorCast runtime artifact build digest mismatch")
+    if (
+        runtime_artifact_policy is not None
+        and getattr(
+            runtime_artifact_policy,
+            "expected_topology_admission_digest",
+            None,
+        )
+        is not None
+        and getattr(manifest, "topology_admission_digest", None)
+        != runtime_artifact_policy.expected_topology_admission_digest
+    ):
+        raise RuntimeError(
+            "TensorCast runtime artifact topology admission digest mismatch"
+        )
+    if manifest.tensor_schema_hash != expected_tensor_schema_hash:
+        raise RuntimeError(
+            "TensorCast runtime artifact tensor schema hash mismatch: "
+            f"manifest={manifest.tensor_schema_hash}, "
+            f"expected={expected_tensor_schema_hash}"
+        )
+    if descriptor_tensor_schema_hash != expected_tensor_schema_hash:
+        raise RuntimeError(
+            "TensorCast runtime artifact descriptor schema hash mismatch: "
+            f"descriptor={descriptor_tensor_schema_hash}, "
+            f"expected={expected_tensor_schema_hash}"
+        )
+    if manifest.canonical_tensor_count != len(tensor_names):
+        raise RuntimeError("TensorCast runtime artifact tensor count mismatch")
+    return manifest
+
+
+__all__ = [
+    "RUNTIME_ARTIFACT_SCHEMA_VERSION",
+    "SERVING_MANIFEST_TENSOR_NAME",
+    "cross_check_runtime_artifact_manifest",
+    "read_runtime_artifact_manifest_tensor",
+    "runtime_manifest_from_tensor_bytes",
+]
diff --git a/tensorcast/serving/resolver.py b/tensorcast/artifact_runtime/artifact/resolver.py
similarity index 76%
rename from tensorcast/serving/resolver.py
rename to tensorcast/artifact_runtime/artifact/resolver.py
index d3bb0005..e45566fc 100644
--- a/tensorcast/serving/resolver.py
+++ b/tensorcast/artifact_runtime/artifact/resolver.py
@@ -1,25 +1,33 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Serving artifact resolution facade for framework integrations."""
+"""Runtime artifact resolution facade for framework integrations."""
 
 from __future__ import annotations
 
+import importlib
+from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, cast
 
 import torch
 
 import tensorcast as tc
-import tensorcast.serving.artifact_manifest as tc_artifact_manifest
-import tensorcast.serving.builder.materialization as tc_core_materialization
-from tensorcast.api.store import artifact as open_artifact
+import tensorcast.artifact_runtime.artifact.manifest as tc_artifact_manifest
+import tensorcast.artifact_runtime.contract as tc_contract
+import tensorcast.artifact_runtime.recipe.materialization as tc_core_materialization
 from tensorcast.api.store.types import CanonicalIndexEntry
 
-ServingArtifactManifest = tc.ServingArtifactManifest
+RuntimeArtifactManifest = tc.RuntimeArtifactManifest
+
+
+def _default_open_artifact(artifact_ref: str) -> Any:
+    store_api = importlib.import_module("tensorcast.api.store")
+    open_fn = cast(Callable[[str], Any], store_api.artifact)
+    return open_fn(artifact_ref)
 
 
 @dataclass(frozen=True)
-class ResolvedServingArtifact:
+class ResolvedRuntimeArtifact:
     artifact: Any
     artifact_ref: str
     descriptor: Any
@@ -28,7 +36,7 @@ class ResolvedServingArtifact:
     tensor_schema_hash: str
 
 
-def is_reserved_serving_tensor_name(name: str) -> bool:
+def is_reserved_runtime_tensor_name(name: str) -> bool:
     return name.startswith("__tensorcast_meta__.")
 
 
@@ -45,7 +53,7 @@ def model_tensor_names_from_descriptor(descriptor: Any) -> tuple[str, ...]:
     return tuple(
         str(name)
         for name in getattr(descriptor, "tensor_names", ())
-        if not is_reserved_serving_tensor_name(str(name))
+        if not is_reserved_runtime_tensor_name(str(name))
     )
 
 
@@ -92,7 +100,7 @@ def compute_descriptor_tensor_schema_hash(
     *,
     manifest_tensor_name: str,
 ) -> str:
-    return tc.compute_serving_tensor_schema_hash(
+    return tc_contract.compute_canonical_runtime_tensor_schema_hash(
         canonical_index_from_descriptor(descriptor),
         manifest_tensor_name=manifest_tensor_name,
     )
@@ -109,7 +117,7 @@ def _prepared_summary_value(summary: Any, field_name: str) -> str | None:
 def _cross_check_prepared_manifest_summary(
     *,
     summary: Any,
-    manifest: tc.ServingArtifactManifest,
+    manifest: tc.RuntimeArtifactManifest,
 ) -> None:
     fields = (
         "serving_manifest_ref",
@@ -122,26 +130,29 @@ def _cross_check_prepared_manifest_summary(
         actual = _prepared_summary_value(manifest, field_name)
         if expected is not None and actual != expected:
             raise RuntimeError(
-                "TensorCast prepared serving artifact manifest does not match "
+                "TensorCast prepared runtime artifact manifest does not match "
                 f"summary field {field_name}: manifest={actual!r}, "
                 f"summary={expected!r}"
             )
 
 
-class ServingArtifactResolver:
-    """Resolve serving artifacts and enforce manifest/schema/policy checks."""
+class RuntimeArtifactResolver:
+    """Resolve runtime artifacts and enforce manifest/schema/policy checks."""
 
     def __init__(
         self,
         *,
         manifest_tensor_name: str,
         schema_version: int,
+        open_artifact_fn: Callable[[str], Any] | None = None,
     ) -> None:
         self._manifest_tensor_name = manifest_tensor_name
         self._schema_version = schema_version
+        self._open_artifact_fn = open_artifact_fn
 
     def open(self, artifact_ref: str) -> Any:
-        artifact = open_artifact(ref=str(artifact_ref))
+        open_fn = self._open_artifact_fn or _default_open_artifact
+        artifact = open_fn(str(artifact_ref))
         artifact.describe()
         return artifact
 
@@ -156,21 +167,21 @@ def read_manifest(
         artifact: Any,
         *,
         artifact_ref: str,
-    ) -> ResolvedServingArtifact:
+    ) -> ResolvedRuntimeArtifact:
         descriptor = artifact.describe()
         tensor_names = model_tensor_names_from_descriptor(descriptor)
         tensor_schema_hash = self.compute_descriptor_tensor_schema_hash(descriptor)
         if self._manifest_tensor_name not in getattr(descriptor, "tensor_names", ()):
             raise RuntimeError(
-                f"TensorCast artifact '{artifact_ref}' is missing serving "
+                f"TensorCast artifact '{artifact_ref}' is missing runtime "
                 "manifest tensor"
             )
-        manifest = tc_artifact_manifest.read_serving_artifact_manifest_tensor(
+        manifest = tc_artifact_manifest.read_runtime_artifact_manifest_tensor(
             artifact,
             artifact_ref=artifact_ref,
             manifest_tensor_name=self._manifest_tensor_name,
         )
-        return ResolvedServingArtifact(
+        return ResolvedRuntimeArtifact(
             artifact=artifact,
             artifact_ref=str(artifact_ref),
             descriptor=descriptor,
@@ -179,7 +190,7 @@ def read_manifest(
             tensor_schema_hash=tensor_schema_hash,
         )
 
-    def resolve(self, artifact_ref: str) -> ResolvedServingArtifact:
+    def resolve(self, artifact_ref: str) -> ResolvedRuntimeArtifact:
         return self.read_manifest(
             self.open(artifact_ref),
             artifact_ref=artifact_ref,
@@ -188,12 +199,12 @@ def resolve(self, artifact_ref: str) -> ResolvedServingArtifact:
     def resolve_prepared(
         self,
         summary: Any,
-    ) -> ResolvedServingArtifact:
+    ) -> ResolvedRuntimeArtifact:
         artifact_ref = getattr(summary, "serving_artifact_ref", None)
         if artifact_ref is None:
             raise RuntimeError(
                 "TensorCast local-ready summary does not reference a durable "
-                "serving artifact"
+                "runtime artifact"
             )
         artifact_ref = str(artifact_ref)
         artifact = self.open(artifact_ref)
@@ -202,23 +213,23 @@ def resolve_prepared(
         tensor_schema_hash = self.compute_descriptor_tensor_schema_hash(descriptor)
         if self._manifest_tensor_name not in getattr(descriptor, "tensor_names", ()):
             raise RuntimeError(
-                f"TensorCast artifact '{artifact_ref}' is missing serving "
+                f"TensorCast artifact '{artifact_ref}' is missing runtime "
                 "manifest tensor"
             )
-        manifest = tc_artifact_manifest.read_serving_artifact_manifest_tensor(
+        manifest = tc_artifact_manifest.read_runtime_artifact_manifest_tensor(
             artifact,
             artifact_ref=artifact_ref,
             manifest_tensor_name=self._manifest_tensor_name,
         )
         _cross_check_prepared_manifest_summary(summary=summary, manifest=manifest)
-        tc_artifact_manifest.cross_check_serving_artifact_manifest(
+        tc_artifact_manifest.cross_check_runtime_artifact_manifest(
             manifest=manifest,
             descriptor_tensor_schema_hash=tensor_schema_hash,
             tensor_names=tensor_names,
             expected_tensor_schema_hash=str(summary.tensor_schema_hash),
             expected_schema_version=self._schema_version,
         )
-        return ResolvedServingArtifact(
+        return ResolvedRuntimeArtifact(
             artifact=artifact,
             artifact_ref=artifact_ref,
             descriptor=descriptor,
@@ -229,36 +240,36 @@ def resolve_prepared(
 
     def cross_check(
         self,
-        resolved: ResolvedServingArtifact,
+        resolved: ResolvedRuntimeArtifact,
         *,
         expected_tensor_schema_hash: str,
-        serving_runtime_policy: Any | None = None,
-    ) -> ResolvedServingArtifact:
-        tc_artifact_manifest.cross_check_serving_artifact_manifest(
+        runtime_artifact_policy: Any | None = None,
+    ) -> ResolvedRuntimeArtifact:
+        tc_artifact_manifest.cross_check_runtime_artifact_manifest(
             manifest=resolved.manifest,
             descriptor_tensor_schema_hash=resolved.tensor_schema_hash,
             tensor_names=resolved.tensor_names,
             expected_tensor_schema_hash=expected_tensor_schema_hash,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
             expected_schema_version=self._schema_version,
         )
         return resolved
 
 
-def resolve_serving_artifact(
+def resolve_runtime_artifact(
     artifact_ref: str,
     *,
     manifest_tensor_name: str | None = None,
     schema_version: int | None = None,
     expected_tensor_schema_hash: str | None = None,
-    serving_runtime_policy: Any | None = None,
-) -> ResolvedServingArtifact:
-    resolver = ServingArtifactResolver(
+    runtime_artifact_policy: Any | None = None,
+) -> ResolvedRuntimeArtifact:
+    resolver = RuntimeArtifactResolver(
         manifest_tensor_name=manifest_tensor_name or tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=(
             schema_version
             if schema_version is not None
-            else int(tc.ServingArtifactManifest.model_fields["schema_version"].default)
+            else int(tc.RuntimeArtifactManifest.model_fields["schema_version"].default)
         ),
     )
     resolved = resolver.resolve(artifact_ref)
@@ -266,19 +277,19 @@ def resolve_serving_artifact(
         resolver.cross_check(
             resolved,
             expected_tensor_schema_hash=expected_tensor_schema_hash,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
     return resolved
 
 
 __all__ = [
-    "ResolvedServingArtifact",
-    "ServingArtifactManifest",
-    "ServingArtifactResolver",
+    "ResolvedRuntimeArtifact",
+    "RuntimeArtifactManifest",
+    "RuntimeArtifactResolver",
     "canonical_index_from_descriptor",
     "compute_descriptor_tensor_schema_hash",
     "contiguous_stride",
-    "is_reserved_serving_tensor_name",
+    "is_reserved_runtime_tensor_name",
     "model_tensor_names_from_descriptor",
-    "resolve_serving_artifact",
+    "resolve_runtime_artifact",
 ]
diff --git a/tensorcast/serving/runtime_attachment.py b/tensorcast/artifact_runtime/attachment.py
similarity index 98%
rename from tensorcast/serving/runtime_attachment.py
rename to tensorcast/artifact_runtime/attachment.py
index 2cfca228..af562d12 100644
--- a/tensorcast/serving/runtime_attachment.py
+++ b/tensorcast/artifact_runtime/attachment.py
@@ -8,7 +8,7 @@
 from dataclasses import dataclass
 from typing import Any
 
-from tensorcast.serving.runtime_view import RuntimeWorkerView
+from tensorcast.artifact_runtime.view import RuntimeWorkerView
 
 
 def _optional_text(value: Any) -> str | None:
diff --git a/tensorcast/artifact_runtime/binding/__init__.py b/tensorcast/artifact_runtime/binding/__init__.py
new file mode 100644
index 00000000..00a8f7a8
--- /dev/null
+++ b/tensorcast/artifact_runtime/binding/__init__.py
@@ -0,0 +1,2 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Runtime binding execution helpers."""
diff --git a/tensorcast/serving/binding_runtime.py b/tensorcast/artifact_runtime/binding/execution.py
similarity index 93%
rename from tensorcast/serving/binding_runtime.py
rename to tensorcast/artifact_runtime/binding/execution.py
index 7163a321..96af1673 100644
--- a/tensorcast/serving/binding_runtime.py
+++ b/tensorcast/artifact_runtime/binding/execution.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Serving artifact bind/swap runtime facades."""
+"""Runtime artifact bind/swap facades."""
 
 from __future__ import annotations
 
@@ -90,17 +90,17 @@ def build_materialization_execution_context(
     return options, profile_fields
 
 
-def bind_serving_artifact(
+def bind_runtime_artifact(
     *,
     resolved_artifact: Any,
     tensor_names: Sequence[str],
     device: Any,
-    serving_runtime_policy: Any | None,
+    runtime_artifact_policy: Any | None,
     options: Any | None,
 ) -> Any:
     return resolved_artifact.artifact.subset(list(tensor_names)).bind(
         device=device,
-        serving_runtime_policy=serving_runtime_policy,
+        runtime_artifact_policy=runtime_artifact_policy,
         options=options,
     )
 
@@ -123,12 +123,12 @@ def _binding_tensor_names(binding: Any) -> tuple[str, ...]:
     return tuple(str(name) for name in binding_tensors)
 
 
-def swap_serving_artifact(
+def swap_runtime_artifact(
     *,
     binding: Any,
     resolved_artifact: Any,
     tensor_names: Sequence[str] | None = None,
-    serving_runtime_policy: Any | None,
+    runtime_artifact_policy: Any | None,
     options: Any | None,
 ) -> Any:
     binding_layout_tensor_names = _binding_layout_tensor_names(binding)
@@ -148,13 +148,13 @@ def swap_serving_artifact(
     )
     return binding.swap(
         artifact,
-        serving_runtime_policy=serving_runtime_policy,
+        runtime_artifact_policy=runtime_artifact_policy,
         options=options,
     )
 
 
 __all__ = [
-    "bind_serving_artifact",
+    "bind_runtime_artifact",
     "build_materialization_execution_context",
-    "swap_serving_artifact",
+    "swap_runtime_artifact",
 ]
diff --git a/tensorcast/serving/builder/binding_plan.py b/tensorcast/artifact_runtime/binding/plan.py
similarity index 97%
rename from tensorcast/serving/builder/binding_plan.py
rename to tensorcast/artifact_runtime/binding/plan.py
index e32465f9..28da9c0b 100644
--- a/tensorcast/serving/builder/binding_plan.py
+++ b/tensorcast/artifact_runtime/binding/plan.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Framework-neutral TracePlan lowering for TensorCast serving bindings."""
+"""Framework-neutral TracePlan lowering for TensorCast runtime bindings."""
 
 from __future__ import annotations
 
@@ -10,7 +10,7 @@
 from tensorcast.api.store import BindingRealizationEntry
 from tensorcast.api.store import CopyPlanEntry as StoreCopyPlanEntry
 from tensorcast.api.store import Range as StoreRange
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
diff --git a/tensorcast/serving/retained_binding.py b/tensorcast/artifact_runtime/binding/retained.py
similarity index 58%
rename from tensorcast/serving/retained_binding.py
rename to tensorcast/artifact_runtime/binding/retained.py
index 22f7bb35..f503e035 100644
--- a/tensorcast/serving/retained_binding.py
+++ b/tensorcast/artifact_runtime/binding/retained.py
@@ -1,10 +1,9 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Retained serving binding authority and acquire helpers."""
+"""Retained runtime binding authority and acquire helpers."""
 
 from __future__ import annotations
 
 import inspect
-import json
 import logging
 import os
 import time
@@ -14,7 +13,6 @@
 from typing import Any, Callable, ContextManager, Iterator
 
 import torch
-from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 import tensorcast as tc
 from tensorcast.api.store.realization_kernel import (
@@ -22,132 +20,19 @@
     envelope_for_runtime_attachment,
     release_contract_for,
 )
+from tensorcast.artifact_runtime.config import (
+    RetainedBindingAcquireSettings as _RetainedBindingAcquireSettings,
+)
+from tensorcast.retained_realization import (
+    retained_realization_claim_mode,
+)
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+    RetainedRealizationExpectedDigests,
+)
 
+RetainedBindingAcquireSettings = _RetainedBindingAcquireSettings
 _LOGGER = logging.getLogger(__name__)
-_RETAINED_BINDING_ACQUIRE_MODES = {"disabled", "external"}
-_READINESS_STATES = {
-    "serving_reserved",
-    "serving_local_ready",
-    "serving_published_ready",
-}
-
-
-def _normalize_optional_text(value: Any) -> str | None:
-    if value is None:
-        return None
-    normalized = str(value).strip()
-    return normalized or None
-
-
-def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
-    normalized = str(value).strip().lower()
-    if normalized not in allowed:
-        raise ValueError(
-            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
-        )
-    return normalized
-
-
-class RetainedServingBindingExpectedDigests(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    target_layout_hash: str
-    tensor_schema_hash: str
-    serving_build_digest: str
-    resolved_spec_digest: str
-
-    @field_validator(
-        "target_layout_hash",
-        "tensor_schema_hash",
-        "serving_build_digest",
-        "resolved_spec_digest",
-        mode="before",
-    )
-    @classmethod
-    def _normalize_required_text(cls, value: Any) -> str:
-        normalized = _normalize_optional_text(value)
-        if normalized is None:
-            raise ValueError("expected digest fields must be non-empty")
-        return normalized
-
-
-class RetainedServingBindingAuthority(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    group_id: str
-    member_ref: dict[str, Any]
-    daemon_id: str
-    daemon_session_id: str
-    device_uuid: str
-    binding_value_ref: dict[str, Any]
-    reservation_capability: dict[str, Any]
-    group_realization_acquire: dict[str, Any] | None = None
-    local_serving_ref: str | None = None
-    readiness: str
-    verification_state: str = "local_only"
-    serving_artifact_id: str | None = None
-    trusted_reservation_bytes: int = Field(ge=0)
-    expected: RetainedServingBindingExpectedDigests
-
-    @field_validator(
-        "group_id",
-        "daemon_id",
-        "daemon_session_id",
-        "device_uuid",
-        mode="before",
-    )
-    @classmethod
-    def _normalize_required_text(cls, value: Any) -> str:
-        normalized = _normalize_optional_text(value)
-        if normalized is None:
-            raise ValueError("retained binding authority text fields required")
-        return normalized
-
-    @field_validator(
-        "local_serving_ref",
-        "verification_state",
-        "serving_artifact_id",
-        mode="before",
-    )
-    @classmethod
-    def _normalize_optional_fields(cls, value: Any) -> Any:
-        return _normalize_optional_text(value)
-
-    @field_validator("readiness", mode="before")
-    @classmethod
-    def _normalize_readiness(cls, value: Any) -> str:
-        return _normalize_enum(
-            value,
-            allowed=_READINESS_STATES,
-            field_name="retained_binding_acquire.authority.readiness",
-        )
-
-    @model_validator(mode="after")
-    def _validate_published_ready(self) -> RetainedServingBindingAuthority:
-        if self.readiness == "serving_published_ready" and not self.serving_artifact_id:
-            raise ValueError(
-                "retained_binding_acquire.authority.serving_artifact_id is required when "
-                "readiness='serving_published_ready'"
-            )
-        return self
-
-
-@dataclass(frozen=True)
-class ParsedRetainedServingBindingAuthority:
-    group_id: str
-    local_serving_ref: str | None
-    binding_value_ref: tc.BindingValueRef
-    reservation_capability: tc.BindingReservationCapability
-    daemon_id: str
-    daemon_session_id: str
-    device_uuid: str
-    member: tc.ServingBindingMemberRef
-    reservation_bytes: int
-    expected: RetainedServingBindingExpectedDigests
-    readiness: str
-    verification_state: str
-    serving_artifact_id: str | None = None
-    group_realization_acquire: tc.GroupRealizationAcquireRef | None = None
 
 
 @dataclass(frozen=True)
@@ -163,10 +48,10 @@ def __init__(
         client: Any,
         response: Any,
         runtime: Any,
-        authority: ParsedRetainedServingBindingAuthority,
+        authority: ParsedRetainedRealizationAuthority,
         binding_value_ref: tc.BindingValueRef,
         binding_layout_id: str,
-        member_ref: tc.ServingBindingMemberRef,
+        member_ref: tc.RuntimeBindingMemberRef,
         reservation_bytes: int,
         lease_token: bytes,
     ) -> None:
@@ -221,12 +106,12 @@ class RuntimeRetainedBindingAttachmentHandle:
     tensors: Mapping[str, torch.Tensor]
     binding_layout_id: str
     binding_value_ref: tc.BindingValueRef
-    member_ref: tc.ServingBindingMemberRef
+    member_ref: tc.RuntimeBindingMemberRef
     reservation_bytes: int
     _state: _RetainedBindingLifecycleState
 
     @property
-    def authority(self) -> ParsedRetainedServingBindingAuthority:
+    def authority(self) -> ParsedRetainedRealizationAuthority:
         return self._state.authority
 
     @property
@@ -261,12 +146,12 @@ class AttachedRetainedBinding:
     tensors: Mapping[str, torch.Tensor]
     binding_layout_id: str
     binding_value_ref: tc.BindingValueRef
-    member_ref: tc.ServingBindingMemberRef
+    member_ref: tc.RuntimeBindingMemberRef
     reservation_bytes: int
     _state: _RetainedBindingLifecycleState
 
     @property
-    def authority(self) -> ParsedRetainedServingBindingAuthority:
+    def authority(self) -> ParsedRetainedRealizationAuthority:
         return self._state.authority
 
     @property
@@ -311,13 +196,58 @@ def __exit__(self, *_exc: object) -> None:
         self.close()
 
 
+@dataclass
+class RestoredRetainedBinding:
+    """Restored retained binding tensors before runtime ownership transfer."""
+
+    _attached: AttachedRetainedBinding
+    _runtime_handle: RuntimeRetainedBindingAttachmentHandle | None = None
+
+    @property
+    def tensors(self) -> Mapping[str, torch.Tensor]:
+        return self._attached.tensors
+
+    @property
+    def binding_layout_id(self) -> str:
+        return self._attached.binding_layout_id
+
+    @property
+    def binding_value_ref(self) -> tc.BindingValueRef:
+        return self._attached.binding_value_ref
+
+    @property
+    def member_ref(self) -> tc.RuntimeBindingMemberRef:
+        return self._attached.member_ref
+
+    @property
+    def reservation_bytes(self) -> int:
+        return self._attached.reservation_bytes
+
+    @property
+    def authority(self) -> ParsedRetainedRealizationAuthority:
+        return self._attached.authority
+
+    @property
+    def runtime_handle(self) -> RuntimeRetainedBindingAttachmentHandle | None:
+        return self._runtime_handle
+
+    def transfer_to_runtime(self) -> RuntimeRetainedBindingAttachmentHandle:
+        if self._runtime_handle is None:
+            self._runtime_handle = self._attached.transfer_to_runtime()
+        return self._runtime_handle
+
+    def close(self) -> None:
+        if self._runtime_handle is None:
+            self._attached.close()
+
+
 @dataclass(frozen=True)
 class BorrowedRetainedBindingLease:
     """Single-owner acquire lease for a retained binding value."""
 
-    authority: ParsedRetainedServingBindingAuthority
+    authority: ParsedRetainedRealizationAuthority
     binding_value_ref: tc.BindingValueRef
-    member_ref: tc.ServingBindingMemberRef
+    member_ref: tc.RuntimeBindingMemberRef
     reservation_bytes: int
     _state: _RetainedBindingLifecycleState
 
@@ -384,91 +314,12 @@ def close(self) -> None:
         self._state.release()
 
 
-def _payload_to_dict(value: Any, *, field_name: str) -> dict[str, Any]:
-    if hasattr(value, "model_dump"):
-        return dict(value.model_dump(mode="python"))
-    if isinstance(value, Mapping):
-        return dict(value)
-    if isinstance(value, str):
-        try:
-            parsed = json.loads(value)
-        except json.JSONDecodeError as exc:
-            raise ValueError(f"{field_name} must be a JSON object") from exc
-        if not isinstance(parsed, Mapping):
-            raise ValueError(f"{field_name} must be a JSON object")
-        return dict(parsed)
-    raise ValueError(f"{field_name} must be a dict or JSON object")
-
-
-def _model_validate(model_type: Any, value: Any, *, field_name: str) -> Any:
-    payload = _payload_to_dict(value, field_name=field_name)
-    try:
-        return model_type.model_validate(payload)
-    except Exception as exc:
-        raise ValueError(
-            f"{field_name} is invalid for TensorCast retained binding acquire: {exc}"
-        ) from exc
-
-
-def _validate_authority_consistency(
-    authority: ParsedRetainedServingBindingAuthority,
-) -> None:
-    capability = authority.reservation_capability
-    if capability.binding_value_ref != authority.binding_value_ref:
-        raise ValueError(
-            "retained_binding_acquire.authority.reservation_capability."
-            "binding_value_ref must match retained_binding_acquire.authority."
-            "binding_value_ref"
-        )
-    if capability.daemon_id != authority.daemon_id:
-        raise ValueError(
-            "retained_binding_acquire.authority.reservation_capability."
-            "daemon_id mismatch"
-        )
-    if capability.daemon_session_id != authority.daemon_session_id:
-        raise ValueError(
-            "retained_binding_acquire.authority.reservation_capability."
-            "daemon_session_id mismatch"
-        )
-    if capability.device_uuid != authority.device_uuid:
-        raise ValueError(
-            "retained_binding_acquire.authority.reservation_capability."
-            "device_uuid mismatch"
-        )
-    if capability.member != authority.member:
-        raise ValueError(
-            "retained_binding_acquire.authority.reservation_capability.member mismatch"
-        )
-    if capability.reservation_bytes != authority.reservation_bytes:
-        raise ValueError(
-            "retained_binding_acquire.authority.reservation_capability."
-            "reservation_bytes must match retained_binding_acquire.authority."
-            "trusted_reservation_bytes"
-        )
-    if authority.member.group_id is not None and authority.member.group_id != (
-        authority.group_id
-    ):
-        raise ValueError(
-            "retained_binding_acquire.authority.member_ref.group_id must match "
-            "retained_binding_acquire.authority.group_id"
-        )
-    if (
-        authority.readiness == "serving_published_ready"
-        and not authority.serving_artifact_id
-    ):
-        raise ValueError(
-            "retained_binding_acquire.authority.serving_artifact_id is required "
-            "when retained_binding_acquire.authority.readiness="
-            "'serving_published_ready'"
-        )
-
-
 def _validate_authority_is_attachable(
-    authority: ParsedRetainedServingBindingAuthority,
+    authority: ParsedRetainedRealizationAuthority,
 ) -> None:
-    if authority.readiness == "serving_reserved":
+    if authority.readiness == "runtime_reserved":
         raise ValueError(
-            "retained_binding_acquire.authority.readiness='serving_reserved' "
+            "retained_binding_acquire.authority.readiness='runtime_reserved' "
             "is not attachable"
         )
     group_acquire = authority.group_realization_acquire
@@ -558,7 +409,7 @@ def _lease_token_from_response(response: Any) -> bytes:
 
 def _validate_acquire_response(
     response: Any,
-    authority: ParsedRetainedServingBindingAuthority,
+    authority: ParsedRetainedRealizationAuthority,
 ) -> tc.BindingValueRef:
     acquired_ref = _binding_value_ref_from_response(
         response,
@@ -581,7 +432,7 @@ def _validate_acquire_response(
 
 def _acquire_retained_binding_response(
     client: Any,
-    authority: ParsedRetainedServingBindingAuthority,
+    authority: ParsedRetainedRealizationAuthority,
     *,
     caller_pid: int,
     timeout_s: float | None,
@@ -592,7 +443,7 @@ def _acquire_retained_binding_response(
         "expected_device_uuid": authority.device_uuid,
         "expected_target_layout_hash": authority.expected.target_layout_hash,
         "expected_tensor_schema_hash": authority.expected.tensor_schema_hash,
-        "expected_serving_build_digest": authority.expected.serving_build_digest,
+        "expected_serving_build_digest": authority.expected.runtime_build_digest,
         "expected_daemon_id": authority.daemon_id,
         "expected_daemon_session_id": authority.daemon_session_id,
         "expected_member": authority.member,
@@ -632,7 +483,7 @@ def _release_lease_token_after_acquire_failure(
         _release_lease_token(client, lease_token=lease_token)
     except Exception:
         _LOGGER.exception(
-            "Failed to release retained serving binding lease after acquire failure",
+            "Failed to release retained runtime binding lease after acquire failure",
         )
 
 
@@ -641,7 +492,7 @@ def acquire_local_ready_retained_binding_lease(
     *,
     local_serving_ref: str,
     expected_device_uuid: str,
-    expected_member: tc.ServingBindingMemberRef,
+    expected_member: tc.RuntimeBindingMemberRef,
     expected_tensor_schema_hash: str,
     expected_serving_build_digest: str,
     expected_target_layout_hash: str | None = None,
@@ -653,7 +504,7 @@ def acquire_local_ready_retained_binding_lease(
     client: Any | None = None,
     timeout_s: float | None = None,
 ) -> Iterator[BorrowedRetainedBindingLease]:
-    """Acquire an already-retained local-ready serving binding by local ref."""
+    """Acquire an already-retained local-ready runtime binding by local ref."""
 
     if runtime is None:
         from tensorcast.api.store import get_runtime_context
@@ -703,10 +554,10 @@ def acquire_local_ready_retained_binding_lease(
         or getattr(runtime, "session_id", "")
         or "local-session"
     )
-    expected = RetainedServingBindingExpectedDigests(
+    expected = RetainedRealizationExpectedDigests(
         target_layout_hash=expected_target_layout_hash or "local-ready-direct",
         tensor_schema_hash=expected_tensor_schema_hash,
-        serving_build_digest=expected_serving_build_digest,
+        runtime_build_digest=expected_serving_build_digest,
         resolved_spec_digest="local-ready-direct",
     )
     reservation_capability = tc.BindingReservationCapability(
@@ -723,10 +574,10 @@ def acquire_local_ready_retained_binding_lease(
         scope_digest=(
             f"{expected.target_layout_hash}:"
             f"{expected.tensor_schema_hash}:"
-            f"{expected.serving_build_digest}"
+            f"{expected.runtime_build_digest}"
         ),
     )
-    authority = ParsedRetainedServingBindingAuthority(
+    authority = ParsedRetainedRealizationAuthority(
         group_id=expected_member.group_id or "",
         local_serving_ref=local_serving_ref,
         binding_value_ref=binding_value_ref,
@@ -737,7 +588,7 @@ def acquire_local_ready_retained_binding_lease(
         member=expected_member,
         reservation_bytes=reservation_bytes,
         expected=expected,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state="local_only",
         serving_artifact_id=serving_artifact_id,
     )
@@ -766,8 +617,8 @@ def acquire_local_ready_retained_binding_lease(
 
 
 @contextmanager
-def acquire_retained_serving_binding_lease(
-    authority: ParsedRetainedServingBindingAuthority,
+def acquire_retained_binding_lease(
+    authority: ParsedRetainedRealizationAuthority,
     *,
     caller_pid: int | None = None,
     runtime: Any | None = None,
@@ -824,139 +675,41 @@ def acquire_retained_serving_binding_lease(
         lease.close()
 
 
-def _select_retained_serving_binding_authority_config(
-    config: Any,
-    *,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-) -> RetainedServingBindingAuthority:
-    acquire_config = config.retained_binding_acquire
-    authority_config = acquire_config.authority
-    if authority_config is not None:
-        return authority_config
-
-    authority_configs = tuple(acquire_config.authorities)
-    if not authority_configs:
-        raise ValueError(
-            "TensorCast retained binding authority requires "
-            "retained_binding_acquire.mode='external' and "
-            "retained_binding_acquire.authority or "
-            "retained_binding_acquire.authorities"
-        )
-    if expected_member is None:
-        if len(authority_configs) == 1:
-            return authority_configs[0]
-        raise ValueError(
-            "TensorCast retained binding authority set requires an expected "
-            "serving member to select the worker authority"
-        )
-
-    for index, candidate in enumerate(authority_configs):
-        member = _model_validate(
-            tc.ServingBindingMemberRef,
-            candidate.member_ref,
-            field_name=(f"retained_binding_acquire.authorities[{index}].member_ref"),
-        )
-        if member == expected_member:
-            return candidate
-    raise ValueError(
-        "TensorCast retained binding authority set has no authority for "
-        f"expected member {expected_member!r}"
-    )
-
+def retained_binding_acquire_mode(extra: Mapping[str, Any] | None) -> str:
+    return retained_realization_claim_mode(extra)
 
-def parse_retained_serving_binding_authority(
-    extra: Mapping[str, Any] | Any,
-    *,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-) -> ParsedRetainedServingBindingAuthority:
-    from tensorcast.serving.config import ServingConfig
 
-    config = (
-        extra if isinstance(extra, ServingConfig) else ServingConfig.from_mapping(extra)
-    )
-    if config.retained_binding_acquire.mode != "external":
-        raise ValueError(
-            "TensorCast retained binding authority requires "
-            "retained_binding_acquire.mode='external' and "
-            "retained_binding_acquire.authority"
+def runtime_restore_rejection_reason(
+    authority: ParsedRetainedRealizationAuthority,
+) -> str | None:
+    readiness = getattr(authority, "readiness", None)
+    if readiness == "runtime_reserved":
+        return (
+            "TensorCast retained acquire readiness='runtime_reserved' is not attachable"
         )
-    authority_config = _select_retained_serving_binding_authority_config(
-        config,
-        expected_member=expected_member,
-    )
-
-    binding_value_ref = _model_validate(
-        tc.BindingValueRef,
-        authority_config.binding_value_ref,
-        field_name="retained_binding_acquire.authority.binding_value_ref",
-    )
-    member = _model_validate(
-        tc.ServingBindingMemberRef,
-        authority_config.member_ref,
-        field_name="retained_binding_acquire.authority.member_ref",
-    )
-    capability_payload = _payload_to_dict(
-        authority_config.reservation_capability,
-        field_name="retained_binding_acquire.authority.reservation_capability",
-    )
-    capability_payload.setdefault(
-        "binding_value_ref", binding_value_ref.model_dump(mode="python")
-    )
-    capability_payload.setdefault("member", member.model_dump(mode="python"))
-    reservation_capability = _model_validate(
-        tc.BindingReservationCapability,
-        capability_payload,
-        field_name="retained_binding_acquire.authority.reservation_capability",
-    )
-    group_realization_acquire = None
-    if authority_config.group_realization_acquire is not None:
-        group_realization_acquire = _model_validate(
-            tc.GroupRealizationAcquireRef,
-            authority_config.group_realization_acquire,
-            field_name="retained_binding_acquire.authority.group_realization_acquire",
+    if readiness in {
+        "serving_group_prepared",
+        "serving_group_published_ready",
+    }:
+        return (
+            "TensorCast retained acquire group readiness requires a "
+            "published group-realization transaction authority"
         )
-
-    authority = ParsedRetainedServingBindingAuthority(
-        group_id=authority_config.group_id,
-        local_serving_ref=authority_config.local_serving_ref,
-        binding_value_ref=binding_value_ref,
-        reservation_capability=reservation_capability,
-        daemon_id=authority_config.daemon_id,
-        daemon_session_id=authority_config.daemon_session_id,
-        device_uuid=authority_config.device_uuid,
-        member=member,
-        reservation_bytes=int(authority_config.trusted_reservation_bytes),
-        expected=authority_config.expected,
-        readiness=authority_config.readiness,
-        verification_state=authority_config.verification_state or "local_only",
-        serving_artifact_id=authority_config.serving_artifact_id,
-        group_realization_acquire=group_realization_acquire,
-    )
-    _validate_authority_consistency(authority)
-    if expected_member is not None and authority.member != expected_member:
-        raise ValueError(
-            "TensorCast retained binding authority member does not match "
-            f"expected member: authority={authority.member!r}, "
-            f"expected={expected_member!r}"
+    if readiness == "runtime_published_ready":
+        return (
+            "TensorCast retained acquire readiness='runtime_published_ready' "
+            "requires a swap-capable runtime binding handle"
         )
-    return authority
-
-
-def retained_binding_acquire_mode(extra: Mapping[str, Any] | None) -> str:
-    if extra is None or not isinstance(extra, Mapping):
-        return "disabled"
-    from tensorcast.serving.config import ServingConfig
-
-    return ServingConfig.from_mapping(extra).retained_binding_acquire.mode
+    return None
 
 
 @contextmanager
-def acquire_retained_serving_binding(
+def acquire_retained_binding(
     *,
-    authority: ParsedRetainedServingBindingAuthority | None = None,
+    authority: ParsedRetainedRealizationAuthority | None = None,
     local_serving_ref: str | None = None,
     target_device: torch.device | str | None = None,
-    expected_member: tc.ServingBindingMemberRef | None = None,
+    expected_member: tc.RuntimeBindingMemberRef | None = None,
     expected_tensor_schema_hash: str | None = None,
     expected_serving_build_digest: str | None = None,
     expected_target_layout_hash: str | None = None,
@@ -971,7 +724,7 @@ def acquire_retained_serving_binding(
     if authority is not None:
         if local_serving_ref is not None:
             raise ValueError(
-                "acquire_retained_serving_binding accepts either authority "
+                "acquire_retained_binding accepts either authority "
                 "or local_serving_ref, not both"
             )
         if expected_member is not None and authority.member != expected_member:
@@ -980,7 +733,7 @@ def acquire_retained_serving_binding(
                 "the expected runtime placement: "
                 f"authority={authority.member}, expected={expected_member}"
             )
-        with acquire_retained_serving_binding_lease(
+        with acquire_retained_binding_lease(
             authority,
             caller_pid=caller_pid,
             runtime=runtime,
@@ -992,7 +745,7 @@ def acquire_retained_serving_binding(
 
     if local_serving_ref is None:
         raise ValueError(
-            "acquire_retained_serving_binding requires authority or local_serving_ref"
+            "acquire_retained_binding requires authority or local_serving_ref"
         )
     if target_device is None or expected_member is None:
         raise ValueError(
@@ -1035,114 +788,105 @@ def acquire_retained_serving_binding(
         yield lease
 
 
-def retained_serving_binding_trusted_reservation_bytes(
-    load_config_or_extra: Any,
+@contextmanager
+def restore_retained_binding(
     *,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-) -> int:
-    extra = getattr(
-        load_config_or_extra, "model_loader_extra_config", load_config_or_extra
-    )
-    if extra is None or not isinstance(extra, Mapping):
-        return 0
-    if retained_binding_acquire_mode(extra) != "external":
-        return 0
-    return parse_retained_serving_binding_authority(
-        extra,
-        expected_member=expected_member,
-    ).reservation_bytes
+    authority: ParsedRetainedRealizationAuthority | None = None,
+    local_serving_ref: str | None = None,
+    target_device: torch.device | str,
+    expected_member: tc.RuntimeBindingMemberRef | None = None,
+    expected_tensor_schema_hash: str | None = None,
+    expected_serving_build_digest: str | None = None,
+    expected_target_layout_hash: str | None = None,
+    expected_daemon_id: str | None = None,
+    expected_daemon_session_id: str | None = None,
+    serving_artifact_id: str | None = None,
+    caller_pid: int | None = None,
+    runtime: Any | None = None,
+    client: Any | None = None,
+    restore_fn: Any | None = None,
+    timeout_s: float | None = None,
+) -> Iterator[RestoredRetainedBinding]:
+    """Acquire and restore a retained binding value for framework attach.
 
+    If the framework does not call ``transfer_to_runtime()``, the restored owner
+    is released automatically when the context exits. After transfer, close
+    ownership belongs to the returned runtime handle.
+    """
 
-def retained_serving_binding_extra_from_prefetched_binding(
-    *,
-    prefetched: tc.PrefetchedServingBinding,
-    target: tc.ServingBindingTarget,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-) -> dict[str, Any]:
-    authority = _retained_serving_binding_authority_from_prefetched_binding(
-        prefetched=prefetched,
-        target=target,
-        expected_member=expected_member,
-    )
-    return _retained_serving_binding_extra(
+    with acquire_retained_binding(
         authority=authority,
-        config_key="retained_binding_acquire",
-    )
-
-
-def _retained_serving_binding_authority_from_prefetched_binding(
-    *,
-    prefetched: tc.PrefetchedServingBinding,
-    target: tc.ServingBindingTarget,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-) -> dict[str, Any]:
-    member = prefetched.member
-    if expected_member is not None and member != expected_member:
-        raise ValueError(
-            "Prefetched serving binding member does not match expected "
-            f"placement: prefetched={member}, expected={expected_member}"
-        )
-    authority: dict[str, Any] = {
-        "group_id": member.group_id or "",
-        "member_ref": _model_dump(member),
-        "daemon_id": prefetched.daemon_id,
-        "daemon_session_id": prefetched.daemon_session_id,
-        "device_uuid": prefetched.device_uuid,
-        "binding_value_ref": _model_dump(prefetched.binding_value_ref),
-        "reservation_capability": _model_dump(prefetched.reservation_capability),
-        "local_serving_ref": prefetched.local_serving_ref,
-        "readiness": str(getattr(prefetched.readiness, "value", prefetched.readiness)),
-        "verification_state": str(
-            getattr(
-                prefetched.verification_state,
-                "value",
-                prefetched.verification_state,
-            )
-        ),
-        "serving_artifact_id": prefetched.serving_artifact_id,
-        "trusted_reservation_bytes": prefetched.reservation_bytes,
-        "expected": {
-            "target_layout_hash": target.resolved_layout.target_layout_hash,
-            "tensor_schema_hash": target.resolved_layout.tensor_schema_hash,
-            "serving_build_digest": target.serving_build_digest,
-            "resolved_spec_digest": target.resolved_layout.spec_digest,
-        },
-    }
-    if prefetched.group_realization_acquire is not None:
-        authority["group_realization_acquire"] = _model_dump(
-            prefetched.group_realization_acquire
+        local_serving_ref=local_serving_ref,
+        target_device=target_device,
+        expected_member=expected_member,
+        expected_tensor_schema_hash=expected_tensor_schema_hash,
+        expected_serving_build_digest=expected_serving_build_digest,
+        expected_target_layout_hash=expected_target_layout_hash,
+        expected_daemon_id=expected_daemon_id,
+        expected_daemon_session_id=expected_daemon_session_id,
+        serving_artifact_id=serving_artifact_id,
+        caller_pid=caller_pid if caller_pid is not None else os.getpid(),
+        runtime=runtime,
+        client=client,
+        timeout_s=timeout_s,
+    ) as lease:
+        attached = lease.restore(
+            target_device=torch.device(target_device),
+            restore_fn=restore_fn,
         )
-    return authority
+        restored = RestoredRetainedBinding(attached)
+        try:
+            yield restored
+        finally:
+            restored.close()
 
 
-def _retained_serving_binding_extra(
+@contextmanager
+def restore_prepared_local_ready_binding(
     *,
-    authority: dict[str, Any],
-    config_key: str,
-) -> dict[str, Any]:
-    return {
-        config_key: {
-            "mode": "external",
-            "authority": authority,
-        },
-    }
-
+    resolved_artifact: Any,
+    target_device: torch.device | str,
+    expected_member: tc.RuntimeBindingMemberRef,
+    expected_tensor_schema_hash: str,
+    expected_serving_build_digest: str | None = None,
+    caller_pid: int | None = None,
+    timeout_s: float | None = None,
+    runtime: Any | None = None,
+    client: Any | None = None,
+    restore_fn: Any | None = None,
+) -> Iterator[RestoredRetainedBinding]:
+    """Restore a local-ready retained value referenced by a runtime manifest."""
 
-def retained_serving_binding_extra_json(
-    *,
-    prefetched: tc.PrefetchedServingBinding,
-    target: tc.ServingBindingTarget,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-) -> str:
-    return json.dumps(
-        retained_serving_binding_extra_from_prefetched_binding(
-            prefetched=prefetched,
-            target=target,
-            expected_member=expected_member,
-        ),
-        sort_keys=True,
-        separators=(",", ":"),
+    manifest = getattr(resolved_artifact, "manifest", None)
+    local_serving_ref = getattr(manifest, "local_serving_ref", None)
+    if manifest is None or not local_serving_ref:
+        raise RuntimeError(
+            "TensorCast prepared local-ready startup requires local_serving_ref "
+            "in the runtime artifact manifest"
+        )
+    serving_build_digest = (
+        expected_serving_build_digest
+        if expected_serving_build_digest is not None
+        else getattr(manifest, "serving_build_digest", None)
     )
+    if not serving_build_digest:
+        raise RuntimeError(
+            "TensorCast prepared local-ready startup requires serving_build_digest"
+        )
+    with restore_retained_binding(
+        local_serving_ref=str(local_serving_ref),
+        target_device=target_device,
+        expected_member=expected_member,
+        expected_tensor_schema_hash=expected_tensor_schema_hash,
+        expected_serving_build_digest=str(serving_build_digest),
+        serving_artifact_id=str(getattr(resolved_artifact, "artifact_ref", "")),
+        caller_pid=caller_pid,
+        timeout_s=timeout_s,
+        runtime=runtime,
+        client=client,
+        restore_fn=restore_fn,
+    ) as restored:
+        yield restored
 
 
 def promote_current_value_and_wait(
@@ -1250,53 +994,3 @@ def _promotion_state_name(
         if mapped is not None:
             return str(mapped.value).strip().lower()
     return str(value).strip().lower()
-
-
-def _model_dump(value: Any) -> dict[str, Any]:
-    if hasattr(value, "model_dump"):
-        return dict(value.model_dump(mode="python"))
-    if isinstance(value, Mapping):
-        return dict(value)
-    raise TypeError(f"Cannot serialize {type(value)!r}")
-
-
-class RetainedBindingAcquireSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    mode: str = "disabled"
-    authority: RetainedServingBindingAuthority | None = None
-    authorities: tuple[RetainedServingBindingAuthority, ...] = ()
-
-    @field_validator("mode", mode="before")
-    @classmethod
-    def _normalize_mode(cls, value: Any) -> str:
-        if value is None:
-            return "disabled"
-        return _normalize_enum(
-            value,
-            allowed=_RETAINED_BINDING_ACQUIRE_MODES,
-            field_name="retained_binding_acquire.mode",
-        )
-
-    @model_validator(mode="after")
-    def _validate_authority(self) -> RetainedBindingAcquireSettings:
-        has_authority = self.authority is not None
-        has_authorities = bool(self.authorities)
-        if self.mode == "external" and not (has_authority or has_authorities):
-            raise ValueError(
-                "retained_binding_acquire.authority or "
-                "retained_binding_acquire.authorities is required when "
-                "retained_binding_acquire.mode='external'"
-            )
-        if self.mode == "external" and has_authority and has_authorities:
-            raise ValueError(
-                "retained_binding_acquire.authority and "
-                "retained_binding_acquire.authorities are mutually exclusive"
-            )
-        if self.mode != "external" and (has_authority or has_authorities):
-            raise ValueError(
-                "retained_binding_acquire.authority and "
-                "retained_binding_acquire.authorities are only valid when "
-                "retained_binding_acquire.mode='external'"
-            )
-        return self
diff --git a/tensorcast/artifact_runtime/config.py b/tensorcast/artifact_runtime/config.py
new file mode 100644
index 00000000..23e40244
--- /dev/null
+++ b/tensorcast/artifact_runtime/config.py
@@ -0,0 +1,727 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Artifact runtime configuration schema and startup planning."""
+
+from __future__ import annotations
+
+import importlib.resources
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from threading import Lock
+from typing import Any, Mapping
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.policy import RuntimePolicy
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+    RetainedRealizationAuthority,
+)
+
+_INIT_LOCK = Lock()
+_INIT_KWARGS: dict[str, Any] | None = None
+_DEFAULT_GLOBAL_STORE_ADDRESS = "127.0.0.1:50051"
+DEFAULT_RUNTIME_PROFILE = "serving_single_node"
+
+_RUNTIME_MODES = {"auto", "connect", "create"}
+_GLOBAL_STORE_MODES = {"auto", "connect", "start", "none"}
+_BOOTSTRAP_MODES = {"disabled", "auto", "required"}
+_COLLECTIVE_MODES = {"auto", "required", "disabled"}
+_RETAINED_BINDING_ACQUIRE_MODES = {"disabled", "external"}
+_REPLICA_PUBLICATION_MODES = {"disabled", "optional", "required"}
+_REPLICA_PUBLICATION_TRIGGERS = {"after_vllm_ready"}
+_PROFILE_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
+_TOP_LEVEL_KEYS = {
+    "runtime",
+    "runtime_artifact",
+    "bootstrap",
+    "materialization",
+    "retained_binding_acquire",
+    "diagnostics",
+    "replica_publication",
+}
+
+
+def _normalize_optional_text(value: Any) -> str | None:
+    if value is None:
+        return None
+    normalized = str(value).strip()
+    return normalized or None
+
+
+def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
+    normalized = str(value).strip().lower()
+    if normalized not in allowed:
+        raise ValueError(
+            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
+        )
+    return normalized
+
+
+def _validate_existing_file(path: str, *, field_name: str) -> str:
+    candidate = Path(path).expanduser()
+    if not candidate.is_file():
+        raise ValueError(f"{field_name} must point to an existing file, got: {path!r}")
+    return str(candidate)
+
+
+def _default_resource_path(package: str, name: str) -> str | None:
+    try:
+        resource = importlib.resources.files(package).joinpath(name)
+    except (FileNotFoundError, ModuleNotFoundError):
+        return None
+    path = Path(str(resource))
+    return str(path) if path.is_file() else None
+
+
+def _normalize_profile_name(value: Any) -> str | None:
+    normalized = _normalize_optional_text(value)
+    if normalized is None:
+        return None
+    if not _PROFILE_NAME_PATTERN.fullmatch(normalized):
+        raise ValueError(
+            "runtime.profile must contain only letters, digits, '.', '_', or '-'"
+        )
+    return normalized
+
+
+def _profile_resource_path(profile: str, filename: str) -> str:
+    profile_name = _normalize_profile_name(profile)
+    if profile_name is None:
+        raise ValueError("runtime.profile must be non-empty")
+    try:
+        resource = (
+            importlib.resources.files("tensorcast")
+            .joinpath("config")
+            .joinpath("profiles")
+            .joinpath(profile_name)
+            .joinpath(filename)
+        )
+    except (FileNotFoundError, ModuleNotFoundError) as exc:
+        raise ValueError(
+            f"Unknown TensorCast runtime config profile: {profile_name!r}"
+        ) from exc
+    path = Path(str(resource))
+    if not path.is_file():
+        raise ValueError(f"Unknown TensorCast runtime config profile: {profile_name!r}")
+    return str(path)
+
+
+@dataclass(frozen=True)
+class RuntimeConfigProfile:
+    name: str
+    daemon_config_path: str
+    global_store_config_path: str
+
+
+def resolve_runtime_config_profile(profile: str) -> RuntimeConfigProfile:
+    profile_name = _normalize_profile_name(profile)
+    if profile_name is None:
+        raise ValueError("runtime.profile must be non-empty")
+    return RuntimeConfigProfile(
+        name=profile_name,
+        daemon_config_path=_profile_resource_path(
+            profile_name, "store_daemon_config.yaml"
+        ),
+        global_store_config_path=_profile_resource_path(
+            profile_name, "global_store_config.yaml"
+        ),
+    )
+
+
+class RuntimeDaemonSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    address: str | None = None
+    config_path: str | None = None
+    show_logs: bool = False
+
+    @field_validator("address", "config_path", mode="before")
+    @classmethod
+    def _normalize_optional_fields(cls, value: Any) -> Any:
+        return _normalize_optional_text(value)
+
+
+class RuntimeGlobalStoreSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    mode: str = "auto"
+    address: str | None = None
+    config_path: str | None = None
+
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _normalize_mode(cls, value: Any) -> str:
+        if value is None:
+            return "auto"
+        return _normalize_enum(
+            value,
+            allowed=_GLOBAL_STORE_MODES,
+            field_name="runtime.global_store.mode",
+        )
+
+    @field_validator("address", "config_path", mode="before")
+    @classmethod
+    def _normalize_optional_fields(cls, value: Any) -> Any:
+        return _normalize_optional_text(value)
+
+    def resolved_mode(self, runtime_mode: str) -> str:
+        if self.mode != "auto":
+            return self.mode
+        if self.address is not None:
+            return "connect"
+        if self.config_path is not None:
+            return "start"
+        if runtime_mode in {"create", "auto"}:
+            return "start"
+        return "none"
+
+
+class RuntimeSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    profile: str | None = DEFAULT_RUNTIME_PROFILE
+    mode: str = "auto"
+    daemon: RuntimeDaemonSettings = RuntimeDaemonSettings()
+    global_store: RuntimeGlobalStoreSettings = RuntimeGlobalStoreSettings()
+
+    @field_validator("profile", mode="before")
+    @classmethod
+    def _normalize_profile(cls, value: Any) -> str | None:
+        return _normalize_profile_name(value)
+
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _normalize_mode(cls, value: Any) -> str:
+        if value is None:
+            return "auto"
+        return _normalize_enum(
+            value,
+            allowed=_RUNTIME_MODES,
+            field_name="runtime.mode",
+        )
+
+    @staticmethod
+    def _default_daemon_config_path() -> str | None:
+        return _default_resource_path("tensorcast", "daemon_config.yaml")
+
+    @staticmethod
+    def _default_global_store_config_path() -> str | None:
+        return _default_resource_path("tensorcast", "global_store_config.yaml")
+
+    def to_init_kwargs(
+        self,
+        *,
+        default_daemon_config_path: str | None = None,
+        default_global_store_config_path: str | None = None,
+    ) -> dict[str, Any]:
+        profile = (
+            resolve_runtime_config_profile(self.profile)
+            if self.profile is not None
+            else None
+        )
+        kwargs: dict[str, Any] = {
+            "mode": self.mode,
+            "show_daemon_logs": self.daemon.show_logs,
+        }
+        if self.daemon.address is not None:
+            kwargs["address"] = self.daemon.address
+
+        daemon_config_path = self.daemon.config_path
+        if daemon_config_path is None and self.mode in {"create", "auto"}:
+            daemon_config_path = (
+                profile.daemon_config_path
+                if profile is not None
+                else default_daemon_config_path or self._default_daemon_config_path()
+            )
+        if daemon_config_path is not None:
+            kwargs["daemon_config_path"] = _validate_existing_file(
+                daemon_config_path,
+                field_name="runtime.daemon.config_path",
+            )
+        elif self.mode in {"create", "auto"}:
+            raise ValueError(
+                "runtime.mode requires a daemon config file for create/auto; "
+                "set runtime.profile or runtime.daemon.config_path"
+            )
+
+        global_store_mode = self.global_store.resolved_mode(self.mode)
+        if global_store_mode != "none":
+            kwargs["global_store_mode"] = global_store_mode
+        if global_store_mode == "connect":
+            kwargs["global_store_address"] = (
+                self.global_store.address or _DEFAULT_GLOBAL_STORE_ADDRESS
+            )
+        elif global_store_mode == "start":
+            global_store_config_path = self.global_store.config_path
+            if global_store_config_path is None:
+                global_store_config_path = (
+                    profile.global_store_config_path
+                    if profile is not None
+                    else default_global_store_config_path
+                    or self._default_global_store_config_path()
+                )
+            if global_store_config_path is not None:
+                kwargs["global_store_config_path"] = _validate_existing_file(
+                    global_store_config_path,
+                    field_name="runtime.global_store.config_path",
+                )
+            else:
+                raise ValueError(
+                    "runtime.global_store.mode='start' requires a Global "
+                    "Store config file; set runtime.profile or "
+                    "runtime.global_store.config_path"
+                )
+
+        return kwargs
+
+    def ensure_initialized(
+        self,
+        *,
+        default_daemon_config_path: str | None = None,
+        default_global_store_config_path: str | None = None,
+    ) -> None:
+        init_kwargs = self.to_init_kwargs(
+            default_daemon_config_path=default_daemon_config_path,
+            default_global_store_config_path=default_global_store_config_path,
+        )
+        import tensorcast as tc
+
+        with _INIT_LOCK:
+            global _INIT_KWARGS
+            if tc.is_initialized():
+                if _INIT_KWARGS is None:
+                    raise RuntimeError(
+                        "TensorCast runtime was already initialized outside "
+                        "tensorcast.artifact_runtime.config.RuntimeSettings."
+                    )
+                if init_kwargs != _INIT_KWARGS:
+                    raise RuntimeError(
+                        "TensorCast runtime already initialized with different "
+                        "settings. Existing="
+                        f"{_INIT_KWARGS}, requested={init_kwargs}"
+                    )
+                return
+            tc.init(**init_kwargs)
+            _INIT_KWARGS = dict(init_kwargs)
+
+
+class RuntimeArtifactSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    artifact_locator: ArtifactLocator | None = None
+    policy: RuntimePolicy = RuntimePolicy()
+
+
+class BootstrapSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    mode: str = "auto"
+    cache_dir: str | None = None
+    verify_source_checksums: bool = True
+
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _normalize_mode(cls, value: Any) -> str:
+        if value is None:
+            return "auto"
+        return _normalize_enum(
+            value,
+            allowed=_BOOTSTRAP_MODES,
+            field_name="bootstrap.mode",
+        )
+
+    @field_validator("cache_dir", mode="before")
+    @classmethod
+    def _normalize_optional_fields(cls, value: Any) -> Any:
+        return _normalize_optional_text(value)
+
+
+class MaterializationSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    collective: str = "auto"
+
+    @field_validator("collective", mode="before")
+    @classmethod
+    def _normalize_collective(cls, value: Any) -> str:
+        if value is None:
+            return "auto"
+        return _normalize_enum(
+            value,
+            allowed=_COLLECTIVE_MODES,
+            field_name="materialization.collective",
+        )
+
+    def collective_policy_value(self) -> str:
+        return {
+            "auto": "collective_first",
+            "required": "require_collective",
+            "disabled": "disable_collective",
+        }[self.collective]
+
+
+class DiagnosticsSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    debug_path: str | None = None
+    verify_tensors: bool = False
+
+    @field_validator("debug_path", mode="before")
+    @classmethod
+    def _normalize_debug_path(cls, value: Any) -> Any:
+        return _normalize_optional_text(value)
+
+
+class ReplicaPublicationPolicy(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    mode: str = "disabled"
+    trigger: str = "after_vllm_ready"
+    async_publish: bool = True
+    timeout_s: float = 30.0
+    ttl_ms: int | None = None
+    drain_timeout_s: float = 30.0
+
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _normalize_mode(cls, value: Any) -> str:
+        if value is None:
+            return "disabled"
+        return _normalize_enum(
+            value,
+            allowed=_REPLICA_PUBLICATION_MODES,
+            field_name="replica_publication.mode",
+        )
+
+    @field_validator("trigger", mode="before")
+    @classmethod
+    def _normalize_trigger(cls, value: Any) -> str:
+        if value is None:
+            return "after_vllm_ready"
+        return _normalize_enum(
+            value,
+            allowed=_REPLICA_PUBLICATION_TRIGGERS,
+            field_name="replica_publication.trigger",
+        )
+
+    @field_validator("async_publish")
+    @classmethod
+    def _validate_async_publish(cls, value: bool) -> bool:
+        if not value:
+            raise ValueError("replica_publication.async_publish=false is not supported")
+        return value
+
+    @field_validator("timeout_s", "drain_timeout_s")
+    @classmethod
+    def _validate_positive_timeout(cls, value: float) -> float:
+        normalized = float(value)
+        if normalized <= 0:
+            raise ValueError("replica_publication timeouts must be positive")
+        return normalized
+
+    @field_validator("ttl_ms")
+    @classmethod
+    def _reject_ttl(cls, value: int | None) -> int | None:
+        if value is not None:
+            raise ValueError("replica_publication.ttl_ms is not supported yet")
+        return value
+
+
+def _retained_authority(value: Any) -> RetainedRealizationAuthority | None:
+    if value is None:
+        return None
+
+    if isinstance(value, RetainedRealizationAuthority):
+        return value
+    return RetainedRealizationAuthority.model_validate(value)
+
+
+class RetainedBindingAcquireSettings(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    mode: str = "disabled"
+    authority: RetainedRealizationAuthority | None = None
+    authorities: tuple[RetainedRealizationAuthority, ...] = ()
+
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _normalize_mode(cls, value: Any) -> str:
+        if value is None:
+            return "disabled"
+        return _normalize_enum(
+            value,
+            allowed=_RETAINED_BINDING_ACQUIRE_MODES,
+            field_name="retained_binding_acquire.mode",
+        )
+
+    @field_validator("authority", mode="before")
+    @classmethod
+    def _validate_authority_value(
+        cls,
+        value: Any,
+    ) -> RetainedRealizationAuthority | None:
+        return _retained_authority(value)
+
+    @field_validator("authorities", mode="before")
+    @classmethod
+    def _validate_authorities_value(
+        cls,
+        value: Any,
+    ) -> tuple[RetainedRealizationAuthority, ...]:
+        if value is None:
+            return ()
+        return tuple(
+            authority
+            for authority in (_retained_authority(item) for item in value)
+            if authority is not None
+        )
+
+    @field_validator("authorities")
+    @classmethod
+    def _validate_authorities(
+        cls,
+        value: tuple[RetainedRealizationAuthority, ...],
+    ) -> tuple[RetainedRealizationAuthority, ...]:
+        return value
+
+    @model_validator(mode="after")
+    def _validate_authority(self) -> RetainedBindingAcquireSettings:
+        has_authority = self.authority is not None
+        has_authorities = bool(self.authorities)
+        if self.mode == "external" and not (has_authority or has_authorities):
+            raise ValueError(
+                "retained_binding_acquire.authority or "
+                "retained_binding_acquire.authorities is required when "
+                "retained_binding_acquire.mode='external'"
+            )
+        if self.mode == "external" and has_authority and has_authorities:
+            raise ValueError(
+                "retained_binding_acquire.authority and "
+                "retained_binding_acquire.authorities are mutually exclusive"
+            )
+        if self.mode != "external" and (has_authority or has_authorities):
+            raise ValueError(
+                "retained_binding_acquire.authority and "
+                "retained_binding_acquire.authorities are only valid when "
+                "retained_binding_acquire.mode='external'"
+            )
+        return self
+
+
+class TensorCastRuntimeConfig(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    runtime: RuntimeSettings = RuntimeSettings()
+    runtime_artifact: RuntimeArtifactSettings = RuntimeArtifactSettings()
+    bootstrap: BootstrapSettings = BootstrapSettings()
+    materialization: MaterializationSettings = MaterializationSettings()
+    retained_binding_acquire: RetainedBindingAcquireSettings = Field(
+        default_factory=RetainedBindingAcquireSettings,
+    )
+    diagnostics: DiagnosticsSettings = DiagnosticsSettings()
+    replica_publication: ReplicaPublicationPolicy = ReplicaPublicationPolicy()
+
+    @classmethod
+    def from_mapping(
+        cls,
+        data: Mapping[str, Any] | None,
+    ) -> TensorCastRuntimeConfig:
+        payload: Mapping[str, Any] = {} if data is None else data
+        if not isinstance(payload, Mapping):
+            raise ValueError("model_loader_extra_config must be a mapping")
+        if "serving" in payload:
+            raise ValueError(
+                "TensorCast runtime config section 'serving' was removed; "
+                "use 'runtime_artifact'"
+            )
+        unknown = {str(key) for key in payload if str(key) not in _TOP_LEVEL_KEYS}
+        if unknown:
+            raise ValueError(
+                "Unexpected TensorCast runtime config keys in "
+                "model_loader_extra_config: "
+                f"{sorted(unknown)}"
+            )
+        runtime_artifact = payload.get("runtime_artifact")
+        if isinstance(runtime_artifact, Mapping) and "selector" in runtime_artifact:
+            raise ValueError(
+                "runtime_artifact.selector is not supported; "
+                "use runtime_artifact.artifact_locator"
+            )
+        return cls.model_validate(dict(payload))
+
+    def to_mapping(self) -> dict[str, Any]:
+        return self.model_dump(mode="python")
+
+
+class RuntimeStartPlanError(ValueError):
+    """Startup configuration cannot be lowered into one runtime plan."""
+
+
+@dataclass(frozen=True)
+class RuntimeStartPlan:
+    """Typed artifact runtime startup intent selected before allocation."""
+
+    kind: str = field(init=False)
+
+
+@dataclass(frozen=True)
+class RuntimeArtifactBindStartPlan(RuntimeStartPlan):
+    """Bind a durable runtime artifact selected by an artifact locator."""
+
+    artifact_locator: ArtifactLocator
+    policy: RuntimePolicy
+    kind: str = field(default="artifact_bind", init=False)
+
+
+@dataclass(frozen=True)
+class RuntimeSourceBootstrapStartPlan(RuntimeStartPlan):
+    """Bootstrap a source artifact into a daemon-owned binding value."""
+
+    source_selector: Any
+    bootstrap_policy: BootstrapSettings
+    kind: str = field(default="source_bootstrap_to_binding", init=False)
+
+
+@dataclass(frozen=True)
+class RuntimeRetainedRealizationStartPlan(RuntimeStartPlan):
+    """Acquire a retained binding authority prepared by artifact prefetch."""
+
+    authority: ParsedRetainedRealizationAuthority
+    kind: str = field(default="retained_binding_acquire", init=False)
+
+
+def _candidate_rejection_reasons(
+    *,
+    has_retained_authority: bool,
+    has_artifact_locator: bool,
+    has_source_selector: bool,
+    bootstrap_mode: str,
+) -> dict[str, str]:
+    source_reason = (
+        "bootstrap.mode is disabled"
+        if bootstrap_mode == "disabled"
+        else "source selector is unavailable"
+    )
+    return {
+        "retained_binding_acquire": (
+            "selected"
+            if has_retained_authority
+            else "retained_binding_acquire.mode is not external"
+        ),
+        "artifact_bind": (
+            "selected" if has_artifact_locator else "runtime artifact locator missing"
+        ),
+        "source_bootstrap_to_binding": (
+            "selected"
+            if has_source_selector and bootstrap_mode in {"auto", "required"}
+            else source_reason
+        ),
+    }
+
+
+def _format_rejection_reasons(reasons: Mapping[str, str]) -> str:
+    return "; ".join(f"{name}: {reason}" for name, reason in reasons.items())
+
+
+def plan_runtime_start(
+    *,
+    config: TensorCastRuntimeConfig,
+    source_selector: Any | None,
+    expected_member: Any | None = None,
+) -> RuntimeStartPlan:
+    """Classify artifact runtime startup into exactly one canonical start plan."""
+
+    retained_requested = config.retained_binding_acquire.mode == "external"
+    artifact_locator = config.runtime_artifact.artifact_locator
+    has_artifact_locator = artifact_locator is not None
+    bootstrap_mode = config.bootstrap.mode
+    has_source_selector = source_selector is not None
+
+    if retained_requested and has_artifact_locator:
+        raise RuntimeStartPlanError(
+            "TensorCast runtime config cannot request both retained binding "
+            "acquire and durable runtime artifact bind"
+        )
+    if bootstrap_mode == "required" and (retained_requested or has_artifact_locator):
+        raise RuntimeStartPlanError(
+            "TensorCast bootstrap.mode='required' is mutually exclusive with "
+            "retained binding acquire and durable runtime artifact bind"
+        )
+    if bootstrap_mode == "disabled" and not (
+        retained_requested or has_artifact_locator
+    ):
+        raise RuntimeStartPlanError(
+            "TensorCast bootstrap.mode='disabled' requires retained binding "
+            "authority or durable runtime artifact locator"
+        )
+
+    if retained_requested:
+        from tensorcast.retained_realization import parse_retained_realization_authority
+
+        return RuntimeRetainedRealizationStartPlan(
+            authority=parse_retained_realization_authority(
+                config,
+                expected_member=expected_member,
+            )
+        )
+    if artifact_locator is not None:
+        return RuntimeArtifactBindStartPlan(
+            artifact_locator=artifact_locator,
+            policy=config.runtime_artifact.policy,
+        )
+    if bootstrap_mode in {"auto", "required"} and source_selector is not None:
+        return RuntimeSourceBootstrapStartPlan(
+            source_selector=source_selector,
+            bootstrap_policy=config.bootstrap,
+        )
+
+    reasons = _candidate_rejection_reasons(
+        has_retained_authority=retained_requested,
+        has_artifact_locator=has_artifact_locator,
+        has_source_selector=has_source_selector,
+        bootstrap_mode=bootstrap_mode,
+    )
+    raise RuntimeStartPlanError(
+        "TensorCast runtime config did not resolve to one startup plan; "
+        f"rejected candidates: {_format_rejection_reasons(reasons)}"
+    )
+
+
+RuntimeArtifactLocator = ArtifactLocator
+RuntimeBootstrapSettings = BootstrapSettings
+RuntimeDiagnosticsSettings = DiagnosticsSettings
+RuntimeMaterializationSettings = MaterializationSettings
+RuntimeReplicaPublicationPolicy = ReplicaPublicationPolicy
+
+
+__all__ = [
+    "DEFAULT_RUNTIME_PROFILE",
+    "ArtifactLocator",
+    "BootstrapSettings",
+    "DiagnosticsSettings",
+    "MaterializationSettings",
+    "ReplicaPublicationPolicy",
+    "RetainedBindingAcquireSettings",
+    "RuntimeArtifactSettings",
+    "RuntimeArtifactBindStartPlan",
+    "RuntimeArtifactLocator",
+    "RuntimeBootstrapSettings",
+    "RuntimeConfigProfile",
+    "RuntimeDaemonSettings",
+    "RuntimeDiagnosticsSettings",
+    "RuntimeGlobalStoreSettings",
+    "RuntimeMaterializationSettings",
+    "RuntimePolicy",
+    "RuntimeReplicaPublicationPolicy",
+    "RuntimeRetainedRealizationStartPlan",
+    "RuntimeSettings",
+    "RuntimeSourceBootstrapStartPlan",
+    "RuntimeStartPlan",
+    "RuntimeStartPlanError",
+    "TensorCastRuntimeConfig",
+    "plan_runtime_start",
+    "resolve_runtime_config_profile",
+]
diff --git a/tensorcast/artifact_runtime/contract.py b/tensorcast/artifact_runtime/contract.py
new file mode 100644
index 00000000..0ba45e4e
--- /dev/null
+++ b/tensorcast/artifact_runtime/contract.py
@@ -0,0 +1,357 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Runtime identity, topology, and source-bound contract helpers."""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+import json
+from collections.abc import Callable, Mapping, Sequence
+from dataclasses import dataclass
+from typing import Any
+
+import torch
+
+from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry
+from tensorcast.types import (
+    SERVING_MANIFEST_TENSOR_NAME,
+    RuntimeBindingMemberRef,
+    RuntimeTopologyRef,
+    SourceBoundCapability,
+)
+
+MIN_SOURCE_BOUND_CONTRACT_VERSION = 4
+SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 = "collective_first_v4"
+REQUIRED_SOURCE_BOUND_CAPABILITIES = (
+    SourceBoundCapability.FIRST_CLASS_COLLECTIVE_INGRESS,
+    SourceBoundCapability.TYPED_EXECUTION_DIAGNOSTICS,
+    SourceBoundCapability.SINGLE_MINT_BINDING_CLOSEOUT,
+)
+
+
+def _canonical_json_bytes(payload: object) -> bytes:
+    return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8")
+
+
+def _multibase_multihash_sha256(digest: bytes) -> str:
+    if len(digest) != 32:
+        raise ValueError("SHA256 digest must be 32 bytes")
+    multihash = b"\x12\x20" + digest
+    encoded = base64.b32encode(multihash).decode("ascii").lower().rstrip("=")
+    return f"b{encoded}"
+
+
+def hash_versioned_payload_to_multihash(version: str, payload: object) -> str:
+    serialized = _canonical_json_bytes(payload)
+    versioned_payload = version.encode("utf-8") + b"\n" + serialized
+    return _multibase_multihash_sha256(hashlib.sha256(versioned_payload).digest())
+
+
+def normalize_logical_topology_payload(
+    logical_topology_json: str | None,
+) -> dict[str, object] | None:
+    if logical_topology_json is None:
+        return None
+    try:
+        payload = json.loads(logical_topology_json)
+    except Exception as exc:  # noqa: BLE001
+        raise ValueError("logical_topology_json must be valid JSON") from exc
+    if not isinstance(payload, dict):
+        raise ValueError("logical_topology_json must encode an object")
+    family = str(payload.get("family", "")).strip()
+    version = str(payload.get("version", "")).strip()
+    raw_dimensions = payload.get("dimensions", [])
+    if not family:
+        raise ValueError("logical_topology_json.family must not be empty")
+    if not version:
+        raise ValueError("logical_topology_json.version must not be empty")
+    if not isinstance(raw_dimensions, list):
+        raise ValueError("logical_topology_json.dimensions must be a list")
+    dimensions: list[dict[str, int | str]] = []
+    for raw_dimension in raw_dimensions:
+        if not isinstance(raw_dimension, dict):
+            raise ValueError("logical_topology_json.dimensions items must be objects")
+        name = str(raw_dimension.get("name", "")).strip()
+        if not name:
+            raise ValueError("logical_topology_json dimensions require non-empty name")
+        size = raw_dimension.get("size", None)
+        if not isinstance(size, int) or size <= 0:
+            raise ValueError(
+                "logical_topology_json dimensions require positive integer size"
+            )
+        dimensions.append({"name": name, "size": int(size)})
+    dimensions.sort(key=lambda item: (str(item["name"]), int(item["size"])))
+    return {
+        "family": family,
+        "version": version,
+        "dimensions": dimensions,
+    }
+
+
+@dataclass(frozen=True)
+class RuntimeTensorSchemaEntry:
+    name: str
+    dtype: str
+    shape: tuple[int, ...]
+    stride: tuple[int, ...]
+    element_size: int
+    storage_offset: int
+
+
+@dataclass(frozen=True)
+class SourceBoundContractState:
+    server_config_present: bool
+    source_bound_contract_version: int
+    source_bound_capability_flags: int
+    source_bound_capability_names: tuple[str, ...]
+    source_bound_contract_ready: bool
+
+    @classmethod
+    def unavailable(cls) -> SourceBoundContractState:
+        return cls(
+            server_config_present=False,
+            source_bound_contract_version=0,
+            source_bound_capability_flags=0,
+            source_bound_capability_names=(),
+            source_bound_contract_ready=False,
+        )
+
+    @classmethod
+    def from_server_config(
+        cls,
+        server_config: Any | None,
+    ) -> SourceBoundContractState:
+        if server_config is None:
+            return cls.unavailable()
+        flags = int(getattr(server_config, "source_bound_capability_flags", 0) or 0)
+        version = int(getattr(server_config, "source_bound_contract_version", 0) or 0)
+        capability_names = tuple(
+            str(capability.name)
+            for capability in SourceBoundCapability
+            if flags & int(capability)
+        )
+        contract_ready = version >= MIN_SOURCE_BOUND_CONTRACT_VERSION and all(
+            flags & int(capability) for capability in REQUIRED_SOURCE_BOUND_CAPABILITIES
+        )
+        return cls(
+            server_config_present=True,
+            source_bound_contract_version=version,
+            source_bound_capability_flags=flags,
+            source_bound_capability_names=capability_names,
+            source_bound_contract_ready=contract_ready,
+        )
+
+
+def collect_runtime_tensor_schema(
+    tensors: Mapping[str, torch.Tensor],
+    *,
+    remove_duplicate: bool,
+) -> tuple[RuntimeTensorSchemaEntry, ...]:
+    schema: list[RuntimeTensorSchemaEntry] = []
+    seen_ptrs: set[int] = set()
+    for name, tensor in sorted(tensors.items()):
+        data_ptr = int(tensor.data_ptr())
+        if remove_duplicate and data_ptr in seen_ptrs:
+            continue
+        seen_ptrs.add(data_ptr)
+        storage_offset = int(tensor.storage_offset())
+        if storage_offset != 0:
+            raise ValueError(
+                "runtime tensor schema hash requires storage_offset == 0: "
+                f"{name} has storage_offset={storage_offset}"
+            )
+        schema.append(
+            RuntimeTensorSchemaEntry(
+                name=str(name),
+                dtype=str(tensor.dtype),
+                shape=tuple(int(dim) for dim in tensor.shape),
+                stride=tuple(int(dim) for dim in tensor.stride()),
+                element_size=int(tensor.element_size()),
+                storage_offset=storage_offset,
+            )
+        )
+    return tuple(schema)
+
+
+def compute_runtime_tensor_schema_hash(
+    schema: Sequence[RuntimeTensorSchemaEntry],
+) -> str:
+    entries: list[CanonicalIndexEntry] = []
+    segment_offset = 0
+    for entry in sorted(schema, key=lambda item: item.name):
+        if int(entry.storage_offset) != 0:
+            raise ValueError(
+                "runtime tensor schema hash requires storage_offset == 0: "
+                f"{entry.name} has storage_offset={entry.storage_offset}"
+            )
+        size_bytes = _schema_entry_size_bytes(entry)
+        entries.append(
+            CanonicalIndexEntry(
+                name=entry.name,
+                dtype=_torch_dtype_from_name(entry.dtype),
+                shape=entry.shape,
+                stride=entry.stride,
+                storage_offset=0,
+                segment_offset=segment_offset,
+                size_bytes=size_bytes,
+            )
+        )
+        segment_offset += size_bytes
+    return compute_canonical_runtime_tensor_schema_hash(
+        CanonicalIndex(
+            entries=tuple(entries),
+            total_size_bytes=segment_offset,
+            avbs_hash="",
+        )
+    )
+
+
+def compute_canonical_runtime_tensor_schema_hash(
+    canonical_index: CanonicalIndex,
+    *,
+    manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME,
+) -> str:
+    tensors = [
+        {
+            "name": str(entry.name),
+            "dtype": str(entry.dtype),
+            "shape": [int(dim) for dim in entry.shape],
+            "stride": [int(dim) for dim in entry.stride],
+            "element_size": int(entry.dtype.itemsize),
+        }
+        for entry in sorted(
+            (
+                entry
+                for entry in canonical_index.entries
+                if str(entry.name) != str(manifest_tensor_name)
+            ),
+            key=lambda entry: str(entry.name),
+        )
+    ]
+    return hash_versioned_payload_to_multihash(
+        "tensorcast.representation.tensor_schema.v1",
+        {"tensors": tensors},
+    )
+
+
+def logical_topology_json(
+    topology_ref: RuntimeTopologyRef,
+    *,
+    framework_payload: Mapping[str, object],
+) -> str:
+    del topology_ref
+    normalized = normalize_logical_topology_payload(
+        json.dumps(
+            dict(framework_payload),
+            sort_keys=True,
+            separators=(",", ":"),
+        )
+    )
+    if normalized is None:
+        raise ValueError("framework_payload must define a logical topology")
+    return json.dumps(normalized, sort_keys=True, separators=(",", ":"))
+
+
+def compute_runtime_representation_contract_hash(
+    *,
+    tensor_schema_hash: str,
+    topology_ref: RuntimeTopologyRef,
+    member_ref: RuntimeBindingMemberRef,
+    framework_name: str,
+    framework_version: str,
+    adapter_version: str,
+    serving_abi_version: str,
+    source_identity: Mapping[str, object],
+) -> str:
+    if not tensor_schema_hash:
+        raise ValueError("tensor_schema_hash must not be empty")
+    payload = {
+        "framework": {
+            "name": str(framework_name),
+            "version": str(framework_version),
+            "adapter_version": str(adapter_version),
+            "serving_abi_version": str(serving_abi_version),
+        },
+        "topology_ref": _stable_payload(topology_ref.model_dump(mode="python")),
+        "member_ref": _stable_payload(member_ref.model_dump(mode="python")),
+        "source_identity": _stable_payload(dict(source_identity)),
+        "tensor_schema_hash": str(tensor_schema_hash),
+    }
+    return hash_versioned_payload_to_multihash(
+        "tensorcast.representation.runtime_contract.v1",
+        payload,
+    )
+
+
+def read_source_bound_contract_state(
+    *,
+    store_fn: Callable[[], Any] | None = None,
+) -> SourceBoundContractState:
+    try:
+        if store_fn is None:
+            import tensorcast as tc
+
+            store_fn = tc.store
+        store = store_fn()
+        capabilities = store.capabilities
+        server_config = getattr(capabilities, "server_config", None)
+    except Exception:
+        return SourceBoundContractState.unavailable()
+    return SourceBoundContractState.from_server_config(server_config)
+
+
+def source_bound_contract_profile_fields(
+    state: SourceBoundContractState,
+    path: str,
+) -> dict[str, object]:
+    return {
+        "source_bound_contract_version": int(state.source_bound_contract_version),
+        "source_bound_capability_flags": list(state.source_bound_capability_names),
+        "source_bound_contract_ready": bool(state.source_bound_contract_ready),
+        "source_bound_contract_path": path,
+    }
+
+
+def _schema_entry_size_bytes(entry: RuntimeTensorSchemaEntry) -> int:
+    elements = 1
+    for dim in entry.shape:
+        elements *= int(dim)
+    return int(elements * entry.element_size)
+
+
+def _torch_dtype_from_name(dtype_name: str) -> torch.dtype:
+    normalized = dtype_name.removeprefix("torch.")
+    dtype = getattr(torch, normalized, None)
+    if not isinstance(dtype, torch.dtype):
+        raise ValueError(f"unsupported runtime tensor dtype: {dtype_name}")
+    return dtype
+
+
+def _stable_payload(value: object) -> object:
+    if isinstance(value, Mapping):
+        return {
+            str(key): _stable_payload(value[key])
+            for key in sorted(value, key=lambda item: str(item))
+            if value[key] is not None
+        }
+    if isinstance(value, (list, tuple)):
+        return [_stable_payload(item) for item in value]
+    if isinstance(value, (str, int, float, bool)) or value is None:
+        return value
+    return str(value)
+
+
+__all__ = [
+    "MIN_SOURCE_BOUND_CONTRACT_VERSION",
+    "REQUIRED_SOURCE_BOUND_CAPABILITIES",
+    "RuntimeTensorSchemaEntry",
+    "SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4",
+    "SourceBoundContractState",
+    "collect_runtime_tensor_schema",
+    "compute_runtime_representation_contract_hash",
+    "compute_runtime_tensor_schema_hash",
+    "logical_topology_json",
+    "read_source_bound_contract_state",
+    "source_bound_contract_profile_fields",
+]
diff --git a/tensorcast/serving/diagnostics.py b/tensorcast/artifact_runtime/diagnostics.py
similarity index 94%
rename from tensorcast/serving/diagnostics.py
rename to tensorcast/artifact_runtime/diagnostics.py
index ca7f69b2..a5f8b493 100644
--- a/tensorcast/serving/diagnostics.py
+++ b/tensorcast/artifact_runtime/diagnostics.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Serving diagnostic helpers with no lifecycle authority."""
+"""Artifact runtime diagnostic helpers with no lifecycle authority."""
 
 from __future__ import annotations
 
@@ -59,7 +59,7 @@ def to_dict(self) -> dict[str, Any]:
 
 
 @dataclass(frozen=True)
-class ServingRealizationReport:
+class RuntimeRealizationReport:
     source_artifact_ref: str
     serving_manifest_ref: str
     representation_contract_hash: str
@@ -88,7 +88,11 @@ def to_dict(self) -> dict[str, Any]:
         }
 
     def to_runtime_diagnostics(self) -> dict[str, Any]:
-        return {"serving_realization_report": self.to_dict()}
+        payload = self.to_dict()
+        return {
+            "runtime_realization_report": payload,
+            "serving_realization_report": payload,
+        }
 
 
 def binding_layout_tensor_count(layout: Any) -> int:
@@ -165,7 +169,7 @@ def binding_layout_debug_payload(
 __all__ = [
     "BindingValueReport",
     "RealizationReport",
-    "ServingRealizationReport",
+    "RuntimeRealizationReport",
     "SourceContractReport",
     "binding_layout_debug_payload",
     "binding_layout_profile_fields",
diff --git a/tensorcast/serving/dto.py b/tensorcast/artifact_runtime/dto.py
similarity index 83%
rename from tensorcast/serving/dto.py
rename to tensorcast/artifact_runtime/dto.py
index 972ad121..e9e5b9e1 100644
--- a/tensorcast/serving/dto.py
+++ b/tensorcast/artifact_runtime/dto.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Serving artifact runtime DTOs shared by framework integrations."""
+"""Runtime artifact DTOs shared by framework integrations."""
 
 from __future__ import annotations
 
@@ -9,13 +9,20 @@
 
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 
-from tensorcast.serving.policy import ServingArtifactLocator
+from tensorcast.artifact_runtime.host import (
+    RuntimePlacement as _RuntimePlacement,
+)
+from tensorcast.artifact_runtime.host import (
+    RuntimeTensorView as _RuntimeTensorView,
+)
+from tensorcast.artifact_runtime.locator import ArtifactLocator
 from tensorcast.types import (
     BindingValueRef,
-    ServingBindingMemberRef,
-    ServingTopologyRef,
 )
 
+RuntimeTensorView = _RuntimeTensorView
+RuntimePlacement = _RuntimePlacement
+
 
 def _normalize_manifest_ref_payload(data: Any) -> Any:
     if not isinstance(data, Mapping):
@@ -44,7 +51,7 @@ def _model_dump_or_none(value: Any) -> dict[str, Any] | None:
     raise TypeError(f"Cannot serialize {type(value)!r} as a mapping")
 
 
-class ServingBindingValue(BaseModel):
+class RuntimeBindingValue(BaseModel):
     model_config = ConfigDict(frozen=True, extra="forbid")
 
     source_artifact_ref: str
@@ -79,7 +86,7 @@ def to_dict(self) -> dict[str, Any]:
         }
 
 
-class PreparedServingArtifact(BaseModel):
+class PreparedRuntimeArtifact(BaseModel):
     model_config = ConfigDict(frozen=True, extra="forbid")
 
     source_artifact_ref: str
@@ -88,7 +95,7 @@ class PreparedServingArtifact(BaseModel):
     representation_contract_hash: str
     serving_build_digest: str
     binding_value_ref: BindingValueRef | None = None
-    readiness: str = "serving_published_ready"
+    readiness: str = "runtime_published_ready"
     family: str
     tensor_schema_hash: str
     serving_version_key: str | None = None
@@ -98,7 +105,7 @@ class PreparedServingArtifact(BaseModel):
     verification_job_id: str | None = None
     tp_rank: int = 0
     tp_world_size: int = 1
-    artifact_locator: ServingArtifactLocator | None = None
+    artifact_locator: ArtifactLocator | None = None
 
     @model_validator(mode="before")
     @classmethod
@@ -109,8 +116,8 @@ def _normalize_input(cls, data: Any) -> Any:
     def manifest_ref(self) -> str:
         return self.serving_manifest_ref
 
-    def to_binding_value(self) -> ServingBindingValue:
-        return ServingBindingValue(
+    def to_binding_value(self) -> RuntimeBindingValue:
+        return RuntimeBindingValue(
             source_artifact_ref=self.source_artifact_ref,
             binding_value_ref=self.binding_value_ref,
             readiness=self.readiness,
@@ -142,7 +149,7 @@ def to_reload_request(self) -> dict[str, Any]:
         else:
             raise RuntimeError(
                 "TensorCast local-ready serving result does not reference a "
-                "durable serving artifact and cannot be used as a reload "
+                "durable runtime artifact and cannot be used as a reload "
                 "request"
             )
         return {
@@ -201,38 +208,6 @@ class FamilyReadiness(BaseModel):
     notes: str = ""
 
 
-class RuntimeTensorView(BaseModel):
-    """Framework-neutral tensor identity view without live tensor payload."""
-
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    name: str
-    dtype: str
-    shape: tuple[int, ...]
-    stride: tuple[int, ...]
-    storage_offset: int = 0
-    element_size: int | None = None
-
-
-class ServingPlacement(BaseModel):
-    """Stable runtime placement identity shared with framework integrations."""
-
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    topology: ServingTopologyRef
-    member: ServingBindingMemberRef
-    framework_payload: dict[str, Any]
-    identity_payload: dict[str, Any]
-
-    def stable_identity_payload(self) -> dict[str, Any]:
-        return {
-            "topology": self.topology.model_dump(mode="python"),
-            "member": self.member.model_dump(mode="python"),
-            "framework_payload": self.framework_payload,
-            "identity_payload": self.identity_payload,
-        }
-
-
 class FrameworkIntegrationContext(BaseModel):
     """Serializable framework identity facts used by core-owned facades."""
 
@@ -242,7 +217,7 @@ class FrameworkIntegrationContext(BaseModel):
     framework_version: str
     adapter_version: str
     serving_abi_version: str
-    placement: ServingPlacement | None = None
+    placement: RuntimePlacement | None = None
     source_identity: dict[str, Any] = Field(default_factory=dict)
 
     def stable_identity_payload(self) -> dict[str, Any]:
diff --git a/tensorcast/serving/errors.py b/tensorcast/artifact_runtime/errors.py
similarity index 67%
rename from tensorcast/serving/errors.py
rename to tensorcast/artifact_runtime/errors.py
index c2492d99..3c0ee15a 100644
--- a/tensorcast/serving/errors.py
+++ b/tensorcast/artifact_runtime/errors.py
@@ -1,17 +1,17 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Structured serving runtime errors."""
+"""Structured artifact runtime errors."""
 
 from __future__ import annotations
 
 from collections.abc import Mapping, Sequence
 
 
-class TensorCastServingRuntimeError(RuntimeError):
-    """Base class for machine-readable serving runtime failures."""
+class TensorCastRuntimeError(RuntimeError):
+    """Base class for machine-readable artifact runtime failures."""
 
-    code = "tensorcast_serving_runtime_error"
-    operation = "serving_runtime"
+    code = "tensorcast_runtime_error"
+    operation = "artifact_runtime"
     retryable = False
     worker_suspect = False
 
@@ -33,25 +33,25 @@ def __init__(
         self.details = dict(details or {})
 
 
-class ServingIntegrationError(TensorCastServingRuntimeError):
-    """Base class for structured serving integration failures."""
+class ArtifactRuntimeIntegrationError(TensorCastRuntimeError):
+    """Base class for structured runtime integration failures."""
 
 
-class ServingIntegrationNotImplementedError(ServingIntegrationError):
+class ArtifactRuntimeNotImplementedError(ArtifactRuntimeIntegrationError):
     """Raised when a deep core-owned lifecycle method is not implemented yet."""
 
     code = "not_implemented"
-    operation = "serving_runtime"
+    operation = "artifact_runtime"
 
 
-class ConfigConflictError(ServingIntegrationError):
-    """Serving config requests mutually exclusive lifecycle execution modes."""
+class ConfigConflictError(ArtifactRuntimeIntegrationError):
+    """Runtime config requests mutually exclusive lifecycle execution modes."""
 
     code = "config_conflict"
     operation = "config_planning"
 
 
-class CapabilityMissingError(ServingIntegrationError):
+class CapabilityMissingError(ArtifactRuntimeIntegrationError):
     """Required host capability is absent for a requested lifecycle path."""
 
     code = "capability_missing"
@@ -80,57 +80,57 @@ def capability_missing(
     )
 
 
-class AdmissionRejectedError(ServingIntegrationError):
-    """Core admission rejected a serving lifecycle request."""
+class AdmissionRejectedError(ArtifactRuntimeIntegrationError):
+    """Core admission rejected a runtime lifecycle request."""
 
     code = "admission_rejected"
     operation = "admission"
 
 
-class PlacementAdmissionError(ServingIntegrationError):
+class PlacementAdmissionError(ArtifactRuntimeIntegrationError):
     """Placement identity or semantic placement proof is invalid."""
 
     code = "placement_admission"
     operation = "placement_admission"
 
 
-class ArtifactLocatorResolutionError(ServingIntegrationError):
-    """Durable serving artifact locator could not resolve to an artifact."""
+class ArtifactLocatorResolutionError(ArtifactRuntimeIntegrationError):
+    """Durable runtime artifact locator could not resolve to an artifact."""
 
     code = "artifact_locator_resolution"
     operation = "artifact_locator_resolution"
 
 
-class ManifestMismatchError(ServingIntegrationError):
-    """Serving manifest content does not match requested runtime facts."""
+class ManifestMismatchError(ArtifactRuntimeIntegrationError):
+    """Runtime artifact manifest content does not match requested facts."""
 
     code = "manifest_mismatch"
     operation = "manifest_validation"
 
 
-class PolicyMismatchError(ServingIntegrationError):
-    """Serving runtime policy does not match the artifact manifest."""
+class PolicyMismatchError(ArtifactRuntimeIntegrationError):
+    """Runtime artifact policy does not match the artifact manifest."""
 
     code = "policy_mismatch"
     operation = "policy_validation"
 
 
-class AuthorityValidationError(ServingIntegrationError):
-    """Retained binding authority failed validation."""
+class AuthorityValidationError(ArtifactRuntimeIntegrationError):
+    """Retained realization authority failed validation."""
 
     code = "authority_validation"
     operation = "retained_acquire"
 
 
-class SchemaMismatchError(ServingIntegrationError):
-    """Runtime tensor schema does not match the serving artifact schema."""
+class SchemaMismatchError(ArtifactRuntimeIntegrationError):
+    """Runtime tensor schema does not match the artifact schema."""
 
     code = "schema_mismatch"
     operation = "schema_validation"
     worker_suspect = True
 
 
-class AttachFinalizeError(ServingIntegrationError):
+class AttachFinalizeError(ArtifactRuntimeIntegrationError):
     """Framework attach, process-after-load, or finalize failed."""
 
     code = "attach_finalize"
@@ -138,14 +138,14 @@ class AttachFinalizeError(ServingIntegrationError):
     worker_suspect = True
 
 
-class RestoreBindingError(ServingIntegrationError):
+class RestoreBindingError(ArtifactRuntimeIntegrationError):
     """Retained binding restore failed before runtime ownership transfer."""
 
     code = "restore_binding"
     operation = "retained_acquire"
 
 
-class OwnershipTransferError(ServingIntegrationError):
+class OwnershipTransferError(ArtifactRuntimeIntegrationError):
     """Binding ownership transfer to runtime state failed."""
 
     code = "ownership_transfer"
@@ -153,36 +153,36 @@ class OwnershipTransferError(ServingIntegrationError):
     worker_suspect = True
 
 
-class RuntimeSwapError(ServingIntegrationError):
-    """Serving binding swap failed after execution started."""
+class RuntimeSwapError(ArtifactRuntimeIntegrationError):
+    """Runtime binding swap failed after execution started."""
 
     code = "runtime_swap"
     operation = "reload"
     worker_suspect = True
 
 
-class SourceSubjectError(ServingIntegrationError):
+class SourceSubjectError(ArtifactRuntimeIntegrationError):
     """Source selector resolution or broadcast payload handling failed."""
 
     code = "source_subject"
     operation = "source_provider"
 
 
-class SourceProviderError(ServingIntegrationError):
+class SourceProviderError(ArtifactRuntimeIntegrationError):
     """Source provider, catalog, or cache policy failed."""
 
     code = "source_provider"
     operation = "source_provider"
 
 
-class PublicationRequiredError(ServingIntegrationError):
+class PublicationRequiredError(ArtifactRuntimeIntegrationError):
     """A local-ready identity was used where durable publication is required."""
 
     code = "publication_required"
     operation = "artifact_locator_validation"
 
 
-class ReplicaPublicationError(ServingIntegrationError):
+class ReplicaPublicationError(ArtifactRuntimeIntegrationError):
     """Runtime-owned ephemeral replica publication failed."""
 
     code = "replica_publication"
@@ -225,10 +225,10 @@ def __init__(
     "RestoreBindingError",
     "RuntimeSwapError",
     "SchemaMismatchError",
-    "ServingIntegrationError",
-    "ServingIntegrationNotImplementedError",
+    "ArtifactRuntimeIntegrationError",
+    "ArtifactRuntimeNotImplementedError",
     "SourceProviderError",
     "SourceSubjectError",
-    "TensorCastServingRuntimeError",
+    "TensorCastRuntimeError",
     "capability_missing",
 ]
diff --git a/tensorcast/serving/hosts.py b/tensorcast/artifact_runtime/host.py
similarity index 90%
rename from tensorcast/serving/hosts.py
rename to tensorcast/artifact_runtime/host.py
index a39103cc..e0bf1d7c 100644
--- a/tensorcast/serving/hosts.py
+++ b/tensorcast/artifact_runtime/host.py
@@ -1,8 +1,8 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Public host capability protocols for serving runtime integrations.
+"""Artifact-runtime host capability protocols for framework integrations.
 
 This module is intentionally lightweight: importing it must not import the
-serving lifecycle implementation, builder stack, binding runtime, or store API.
+runtime lifecycle implementation, builder stack, binding runtime, or store API.
 Framework integrations should use these DTOs and protocols to describe facts
 and capabilities; TensorCast core owns the lifecycle that consumes them.
 """
@@ -16,11 +16,12 @@
 from dataclasses import dataclass, field
 from typing import Any, Protocol, cast
 
-from tensorcast.serving.dto import RuntimeTensorView, ServingPlacement
+from pydantic import BaseModel, ConfigDict
+
 from tensorcast.types import (
     SERVING_MANIFEST_TENSOR_NAME,
-    ServingBindingMemberRef,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeTopologyRef,
 )
 
 PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION = 1
@@ -31,6 +32,38 @@
 SOURCE_CATALOG_SCHEMA_VERSION = 1
 
 
+class RuntimeTensorView(BaseModel):
+    """Framework-neutral tensor identity view without live tensor payload."""
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    name: str
+    dtype: str
+    shape: tuple[int, ...]
+    stride: tuple[int, ...]
+    storage_offset: int = 0
+    element_size: int | None = None
+
+
+class RuntimePlacement(BaseModel):
+    """Stable runtime placement identity shared with framework integrations."""
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    topology: RuntimeTopologyRef
+    member: RuntimeBindingMemberRef
+    framework_payload: dict[str, Any]
+    identity_payload: dict[str, Any]
+
+    def stable_identity_payload(self) -> dict[str, Any]:
+        return {
+            "topology": self.topology.model_dump(mode="python"),
+            "member": self.member.model_dump(mode="python"),
+            "framework_payload": self.framework_payload,
+            "identity_payload": self.identity_payload,
+        }
+
+
 @dataclass(frozen=True)
 class FrameworkIdentity:
     """Stable framework identity facts owned by a framework host."""
@@ -247,7 +280,7 @@ def collect_runtime_tensor_view(
         self,
         tensors: Mapping[str, object],
     ) -> tuple[RuntimeTensorView, ...]:
-        from tensorcast.serving import contract as tc_contract
+        import tensorcast.artifact_runtime.contract as tc_contract
 
         schema = tc_contract.collect_runtime_tensor_schema(
             cast(Any, tensors), remove_duplicate=False
@@ -435,15 +468,15 @@ def execution_facts(
     ) -> MaterializationExecutionFacts: ...
 
 
-def serving_placement_from_framework_facts(
+def runtime_placement_from_framework_facts(
     *,
     identity_facts: PlacementIdentityFacts,
     admission_facts: PlacementAdmissionFacts | None = None,
     member_facts: PlacementMemberFacts,
     framework_payload: Mapping[str, object] | None = None,
     identity_payload: Mapping[str, object] | None = None,
-) -> ServingPlacement:
-    """Build core-owned serving placement identity from host facts."""
+) -> RuntimePlacement:
+    """Build core-owned runtime placement identity from host facts."""
 
     admission_facts = admission_facts or PlacementAdmissionFacts()
     placement_identity_payload = _stable_payload(
@@ -512,12 +545,12 @@ def serving_placement_from_framework_facts(
     resolved_identity_payload = dict(
         identity_payload or cast(Mapping[str, object], placement_identity_payload)
     )
-    return ServingPlacement(
-        topology=ServingTopologyRef(
+    return RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest=topology_digest,
             logical_topology_ref=(f"tensorcast://placement/{topology_digest[:16]}"),
         ),
-        member=ServingBindingMemberRef(
+        member=RuntimeBindingMemberRef(
             member_id=str(member_id),
             member_index=int(member_index),
             member_count=int(member_count),
@@ -567,26 +600,26 @@ class RuntimeProfile:
     source_catalog_policy: SourceCatalogPolicy | None = None
 
     @classmethod
-    def from_config(cls, serving_config: object) -> "RuntimeProfile":
-        return cls.from_serving_config(serving_config)
+    def from_config(cls, runtime_config: object) -> "RuntimeProfile":
+        return cls.from_runtime_config(runtime_config)
 
     @classmethod
-    def from_serving_config(cls, serving_config: object) -> "RuntimeProfile":
+    def from_runtime_config(cls, runtime_config: object) -> "RuntimeProfile":
         return cls(
             runtime_config=RuntimeConfig(
-                _mapping_from_object(getattr(serving_config, "runtime", None))
+                _mapping_from_object(getattr(runtime_config, "runtime", None))
             ),
             materialization_policy=MaterializationPolicy(
-                _mapping_from_object(getattr(serving_config, "materialization", None))
+                _mapping_from_object(getattr(runtime_config, "materialization", None))
             ),
             source_bound_contract=SourceBoundContractProfile(
                 _mapping_from_object(
-                    getattr(serving_config, "source_bound_contract", None)
+                    getattr(runtime_config, "source_bound_contract", None)
                 )
             ),
             manifest_policy=ManifestPolicy(),
             source_catalog_policy=SourceCatalogPolicy(
-                _mapping_from_object(getattr(serving_config, "source_catalog", None))
+                _mapping_from_object(getattr(runtime_config, "source_catalog", None))
             ),
         )
 
@@ -724,7 +757,7 @@ def admit(self, request: AdmissionRequest) -> AdmissionDecision:
 
 
 @dataclass(frozen=True)
-class IntegrationHost:
+class RuntimeHostCapabilities:
     framework: FrameworkHost
     placement: PlacementHost
     source_catalog: SourceCatalogProvider | None = None
@@ -736,6 +769,12 @@ class IntegrationHost:
     admission: AdmissionPolicy | None = None
 
 
+IntegrationHost = RuntimeHostCapabilities
+RuntimeAdmissionDecision = AdmissionDecision
+RuntimeAdmissionPolicy = AdmissionPolicy
+RuntimeAdmissionRequest = AdmissionRequest
+
+
 def semantic_placement_digest(
     *,
     kind: str,
@@ -811,7 +850,9 @@ def _stable_digest(value: object) -> str:
     "FrameworkHost",
     "FrameworkIdentity",
     "IntegrationHost",
+    "ManifestPolicy",
     "MaterializationExecutionFacts",
+    "MaterializationPolicy",
     "NativeLoadHost",
     "ObservabilitySink",
     "PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION",
@@ -823,6 +864,15 @@ def _stable_digest(value: object) -> str:
     "RecipeCachePolicy",
     "RecipeTraceHost",
     "RECIPE_CACHE_POLICY_SCHEMA_VERSION",
+    "RuntimeConfig",
+    "RuntimeAdmissionDecision",
+    "RuntimeAdmissionPolicy",
+    "RuntimeAdmissionRequest",
+    "RuntimeHostCapabilities",
+    "RuntimePlacement",
+    "RuntimeProfile",
+    "SourceBoundContractProfile",
+    "SourceCatalogPolicy",
     "SourceCatalogProvider",
     "SourceCatalogRequest",
     "SOURCE_CATALOG_REQUEST_SCHEMA_VERSION",
@@ -835,6 +885,6 @@ def _stable_digest(value: object) -> str:
     "TensorCastEvent",
     "TensorSurfaceHost",
     "TorchTensorHost",
+    "runtime_placement_from_framework_facts",
     "semantic_placement_digest",
-    "serving_placement_from_framework_facts",
 ]
diff --git a/tensorcast/artifact_runtime/intent.py b/tensorcast/artifact_runtime/intent.py
new file mode 100644
index 00000000..7b016af5
--- /dev/null
+++ b/tensorcast/artifact_runtime/intent.py
@@ -0,0 +1,75 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Artifact runtime intent DTOs."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+from tensorcast.artifact_runtime.errors import AuthorityValidationError
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.policy import RuntimePolicy
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+)
+
+if TYPE_CHECKING:
+    from tensorcast.artifact_runtime.host import RecipeCachePolicy, SourceSelector
+
+
+@dataclass(frozen=True)
+class BootstrapPolicy:
+    fields: Mapping[str, object] = field(default_factory=dict)
+
+
+class RuntimeIntent:
+    """Marker base class for artifact runtime lifecycle intent DTOs."""
+
+
+@dataclass(frozen=True)
+class ExistingRuntimeArtifact(RuntimeIntent):
+    artifact_locator: ArtifactLocator | object
+    policy: RuntimePolicy | object | None = None
+
+
+@dataclass(frozen=True)
+class LocalSourceBootstrap(RuntimeIntent):
+    source_selector: SourceSelector
+    bootstrap_policy: Any
+    cache_policy: RecipeCachePolicy | None = None
+
+
+@dataclass(frozen=True)
+class RetainedBindingAcquire(RuntimeIntent):
+    authority: ParsedRetainedRealizationAuthority
+
+    def __post_init__(self) -> None:
+        if not isinstance(self.authority, ParsedRetainedRealizationAuthority):
+            raise AuthorityValidationError(
+                "RetainedBindingAcquire.authority must be "
+                "ParsedRetainedRealizationAuthority"
+            )
+
+
+@dataclass(frozen=True)
+class RequestContext:
+    framework_config: object | None = None
+    model_config: object | None = None
+    target_device: object | None = None
+    timeout_s: float | None = 30.0
+
+
+RuntimeRequestContext = RequestContext
+
+
+__all__ = [
+    "BootstrapPolicy",
+    "ExistingRuntimeArtifact",
+    "LocalSourceBootstrap",
+    "RequestContext",
+    "RetainedBindingAcquire",
+    "RuntimeIntent",
+    "RuntimeRequestContext",
+]
diff --git a/tensorcast/serving/_runtime_impl/lifecycle.py b/tensorcast/artifact_runtime/lifecycle.py
similarity index 73%
rename from tensorcast/serving/_runtime_impl/lifecycle.py
rename to tensorcast/artifact_runtime/lifecycle.py
index 94a22d3d..e0ad19bb 100644
--- a/tensorcast/serving/_runtime_impl/lifecycle.py
+++ b/tensorcast/artifact_runtime/lifecycle.py
@@ -1,10 +1,9 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Serving lifecycle implementation for TensorCast framework integrations.
+"""Artifact-runtime lifecycle implementation for framework integrations.
 
-New framework integrations should prefer the narrow public modules
-``tensorcast.serving.runtime``, ``tensorcast.serving.hosts`` and
-``tensorcast.serving.testing``.  This module owns lifecycle orchestration and
-keeps low-level helpers out of the framework-facing host/runtime modules.
+New framework integrations should prefer the artifact-runtime public modules
+and runtime testing fixtures. This module owns lifecycle orchestration and keeps
+low-level helpers out of the framework-facing host/runtime modules.
 """
 
 from __future__ import annotations
@@ -13,22 +12,34 @@
 import json
 import logging
 import os
-from collections.abc import Callable, Iterator, Mapping, Sequence
-from contextlib import contextmanager
+import time
+from collections.abc import Callable, Mapping, Sequence
 from dataclasses import dataclass, replace
-from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, cast
+from typing import Any, NoReturn, cast
 
 import torch
 
 import tensorcast as tc
+import tensorcast.artifact_runtime.binding.execution as tc_binding_runtime
+import tensorcast.artifact_runtime.config as tc_runtime_config
+import tensorcast.artifact_runtime.contract as tc_contract
+import tensorcast.artifact_runtime.diagnostics as tc_diagnostics
+import tensorcast.artifact_runtime.intent as tc_runtime_intent
+import tensorcast.artifact_runtime.publication.replica as tc_replica_publication
+import tensorcast.artifact_runtime.readiness as tc_readiness
+import tensorcast.artifact_runtime.recipe.local_ready as tc_local_ready
+import tensorcast.artifact_runtime.recipe.semantic_validation as tc_semantic_validation
+import tensorcast.artifact_runtime.recipe.tensor_schema as tc_tensor_schema
+import tensorcast.artifact_runtime.request_facts as tc_request_facts
+import tensorcast.artifact_runtime.source as tc_source_catalog
 from tensorcast.api.store.common import canonical_index_to_bytes
 from tensorcast.api.store.realization_kernel import (
     ArtifactRealizationHandle,
     ArtifactRealizationReport,
     ArtifactRealizationSpec,
     RealizationTargetPlan,
+    ResolvedArtifactSelection,
     artifact_realization_report_to_dict,
     emit_artifact_realization_profile_event,
     envelope_for_runtime_attachment,
@@ -37,45 +48,114 @@
     resolve_artifact_selection,
 )
 from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry
-from tensorcast.serving import binding_runtime as tc_binding_runtime
-from tensorcast.serving import config as tc_config
-from tensorcast.serving import contract as tc_contract
-from tensorcast.serving import diagnostics as tc_diagnostics
-from tensorcast.serving import dto as tc_dto
-from tensorcast.serving import errors as tc_errors
-from tensorcast.serving import hosts as tc_hosts
-from tensorcast.serving import local_ready as tc_local_ready
-from tensorcast.serving import policy as tc_policy
-from tensorcast.serving import readiness as tc_readiness
-from tensorcast.serving import recipe_build as tc_recipe_build
-from tensorcast.serving import replica_publication as tc_replica_publication
-from tensorcast.serving import retained_binding as tc_retained_binding
-from tensorcast.serving import runtime_attachment as tc_runtime_attachment
-from tensorcast.serving import runtime_config as tc_runtime_config
-from tensorcast.serving import runtime_contract as tc_runtime_contract
-from tensorcast.serving import runtime_intent as tc_runtime_intent
-from tensorcast.serving import runtime_view as tc_runtime_view
-from tensorcast.serving import session as tc_session
-from tensorcast.serving import source_catalog as tc_source_catalog
-from tensorcast.serving.builder import compiler as tc_compiler
-from tensorcast.serving.builder import materialization as tc_materialization
-from tensorcast.serving.builder import publication as tc_publication
-from tensorcast.serving.builder import recipe_cache as tc_recipe_cache
-from tensorcast.serving.builder import recipe_validation as tc_recipe_validation
-from tensorcast.serving.builder import semantic_validation as tc_semantic_validation
-from tensorcast.serving.builder import tensor_schema as tc_tensor_schema
-from tensorcast.serving.builder import trace_cache as tc_trace_cache
-from tensorcast.serving.builder.compiler import TracePlan
-from tensorcast.serving.resolver import (
-    ResolvedServingArtifact,
-    ServingArtifactResolver,
+from tensorcast.artifact_runtime.artifact.resolver import (
+    ResolvedRuntimeArtifact,
+    RuntimeArtifactResolver,
     canonical_index_from_descriptor,
-    is_reserved_serving_tensor_name,
+    is_reserved_runtime_tensor_name,
+)
+from tensorcast.artifact_runtime.attachment import (
+    RuntimeAttachment,
+    RuntimeBindingState,
+    RuntimeBindingView,
+    RuntimeStateSeed,
+)
+from tensorcast.artifact_runtime.binding.retained import (
+    RestoredRetainedBinding,
+    restore_prepared_local_ready_binding,
+    restore_retained_binding,
+    runtime_restore_rejection_reason,
+)
+from tensorcast.artifact_runtime.dto import (
+    FrameworkIntegrationContext,
+    PreparedRuntimeArtifact,
+    RuntimeBindingValue,
+    RuntimePlacement,
+)
+from tensorcast.artifact_runtime.errors import (
+    AdmissionRejectedError,
+    ArtifactLocatorResolutionError,
+    ArtifactRuntimeIntegrationError,
+    ArtifactRuntimeNotImplementedError,
+    AttachFinalizeError,
+    AuthorityValidationError,
+    CapabilityMissingError,
+    ConfigConflictError,
+    ManifestMismatchError,
+    OwnershipTransferError,
+    PlacementAdmissionError,
+    RestoreBindingError,
+    SchemaMismatchError,
+    SourceProviderError,
+    SourceSubjectError,
+)
+from tensorcast.artifact_runtime.errors import (
+    capability_missing as _capability_missing,
+)
+from tensorcast.artifact_runtime.host import (
+    AdmissionDecision,
+    AdmissionRequest,
+    DefaultAdmissionPolicy,
+    FrameworkHost,
+    FrameworkIdentity,
+    IntegrationHost,
+    MaterializationExecutionFacts,
+    MaterializationPolicy,
+    PlacementAdmissionFacts,
+    PlacementIdentityFacts,
+    PlacementMemberFacts,
+    RecipeCachePolicy,
+    RuntimeProfile,
+    SourceCatalogRequest,
+    SourceDownloadPolicy,
+    SourceHost,
+    SourceSelector,
+    SourceSubjectCoordinator,
+    TensorSurfaceHost,
+    TorchTensorHost,
+    runtime_placement_from_framework_facts,
+)
+from tensorcast.artifact_runtime.locator import (
+    ArtifactLocator,
+)
+from tensorcast.artifact_runtime.policy import (
+    RuntimePolicy,
+)
+from tensorcast.artifact_runtime.recipe.build import (
+    RecipeBuildCacheConfig,
+    RecipeBuildSession,
+    RecipeBuildSessionRequest,
+    RuntimeBindingPlan,
+    recipe_build_cache_config_from_policy,
+)
+from tensorcast.artifact_runtime.recipe.build import (
+    build_recipe_session as build_recipe_session_from_request,
+)
+from tensorcast.artifact_runtime.recipe.compiler import (
+    TensorcastSemanticValidationSpec,
+    TensorSchemaEntry,
+)
+from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan
+from tensorcast.artifact_runtime.source import (
+    SourceSubject,
+    is_public_disk_source_subject,
+    resolve_source_subject,
+    source_subject_broadcast_payload,
+    source_subject_from_broadcast_payload,
+)
+from tensorcast.artifact_runtime.view import (
+    RuntimeWorkerView,
+    source_selection_projection_from_artifact_realization_report,
+    source_selection_projection_from_execution_diagnostics,
+    source_selection_projection_from_materialization_diagnostics,
+)
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
 )
 from tensorcast.types import (
     CollectivePolicy,
     FinalizeClass,
-    ServingSupportLevel,
+    RuntimeSupportLevel,
 )
 
 ArtifactError = tc.ArtifactError
@@ -83,208 +163,136 @@
 BindingReservationCapability = tc.BindingReservationCapability
 BindingValueRef = tc.BindingValueRef
 BuilderMode = tc.BuilderMode
-CompiledServingRecipe = tc_compiler.CompiledServingRecipe
-BindingFinalizeMaterializationResult = (
-    tc_materialization.BindingFinalizeMaterializationResult
-)
 DEFAULT_RUNTIME_PROFILE = tc_runtime_config.DEFAULT_RUNTIME_PROFILE
 LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION = (
     tc_local_ready.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION
 )
 
 _LOGGER = logging.getLogger(__name__)
-FamilyReadiness = tc_dto.FamilyReadiness
-FrameworkIntegrationContext = tc_dto.FrameworkIntegrationContext
-PreparedServingArtifact = tc_dto.PreparedServingArtifact
-ServingBindingValue = tc_dto.ServingBindingValue
 PublishedModelVersion = tc.PublishedModelVersion
-ServingBindingPlan = tc_recipe_build.ServingBindingPlan
-RecipeBuildCacheConfig = tc_recipe_build.RecipeBuildCacheConfig
-RecipeBuildRunResult = tc_recipe_build.RecipeBuildRunResult
-RecipeCacheLookupResult = tc_recipe_build.RecipeCacheLookupResult
-RecipeCacheWriteResult = tc_recipe_build.RecipeCacheWriteResult
-RecipeBuildSession = tc_recipe_build.RecipeBuildSession
-COMPILED_RECIPE_MEMORY_CACHE = tc_recipe_build.COMPILED_RECIPE_MEMORY_CACHE
-TRACE_PLAN_MEMORY_CACHE = tc_recipe_build.TRACE_PLAN_MEMORY_CACHE
-RecipeCompileInputs = tc_compiler.RecipeCompileInputs
-RecipePublicationContext = tc_publication.RecipePublicationContext
-ParsedRetainedServingBindingAuthority = (
-    tc_retained_binding.ParsedRetainedServingBindingAuthority
-)
 GroupRealizationAcquireRef = tc.GroupRealizationAcquireRef
-RuntimeTensorView = tc_dto.RuntimeTensorView
 SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 = (
-    tc_runtime_contract.SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4
+    tc_contract.SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4
 )
 SERVING_MANIFEST_TENSOR_NAME = tc.SERVING_MANIFEST_TENSOR_NAME
-ServingBindingState = tc_session.ServingBindingState
-ServingArtifactManifest = tc.ServingArtifactManifest
-ServingConfig = tc_config.ServingConfig
-ReplicaPublicationPolicy = tc_config.ReplicaPublicationPolicy
-ServingBindingMemberRef = tc.ServingBindingMemberRef
-ServingPlacement = tc_dto.ServingPlacement
-ServingRuntimePolicy = tc.ServingRuntimePolicy
-SourceBoundContractState = tc_runtime_contract.SourceBoundContractState
-source_bound_contract_profile_fields = (
-    tc_runtime_contract.source_bound_contract_profile_fields
-)
+RuntimeArtifactManifest = tc.RuntimeArtifactManifest
+TensorCastRuntimeConfig = tc_runtime_config.TensorCastRuntimeConfig
+ReplicaPublicationPolicy = tc_runtime_config.ReplicaPublicationPolicy
+RuntimeBindingMemberRef = tc.RuntimeBindingMemberRef
+RuntimeArtifactPolicy = tc.RuntimeArtifactPolicy
+SourceBoundContractState = tc_contract.SourceBoundContractState
+source_bound_contract_profile_fields = tc_contract.source_bound_contract_profile_fields
 SourceCatalog = tc_source_catalog.SourceCatalog
 SOURCE_CATALOG_SCHEMA_VERSION = tc_source_catalog.SOURCE_CATALOG_SCHEMA_VERSION
 
-AdmissionRejectedError = tc_errors.AdmissionRejectedError
-ArtifactLocatorResolutionError = tc_errors.ArtifactLocatorResolutionError
-AttachFinalizeError = tc_errors.AttachFinalizeError
-AuthorityValidationError = tc_errors.AuthorityValidationError
-CapabilityMissingError = tc_errors.CapabilityMissingError
-ConfigConflictError = tc_errors.ConfigConflictError
-ManifestMismatchError = tc_errors.ManifestMismatchError
-OwnershipTransferError = tc_errors.OwnershipTransferError
-PlacementAdmissionError = tc_errors.PlacementAdmissionError
-PolicyMismatchError = tc_errors.PolicyMismatchError
-PublicationRequiredError = tc_errors.PublicationRequiredError
-ReplicaPublicationError = tc_errors.ReplicaPublicationError
-RestoreBindingError = tc_errors.RestoreBindingError
-RuntimeSwapError = tc_errors.RuntimeSwapError
-SchemaMismatchError = tc_errors.SchemaMismatchError
-ServingIntegrationError = tc_errors.ServingIntegrationError
-ServingIntegrationNotImplementedError = tc_errors.ServingIntegrationNotImplementedError
-SourceProviderError = tc_errors.SourceProviderError
-SourceSubjectError = tc_errors.SourceSubjectError
-TensorCastServingRuntimeError = tc_errors.TensorCastServingRuntimeError
-_capability_missing = tc_errors.capability_missing
-
-RuntimeAttachment = tc_runtime_attachment.RuntimeAttachment
-RuntimeBindingState = tc_runtime_attachment.RuntimeBindingState
-RuntimeBindingView = tc_runtime_attachment.RuntimeBindingView
-RuntimeStateSeed = tc_runtime_attachment.RuntimeStateSeed
-
-BindingValueRefProjection = tc_runtime_view.BindingValueRefProjection
-MaterializationDiagnosticsProjection = (
-    tc_runtime_view.MaterializationDiagnosticsProjection
-)
-PublishedReplicaProjection = tc_runtime_view.PublishedReplicaProjection
-ReloadRequestProjection = tc_runtime_view.ReloadRequestProjection
-ReloadResponseProjection = tc_runtime_view.ReloadResponseProjection
-RuntimeEndpointProjection = tc_runtime_view.RuntimeEndpointProjection
-RuntimeWorkerView = tc_runtime_view.RuntimeWorkerView
-SourceBoundContractProjection = tc_runtime_view.SourceBoundContractProjection
-SourceSelectionProjection = tc_runtime_view.SourceSelectionProjection
-WeightVersionProjection = tc_runtime_view.WeightVersionProjection
-source_selection_projection_from_artifact_realization_report = (
-    tc_runtime_view.source_selection_projection_from_artifact_realization_report
-)
-source_selection_projection_from_execution_diagnostics = (
-    tc_runtime_view.source_selection_projection_from_execution_diagnostics
-)
-source_selection_projection_from_materialization_diagnostics = (
-    tc_runtime_view.source_selection_projection_from_materialization_diagnostics
+
+ModelRuntimeRequestFactsError = tc_request_facts.ModelRuntimeRequestFactsError
+resolve_model_runtime_request_facts = (
+    tc_request_facts.resolve_model_runtime_request_facts
 )
 
-# Host capability contracts live in hosts.py. Lifecycle uses module-local
-# aliases only to keep the orchestration code readable.
-AdmissionDecision = tc_hosts.AdmissionDecision
-AdmissionPolicy = tc_hosts.AdmissionPolicy
-AdmissionRequest = tc_hosts.AdmissionRequest
-CollectiveHost = tc_hosts.CollectiveHost
-DefaultAdmissionPolicy = tc_hosts.DefaultAdmissionPolicy
-FinalizeHookHost = tc_hosts.FinalizeHookHost
-FinalizePhase = tc_hosts.FinalizePhase
-FinalizePolicy = tc_hosts.FinalizePolicy
-FrameworkHost = tc_hosts.FrameworkHost
-FrameworkIdentity = tc_hosts.FrameworkIdentity
-IntegrationHost = tc_hosts.IntegrationHost
-ManifestPolicy = tc_hosts.ManifestPolicy
-MaterializationExecutionFacts = tc_hosts.MaterializationExecutionFacts
-MaterializationPolicy = tc_hosts.MaterializationPolicy
-NativeLoadHost = tc_hosts.NativeLoadHost
-ObservabilitySink = tc_hosts.ObservabilitySink
-PlacementAdmissionFacts = tc_hosts.PlacementAdmissionFacts
-PlacementHost = tc_hosts.PlacementHost
-PlacementIdentityFacts = tc_hosts.PlacementIdentityFacts
-PlacementMemberFacts = tc_hosts.PlacementMemberFacts
-RecipeCachePolicy = tc_hosts.RecipeCachePolicy
-RecipeTraceHost = tc_hosts.RecipeTraceHost
-RuntimeConfig = tc_hosts.RuntimeConfig
-RuntimeProfile = tc_hosts.RuntimeProfile
-SourceBoundContractProfile = tc_hosts.SourceBoundContractProfile
-SourceCatalogPolicy = tc_hosts.SourceCatalogPolicy
-SourceCatalogProvider = tc_hosts.SourceCatalogProvider
-SourceCatalogRequest = tc_hosts.SourceCatalogRequest
-SourceDownloadPolicy = tc_hosts.SourceDownloadPolicy
-SourceHost = tc_hosts.SourceHost
-SourceSelector = tc_hosts.SourceSelector
-SourceSubjectCoordinator = tc_hosts.SourceSubjectCoordinator
-TensorCastEvent = tc_hosts.TensorCastEvent
-TensorSurfaceHost = tc_hosts.TensorSurfaceHost
-TorchTensorHost = tc_hosts.TorchTensorHost
-semantic_placement_digest = tc_hosts.semantic_placement_digest
-serving_placement_from_framework_facts = tc_hosts.serving_placement_from_framework_facts
-TensorcastSemanticValidationSpec = tc_compiler.TensorcastSemanticValidationSpec
-TensorcastServingFacts = tc_compiler.TensorcastServingFacts
-TensorSchemaEntry = tc_compiler.TensorSchemaEntry
-read_source_bound_contract_state = tc_runtime_contract.read_source_bound_contract_state
+read_source_bound_contract_state = tc_contract.read_source_bound_contract_state
 resolve_runtime_config_profile = tc_runtime_config.resolve_runtime_config_profile
 
-RUNTIME_ENDPOINT_PROJECTION_SCHEMA_VERSION = (
-    tc_runtime_view.RUNTIME_ENDPOINT_PROJECTION_SCHEMA_VERSION
-)
-WEIGHT_VERSION_PROJECTION_SCHEMA_VERSION = (
-    tc_runtime_view.WEIGHT_VERSION_PROJECTION_SCHEMA_VERSION
-)
-RELOAD_RESPONSE_PROJECTION_SCHEMA_VERSION = (
-    tc_runtime_view.RELOAD_RESPONSE_PROJECTION_SCHEMA_VERSION
-)
-PUBLISHED_REPLICA_PROJECTION_SCHEMA_VERSION = (
-    tc_runtime_view.PUBLISHED_REPLICA_PROJECTION_SCHEMA_VERSION
-)
-SOURCE_SELECTION_PROJECTION_SCHEMA_VERSION = (
-    tc_runtime_view.SOURCE_SELECTION_PROJECTION_SCHEMA_VERSION
-)
-SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION = (
-    tc_policy.SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION
-)
 binding_layout_debug_payload = tc_diagnostics.binding_layout_debug_payload
 binding_layout_profile_fields = tc_diagnostics.binding_layout_profile_fields
 binding_layout_tensor_count = tc_diagnostics.binding_layout_tensor_count
-SERVING_POLICY_SCHEMA_VERSION = tc_policy.SERVING_POLICY_SCHEMA_VERSION
-ServingArtifactLocator = tc_policy.ServingArtifactLocator
-ServingPolicy = tc_policy.ServingPolicy
-normalize_serving_reload_request_payload = (
-    tc_policy.normalize_serving_reload_request_payload
-)
-merge_serving_reload_extra_config = tc_policy.merge_serving_reload_extra_config
-load_source_tensors_for_recipe = tc_materialization.load_source_tensors_for_recipe
-materialize_recipe_copy_plan_tensors = (
-    tc_materialization.materialize_recipe_copy_plan_tensors
-)
-materialize_pure_transform_serving_tensors = (
-    tc_materialization.materialize_pure_transform_serving_tensors
-)
-materialize_binding_finalize_serving_tensors = (
-    tc_materialization.materialize_binding_finalize_serving_tensors
-)
-collect_serving_tensors_from_model = (
-    tc_materialization.collect_serving_tensors_from_model
-)
-run_binding_finalize_semantic_validation = (
-    tc_materialization.run_binding_finalize_semantic_validation
-)
-validate_binding_finalize_tensor_schema = (
-    tc_materialization.validate_binding_finalize_tensor_schema
-)
-complete_pure_transform_recipe_publication_from_recipe = (
-    tc_publication.complete_pure_transform_recipe_publication
-)
 PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION = 1
 PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION = 1
 SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION = 1
 RECIPE_CACHE_POLICY_SCHEMA_VERSION = 1
 SOURCE_CATALOG_REQUEST_SCHEMA_VERSION = 1
 
+__all__ = [
+    "AdmissionDecision",
+    "AdmissionRejectedError",
+    "AdmissionRequest",
+    "ArtifactLocatorResolutionError",
+    "ArtifactRuntimeIntegration",
+    "ArtifactRuntimeIntegrationError",
+    "ArtifactRuntimeNotImplementedError",
+    "ArtifactRuntimeSession",
+    "AttachFinalizeError",
+    "AuthorityValidationError",
+    "BootstrapPolicy",
+    "CapabilityMissingError",
+    "ConfigConflictError",
+    "DefaultAdmissionPolicy",
+    "ExistingRuntimeArtifact",
+    "FinalizeClass",
+    "FrameworkIdentity",
+    "IntegrationHost",
+    "LocalReadyBindingContract",
+    "LocalReadyManifestCarrierResult",
+    "LocalReadyMaterializationIdentity",
+    "LocalSourceBootstrap",
+    "ManifestMismatchError",
+    "MaterializationExecutionFacts",
+    "OwnershipTransferError",
+    "PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION",
+    "PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION",
+    "PlacementAdmissionError",
+    "PlacementAdmissionFacts",
+    "PlacementIdentityFacts",
+    "PlacementMemberFacts",
+    "RECIPE_CACHE_POLICY_SCHEMA_VERSION",
+    "SERVING_MANIFEST_TENSOR_NAME",
+    "SOURCE_CATALOG_REQUEST_SCHEMA_VERSION",
+    "SOURCE_CATALOG_SCHEMA_VERSION",
+    "SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION",
+    "RecipeBuildSessionRequest",
+    "RecipeCachePolicy",
+    "RequestContext",
+    "RestoreBindingError",
+    "RetainedBindingAcquire",
+    "RuntimeAttachment",
+    "RuntimeBindingMaterialization",
+    "RuntimeBindingPlan",
+    "RuntimeBindingResult",
+    "RuntimeBindingState",
+    "RuntimeBindingView",
+    "RuntimeLoadResult",
+    "RuntimePlacement",
+    "RuntimeProfile",
+    "RuntimeReloadResult",
+    "RuntimeStateSeed",
+    "RuntimeSupportLevel",
+    "RuntimeWorkerView",
+    "SchemaMismatchError",
+    "SourceCatalogRequest",
+    "SourceDownloadPolicy",
+    "SourceHost",
+    "SourceProviderError",
+    "SourceSelector",
+    "SourceSubject",
+    "TensorSchemaEntry",
+    "TorchTensorHost",
+    "TensorcastSemanticValidationSpec",
+    "_DirectRuntimeLoad",
+    "_LocalReadyBootstrap",
+    "_LocalReadyFinalize",
+    "_RetainedBindingAcquire",
+    "_RuntimeReload",
+    "bind_runtime_artifact",
+    "build_local_ready_prepared_artifact",
+    "is_runtime_binding_swap_capable",
+    "local_ready_current_value_summary_fields",
+    "restore_prepared_local_ready_binding",
+    "restore_retained_binding",
+    "runtime_binding_state_from_runtime_view",
+    "runtime_placement_from_framework_facts",
+    "source_selection_projection_from_artifact_realization_report",
+    "source_selection_projection_from_execution_diagnostics",
+    "source_selection_projection_from_materialization_diagnostics",
+    "source_subject_broadcast_payload",
+    "source_subject_from_broadcast_payload",
+    "swap_runtime_artifact",
+]
+
 
 BootstrapPolicy = tc_runtime_intent.BootstrapPolicy
-ServingIntent = tc_runtime_intent.ServingIntent
-ExistingServingArtifact = tc_runtime_intent.ExistingServingArtifact
+RuntimeIntent = tc_runtime_intent.RuntimeIntent
+ExistingRuntimeArtifact = tc_runtime_intent.ExistingRuntimeArtifact
 LocalSourceBootstrap = tc_runtime_intent.LocalSourceBootstrap
 RetainedBindingAcquire = tc_runtime_intent.RetainedBindingAcquire
 RequestContext = tc_runtime_intent.RequestContext
@@ -313,16 +321,19 @@ def attach_and_finalize(
         run_post_bind_finalize: bool = True,
         expected_tensor_schema_hash: str | None = None,
         semantic_validation_spec: Any | None = None,
+        model_runtime_spec: ArtifactRealizationSpec | None = None,
     ) -> RuntimeBindingState:
         owner: Any = binding_handle
         transferred = False
         try:
+            attach_start = time.perf_counter()
             self._emit("runtime_materialization.attach.start", state_seed)
             self._attach_bound_tensors(
                 model,
                 tensors,
                 replace_meta_params=replace_meta_params,
             )
+            attach_done = time.perf_counter()
             canonical = self._collect_runtime_tensors(
                 model,
                 remove_duplicate=False,
@@ -372,16 +383,35 @@ def attach_and_finalize(
             if callable(transfer_to_runtime):
                 owner = transfer_to_runtime()
                 transferred = True
+            finalize_done = time.perf_counter()
             view = state_seed.runtime_view()
+            realization_report = state_seed.realization_report
+            if realization_report is not None:
+                realization_report = replace(
+                    realization_report,
+                    runtime_attach_sec=max(0.0, attach_done - attach_start),
+                    runtime_finalize_sec=max(0.0, finalize_done - attach_done),
+                    total_sec=max(0.0, finalize_done - attach_start),
+                )
             realization_handle = _runtime_attachment_realization_handle(
-                report=state_seed.realization_report,
+                report=realization_report,
                 binding_handle=binding_handle,
                 owner=owner,
             )
             model_runtime_ref: dict[str, RuntimeBindingState] = {}
-            model_runtime_handle = _model_runtime_realization_handle(
-                context=context,
-                target_device=target_device,
+            model_runtime_handle = _model_runtime_realization_handle_for_spec(
+                spec=(
+                    _model_runtime_spec_with_context_defaults(
+                        spec=model_runtime_spec,
+                        context=context,
+                        target_device=target_device,
+                    )
+                    if model_runtime_spec is not None
+                    else _model_runtime_spec_for_context(
+                        context=context,
+                        target_device=target_device,
+                    )
+                ),
                 runtime_attachment_handle=realization_handle,
                 attach_fn=lambda **_kwargs: model_runtime_ref["state"],
             )
@@ -407,6 +437,9 @@ def attach_and_finalize(
             return state
         except OwnershipTransferError:
             raise
+        except ModelRuntimeRequestFactsError:
+            self._close_quietly(owner)
+            raise
         except SchemaMismatchError:
             self._close_quietly(owner)
             raise
@@ -449,13 +482,15 @@ def _run_semantic_validation(
         model_config: object | None,
     ) -> Any:
         if getattr(spec, "kind", None) == "none":
-            return evaluate_semantic_validation_spec(spec, None)
+            return tc_semantic_validation.evaluate_semantic_validation_spec(spec, None)
         hook_host = self.host.framework
         semantic_probes = getattr(hook_host, "semantic_probes", None)
         actual_payload = (
             semantic_probes(model, model_config) if callable(semantic_probes) else None
         )
-        return evaluate_semantic_validation_spec(spec, actual_payload)
+        return tc_semantic_validation.evaluate_semantic_validation_spec(
+            spec, actual_payload
+        )
 
     def _surface(self) -> TensorSurfaceHost:
         if self.host.tensor_surface is None:
@@ -558,7 +593,7 @@ class _HostMaterializationRequest:
 
 
 @dataclass(frozen=True)
-class _DirectServingLoad:
+class _DirectRuntimeLoad:
     artifact_locator: Any | None = None
     policy: Any | None = None
     materialization: Any | None = None
@@ -566,7 +601,7 @@ class _DirectServingLoad:
     source_bound_contract_state: Any | None = None
     source_bound_contract_path: str | None = None
     execution_facts: Mapping[str, Any] | None = None
-    operation_scope: str = "startup.direct_serving_artifact.bind"
+    operation_scope: str = "startup.direct_runtime_artifact.bind"
     require_materialization_options: bool = False
     framework_config: Any | None = None
     model_config: Any | None = None
@@ -574,21 +609,23 @@ class _DirectServingLoad:
     expected_member: Any | None = None
     timeout_s: float | None = 30.0
     artifact_ref: str | None = None
-    resolved_artifact: ResolvedServingArtifact | None = None
+    source_selection: ResolvedArtifactSelection | None = None
+    resolved_artifact: ResolvedRuntimeArtifact | None = None
     model: Any | None = None
+    model_runtime_spec: ArtifactRealizationSpec | None = None
 
 
 @dataclass(frozen=True)
-class ServingLoadResult:
+class RuntimeLoadResult:
     model: Any | None = None
     runtime_state: RuntimeBindingState | None = None
     runtime_view: RuntimeBindingView | None = None
-    resolved_artifact: ResolvedServingArtifact | None = None
+    resolved_artifact: ResolvedRuntimeArtifact | None = None
     binding_result: RuntimeBindingResult | None = None
 
 
 @dataclass(frozen=True)
-class _ServingReload:
+class _RuntimeReload:
     current_state: RuntimeBindingState | Any
     artifact_locator: Any | None = None
     policy: Any | None = None
@@ -604,27 +641,27 @@ class _ServingReload:
     model_config: Any | None = None
     target_device: Any | None = None
     artifact_ref: str | None = None
-    resolved_artifact: ResolvedServingArtifact | None = None
+    resolved_artifact: ResolvedRuntimeArtifact | None = None
     model: Any | None = None
 
 
 @dataclass(frozen=True)
-class ServingReloadResult:
+class RuntimeReloadResult:
     runtime_state: RuntimeBindingState | None = None
     runtime_view: RuntimeBindingView | None = None
-    resolved_artifact: ResolvedServingArtifact | None = None
+    resolved_artifact: ResolvedRuntimeArtifact | None = None
     binding_result: RuntimeBindingResult | None = None
 
 
 @dataclass(frozen=True)
-class _ServingArtifactPreflight:
-    resolved_artifact: ResolvedServingArtifact
-    serving_runtime_policy: Any | None
+class _RuntimeArtifactPreflight:
+    resolved_artifact: ResolvedRuntimeArtifact
+    runtime_artifact_policy: Any | None
 
 
 @dataclass(frozen=True)
 class _RetainedBindingAcquire:
-    authority: Any | None = None
+    authority: ParsedRetainedRealizationAuthority | None = None
     framework_config: Any | None = None
     model_config: Any | None = None
     target_device: Any | None = None
@@ -633,6 +670,7 @@ class _RetainedBindingAcquire:
     client: Any | None = None
     restore_fn: Any | None = None
     timeout_s: float | None = 30.0
+    model_runtime_spec: ArtifactRealizationSpec | None = None
 
 
 @dataclass(frozen=True)
@@ -648,7 +686,7 @@ class _LocalReadyBootstrap:
     """Internal lowering payload for ``LocalSourceBootstrap``.
 
     This is deliberately private: framework integrations enter through
-    ``ServingIntegration.start(LocalSourceBootstrap, context)`` and host facts.
+    ``ArtifactRuntimeIntegration.start(LocalSourceBootstrap, context)`` and host facts.
     """
 
     source_selector: SourceSelector | Any | None = None
@@ -673,6 +711,7 @@ class _LocalReadyBootstrap:
     source_subject: Any | None = None
     placement: Any | None = None
     source_artifact_ref: str | None = None
+    source_selection: ResolvedArtifactSelection | None = None
     serving_manifest_ref: str | None = None
     representation_contract_hash: str | None = None
     serving_build_digest: str | None = None
@@ -700,6 +739,7 @@ class _LocalReadyBootstrap:
     framework_version: str | None = None
     adapter_version: str | None = None
     serving_abi_version: str | None = None
+    model_runtime_spec: ArtifactRealizationSpec | None = None
 
 
 @dataclass(frozen=True)
@@ -718,6 +758,7 @@ class _LocalReadyFinalize:
     source_bound_contract_state: Any
     source_bound_contract_path: str
     target_device: Any
+    source_selection: ResolvedArtifactSelection | None = None
     manifest_bytes: bytes | None = None
     framework_config: Any | None = None
     model_config: Any | None = None
@@ -737,15 +778,16 @@ class _LocalReadyFinalize:
     framework_version: str | None = None
     adapter_version: str | None = None
     serving_abi_version: str | None = None
+    model_runtime_spec: ArtifactRealizationSpec | None = None
 
 
 @dataclass(frozen=True)
-class LocalReadyServingResult:
+class LocalReadyRuntimeResult:
     model: Any | None = None
     runtime_state: RuntimeBindingState | None = None
     runtime_view: RuntimeBindingView | None = None
-    prepared: PreparedServingArtifact | None = None
-    binding_value: ServingBindingValue | None = None
+    prepared: PreparedRuntimeArtifact | None = None
+    binding_value: RuntimeBindingValue | None = None
     recipe: Any | None = None
     current_value: Any | None = None
     binding: Any | None = None
@@ -756,26 +798,6 @@ class LocalReadyServingResult:
     realization_report: ArtifactRealizationReport | None = None
 
 
-@dataclass(frozen=True)
-class RecipeBuildSessionRequest:
-    source_subject: SourceSubject | Any | None = None
-    framework_config: Any | None = None
-    model_config: Any | None = None
-    placement: ServingPlacement | None = None
-    cache_config: Any | None = None
-    identity: ServingBindingPlan | None = None
-    trace_cache_schema_version: int | None = None
-    tp_rank: int | None = None
-    tp_world_size: int | None = None
-
-
-@dataclass(frozen=True)
-class RecipeBuildResult:
-    session: RecipeBuildSession
-    recipe: Any | None = None
-    diagnostics: Mapping[str, Any] | None = None
-
-
 @dataclass(frozen=True)
 class LocalReadyBindingContract:
     excluded_names: tuple[str, ...]
@@ -836,7 +858,7 @@ def _canonical_index_bytes_from_tensors(
 
 def _canonical_index_bytes_for_runtime_selection(
     *,
-    resolved: ResolvedServingArtifact | Any | None,
+    resolved: ResolvedRuntimeArtifact | Any | None,
     tensors: Mapping[str, torch.Tensor],
 ) -> bytes:
     descriptor = getattr(resolved, "descriptor", None)
@@ -863,11 +885,12 @@ def _target_layout_digest_for_runtime_attachment(
 
 def _runtime_attachment_report_for_resolved(
     *,
-    resolved: ResolvedServingArtifact | Any,
+    resolved: ResolvedRuntimeArtifact | Any,
     tensors: Mapping[str, torch.Tensor],
     binding_handle: Any | None,
     target_device: Any,
     tensor_schema_hash: str,
+    source_selection: ResolvedArtifactSelection | None = None,
     execution_diagnostics: Any | None = None,
     materialization_diagnostics: Any | None = None,
 ) -> ArtifactRealizationReport:
@@ -885,14 +908,14 @@ def _runtime_attachment_report_for_resolved(
     )
     envelope = envelope_for_runtime_attachment(tensors, retained=False)
     envelope.validate_for_target(target_plan)
-    selection = resolve_artifact_selection(
+    selection = source_selection or resolve_artifact_selection(
         artifact_id=str(getattr(resolved, "artifact_ref", "") or ""),
         canonical_index_bytes=_canonical_index_bytes_for_runtime_selection(
             resolved=resolved,
             tensors=tensors,
         ),
         tensor_names=tuple(str(name) for name in tensors),
-        artifact_profile="serving_artifact",
+        artifact_profile="runtime_artifact",
         authority_scope="daemon_mediated_runtime_attachment",
     )
     return report_for_runtime_attachment(
@@ -908,12 +931,13 @@ def _runtime_attachment_report_for_resolved(
 
 def _runtime_attachment_report_for_retained(
     *,
-    authority: tc_retained_binding.ParsedRetainedServingBindingAuthority,
+    authority: ParsedRetainedRealizationAuthority,
     tensors: Mapping[str, torch.Tensor],
     binding_handle: Any | None,
     target_device: Any,
     tensor_schema_hash: str,
     reservation_bytes: int,
+    source_selection: ResolvedArtifactSelection | None = None,
 ) -> ArtifactRealizationReport:
     binding_layout_id = _optional_text(
         getattr(binding_handle, "binding_layout_id", None)
@@ -939,7 +963,7 @@ def _runtime_attachment_report_for_retained(
         or authority.local_serving_ref
         or authority.binding_value_ref.binding_value_id
     )
-    selection = resolve_artifact_selection(
+    selection = source_selection or resolve_artifact_selection(
         artifact_id=str(artifact_id),
         canonical_index_bytes=_canonical_index_bytes_from_tensors(tensors),
         tensor_names=tuple(str(name) for name in tensors),
@@ -965,6 +989,7 @@ def _runtime_attachment_report_for_artifact_id(
     tensor_schema_hash: str,
     artifact_profile: str,
     authority_scope: str,
+    source_selection: ResolvedArtifactSelection | None = None,
     retained: bool = False,
     reservation_bytes: int = 0,
 ) -> ArtifactRealizationReport:
@@ -986,7 +1011,7 @@ def _runtime_attachment_report_for_artifact_id(
         reservation_bytes=reservation_bytes,
     )
     envelope.validate_for_target(target_plan)
-    selection = resolve_artifact_selection(
+    selection = source_selection or resolve_artifact_selection(
         artifact_id=str(artifact_id),
         canonical_index_bytes=_canonical_index_bytes_from_tensors(tensors),
         tensor_names=tuple(str(name) for name in tensors),
@@ -1057,6 +1082,21 @@ def _model_runtime_spec_for_context(
     )
 
 
+def _model_runtime_spec_with_context_defaults(
+    *,
+    spec: ArtifactRealizationSpec,
+    context: FrameworkIntegrationContext,
+    target_device: Any,
+) -> ArtifactRealizationSpec:
+    facts = resolve_model_runtime_request_facts(
+        spec=spec,
+        runtime_context=RequestContext(target_device=target_device),
+        host_context=context,
+        host_target_device=target_device,
+    )
+    return cast(ArtifactRealizationSpec, facts.spec)
+
+
 def _model_runtime_realization_handle(
     *,
     context: FrameworkIntegrationContext,
@@ -1098,9 +1138,25 @@ def _model_runtime_realization_handle_for_spec(
     return handle
 
 
+def _project_model_runtime_attachment(
+    state: RuntimeBindingState,
+    attachment: RuntimeAttachment,
+) -> RuntimeAttachment:
+    handle = state.model_runtime_handle
+    if not isinstance(handle, ArtifactRealizationHandle):
+        return attachment
+    state.model_runtime_handle = ArtifactRealizationHandle(
+        target_kind="model_runtime",
+        report=handle.report,
+        attachment_value=attachment,
+        release_contract=handle.release_contract,
+    )
+    return attachment
+
+
 @dataclass(frozen=True)
 class RuntimeBindingResult:
-    """Attach-ready result from a serving bind or swap operation."""
+    """Attach-ready result from a runtime bind or swap operation."""
 
     binding: Any
     tensors: Mapping[str, torch.Tensor]
@@ -1130,125 +1186,6 @@ def from_binding(
         )
 
 
-@dataclass
-class RestoredRetainedBinding:
-    """Restored retained binding tensors before runtime ownership transfer."""
-
-    _attached: tc_retained_binding.AttachedRetainedBinding
-    _runtime_handle: (
-        tc_retained_binding.RuntimeRetainedBindingAttachmentHandle | None
-    ) = None
-
-    @property
-    def tensors(self) -> Mapping[str, torch.Tensor]:
-        return self._attached.tensors
-
-    @property
-    def binding_layout_id(self) -> str:
-        return self._attached.binding_layout_id
-
-    @property
-    def binding_value_ref(self) -> tc.BindingValueRef:
-        return self._attached.binding_value_ref
-
-    @property
-    def member_ref(self) -> tc.ServingBindingMemberRef:
-        return self._attached.member_ref
-
-    @property
-    def reservation_bytes(self) -> int:
-        return self._attached.reservation_bytes
-
-    @property
-    def authority(self) -> tc_retained_binding.ParsedRetainedServingBindingAuthority:
-        return self._attached.authority
-
-    @property
-    def runtime_handle(
-        self,
-    ) -> tc_retained_binding.RuntimeRetainedBindingAttachmentHandle | None:
-        return self._runtime_handle
-
-    def transfer_to_runtime(
-        self,
-    ) -> tc_retained_binding.RuntimeRetainedBindingAttachmentHandle:
-        if self._runtime_handle is None:
-            self._runtime_handle = self._attached.transfer_to_runtime()
-        return self._runtime_handle
-
-    def close(self) -> None:
-        if self._runtime_handle is None:
-            self._attached.close()
-
-
-@dataclass(frozen=True)
-class SourceSubject:
-    """Opaque framework-facing source subject wrapper."""
-
-    artifact_ref: str
-    subject: Any
-    source_kind: str = "opaque"
-    metadata_fingerprint: str | None = None
-
-    def broadcast_payload(self) -> dict[str, Any]:
-        if self.source_kind == "public_disk":
-            subject_payload = _public_disk_source_payload(self.subject)
-        else:
-            subject_payload = self.subject
-        return {
-            "kind": self.source_kind,
-            "artifact_ref": self.artifact_ref,
-            "subject": subject_payload,
-            "metadata_fingerprint": self.metadata_fingerprint,
-        }
-
-    def profile_fields(self) -> dict[str, Any]:
-        source = self.subject
-        fields: dict[str, Any] = {
-            "artifact_ref": self.artifact_ref,
-            "source_kind": self.source_kind,
-        }
-        if self.metadata_fingerprint is not None:
-            fields["metadata_fingerprint"] = self.metadata_fingerprint
-        canonical_index = getattr(source, "canonical_index_bytes", None)
-        if canonical_index is not None:
-            fields["canonical_index_bytes"] = len(canonical_index)
-        source_index = getattr(source, "source_index_bytes", None)
-        if source_index is not None:
-            fields["source_index_bytes"] = len(bytes(source_index or b""))
-        for name in ("format_kind", "metadata_capability"):
-            value = getattr(source, name, None)
-            if value is not None:
-                fields[name] = str(value or "")
-        return fields
-
-
-def _public_disk_source_payload(source: Any) -> dict[str, Any]:
-    return {
-        "path": str(getattr(source, "path", "") or ""),
-        "canonical_index_bytes": bytes(source.canonical_index_bytes),
-        "artifact_id": str(getattr(source, "artifact_id", "") or ""),
-        "generation": int(getattr(source, "generation", 0) or 0),
-        "verify_checksums": bool(getattr(source, "verify_checksums", True)),
-        "trusted_content_artifact_id": _optional_str(
-            getattr(source, "trusted_content_artifact_id", None)
-        ),
-        "source_index_bytes": _optional_bytes(
-            getattr(source, "source_index_bytes", None)
-        ),
-        "format_kind": _enum_wire_value(getattr(source, "format_kind", None)),
-        "metadata_capability": _enum_wire_value(
-            getattr(source, "metadata_capability", None)
-        ),
-        "resolution_strategy": _enum_wire_value(
-            getattr(source, "resolution_strategy", None)
-        ),
-        "validation_mode": _enum_wire_value(getattr(source, "validation_mode", None)),
-        "policy_id": _optional_str(getattr(source, "policy_id", None)),
-        "exact_size_bytes": int(getattr(source, "exact_size_bytes", 0) or 0),
-    }
-
-
 def _optional_str(value: Any) -> str | None:
     if value is None:
         return None
@@ -1308,110 +1245,6 @@ def _artifact_locator_kind(artifact_locator: object) -> str:
     return str(getattr(artifact_locator, "kind", "") or "")
 
 
-def _optional_bool(fields: Mapping[str, object], name: str, default: bool) -> bool:
-    value = fields.get(name)
-    if value is None:
-        return default
-    return bool(value)
-
-
-def _optional_path(value: object | None) -> Path | None:
-    if value is None:
-        return None
-    text = str(value).strip()
-    if not text:
-        return None
-    return Path(text).expanduser()
-
-
-def _unique_paths(paths: Sequence[Path]) -> tuple[Path, ...]:
-    unique: list[Path] = []
-    seen: set[str] = set()
-    for path in paths:
-        key = str(path)
-        if key in seen:
-            continue
-        seen.add(key)
-        unique.append(path)
-    return tuple(unique)
-
-
-def _model_adjacent_cache_root(source_catalog: object) -> Path | None:
-    raw_selected_files = getattr(source_catalog, "selected_files", ()) or ()
-    selected_files = tuple(cast(Sequence[Any], raw_selected_files))
-    if not selected_files:
-        return None
-    parent_paths: list[str] = []
-    for entry in selected_files:
-        path = getattr(entry, "path", None)
-        if path is None:
-            continue
-        parent_paths.append(str(Path(path).expanduser().resolve().parent))
-    if not parent_paths:
-        return None
-    return Path(os.path.commonpath(parent_paths)) / ".tensorcast" / "bootstrap_cache"
-
-
-def _is_writable_or_creatable(path: Path) -> bool:
-    if path.exists():
-        return os.access(path, os.W_OK)
-    parent = path.parent
-    while not parent.exists() and parent != parent.parent:
-        parent = parent.parent
-    return parent.exists() and os.access(parent, os.W_OK)
-
-
-def _recipe_build_cache_config_from_policy(
-    policy: RecipeCachePolicy,
-    *,
-    source_catalog: object,
-) -> RecipeBuildCacheConfig:
-    fields = dict(policy.fields or {})
-    explicit_cache_root = _optional_bool(fields, "explicit_cache_root", False)
-    prefer_model_adjacent = _optional_bool(fields, "prefer_model_adjacent", True)
-    cache_root = _optional_path(fields.get("cache_root"))
-
-    roots: list[Path] = []
-    if prefer_model_adjacent:
-        model_adjacent = _model_adjacent_cache_root(source_catalog)
-        if model_adjacent is not None:
-            roots.append(model_adjacent)
-    if cache_root is not None and (explicit_cache_root or not roots):
-        roots.append(cache_root)
-    roots = list(_unique_paths(roots))
-
-    write_roots: list[Path] = []
-    if prefer_model_adjacent:
-        model_adjacent = _model_adjacent_cache_root(source_catalog)
-        if model_adjacent is not None and _is_writable_or_creatable(model_adjacent):
-            write_roots.append(model_adjacent)
-    if cache_root is not None and (explicit_cache_root or not write_roots):
-        write_roots.append(cache_root)
-    write_roots = list(_unique_paths(write_roots))
-
-    debug_output_dir = _optional_path(fields.get("debug_output_dir"))
-    return RecipeBuildCacheConfig(
-        cache_dirs=tuple(str(root / "trace_plans") for root in roots),
-        trace_write_dirs=tuple(str(root / "trace_plans") for root in write_roots),
-        recipe_cache_dirs=tuple(str(root / "compiled_recipes") for root in roots),
-        recipe_cache_write_dirs=tuple(
-            str(root / "compiled_recipes") for root in write_roots
-        ),
-        debug_output_dir=debug_output_dir,
-        allow_cache=_optional_bool(fields, "allow_cache", True),
-        allow_recipe_cache=_optional_bool(fields, "allow_recipe_cache", True),
-        allow_trace=_optional_bool(fields, "allow_trace", True),
-        trace_tp_slices=_optional_bool(fields, "trace_tp_slices", True),
-        debug_dump_trace=_optional_bool(fields, "debug_dump_trace", False),
-        synchronous_cache_write=_optional_bool(
-            fields, "synchronous_cache_write", False
-        ),
-        synchronous_recipe_cache_write=_optional_bool(
-            fields, "synchronous_recipe_cache_write", False
-        ),
-    )
-
-
 def _collective_policy_value(policy: MaterializationPolicy) -> str:
     collective = str(policy.fields.get("collective", "auto") or "auto")
     return {
@@ -1444,128 +1277,10 @@ def _framework_payload_mapping(payload: object | None) -> dict[str, object] | No
     return {str(key): value for key, value in payload.items()}
 
 
-def _optional_bytes(value: Any) -> bytes | None:
-    if value is None:
-        return None
-    data = bytes(value)
-    return data or None
-
-
-def _enum_wire_value(value: Any) -> str | int | None:
-    if value is None:
-        return None
-    enum_value = getattr(value, "value", value)
-    if isinstance(enum_value, (str, int)):
-        return enum_value
-    return str(enum_value)
-
-
-def _source_subject_from_handle(source: Any) -> SourceSubject:
-    artifact_ref = str(getattr(source, "artifact_id", "") or "")
-    if not artifact_ref:
-        raise RuntimeError("TensorCast source subject is missing a source artifact_id")
-    return SourceSubject(
-        artifact_ref=artifact_ref,
-        subject=source,
-        source_kind="public_disk",
-    )
-
-
-def resolve_source_subject(
-    path: str,
-    *,
-    verify_checksums: bool,
-) -> SourceSubject:
-    return _source_subject_from_handle(
-        tc.resolve_public_disk_source(
-            path,
-            verify_checksums=verify_checksums,
-        )
-    )
-
-
-def source_subject_from_broadcast_payload(payload: Mapping[str, Any]) -> SourceSubject:
-    payload_dict = dict(payload)
-    if "kind" not in payload_dict:
-        raise SourceSubjectError(
-            "TensorCast source subject broadcast payload is missing kind"
-        )
-    kind = str(payload_dict.get("kind") or "")
-    artifact_ref = str(payload_dict.get("artifact_ref") or "")
-    if not artifact_ref:
-        raise SourceSubjectError(
-            "TensorCast source subject broadcast payload is missing artifact_ref"
-        )
-    source: Any
-    if kind == "public_disk":
-        subject_payload = payload_dict.get("subject")
-        if not isinstance(subject_payload, Mapping):
-            raise SourceSubjectError(
-                "TensorCast public_disk source subject payload must be a mapping"
-            )
-        source = tc.PublicDiskSourceHandle(**dict(subject_payload))
-    else:
-        source = payload_dict.get("subject")
-    return SourceSubject(
-        artifact_ref=artifact_ref,
-        subject=source,
-        source_kind=kind,
-        metadata_fingerprint=_optional_text(payload_dict.get("metadata_fingerprint")),
-    )
-
-
-def source_subject_broadcast_payload(subject: SourceSubject) -> dict[str, Any]:
-    return subject.broadcast_payload()
-
-
-def is_public_disk_source_subject(subject: Any) -> bool:
-    return isinstance(subject, tc.PublicDiskSourceHandle)
-
-
 def source_subject_slice_count(recipe: Any, subject: Any) -> int:
     if is_public_disk_source_subject(subject):
         return 0
-    return tensorcast_view_slice_count(recipe)
-
-
-def serving_binding_state_from_runtime_view(
-    *,
-    runtime_view: RuntimeBindingView,
-    artifact_locator: Any,
-    policy: Any,
-    readiness: str | None = None,
-) -> ServingBindingState:
-    binding_value_ref = runtime_view.binding_value_ref
-    to_ref = getattr(binding_value_ref, "to_binding_value_ref", None)
-    if callable(to_ref):
-        binding_value_ref = to_ref()
-    if binding_value_ref is not None and not isinstance(
-        binding_value_ref,
-        BindingValueRef,
-    ):
-        if isinstance(binding_value_ref, Mapping):
-            binding_value_ref = BindingValueRef.model_validate(dict(binding_value_ref))
-        else:
-            raise ServingIntegrationError(
-                "RuntimeBindingView.binding_value_ref must be BindingValueRef or a mapping"
-            )
-    typed_binding_value_ref = cast(BindingValueRef | None, binding_value_ref)
-    resolved_readiness = readiness or runtime_view.readiness or "loaded"
-    state = "loaded" if resolved_readiness == "serving" else resolved_readiness
-    return ServingBindingState(
-        state=state,
-        artifact_locator=artifact_locator,
-        serving_artifact_ref=runtime_view.serving_artifact_ref,
-        manifest_ref=getattr(policy, "manifest_ref", None),
-        representation_contract_hash=(
-            runtime_view.representation_contract_hash
-            or getattr(policy, "representation_contract_hash", "")
-        ),
-        serving_build_digest=getattr(policy, "serving_build_digest", None),
-        binding_value_ref=typed_binding_value_ref,
-        local_serving_ref=runtime_view.local_serving_ref,
-        readiness=resolved_readiness,
-    )
+    return tc_local_ready.tensorcast_view_slice_count(recipe)
 
 
 def runtime_binding_state_from_runtime_view(
@@ -1718,12 +1433,14 @@ def local_ready_current_value_summary_fields(
 ) -> dict[str, Any]:
     local_serving_ref = getattr(current_value, "local_serving_ref", None)
     if require_local_serving_ref and not local_serving_ref:
-        raise ServingIntegrationError(
+        raise ArtifactRuntimeIntegrationError(
             "TensorCast local-ready current value did not include local_serving_ref"
         )
     return {
         "binding_value_id": getattr(current_value, "binding_value_id", None),
-        "verification_state": binding_value_verification_state_name(current_value),
+        "verification_state": tc_local_ready.binding_value_verification_state_name(
+            current_value
+        ),
         "local_serving_ref": local_serving_ref,
     }
 
@@ -1761,7 +1478,7 @@ def build_local_ready_prepared_artifact(
     source_bound_contract_path: str,
     artifact_realization_report: ArtifactRealizationReport | None = None,
     model_runtime_spec: ArtifactRealizationSpec | None = None,
-) -> LocalReadyServingResult:
+) -> LocalReadyRuntimeResult:
     current_value_fields = local_ready_current_value_summary_fields(
         current_value,
         require_local_serving_ref=True,
@@ -1787,7 +1504,7 @@ def build_local_ready_prepared_artifact(
         ),
         prefix="realize",
     )
-    realization_report = tc_diagnostics.ServingRealizationReport(
+    realization_report = tc_diagnostics.RuntimeRealizationReport(
         source_artifact_ref=source_artifact_ref,
         serving_manifest_ref=serving_manifest_ref,
         representation_contract_hash=representation_contract_hash,
@@ -1828,7 +1545,7 @@ def build_local_ready_prepared_artifact(
         tensor_schema_hash=tensor_schema_hash,
         binding_value_ref=binding_value_ref,
         local_serving_ref=local_serving_ref,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         diagnostics=diagnostics,
     )
     runtime_state = runtime_binding_state_from_runtime_view(
@@ -1838,14 +1555,14 @@ def build_local_ready_prepared_artifact(
         artifact_realization_report=artifact_realization_report,
         model_runtime_spec=model_runtime_spec,
     )
-    prepared = PreparedServingArtifact(
+    prepared = PreparedRuntimeArtifact(
         source_artifact_ref=source_artifact_ref,
         serving_artifact_ref=None,
         serving_manifest_ref=serving_manifest_ref,
         representation_contract_hash=representation_contract_hash,
         serving_build_digest=serving_build_digest,
         binding_value_ref=binding_value_ref,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         family=family,
         tensor_schema_hash=tensor_schema_hash,
         binding_layout_id=binding_layout_id,
@@ -1855,7 +1572,7 @@ def build_local_ready_prepared_artifact(
         tp_rank=int(tp_rank),
         tp_world_size=int(tp_world_size),
     )
-    return LocalReadyServingResult(
+    return LocalReadyRuntimeResult(
         runtime_state=runtime_state,
         runtime_view=runtime_view,
         prepared=prepared,
@@ -1893,22 +1610,22 @@ def build_collective_group_id(
 
 
 @dataclass(frozen=True)
-class ServingIntegration:
-    """Small service object for framework-facing serving lifecycle calls."""
+class ArtifactRuntimeIntegration:
+    """Small service object for framework-facing runtime lifecycle calls."""
 
-    resolver: ServingArtifactResolver | None = None
+    resolver: RuntimeArtifactResolver | None = None
     profile_sink: Any | None = None
     host: IntegrationHost | None = None
 
     @staticmethod
-    def _lifecycle_not_implemented(method: str, phase: str) -> None:
-        raise ServingIntegrationNotImplementedError(
-            f"ServingIntegration.{method} request DTO is available, but the "
+    def _lifecycle_not_implemented(method: str, phase: str) -> NoReturn:
+        raise ArtifactRuntimeNotImplementedError(
+            f"ArtifactRuntimeIntegration.{method} request DTO is available, but the "
             f"deep core-owned lifecycle is scheduled for {phase}"
         )
 
-    def resolve(self, artifact_ref: str, **kwargs: Any) -> ResolvedServingArtifact:
-        return resolve_serving_artifact(
+    def resolve(self, artifact_ref: str, **kwargs: Any) -> ResolvedRuntimeArtifact:
+        return resolve_runtime_artifact(
             artifact_ref,
             resolver=self.resolver,
             **kwargs,
@@ -1919,10 +1636,12 @@ def read_manifest(
         artifact: Any,
         *,
         artifact_ref: str,
-    ) -> ResolvedServingArtifact:
+    ) -> ResolvedRuntimeArtifact:
         if self.resolver is None:
-            raise ValueError("ServingIntegration.read_manifest requires resolver")
-        return read_serving_artifact_manifest(
+            raise ValueError(
+                "ArtifactRuntimeIntegration.read_manifest requires resolver"
+            )
+        return read_runtime_artifact_manifest(
             artifact,
             artifact_ref=artifact_ref,
             resolver=self.resolver,
@@ -1930,12 +1649,12 @@ def read_manifest(
 
     def cross_check(
         self,
-        resolved_artifact: ResolvedServingArtifact,
+        resolved_artifact: ResolvedRuntimeArtifact,
         **kwargs: Any,
-    ) -> ResolvedServingArtifact:
+    ) -> ResolvedRuntimeArtifact:
         if self.resolver is None:
-            raise ValueError("ServingIntegration.cross_check requires resolver")
-        return cross_check_serving_artifact(
+            raise ValueError("ArtifactRuntimeIntegration.cross_check requires resolver")
+        return cross_check_runtime_artifact(
             resolved_artifact,
             resolver=self.resolver,
             **kwargs,
@@ -1943,20 +1662,20 @@ def cross_check(
 
     def start(
         self,
-        intent: ServingIntent,
+        intent: RuntimeIntent,
         context: RequestContext,
     ) -> RuntimeAttachment:
-        """Start serving from a public intent DTO."""
+        """Start runtime materialization from a public intent DTO."""
 
         decision = self._admit_intent(intent, context)
-        if isinstance(intent, ExistingServingArtifact):
+        if isinstance(intent, ExistingRuntimeArtifact):
             self._reject_source_selector_for_existing_artifact(intent.artifact_locator)
             materialization_request = self._host_materialization_request(
                 context,
-                operation_scope="startup.direct_serving_artifact.bind",
+                operation_scope="startup.direct_runtime_artifact.bind",
             )
-            load_result = self._load_existing_serving_artifact(
-                _DirectServingLoad(
+            load_result = self._load_existing_runtime_artifact(
+                _DirectRuntimeLoad(
                     artifact_locator=intent.artifact_locator,
                     policy=intent.policy,
                     framework_config=context.framework_config,
@@ -1980,9 +1699,9 @@ def start(
                 )
             )
             if load_result.model is None or load_result.runtime_state is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start returned no model/state for "
-                    "ExistingServingArtifact"
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start returned no model/state for "
+                    "ExistingRuntimeArtifact"
                 )
             return self._attachment_from_load_result(load_result, decision)
         if isinstance(intent, RetainedBindingAcquire):
@@ -1999,7 +1718,7 @@ def start(
                     and placement.member != authority.member
                 ):
                     raise AuthorityValidationError(
-                        "ParsedRetainedServingBindingAuthority.member does not match "
+                        "ParsedRetainedRealizationAuthority.member does not match "
                         "runtime placement",
                         details={
                             "authority_member": repr(authority.member),
@@ -2019,8 +1738,8 @@ def start(
                 )
             )
             if retained_result.model is None or retained_result.runtime_state is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start returned no model/state for "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start returned no model/state for "
                     "RetainedBindingAcquire"
                 )
             return self._attachment_from_retained_result(retained_result, decision)
@@ -2036,8 +1755,8 @@ def start(
                 local_ready_result.model is None
                 or local_ready_result.runtime_state is None
             ):
-                raise ServingIntegrationError(
-                    "ServingIntegration.start returned no model/state for "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start returned no model/state for "
                     "LocalSourceBootstrap"
                 )
             self._run_local_ready_barrier(context)
@@ -2045,25 +1764,241 @@ def start(
                 local_ready_result,
                 decision,
             )
-        raise ServingIntegrationError(
-            f"Unsupported TensorCast serving intent: {type(intent).__name__}"
+        raise ArtifactRuntimeIntegrationError(
+            f"Unsupported TensorCast runtime intent: {type(intent).__name__}"
+        )
+
+    def _retained_expected_member(
+        self,
+        authority: ParsedRetainedRealizationAuthority,
+        context: RequestContext,
+    ) -> Any:
+        expected_member = authority.member
+        if self.host is None:
+            return expected_member
+        placement = self._framework_context(
+            context.framework_config,
+            context.model_config,
+        ).placement
+        if (
+            placement is not None
+            and placement.member is not None
+            and placement.member != authority.member
+        ):
+            raise AuthorityValidationError(
+                "ParsedRetainedRealizationAuthority.member does not match "
+                "runtime placement",
+                details={
+                    "authority_member": repr(authority.member),
+                    "placement_member": repr(placement.member),
+                },
+            )
+        if placement is not None and placement.member is not None:
+            return placement.member
+        return expected_member
+
+    def realize_model_runtime(
+        self,
+        *,
+        artifact_ref: str,
+        spec: ArtifactRealizationSpec,
+        context: RequestContext,
+        source_selection: ResolvedArtifactSelection | None = None,
+        runtime_artifact_policy: Any | None = None,
+        materialization: Any | None = None,
+    ) -> RuntimeAttachment:
+        """Realize an artifact-rooted model runtime without a session."""
+
+        if spec.target_kind != "model_runtime":
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.realize_model_runtime requires a model_runtime spec"
+            )
+        framework_context = self._framework_context(
+            context.framework_config,
+            context.model_config,
+        )
+        facts = resolve_model_runtime_request_facts(
+            spec=spec,
+            runtime_context=context,
+            host_context=framework_context,
+        )
+        spec = cast(ArtifactRealizationSpec, facts.spec)
+        context = cast(RequestContext, facts.context)
+        intent = ExistingRuntimeArtifact(
+            artifact_locator=str(artifact_ref), policy=runtime_artifact_policy
+        )
+        decision = self._admit_intent(intent, context)
+        materialization_request = self._host_materialization_request(
+            context,
+            operation_scope="startup.direct_artifact_runtime.bind",
+        )
+        result = self._load_existing_runtime_artifact(
+            _DirectRuntimeLoad(
+                artifact_ref=str(artifact_ref),
+                policy=runtime_artifact_policy,
+                materialization=materialization,
+                framework_config=context.framework_config,
+                model_config=context.model_config,
+                target_device=context.target_device,
+                timeout_s=context.timeout_s,
+                configured_collective_policy=(
+                    materialization_request.configured_collective_policy
+                ),
+                source_selection=source_selection,
+                source_bound_contract_state=(
+                    materialization_request.source_bound_contract_state
+                ),
+                source_bound_contract_path=(
+                    materialization_request.source_bound_contract_path
+                ),
+                execution_facts=materialization_request.execution_facts,
+                operation_scope=materialization_request.operation_scope,
+                require_materialization_options=(
+                    materialization_request.require_materialization_options
+                ),
+                model_runtime_spec=spec,
+            )
+        )
+        if result.model is None or result.runtime_state is None:
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.realize_model_runtime returned no model/state"
+            )
+        return self._attachment_from_load_result(result, decision)
+
+    def realize_retained_model_runtime(
+        self,
+        *,
+        authority: ParsedRetainedRealizationAuthority,
+        spec: ArtifactRealizationSpec,
+        context: RequestContext,
+    ) -> RuntimeAttachment:
+        """Realize a retained handoff for a model runtime without a session."""
+
+        if spec.target_kind != "model_runtime":
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.realize_retained_model_runtime requires "
+                "a model_runtime spec"
+            )
+        framework_context = self._framework_context(
+            context.framework_config,
+            context.model_config,
+        )
+        facts = resolve_model_runtime_request_facts(
+            spec=spec,
+            runtime_context=context,
+            host_context=framework_context,
+        )
+        spec = cast(ArtifactRealizationSpec, facts.spec)
+        context = cast(RequestContext, facts.context)
+        intent = RetainedBindingAcquire(authority)
+        decision = self._admit_intent(intent, context)
+        retained_result = self._restore_retained_for_intent(
+            _RetainedBindingAcquire(
+                authority=authority,
+                framework_config=context.framework_config,
+                model_config=context.model_config,
+                target_device=context.target_device,
+                expected_member=self._retained_expected_member(authority, context),
+                timeout_s=context.timeout_s,
+                model_runtime_spec=spec,
+            )
+        )
+        if retained_result.model is None or retained_result.runtime_state is None:
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.realize_retained_model_runtime returned no "
+                "model/state"
+            )
+        return self._attachment_from_retained_result(retained_result, decision)
+
+    def realize_mounted_source_model_runtime(
+        self,
+        *,
+        artifact_ref: str,
+        source_subject: Any,
+        spec: ArtifactRealizationSpec,
+        context: RequestContext,
+        source_selection: ResolvedArtifactSelection | None = None,
+        source_selector: SourceSelector | None = None,
+        bootstrap_policy: Any | None = None,
+        materialization: Any | None = None,
+    ) -> RuntimeAttachment:
+        """Realize a daemon-attested mounted source as a model runtime."""
+
+        if spec.target_kind != "model_runtime":
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.realize_mounted_source_model_runtime "
+                "requires a model_runtime spec"
+            )
+        framework_context = self._framework_context(
+            context.framework_config,
+            context.model_config,
+        )
+        facts = resolve_model_runtime_request_facts(
+            spec=spec,
+            runtime_context=context,
+            host_context=framework_context,
+        )
+        spec = cast(ArtifactRealizationSpec, facts.spec)
+        context = cast(RequestContext, facts.context)
+        source_artifact_ref = tc_source_catalog.resolve_source_artifact_ref(
+            str(artifact_ref)
+        )
+        if not source_artifact_ref.startswith("msa1:"):
+            raise ArtifactRuntimeIntegrationError(
+                "mounted-source model_runtime realization requires an msa1 "
+                "mounted-source artifact"
+            )
+        subject = self._source_subject_for_mounted_source(
+            source_artifact_ref=source_artifact_ref,
+            source_subject=source_subject,
+        )
+        resolved_selector = source_selector or self._source_selector_for_subject(
+            subject
+        )
+        intent = LocalSourceBootstrap(
+            source_selector=resolved_selector,
+            bootstrap_policy=bootstrap_policy or BootstrapPolicy(),
+        )
+        decision = self._admit_intent(intent, context)
+        request = self._local_source_bootstrap_request(
+            intent,
+            context,
+            decision=decision,
+            model_runtime_spec=spec,
         )
+        if materialization is not None:
+            request = replace(request, options=materialization)
+        local_ready_result = self._prepare_local_source_bootstrap(
+            replace(
+                request,
+                source_subject=subject,
+                source_artifact_ref=source_artifact_ref,
+                source_selection=source_selection,
+            )
+        )
+        if local_ready_result.model is None or local_ready_result.runtime_state is None:
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.realize_mounted_source_model_runtime "
+                "returned no model/state"
+            )
+        self._run_local_ready_barrier(context)
+        return self._attachment_from_local_ready_result(local_ready_result, decision)
 
     def reload(
         self,
         current_state: RuntimeBindingState | Any,
-        intent: ExistingServingArtifact,
+        intent: ExistingRuntimeArtifact,
         context: RequestContext,
         *,
         model: object | None = None,
         contract_identity: str | None = None,
     ) -> RuntimeAttachment:
-        """Reload an existing runtime binding from a public serving intent."""
+        """Reload an existing runtime binding from a public runtime intent."""
 
-        if not isinstance(intent, ExistingServingArtifact):
-            raise ServingIntegrationError(
-                "ServingIntegration.reload currently accepts "
-                "ExistingServingArtifact intent only"
+        if not isinstance(intent, ExistingRuntimeArtifact):
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.reload currently accepts "
+                "ExistingRuntimeArtifact intent only"
             )
         self._reject_source_selector_for_existing_artifact(intent.artifact_locator)
         decision = self._admit_intent(intent, context, reload=True)
@@ -2071,8 +2006,8 @@ def reload(
             context,
             operation_scope="runtime_binding.swap",
         )
-        result = self._reload_existing_serving_artifact(
-            _ServingReload(
+        result = self._reload_existing_runtime_artifact(
+            _RuntimeReload(
                 current_state=current_state,
                 artifact_locator=intent.artifact_locator,
                 policy=intent.policy,
@@ -2098,8 +2033,8 @@ def reload(
             )
         )
         if result.runtime_state is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.reload returned no runtime state"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.reload returned no runtime state"
             )
         runtime_model = (
             model if model is not None else getattr(current_state, "model", None)
@@ -2109,11 +2044,12 @@ def reload(
             decision=decision,
             include_reload_response=True,
         )
-        return RuntimeAttachment(
+        attachment = RuntimeAttachment(
             model=runtime_model,
             state=result.runtime_state,
             view=view,
         )
+        return _project_model_runtime_attachment(result.runtime_state, attachment)
 
     def describe(self, state: RuntimeBindingState | Any) -> RuntimeWorkerView:
         """Return the typed endpoint/worker projection for core runtime state."""
@@ -2124,7 +2060,7 @@ def describe(self, state: RuntimeBindingState | Any) -> RuntimeWorkerView:
 
     def _admit_intent(
         self,
-        intent: ServingIntent,
+        intent: RuntimeIntent,
         context: RequestContext,
         *,
         reload: bool = False,
@@ -2132,8 +2068,8 @@ def _admit_intent(
         if self.host is None:
             return None
         if context.model_config is None:
-            raise ServingIntegrationError(
-                "ServingIntegration host admission requires model_config"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration host admission requires model_config"
             )
         framework_identity = self.host.framework.identity(context.model_config)
         placement_identity = self.host.placement.identity_facts(
@@ -2169,7 +2105,7 @@ def _admit_intent(
             allowed = decision.startup_allowed
             action = "startup"
         if not allowed:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast admission rejected "
                 f"{action}: family={decision.family!r}, "
                 f"support_level={decision.support_level!r}"
@@ -2181,31 +2117,32 @@ def _reject_source_selector_for_existing_artifact(
         artifact_locator: object,
     ) -> None:
         if isinstance(artifact_locator, SourceSelector):
-            raise ServingIntegrationError(
-                "ExistingServingArtifact requires a durable serving artifact "
+            raise ArtifactRuntimeIntegrationError(
+                "ExistingRuntimeArtifact requires a durable runtime artifact "
                 "locator; local source selectors must use LocalSourceBootstrap"
             )
         if _artifact_locator_kind(artifact_locator) == "local_path":
-            raise ServingIntegrationError(
-                "ExistingServingArtifact rejects local_path artifact locators; use "
+            raise ArtifactRuntimeIntegrationError(
+                "ExistingRuntimeArtifact rejects local_path artifact locators; use "
                 "LocalSourceBootstrap for local source acquisition"
             )
 
     def _attachment_from_load_result(
         self,
-        result: ServingLoadResult,
+        result: RuntimeLoadResult,
         decision: AdmissionDecision | None,
     ) -> RuntimeAttachment:
         state = result.runtime_state
         if state is None or result.model is None:
-            raise ServingIntegrationError(
-                "ServingLoadResult is missing model or runtime_state"
+            raise ArtifactRuntimeIntegrationError(
+                "RuntimeLoadResult is missing model or runtime_state"
             )
-        return RuntimeAttachment(
+        attachment = RuntimeAttachment(
             model=result.model,
             state=state,
             view=self._worker_view_from_state(state, decision=decision),
         )
+        return _project_model_runtime_attachment(state, attachment)
 
     def _attachment_from_retained_result(
         self,
@@ -2214,32 +2151,34 @@ def _attachment_from_retained_result(
     ) -> RuntimeAttachment:
         state = result.runtime_state
         if state is None or result.model is None:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "RetainedBindingResult is missing model or runtime_state"
             )
-        return RuntimeAttachment(
+        attachment = RuntimeAttachment(
             model=result.model,
             state=state,
             view=self._worker_view_from_state(state, decision=decision),
         )
+        return _project_model_runtime_attachment(state, attachment)
 
     def _attachment_from_local_ready_result(
         self,
-        result: LocalReadyServingResult,
+        result: LocalReadyRuntimeResult,
         decision: AdmissionDecision | None,
     ) -> RuntimeAttachment:
         state = result.runtime_state
         if state is None or result.model is None:
-            raise ServingIntegrationError(
-                "LocalReadyServingResult is missing model or runtime_state"
+            raise ArtifactRuntimeIntegrationError(
+                "LocalReadyRuntimeResult is missing model or runtime_state"
             )
-        return RuntimeAttachment(
+        attachment = RuntimeAttachment(
             model=result.model,
             state=state,
             view=self._worker_view_from_state(state, decision=decision),
             prepared=result.prepared,
             recipe=result.recipe,
         )
+        return _project_model_runtime_attachment(state, attachment)
 
     def _local_source_bootstrap_request(
         self,
@@ -2247,22 +2186,23 @@ def _local_source_bootstrap_request(
         context: RequestContext,
         *,
         decision: AdmissionDecision | None,
+        model_runtime_spec: ArtifactRealizationSpec | None = None,
     ) -> _LocalReadyBootstrap:
         if self.host is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                 "IntegrationHost"
             )
         if context.model_config is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires model_config"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires model_config"
             )
         profile = self.host.runtime_profile or RuntimeProfile()
         identity = self.host.framework.identity(context.model_config)
         placement_identity = self.host.placement.identity_facts(
             context.framework_config
         )
-        placement = self._host_serving_placement(context.framework_config)
+        placement = self._host_runtime_placement(context.framework_config)
         recipe = getattr(intent, "recipe", None)
         model = getattr(intent, "model", None)
         coordinator = getattr(intent, "coordinator", None)
@@ -2336,7 +2276,49 @@ def _local_source_bootstrap_request(
             require_materialization_options=(
                 materialization_request.require_materialization_options
             ),
+            model_runtime_spec=model_runtime_spec,
+        )
+
+    @staticmethod
+    def _source_subject_for_mounted_source(
+        *,
+        source_artifact_ref: str,
+        source_subject: Any,
+    ) -> SourceSubject:
+        if isinstance(source_subject, SourceSubject):
+            subject_ref = tc_source_catalog.resolve_source_artifact_ref(
+                source_subject.artifact_ref
+            )
+            if subject_ref != source_artifact_ref:
+                raise ArtifactRuntimeIntegrationError(
+                    "mounted-source subject artifact_ref does not match "
+                    "realization artifact_ref"
+                )
+            return source_subject
+        subject_artifact_ref = str(getattr(source_subject, "artifact_id", "") or "")
+        if subject_artifact_ref and subject_artifact_ref != source_artifact_ref:
+            raise ArtifactRuntimeIntegrationError(
+                "mounted-source handle artifact_id does not match realization "
+                "artifact_ref"
+            )
+        source_kind = (
+            "public_disk" if is_public_disk_source_subject(source_subject) else "opaque"
         )
+        return SourceSubject(
+            artifact_ref=source_artifact_ref,
+            subject=source_subject,
+            source_kind=source_kind,
+        )
+
+    @staticmethod
+    def _source_selector_for_subject(subject: SourceSubject) -> SourceSelector:
+        source_path = getattr(subject.subject, "path", None)
+        if source_path is None or not str(source_path).strip():
+            raise ArtifactRuntimeIntegrationError(
+                "mounted-source model_runtime realization requires a source "
+                "selector or a source subject with a path"
+            )
+        return SourceSelector.local_path(str(source_path))
 
     def _host_source_subject_coordinator(
         self,
@@ -2370,7 +2352,7 @@ def _host_recipe_cache_policy(
             model_config,
         )
         if policy is not None and not isinstance(policy, RecipeCachePolicy):
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "IntegrationHost.source.recipe_cache_policy must return "
                 "RecipeCachePolicy or None"
             )
@@ -2393,8 +2375,8 @@ def _worker_view_from_state(
     ) -> RuntimeWorkerView:
         runtime_view = getattr(state, "runtime_view", None)
         if runtime_view is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.describe requires state.runtime_view"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.describe requires state.runtime_view"
             )
         endpoint_fields = dict(decision.endpoint_fields) if decision else {}
         return RuntimeWorkerView.from_runtime_view(
@@ -2432,13 +2414,13 @@ def _host_materialization_request(
             require_materialization_options=True,
         )
 
-    def _host_serving_placement(
+    def _host_runtime_placement(
         self,
         framework_config: object | None,
-    ) -> ServingPlacement:
+    ) -> RuntimePlacement:
         if self.host is None:
-            raise ServingIntegrationError(
-                "ServingIntegration host placement requires IntegrationHost"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration host placement requires IntegrationHost"
             )
         framework_payload = None
         framework_payload_fn = getattr(self.host.placement, "framework_payload", None)
@@ -2452,7 +2434,7 @@ def _host_serving_placement(
             identity_payload = _framework_payload_mapping(
                 identity_payload_fn(framework_config)
             )
-        return serving_placement_from_framework_facts(
+        return runtime_placement_from_framework_facts(
             identity_facts=self.host.placement.identity_facts(framework_config),
             admission_facts=self.host.placement.admission_facts(framework_config),
             member_facts=self.host.placement.member_facts(framework_config),
@@ -2471,20 +2453,20 @@ def _serving_artifact_schema_version(profile: RuntimeProfile) -> int:
             "serving_artifact_schema_version", None
         )
         if value is None:
-            model_fields = getattr(ServingArtifactManifest, "model_fields", {})
+            model_fields = getattr(RuntimeArtifactManifest, "model_fields", {})
             schema_field = model_fields.get("schema_version")
             value = getattr(schema_field, "default", 1)
         return _optional_int(value) or 1
 
-    def _load_existing_serving_artifact(
-        self, request: _DirectServingLoad
-    ) -> ServingLoadResult:
+    def _load_existing_runtime_artifact(
+        self, request: _DirectRuntimeLoad
+    ) -> RuntimeLoadResult:
         target_device = self._require_target_device(request.target_device)
         context = self._framework_context(
             request.framework_config,
             request.model_config,
         )
-        preflight = self._preflight_serving_artifact(
+        preflight = self._preflight_runtime_artifact(
             resolved_artifact=request.resolved_artifact,
             artifact_ref=request.artifact_ref,
             artifact_locator=request.artifact_locator,
@@ -2493,7 +2475,7 @@ def _load_existing_serving_artifact(
             placement=context.placement,
         )
         resolved = preflight.resolved_artifact
-        policy = preflight.serving_runtime_policy
+        policy = preflight.runtime_artifact_policy
         model = request.model
         if model is None:
             self._prepare_model_construction(
@@ -2506,7 +2488,7 @@ def _load_existing_serving_artifact(
             )
         self._assert_model_ready_for_runtime_binding(
             model,
-            context="TensorCast direct serving artifact startup",
+            context="TensorCast direct runtime artifact startup",
         )
         self._align_runtime_tensor_names(
             model,
@@ -2524,7 +2506,7 @@ def _load_existing_serving_artifact(
             current_tensors,
             remove_duplicate=False,
         )
-        preflight = self._preflight_serving_artifact(
+        preflight = self._preflight_runtime_artifact(
             resolved_artifact=resolved,
             artifact_ref=request.artifact_ref,
             artifact_locator=request.artifact_locator,
@@ -2533,7 +2515,7 @@ def _load_existing_serving_artifact(
             placement=context.placement,
         )
         resolved = preflight.resolved_artifact
-        policy = preflight.serving_runtime_policy
+        policy = preflight.runtime_artifact_policy
         manifest = getattr(resolved, "manifest", None)
         local_serving_ref = getattr(manifest, "local_serving_ref", None)
         if local_serving_ref:
@@ -2542,7 +2524,7 @@ def _load_existing_serving_artifact(
                 expected_member = context.placement.member
             if expected_member is None:
                 raise RestoreBindingError(
-                    "ServingIntegration._load_existing_serving_artifact prepared "
+                    "ArtifactRuntimeIntegration._load_existing_runtime_artifact prepared "
                     "local-ready restore requires expected_member"
                 )
             with restore_prepared_local_ready_binding(
@@ -2567,6 +2549,7 @@ def _load_existing_serving_artifact(
                         tensor_schema_hash=tensor_schema_hash,
                         artifact_profile="retained_binding",
                         authority_scope="daemon_retained_runtime_attachment",
+                        source_selection=request.source_selection,
                         retained=True,
                         reservation_bytes=int(restored.reservation_bytes),
                     )
@@ -2578,6 +2561,7 @@ def _load_existing_serving_artifact(
                         target_device=target_device,
                         tensor_schema_hash=tensor_schema_hash,
                         reservation_bytes=restored.reservation_bytes,
+                        source_selection=request.source_selection,
                     )
                 state_seed = self._state_seed(
                     resolved,
@@ -2588,7 +2572,7 @@ def _load_existing_serving_artifact(
                     ),
                     binding_handle=restored,
                     artifact_realization_report=artifact_report,
-                    readiness="serving_local_ready",
+                    readiness="runtime_local_ready",
                 )
                 runtime_state = self._materializer().attach_and_finalize(
                     model=model,
@@ -2599,17 +2583,18 @@ def _load_existing_serving_artifact(
                     replace_meta_params=True,
                     target_device=target_device,
                     model_config=request.model_config,
+                    model_runtime_spec=request.model_runtime_spec,
                 )
         else:
             materialization = self._load_materialization_options(
                 request,
                 resolved,
             )
-            binding_result = bind_serving_artifact(
+            binding_result = bind_runtime_artifact(
                 resolved_artifact=resolved,
                 tensor_names=tuple(current_tensors.keys()),
                 device=target_device,
-                serving_runtime_policy=policy,
+                runtime_artifact_policy=policy,
                 options=materialization,
             )
             artifact_report = _runtime_attachment_report_for_resolved(
@@ -2618,6 +2603,7 @@ def _load_existing_serving_artifact(
                 binding_handle=binding_result.binding,
                 target_device=target_device,
                 tensor_schema_hash=tensor_schema_hash,
+                source_selection=request.source_selection,
                 execution_diagnostics=binding_result.execution_diagnostics,
                 materialization_diagnostics=binding_result.materialization_diagnostics,
             )
@@ -2638,8 +2624,9 @@ def _load_existing_serving_artifact(
                 replace_meta_params=True,
                 target_device=target_device,
                 model_config=request.model_config,
+                model_runtime_spec=request.model_runtime_spec,
             )
-        return ServingLoadResult(
+        return RuntimeLoadResult(
             model=model,
             runtime_state=runtime_state,
             runtime_view=runtime_state.runtime_view,
@@ -2647,9 +2634,9 @@ def _load_existing_serving_artifact(
             binding_result=binding_result,
         )
 
-    def _reload_existing_serving_artifact(
-        self, request: _ServingReload
-    ) -> ServingReloadResult:
+    def _reload_existing_runtime_artifact(
+        self, request: _RuntimeReload
+    ) -> RuntimeReloadResult:
         target_device = (
             torch.device(request.target_device)
             if request.target_device is not None
@@ -2657,13 +2644,13 @@ def _reload_existing_serving_artifact(
         )
         binding = getattr(request.current_state, "binding", None)
         if binding is None:
-            raise ServingIntegrationError(
-                "ServingIntegration._reload_existing_serving_artifact requires current_state.binding"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires current_state.binding"
             )
         if not is_runtime_binding_swap_capable(binding):
-            raise ServingIntegrationError(
-                "ServingIntegration._reload_existing_serving_artifact requires a "
-                "swap-capable serving binding"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires a "
+                "swap-capable runtime binding"
             )
         current_view = getattr(request.current_state, "runtime_view", None)
         expected_tensor_schema_hash = getattr(current_view, "tensor_schema_hash", None)
@@ -2692,7 +2679,7 @@ def _reload_existing_serving_artifact(
                 request.model_config,
             )
         placement = None if context is None else context.placement
-        preflight = self._preflight_serving_artifact(
+        preflight = self._preflight_runtime_artifact(
             resolved_artifact=request.resolved_artifact,
             artifact_ref=request.artifact_ref,
             artifact_locator=request.artifact_locator,
@@ -2701,18 +2688,18 @@ def _reload_existing_serving_artifact(
             placement=placement,
         )
         resolved = preflight.resolved_artifact
-        policy = preflight.serving_runtime_policy
+        policy = preflight.runtime_artifact_policy
         materialization = self._reload_materialization_options(
             request,
             resolved,
         )
-        binding_result = swap_serving_artifact(
+        binding_result = swap_runtime_artifact(
             binding=binding,
             resolved_artifact=resolved,
             tensor_names=(
                 None if runtime_tensors is None else tuple(runtime_tensors.keys())
             ),
-            serving_runtime_policy=policy,
+            runtime_artifact_policy=policy,
             options=materialization,
         )
         artifact_report = _runtime_attachment_report_for_resolved(
@@ -2769,7 +2756,7 @@ def _reload_existing_serving_artifact(
                 else realization_handle.release_contract,
                 realization_handle=realization_handle,
             )
-        return ServingReloadResult(
+        return RuntimeReloadResult(
             runtime_state=runtime_state,
             runtime_view=runtime_state.runtime_view,
             resolved_artifact=resolved,
@@ -2783,27 +2770,12 @@ def _restore_retained_for_intent(
         authority = request.authority
         if authority is None:
             raise RestoreBindingError(
-                "ServingIntegration._restore_retained_for_intent requires authority"
-            )
-        readiness = getattr(authority, "readiness", None)
-        if readiness == "serving_reserved":
-            raise RestoreBindingError(
-                "TensorCast retained acquire readiness='serving_reserved' "
-                "is not attachable"
-            )
-        if readiness in {
-            "serving_group_prepared",
-            "serving_group_published_ready",
-        }:
-            raise RestoreBindingError(
-                "TensorCast retained acquire group readiness requires a "
-                "published group-realization transaction authority"
-            )
-        if readiness == "serving_published_ready":
-            raise RestoreBindingError(
-                "TensorCast retained acquire readiness='serving_published_ready' "
-                "requires a swap-capable serving binding handle"
+                "ArtifactRuntimeIntegration._restore_retained_for_intent requires "
+                "authority"
             )
+        rejection_reason = runtime_restore_rejection_reason(authority)
+        if rejection_reason is not None:
+            raise RestoreBindingError(rejection_reason)
         model = self._build_meta_model(
             request.framework_config,
             request.model_config,
@@ -2844,7 +2816,7 @@ def _restore_retained_for_intent(
                     binding_value_ref=restored.binding_value_ref,
                     local_serving_ref=getattr(authority, "local_serving_ref", None),
                     readiness=str(
-                        getattr(authority, "readiness", "") or "serving_local_ready"
+                        getattr(authority, "readiness", "") or "runtime_local_ready"
                     ),
                     diagnostics={
                         "reservation_bytes": int(restored.reservation_bytes),
@@ -2871,6 +2843,7 @@ def _restore_retained_for_intent(
                     model_config=request.model_config,
                     run_process_after_load=False,
                     expected_tensor_schema_hash=expected_tensor_schema_hash,
+                    model_runtime_spec=request.model_runtime_spec,
                 )
                 return RetainedBindingResult(
                     model=model,
@@ -2887,7 +2860,7 @@ def _restore_retained_for_intent(
 
     def _prepare_local_source_bootstrap(
         self, request: _LocalReadyBootstrap
-    ) -> LocalReadyServingResult:
+    ) -> LocalReadyRuntimeResult:
         if (
             request.recipe is None or request.source_subject is None
         ) and request.build_recipe_from_framework_context:
@@ -2895,18 +2868,18 @@ def _prepare_local_source_bootstrap(
         if request.recipe is None or request.source_subject is None:
             self._lifecycle_not_implemented("_prepare_local_source_bootstrap", "P5")
         if request.target_device is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires target_device"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires target_device"
             )
         if not request.manifest_tensor_name:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires manifest_tensor_name"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires manifest_tensor_name"
             )
         model = request.model
         if request.build_model_from_framework_context and model is None:
             if request.model_config is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start(LocalSourceBootstrap) requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                     "model_config to build a framework model"
                 )
             model = self._build_meta_model(
@@ -2923,24 +2896,24 @@ def _prepare_local_source_bootstrap(
             or not serving_build_digest
         ):
             if request.model_config is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start(LocalSourceBootstrap) requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                     "model_config to build a local-ready manifest carrier"
                 )
             if request.placement is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start(LocalSourceBootstrap) requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                     "placement to build a local-ready manifest carrier"
                 )
             if request.runtime_binding_schema_version is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start(LocalSourceBootstrap) requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                     "runtime_binding_schema_version to build a local-ready "
                     "manifest carrier"
                 )
             if request.serving_artifact_schema_version is None:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start(LocalSourceBootstrap) requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                     "serving_artifact_schema_version to build a local-ready "
                     "manifest carrier"
                 )
@@ -2993,7 +2966,7 @@ def _prepare_local_source_bootstrap(
             )
             if options is None:
                 options = self._local_ready_materialization_options(request)
-        realization = prepare_local_ready_serving(
+        realization = tc_local_ready.realize_local_ready_binding_from_source(
             recipe=request.recipe,
             source_subject=request.source_subject,
             target_device=torch.device(request.target_device),
@@ -3002,7 +2975,7 @@ def _prepare_local_source_bootstrap(
             options=options,
             binding_factory=request.binding_factory,
         )
-        realized = LocalReadyServingResult(
+        realized = LocalReadyRuntimeResult(
             recipe=request.recipe,
             binding=realization.binding,
             update_epoch=realization.update_epoch,
@@ -3032,6 +3005,7 @@ def _prepare_local_source_bootstrap(
                     binding=realization.binding,
                     update_epoch=realization.update_epoch,
                     source_artifact_ref=str(request.source_artifact_ref),
+                    source_selection=request.source_selection,
                     serving_manifest_ref=str(serving_manifest_ref),
                     representation_contract_hash=str(representation_contract_hash),
                     serving_build_digest=str(serving_build_digest),
@@ -3058,9 +3032,10 @@ def _prepare_local_source_bootstrap(
                     framework_version=request.framework_version,
                     adapter_version=request.adapter_version,
                     serving_abi_version=request.serving_abi_version,
+                    model_runtime_spec=request.model_runtime_spec,
                 )
             )
-            return LocalReadyServingResult(
+            return LocalReadyRuntimeResult(
                 model=finalized.model,
                 runtime_state=finalized.runtime_state,
                 runtime_view=finalized.runtime_view,
@@ -3088,8 +3063,8 @@ def _local_ready_prepare_with_built_recipe(
             source_subject_record, "artifact_ref", None
         )
         if not source_artifact_ref:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) could not "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) could not "
                 "derive source_artifact_ref from source subject"
             )
         try:
@@ -3097,8 +3072,8 @@ def _local_ready_prepare_with_built_recipe(
                 source_artifact_ref
             )
         except ValueError as exc:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                 "a real source artifact identity"
             ) from exc
         source_realization_subject = getattr(
@@ -3142,8 +3117,8 @@ def _resolve_local_ready_source_subject(
         request: _LocalReadyBootstrap,
     ) -> SourceSubject:
         if request.source_selector is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                 "source_selector when source_subject is not supplied"
             )
         verify_checksums = bool(
@@ -3167,8 +3142,8 @@ def _local_ready_source_catalog(
                 source_artifact_ref
             )
         except ValueError as exc:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                 "a real source artifact identity"
             ) from exc
         if request.source_catalog is not None:
@@ -3179,11 +3154,11 @@ def _local_ready_source_catalog(
             return request.source_catalog
         if self.host is not None and self.host.source_catalog is not None:
             if not isinstance(request.source_selector, SourceSelector):
-                raise ServingIntegrationError(
+                raise ArtifactRuntimeIntegrationError(
                     "IntegrationHost.source_catalog requires a core SourceSelector"
                 )
             if request.model_config is None:
-                raise ServingIntegrationError(
+                raise ArtifactRuntimeIntegrationError(
                     "IntegrationHost.source_catalog requires model_config"
                 )
             source_catalog = self.host.source_catalog.build_catalog(
@@ -3217,7 +3192,7 @@ def _local_ready_source_catalog(
             )
             return source_catalog
         raise _capability_missing(
-            "ServingIntegration.start(LocalSourceBootstrap) requires "
+            "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
             "IntegrationHost.source_catalog when recipe is not supplied",
             level="level2-local-bootstrap",
             capability="source_catalog",
@@ -3237,7 +3212,7 @@ def _validate_source_catalog_artifact_ref(
     ) -> None:
         catalog_artifact_ref = getattr(source_catalog, "source_artifact_ref", None)
         if catalog_artifact_ref is None:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "SourceCatalogProvider returned a catalog without a real "
                 "source_artifact_ref"
             )
@@ -3246,12 +3221,12 @@ def _validate_source_catalog_artifact_ref(
                 str(catalog_artifact_ref)
             )
         except ValueError as exc:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "SourceCatalogProvider returned a catalog without a real "
                 "source_artifact_ref"
             ) from exc
         if catalog_source_ref != expected_source_artifact_ref:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "SourceCatalogProvider returned source_artifact_ref "
                 f"{catalog_source_ref!r}, expected {expected_source_artifact_ref!r}"
             )
@@ -3266,7 +3241,7 @@ def _local_ready_recipe_cache_config(
         if callable(cache_config_factory):
             return cache_config_factory(source_catalog=source_catalog)
         if isinstance(request.cache_config, RecipeCachePolicy):
-            return _recipe_build_cache_config_from_policy(
+            return recipe_build_cache_config_from_policy(
                 request.cache_config,
                 source_catalog=source_catalog,
             )
@@ -3285,8 +3260,8 @@ def _build_local_ready_recipe_from_framework_context(
         placement: Any | None,
     ) -> Any:
         if request.model_config is None:
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                 "model_config when recipe is not supplied"
             )
         adapter = self._recipe_framework_adapter(request.model_config)
@@ -3309,7 +3284,7 @@ def _build_local_ready_recipe_from_framework_context(
                 request.model_config,
             ),
             cache_config=cache_config,
-            is_reserved_serving_tensor_name=is_reserved_serving_tensor_name,
+            is_reserved_runtime_tensor_name=is_reserved_runtime_tensor_name,
             semantic_validation_spec=request.semantic_validation_spec,
             placement=placement,
             debug_extra={
@@ -3352,8 +3327,8 @@ def _local_ready_materialization_options(
             or execution_facts is None
         ):
             if request.require_materialization_options:
-                raise ServingIntegrationError(
-                    "ServingIntegration.start(LocalSourceBootstrap) requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                     "materialization execution context"
                 )
             return None
@@ -3362,8 +3337,8 @@ def _local_ready_materialization_options(
             "source_bound_contract_ready",
             False,
         ):
-            raise ServingIntegrationError(
-                "ServingIntegration.start(LocalSourceBootstrap) requires "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires "
                 "ready source-bound contract state"
             )
         identity = self.local_ready_materialization_identity(request.recipe)
@@ -3399,7 +3374,7 @@ def _assert_local_ready_contract_realizable(
         context: str,
     ) -> None:
         if contract.realization_entry_count <= 0:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 f"{context} requires a non-empty BindingRealizationPlan"
             )
         if not contract.fallback_copy_plan:
@@ -3410,7 +3385,7 @@ def _assert_local_ready_contract_realizable(
         )
         if len(contract.fallback_copy_plan) > 8:
             unsupported = f"{unsupported}, ..." if unsupported else "..."
-        raise ServingIntegrationError(
+        raise ArtifactRuntimeIntegrationError(
             f"{context} requires a fully representable BindingRealizationPlan; "
             f"unsupported_entries={len(contract.fallback_copy_plan)} "
             f"[{unsupported}]"
@@ -3418,27 +3393,27 @@ def _assert_local_ready_contract_realizable(
 
     def _finalize_local_ready_runtime(
         self, request: _LocalReadyFinalize
-    ) -> LocalReadyServingResult:
+    ) -> LocalReadyRuntimeResult:
         target_device = self._require_target_device(request.target_device)
         if request.recipe is None:
-            raise ServingIntegrationError(
-                "ServingIntegration._finalize_local_ready_runtime requires recipe"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires recipe"
             )
         if request.model is None:
-            raise ServingIntegrationError(
-                "ServingIntegration._finalize_local_ready_runtime requires model"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires model"
             )
         if request.binding is None:
-            raise ServingIntegrationError(
-                "ServingIntegration._finalize_local_ready_runtime requires binding"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires binding"
             )
         if request.update_epoch is None:
-            raise ServingIntegrationError(
-                "ServingIntegration._finalize_local_ready_runtime requires update_epoch"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires update_epoch"
             )
         if not request.manifest_tensor_name:
-            raise ServingIntegrationError(
-                "ServingIntegration._finalize_local_ready_runtime requires manifest_tensor_name"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires manifest_tensor_name"
             )
         try:
             framework_context = self._framework_context(
@@ -3478,7 +3453,7 @@ def _finalize_local_ready_runtime(
                         request.representation_contract_hash
                     ),
                     tensor_schema_hash=tensor_schema_hash,
-                    readiness="serving_local_ready",
+                    readiness="runtime_local_ready",
                 ),
                 replace_meta_params=bool(request.replace_meta_params),
                 target_device=target_device,
@@ -3500,14 +3475,26 @@ def _finalize_local_ready_runtime(
                 update_epoch=request.update_epoch,
                 source_artifact_ref=str(request.source_artifact_ref),
             )
+            source_ref = str(request.source_artifact_ref)
+            artifact_profile = (
+                "mounted_source"
+                if source_ref.startswith("msa1:")
+                else "local_ready_source_artifact"
+            )
+            authority_scope = (
+                "daemon_local_mounted_source"
+                if source_ref.startswith("msa1:")
+                else "daemon_mediated_local_ready_runtime_attachment"
+            )
             artifact_report = _runtime_attachment_report_for_artifact_id(
-                artifact_id=str(request.source_artifact_ref),
+                artifact_id=source_ref,
                 tensors=_binding_tensors(request.binding),
                 binding_handle=request.binding,
                 target_device=target_device,
                 tensor_schema_hash=tensor_schema_hash,
-                artifact_profile="local_ready_source_artifact",
-                authority_scope="daemon_mediated_local_ready_runtime_attachment",
+                artifact_profile=artifact_profile,
+                authority_scope=authority_scope,
+                source_selection=request.source_selection,
             )
             prepared = build_local_ready_prepared_artifact(
                 source_artifact_ref=str(request.source_artifact_ref),
@@ -3523,12 +3510,20 @@ def _finalize_local_ready_runtime(
                 source_bound_contract_state=request.source_bound_contract_state,
                 source_bound_contract_path=str(request.source_bound_contract_path),
                 artifact_realization_report=artifact_report,
-                model_runtime_spec=_model_runtime_spec_for_context(
-                    context=framework_context,
-                    target_device=target_device,
+                model_runtime_spec=(
+                    _model_runtime_spec_with_context_defaults(
+                        spec=request.model_runtime_spec,
+                        context=framework_context,
+                        target_device=target_device,
+                    )
+                    if request.model_runtime_spec is not None
+                    else _model_runtime_spec_for_context(
+                        context=framework_context,
+                        target_device=target_device,
+                    )
                 ),
             )
-            return LocalReadyServingResult(
+            return LocalReadyRuntimeResult(
                 model=request.model,
                 runtime_state=prepared.runtime_state,
                 runtime_view=prepared.runtime_view,
@@ -3553,12 +3548,12 @@ def _assert_local_ready_finalize_admitted(
         if not self.local_ready_requires_binding_finalize(request.recipe):
             return
         if not request.run_process_after_load:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast representation-changing local-ready finalize "
                 "requires process_after_load execution"
             )
         if not request.run_semantic_validation:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast representation-changing local-ready finalize "
                 "requires explicit semantic validation"
             )
@@ -3566,12 +3561,12 @@ def _assert_local_ready_finalize_admitted(
             semantic_validation_spec is None
             or getattr(semantic_validation_spec, "kind", "none") == "none"
         ):
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast representation-changing local-ready finalize "
                 "requires an explicit semantic validation spec"
             )
         if not request.validate_representation_contract_hash:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast representation-changing local-ready finalize "
                 "requires representation contract validation"
             )
@@ -3579,7 +3574,7 @@ def _assert_local_ready_finalize_admitted(
             request.source_bound_contract_state is None
             or not request.source_bound_contract_path
         ):
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast representation-changing local-ready finalize "
                 "requires same-binding contract proof"
             )
@@ -3588,7 +3583,7 @@ def _assert_local_ready_finalize_admitted(
             "source_bound_contract_ready",
             False,
         ):
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast representation-changing local-ready finalize "
                 "requires ready same-binding contract proof"
             )
@@ -3612,23 +3607,23 @@ def _validate_local_ready_representation_contract_hash(
         if not request.validate_representation_contract_hash:
             return
         if request.model_config is None:
-            raise ServingIntegrationError(
-                "ServingIntegration local-ready representation validation "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration local-ready representation validation "
                 "requires model_config"
             )
         if request.placement is None:
-            raise ServingIntegrationError(
-                "ServingIntegration local-ready representation validation "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration local-ready representation validation "
                 "requires placement"
             )
         if request.runtime_binding_schema_version is None:
-            raise ServingIntegrationError(
-                "ServingIntegration local-ready representation validation "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration local-ready representation validation "
                 "requires runtime_binding_schema_version"
             )
         if request.serving_artifact_schema_version is None:
-            raise ServingIntegrationError(
-                "ServingIntegration local-ready representation validation "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration local-ready representation validation "
                 "requires serving_artifact_schema_version"
             )
         actual = self.local_ready_representation_contract_hash(
@@ -3661,7 +3656,7 @@ def build_local_ready_manifest_carrier(
         logical_topology_json_payload: str | None = None,
         topology_admission_digest: str | None = None,
     ) -> tuple[str, bytes]:
-        return prepare_same_binding_manifest_carrier(
+        return tc_local_ready.prepare_same_binding_manifest_carrier(
             recipe,
             manifest_tensor_name=manifest_tensor_name,
             representation_contract_hash=representation_contract_hash,
@@ -3678,18 +3673,20 @@ def build_local_ready_manifest_carrier_from_contract(
         topology: Any | None = None,
         framework_payload: Mapping[str, Any] | None = None,
     ) -> tuple[str, bytes]:
-        base_canonical_index = canonical_index_from_recipe(recipe)
-        tensor_schema_hash = compute_serving_tensor_schema_hash(
+        base_canonical_index = tc_local_ready.canonical_index_from_recipe(recipe)
+        tensor_schema_hash = tc_contract.compute_canonical_runtime_tensor_schema_hash(
             base_canonical_index,
             manifest_tensor_name=manifest_tensor_name,
         )
         representation_contract_hash = representation_contract_hash_factory(
             tensor_schema_hash
         )
-        logical_topology_json_payload = logical_topology_json_from_recipe(
-            recipe,
-            topology=topology,
-            framework_payload=dict(framework_payload or {}),
+        logical_topology_json_payload = (
+            tc_local_ready.logical_topology_json_from_recipe(
+                recipe,
+                topology=topology,
+                framework_payload=dict(framework_payload or {}),
+            )
         )
         topology_admission_digest = _optional_text(
             getattr(topology, "schema_topology_digest", None)
@@ -3738,10 +3735,17 @@ def local_ready_representation_contract_hash(
             "serving_artifact_schema_version": int(serving_artifact_schema_version),
             "placement": placement_identity,
         }
-        return compute_runtime_representation_contract_hash(
+        topology_ref = getattr(placement, "topology", None)
+        member_ref = getattr(placement, "member", None)
+        if topology_ref is None or member_ref is None:
+            raise ArtifactRuntimeIntegrationError(
+                "TensorCast local-ready manifest carrier requires placement "
+                "topology and member identity"
+            )
+        return tc_contract.compute_runtime_representation_contract_hash(
             tensor_schema_hash=str(tensor_schema_hash or ""),
-            topology_ref=getattr(placement, "topology", None),
-            member_ref=getattr(placement, "member", None),
+            topology_ref=topology_ref,
+            member_ref=member_ref,
             framework_name=framework_name
             or self._framework_identity(model_config).framework_name,
             framework_version=framework_version
@@ -3815,7 +3819,7 @@ def prepare_local_ready_manifest_carrier_from_framework_context(
                 serving_abi_version=serving_abi_version,
             )
         )
-        manifest = ServingArtifactManifest.from_bytes(manifest_bytes)
+        manifest = RuntimeArtifactManifest.from_bytes(manifest_bytes)
         return LocalReadyManifestCarrierResult(
             representation_contract_hash=representation_contract_hash,
             manifest_bytes=manifest_bytes,
@@ -3830,7 +3834,7 @@ def local_ready_tensor_schema_hash(
         manifest_tensor_name: str,
         manifest_bytes: bytes | None = None,
     ) -> str:
-        return compute_serving_binding_tensor_schema_hash(
+        return tc_local_ready.compute_runtime_binding_tensor_schema_hash(
             recipe,
             manifest_tensor_name=manifest_tensor_name,
             manifest_bytes=manifest_bytes,
@@ -3840,7 +3844,10 @@ def local_ready_materialized_tensor_names(
         self,
         recipe: Any,
     ) -> tuple[str, ...]:
-        return tuple(str(entry.name) for entry in materialized_tensor_schema(recipe))
+        return tuple(
+            str(entry.name)
+            for entry in tc_local_ready.materialized_tensor_schema(recipe)
+        )
 
     def _assert_local_ready_binding_tensor_set(
         self,
@@ -3880,18 +3887,20 @@ def build_local_ready_binding_contract(
         realization_plan_proto = bytes(
             getattr(recipe, "realization_plan_proto", b"") or b""
         )
-        realization_entry_count = compiled_recipe_realization_plan_count(recipe)
+        realization_entry_count = tc_local_ready.compiled_recipe_realization_plan_count(
+            recipe
+        )
         if realization_entry_count <= 0:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast local-ready binding contract requires a compiled "
                 "recipe with a pre-lowered BindingRealizationPlan"
             )
         if not realization_plan_proto:
-            raise ServingIntegrationError(
+            raise ArtifactRuntimeIntegrationError(
                 "TensorCast local-ready binding contract requires compiled "
                 "recipe realization_plan_proto; regenerate the compiled recipe cache"
             )
-        validate_tensor_schema_against_tensors(
+        tc_tensor_schema.validate_tensor_schema_against_tensors(
             recipe.tensor_schema,
             canonical_tensors,
         )
@@ -3930,9 +3939,9 @@ def local_ready_materialization_identity(
         )
 
     def local_ready_requires_binding_finalize(self, recipe: Any) -> bool:
-        serving_facts = getattr(recipe, "serving_facts", None)
+        runtime_facts = getattr(recipe, "runtime_facts", None)
         process_after_load_class = tc_readiness.coerce_finalize_class(
-            getattr(serving_facts, "process_after_load_class", None),
+            getattr(runtime_facts, "process_after_load_class", None),
             default=FinalizeClass.RUNTIME_ONLY,
         )
         return process_after_load_class == FinalizeClass.REPRESENTATION_CHANGING
@@ -3943,7 +3952,9 @@ def validate_local_ready_tensor_schema(
         recipe: Any,
         tensors: Mapping[str, Any],
     ) -> None:
-        validate_tensor_schema_against_tensors(recipe.tensor_schema, tensors)
+        tc_tensor_schema.validate_tensor_schema_against_tensors(
+            recipe.tensor_schema, tensors
+        )
 
     def freeze_local_ready(
         self,
@@ -3952,7 +3963,7 @@ def freeze_local_ready(
         update_epoch: Any,
         source_artifact_ref: str,
     ) -> Any:
-        return freeze_local_ready_binding(
+        return tc_local_ready.freeze_local_ready_binding(
             binding=binding,
             update_epoch=update_epoch,
             source_artifact_ref=source_artifact_ref,
@@ -3998,93 +4009,24 @@ def build_materialization_options(
     def build_recipe_session(
         self, request: RecipeBuildSessionRequest
     ) -> RecipeBuildSession:
-        identity = request.identity
-        if identity is None:
-            identity = self._recipe_build_identity(request)
-        return RecipeBuildSession(identity)
-
-    def _recipe_build_identity(
-        self,
-        request: RecipeBuildSessionRequest,
-    ) -> ServingBindingPlan:
-        model_config = request.model_config
-        if model_config is None:
-            self._lifecycle_not_implemented("build_recipe_session", "P2")
-        adapter = self._recipe_framework_adapter(model_config)
+        adapter = None
         placement = request.placement
-        if placement is None and self.host is not None:
-            placement = self._framework_context(
-                request.framework_config,
-                model_config,
-            ).placement
-        serving_placement = getattr(placement, "serving_placement", placement)
-        member = getattr(serving_placement, "member", None)
-        stable_identity_payload = getattr(
-            serving_placement, "stable_identity_payload", None
-        )
-        if callable(stable_identity_payload):
-            placement_payload = stable_identity_payload()
-        else:
-            placement_payload = getattr(placement, "identity_payload", None)
-            if placement_payload is None:
-                placement_payload = getattr(serving_placement, "identity_payload", None)
-        trace_cache_schema_version = request.trace_cache_schema_version
-        if trace_cache_schema_version is None:
-            trace_cache_schema_version = getattr(
-                request.cache_config,
-                "trace_cache_schema_version",
-                1,
-            )
-        tp_rank = request.tp_rank
-        if tp_rank is None:
-            tp_rank = getattr(placement, "tp_rank", None)
-        if tp_rank is None and member is not None:
-            tp_rank = getattr(member, "member_index", None)
-        tp_world_size = request.tp_world_size
-        if tp_world_size is None:
-            tp_world_size = getattr(placement, "tp_world_size", None)
-        if tp_world_size is None and member is not None:
-            tp_world_size = getattr(member, "member_count", None)
-        compute_hash = getattr(model_config, "compute_hash", None)
-        model_id = str(getattr(model_config, "model", "unknown"))
-        framework_version = self._adapter_text(adapter, "framework_version")
-        return ServingBindingPlan(
-            model_hash=str(
-                compute_hash()
-                if callable(compute_hash)
-                else getattr(model_config, "model", "unknown")
-            ),
-            model_id=model_id,
-            model_revision=getattr(model_config, "revision", None),
-            dtype=str(getattr(model_config, "dtype", "unknown")),
-            runtime_version=framework_version,
-            framework_name=self._adapter_text(adapter, "framework_name"),
-            framework_version=framework_version,
-            adapter_version=self._adapter_text(adapter, "adapter_version"),
-            serving_abi_version=self._adapter_text(
-                adapter,
-                "serving_abi_version",
-                model_config,
-            ),
-            trace_cache_schema_version=int(trace_cache_schema_version),
-            tp_rank=int(tp_rank or 0),
-            tp_world_size=int(tp_world_size or 1),
-            topology_ref=getattr(serving_placement, "topology", None),
-            member_ref=member,
-            placement=placement_payload,
+        if request.identity is None:
+            model_config = request.model_config
+            if model_config is None:
+                self._lifecycle_not_implemented("build_recipe_session", "P2")
+            adapter = self._recipe_framework_adapter(model_config)
+            if placement is None and self.host is not None:
+                placement = self._framework_context(
+                    request.framework_config,
+                    model_config,
+                ).placement
+        return build_recipe_session_from_request(
+            request,
+            adapter=adapter,
+            placement=placement,
         )
 
-    @staticmethod
-    def _adapter_text(
-        adapter: Any | None,
-        method_name: str,
-        *args: Any,
-    ) -> str:
-        method = getattr(adapter, method_name, None)
-        if callable(method):
-            return str(method(*args))
-        return ""
-
     def resolve_source_subject(
         self,
         path: str | SourceSelector,
@@ -4155,7 +4097,7 @@ def _framework_host(self) -> FrameworkHost:
         if self.host is not None:
             return self.host.framework
         raise _capability_missing(
-            "ServingIntegration requires IntegrationHost.framework",
+            "ArtifactRuntimeIntegration requires IntegrationHost.framework",
             level="level1-runtime",
             capability="framework",
             operation="framework_host",
@@ -4165,7 +4107,7 @@ def _framework_host(self) -> FrameworkHost:
                 "assert_model_ready_for_runtime_binding",
             ),
             next_action=(
-                "Construct ServingRuntimeSession with IntegrationHost.framework."
+                "Construct ArtifactRuntimeSession with IntegrationHost.framework."
             ),
         )
 
@@ -4206,7 +4148,7 @@ def _surface(self) -> TensorSurfaceHost:
                 )
             return self.host.tensor_surface
         raise _capability_missing(
-            "ServingIntegration requires IntegrationHost.tensor_surface",
+            "ArtifactRuntimeIntegration requires IntegrationHost.tensor_surface",
             level="level1-runtime",
             capability="tensor_surface",
             operation="runtime_tensor_surface",
@@ -4216,15 +4158,15 @@ def _surface(self) -> TensorSurfaceHost:
                 "compute_runtime_tensor_schema_hash",
             ),
             next_action=(
-                "Construct ServingRuntimeSession with IntegrationHost.tensor_surface."
+                "Construct ArtifactRuntimeSession with IntegrationHost.tensor_surface."
             ),
         )
 
     @staticmethod
     def _require_target_device(target_device: Any | None) -> torch.device:
         if target_device is None:
-            raise ServingIntegrationError(
-                "ServingIntegration request requires target_device"
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration request requires target_device"
             )
         return torch.device(target_device)
 
@@ -4247,7 +4189,7 @@ def _runtime_policy_with_placement(
         if digest is None:
             return policy
         if policy is None:
-            return ServingRuntimePolicy(
+            return RuntimeArtifactPolicy(
                 require_manifest=True,
                 expected_topology_admission_digest=digest,
             )
@@ -4279,11 +4221,11 @@ def _json_object_payload(value: Any, *, field_name: str) -> Any:
             payload = json.loads(str(value))
         except Exception as exc:
             raise ManifestMismatchError(
-                f"TensorCast serving artifact {field_name} is invalid JSON"
+                f"TensorCast runtime artifact {field_name} is invalid JSON"
             ) from exc
         if not isinstance(payload, dict):
             raise ManifestMismatchError(
-                f"TensorCast serving artifact {field_name} must be a JSON object"
+                f"TensorCast runtime artifact {field_name} must be a JSON object"
             )
         return payload
 
@@ -4308,12 +4250,12 @@ def _validate_resolved_artifact_placement(
         if manifest_topology_digest is not None:
             if placement_topology_digest is None:
                 raise ManifestMismatchError(
-                    "TensorCast serving artifact topology admission digest "
+                    "TensorCast runtime artifact topology admission digest "
                     "requires current framework placement"
                 )
             if manifest_topology_digest != placement_topology_digest:
                 raise ManifestMismatchError(
-                    "TensorCast serving artifact topology admission digest "
+                    "TensorCast runtime artifact topology admission digest "
                     "mismatch: "
                     f"manifest={manifest_topology_digest}, "
                     f"current={placement_topology_digest}"
@@ -4326,7 +4268,7 @@ def _validate_resolved_artifact_placement(
             return
         if placement is None:
             raise ManifestMismatchError(
-                "TensorCast serving artifact logical topology requires current "
+                "TensorCast runtime artifact logical topology requires current "
                 "framework placement"
             )
         try:
@@ -4336,7 +4278,7 @@ def _validate_resolved_artifact_placement(
             )
         except Exception as exc:
             raise ManifestMismatchError(
-                "TensorCast serving artifact logical topology could not be "
+                "TensorCast runtime artifact logical topology could not be "
                 "computed from current framework placement"
             ) from exc
         if cls._json_object_payload(
@@ -4345,7 +4287,7 @@ def _validate_resolved_artifact_placement(
             current_logical_topology, field_name="current logical topology"
         ):
             raise ManifestMismatchError(
-                "TensorCast serving artifact logical topology mismatch"
+                "TensorCast runtime artifact logical topology mismatch"
             )
 
     def _prepare_model_construction(
@@ -4407,15 +4349,15 @@ def support_level(
         self,
         model: object,
         model_config: object,
-    ) -> ServingSupportLevel:
+    ) -> RuntimeSupportLevel:
         host = self._framework_host()
         support_level = getattr(host, "support_level", None)
         if callable(support_level):
-            return tc_readiness.coerce_serving_support_level(
+            return tc_readiness.coerce_runtime_support_level(
                 support_level(model, model_config),
-                default=ServingSupportLevel.BLOCKED,
+                default=RuntimeSupportLevel.BLOCKED,
             )
-        return ServingSupportLevel.BLOCKED
+        return RuntimeSupportLevel.BLOCKED
 
     def process_after_load_class(
         self,
@@ -4465,7 +4407,7 @@ def trace_model_load(
         trace = getattr(host, "trace_model_load", None)
         if not callable(trace):
             raise _capability_missing(
-                "ServingIntegration host requires RecipeTraceHost."
+                "ArtifactRuntimeIntegration host requires RecipeTraceHost."
                 "trace_model_load on recipe cache miss",
                 level="level2-local-bootstrap",
                 capability="recipe_trace",
@@ -4514,7 +4456,7 @@ def native_load_weights(self, model: object, weights: object) -> None:
         native_load = getattr(host, "native_load_weights", None)
         if not callable(native_load):
             raise _capability_missing(
-                "ServingIntegration host requires NativeLoadHost for native "
+                "ArtifactRuntimeIntegration host requires NativeLoadHost for native "
                 "checkpoint/source loading",
                 level="level2-local-bootstrap",
                 capability="native_load",
@@ -4560,13 +4502,13 @@ def _assert_tensor_names_match_expected(
         if not missing and not unexpected:
             return
         raise SchemaMismatchError(
-            "TensorCast runtime tensor set does not match serving artifact: "
+            "TensorCast runtime tensor set does not match runtime artifact: "
             f"missing_count={len(missing)}, unexpected_count={len(unexpected)}"
         )
 
     def _load_materialization_options(
         self,
-        request: _DirectServingLoad,
+        request: _DirectRuntimeLoad,
         resolved: Any,
     ) -> Any | None:
         if request.materialization is not None:
@@ -4579,8 +4521,8 @@ def _load_materialization_options(
             or execution_facts is None
         ):
             if request.require_materialization_options:
-                raise ServingIntegrationError(
-                    "ServingIntegration._load_existing_serving_artifact requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration._load_existing_runtime_artifact requires "
                     "materialization execution context for direct bind"
                 )
             return None
@@ -4589,8 +4531,8 @@ def _load_materialization_options(
             "source_bound_contract_ready",
             False,
         ):
-            raise ServingIntegrationError(
-                "ServingIntegration._load_existing_serving_artifact requires ready "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._load_existing_runtime_artifact requires ready "
                 "source-bound contract state for direct bind"
             )
         manifest = getattr(resolved, "manifest", None)
@@ -4607,7 +4549,7 @@ def _load_materialization_options(
 
     def _reload_materialization_options(
         self,
-        request: _ServingReload,
+        request: _RuntimeReload,
         resolved: Any,
     ) -> Any | None:
         if request.materialization is not None:
@@ -4620,8 +4562,8 @@ def _reload_materialization_options(
             or execution_facts is None
         ):
             if request.require_materialization_options:
-                raise ServingIntegrationError(
-                    "ServingIntegration._reload_existing_serving_artifact requires "
+                raise ArtifactRuntimeIntegrationError(
+                    "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires "
                     "materialization execution context for swap"
                 )
             return None
@@ -4630,8 +4572,8 @@ def _reload_materialization_options(
             "source_bound_contract_ready",
             False,
         ):
-            raise ServingIntegrationError(
-                "ServingIntegration._reload_existing_serving_artifact requires ready "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires ready "
                 "source-bound contract state for swap"
             )
         manifest = getattr(resolved, "manifest", None)
@@ -4656,19 +4598,19 @@ def _reload_materialization_options(
     def _resolved_artifact(
         self,
         *,
-        resolved_artifact: ResolvedServingArtifact | None,
+        resolved_artifact: ResolvedRuntimeArtifact | None,
         artifact_ref: str | None,
         artifact_locator: Any | None,
         expected_tensor_schema_hash: str | None,
-        serving_runtime_policy: Any | None,
-        placement: ServingPlacement | None = None,
-    ) -> ResolvedServingArtifact:
+        runtime_artifact_policy: Any | None,
+        placement: RuntimePlacement | None = None,
+    ) -> ResolvedRuntimeArtifact:
         if resolved_artifact is not None:
             if artifact_ref is not None and str(resolved_artifact.artifact_ref) != str(
                 artifact_ref
             ):
                 raise ManifestMismatchError(
-                    "TensorCast resolved serving artifact ref mismatch: "
+                    "TensorCast resolved runtime artifact ref mismatch: "
                     f"resolved={resolved_artifact.artifact_ref}, "
                     f"requested={artifact_ref}"
                 )
@@ -4677,11 +4619,11 @@ def _resolved_artifact(
                 placement=placement,
             )
             if self.resolver is not None and expected_tensor_schema_hash:
-                return cross_check_serving_artifact(
+                return cross_check_runtime_artifact(
                     resolved_artifact,
                     resolver=self.resolver,
                     expected_tensor_schema_hash=expected_tensor_schema_hash,
-                    serving_runtime_policy=serving_runtime_policy,
+                    runtime_artifact_policy=runtime_artifact_policy,
                 )
             return resolved_artifact
         resolved_ref = artifact_ref
@@ -4697,15 +4639,15 @@ def _resolved_artifact(
             else:
                 resolved_ref = str(artifact_locator)
         if not resolved_ref:
-            raise ServingIntegrationError(
-                "ServingIntegration request requires resolved_artifact, "
+            raise ArtifactRuntimeIntegrationError(
+                "ArtifactRuntimeIntegration request requires resolved_artifact, "
                 "artifact_ref, or artifact_locator"
             )
-        resolved = resolve_serving_artifact(
+        resolved = resolve_runtime_artifact(
             str(resolved_ref),
             resolver=self.resolver,
             expected_tensor_schema_hash=expected_tensor_schema_hash,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
         self._validate_resolved_artifact_placement(
             resolved,
@@ -4713,26 +4655,26 @@ def _resolved_artifact(
         )
         return resolved
 
-    def _preflight_serving_artifact(
+    def _preflight_runtime_artifact(
         self,
         *,
-        resolved_artifact: ResolvedServingArtifact | None,
+        resolved_artifact: ResolvedRuntimeArtifact | None,
         artifact_ref: str | None,
         artifact_locator: Any | None,
         expected_tensor_schema_hash: str | None,
         policy: Any | None,
-        placement: ServingPlacement | None = None,
-    ) -> _ServingArtifactPreflight:
+        placement: RuntimePlacement | None = None,
+    ) -> _RuntimeArtifactPreflight:
         base_policy = self._runtime_policy(policy)
         resolved = self._resolved_artifact(
             resolved_artifact=resolved_artifact,
             artifact_ref=artifact_ref,
             artifact_locator=artifact_locator,
             expected_tensor_schema_hash=None,
-            serving_runtime_policy=None,
+            runtime_artifact_policy=None,
             placement=placement,
         )
-        serving_runtime_policy = self._runtime_policy_from_manifest(
+        runtime_artifact_policy = self._runtime_policy_from_manifest(
             base_policy,
             resolved,
             placement=placement,
@@ -4743,12 +4685,12 @@ def _preflight_serving_artifact(
                 artifact_ref=artifact_ref,
                 artifact_locator=artifact_locator,
                 expected_tensor_schema_hash=expected_tensor_schema_hash,
-                serving_runtime_policy=serving_runtime_policy,
+                runtime_artifact_policy=runtime_artifact_policy,
                 placement=placement,
             )
-        return _ServingArtifactPreflight(
+        return _RuntimeArtifactPreflight(
             resolved_artifact=resolved,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
 
     def _framework_context(
@@ -4760,7 +4702,7 @@ def _framework_context(
         placement = None
         if self.host is not None:
             try:
-                placement = self._host_serving_placement(framework_config)
+                placement = self._host_runtime_placement(framework_config)
             except Exception:
                 placement = None
         return FrameworkIntegrationContext(
@@ -4774,13 +4716,13 @@ def _framework_context(
     def _materializer(self) -> RuntimeBindingMaterialization:
         if self.host is None:
             raise _capability_missing(
-                "ServingIntegration runtime materialization requires IntegrationHost",
+                "ArtifactRuntimeIntegration runtime materialization requires IntegrationHost",
                 level="level1-runtime",
                 capability="integration_host",
                 operation="runtime_materialization",
                 required_methods=("framework", "placement", "tensor_surface"),
                 next_action=(
-                    "Construct ServingRuntimeSession with an IntegrationHost "
+                    "Construct ArtifactRuntimeSession with an IntegrationHost "
                     "instead of calling lifecycle helpers without host facts."
                 ),
             )
@@ -4791,14 +4733,14 @@ def _materializer(self) -> RuntimeBindingMaterialization:
 
     @staticmethod
     def _state_seed(
-        resolved: ResolvedServingArtifact,
+        resolved: ResolvedRuntimeArtifact,
         *,
         tensor_schema_hash: str,
         execution_diagnostics: Any | None,
         materialization_diagnostics: Any | None = None,
         binding_handle: Any | None = None,
         artifact_realization_report: ArtifactRealizationReport | None = None,
-        readiness: str = "serving",
+        readiness: str = "runtime_ready",
     ) -> RuntimeStateSeed:
         artifact_ref = str(getattr(resolved, "artifact_ref", "") or "")
         manifest = getattr(resolved, "manifest", None)
@@ -4837,23 +4779,23 @@ def _state_seed(
         )
 
 
-def resolve_serving_artifact(
+def resolve_runtime_artifact(
     artifact_ref: str,
     *,
-    resolver: ServingArtifactResolver | None = None,
+    resolver: RuntimeArtifactResolver | None = None,
     manifest_tensor_name: str | None = None,
     schema_version: int | None = None,
     expected_tensor_schema_hash: str | None = None,
-    serving_runtime_policy: Any | None = None,
-) -> ResolvedServingArtifact:
-    """Resolve a serving artifact and optionally cross-check runtime schema."""
+    runtime_artifact_policy: Any | None = None,
+) -> ResolvedRuntimeArtifact:
+    """Resolve a runtime artifact and optionally cross-check runtime schema."""
 
-    resolved_resolver = resolver or ServingArtifactResolver(
+    resolved_resolver = resolver or RuntimeArtifactResolver(
         manifest_tensor_name=manifest_tensor_name or tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=(
             schema_version
             if schema_version is not None
-            else int(tc.ServingArtifactManifest.model_fields["schema_version"].default)
+            else int(tc.RuntimeArtifactManifest.model_fields["schema_version"].default)
         ),
     )
     resolved = resolved_resolver.resolve(str(artifact_ref))
@@ -4861,65 +4803,65 @@ def resolve_serving_artifact(
         resolved_resolver.cross_check(
             resolved,
             expected_tensor_schema_hash=expected_tensor_schema_hash,
-            serving_runtime_policy=serving_runtime_policy,
+            runtime_artifact_policy=runtime_artifact_policy,
         )
     return resolved
 
 
-def read_serving_artifact_manifest(
+def read_runtime_artifact_manifest(
     artifact: Any,
     *,
     artifact_ref: str,
-    resolver: ServingArtifactResolver,
-) -> ResolvedServingArtifact:
-    """Read a serving manifest from an already opened artifact handle."""
+    resolver: RuntimeArtifactResolver,
+) -> ResolvedRuntimeArtifact:
+    """Read a runtime manifest from an already opened artifact handle."""
 
     return resolver.read_manifest(artifact, artifact_ref=str(artifact_ref))
 
 
-def cross_check_serving_artifact(
-    resolved_artifact: ResolvedServingArtifact,
+def cross_check_runtime_artifact(
+    resolved_artifact: ResolvedRuntimeArtifact,
     *,
-    resolver: ServingArtifactResolver,
+    resolver: RuntimeArtifactResolver,
     expected_tensor_schema_hash: str,
-    serving_runtime_policy: Any | None = None,
-) -> ResolvedServingArtifact:
+    runtime_artifact_policy: Any | None = None,
+) -> ResolvedRuntimeArtifact:
     """Validate manifest, descriptor schema, and runtime policy agreement."""
 
     return resolver.cross_check(
         resolved_artifact,
         expected_tensor_schema_hash=expected_tensor_schema_hash,
-        serving_runtime_policy=serving_runtime_policy,
+        runtime_artifact_policy=runtime_artifact_policy,
     )
 
 
 @dataclass(frozen=True)
-class ServingRuntimeSession:
-    """Config-planned serving runtime lifecycle entrypoint."""
+class ArtifactRuntimeSession:
+    """Config-planned artifact runtime lifecycle entrypoint."""
 
-    serving_config: ServingConfig
+    runtime_config: TensorCastRuntimeConfig
     host: IntegrationHost
-    integration: ServingIntegration
+    integration: ArtifactRuntimeIntegration
     profile_sink: Any | None = None
 
     @classmethod
     def from_config(
         cls,
-        serving_config: ServingConfig | Mapping[str, Any],
+        runtime_config: TensorCastRuntimeConfig | Mapping[str, Any],
         *,
         host: IntegrationHost,
-        resolver: ServingArtifactResolver | None = None,
+        resolver: RuntimeArtifactResolver | None = None,
         profile_sink: Any | None = None,
-    ) -> "ServingRuntimeSession":
+    ) -> "ArtifactRuntimeSession":
         config = (
-            serving_config
-            if isinstance(serving_config, ServingConfig)
-            else ServingConfig.from_mapping(serving_config)
+            runtime_config
+            if isinstance(runtime_config, TensorCastRuntimeConfig)
+            else TensorCastRuntimeConfig.from_mapping(runtime_config)
         )
         return cls(
-            serving_config=config,
+            runtime_config=config,
             host=host,
-            integration=ServingIntegration(
+            integration=ArtifactRuntimeIntegration(
                 resolver=resolver,
                 profile_sink=profile_sink,
                 host=host,
@@ -4943,7 +4885,10 @@ def publish_current_replica(
         del context
         return tc_replica_publication.publish_current_replica(
             current_attachment=current_attachment,
-            policy=self._replica_publication_policy(policy),
+            policy=tc_replica_publication.replica_publication_policy(
+                policy,
+                default_policy=self.runtime_config.replica_publication,
+            ),
             ensure_runtime_initialized=self._ensure_runtime_initialized,
             profile_sink=self.profile_sink,
         )
@@ -4986,7 +4931,7 @@ def retire_current_replica(
             reason=reason,
             drain_timeout_s=drain_timeout_s,
             default_drain_timeout_s=(
-                self.serving_config.replica_publication.drain_timeout_s
+                self.runtime_config.replica_publication.drain_timeout_s
             ),
             ensure_runtime_initialized=self._ensure_runtime_initialized,
             profile_sink=self.profile_sink,
@@ -4994,10 +4939,10 @@ def retire_current_replica(
 
     def _start_intent(
         self,
-        intent: ServingIntent,
+        intent: RuntimeIntent,
         context: RequestContext,
     ) -> RuntimeAttachment:
-        """Private/admin entrypoint for already lowered serving intents."""
+        """Private/admin entrypoint for already lowered runtime intents."""
 
         self._ensure_runtime_initialized()
         return self.integration.start(intent, context)
@@ -5006,20 +4951,20 @@ def reload(
         self,
         *,
         current_attachment: RuntimeAttachment | RuntimeBindingState | Any,
-        artifact_locator: ServingArtifactLocator,
-        policy: ServingPolicy | None,
+        artifact_locator: ArtifactLocator,
+        policy: RuntimePolicy | None,
         context: RequestContext,
         model: object | None = None,
         contract_identity: str | None = None,
     ) -> RuntimeAttachment:
         self._reject_local_reload_artifact_locator(artifact_locator)
-        if not isinstance(artifact_locator, ServingArtifactLocator):
+        if not isinstance(artifact_locator, ArtifactLocator):
             raise ConfigConflictError(
-                "TensorCast serving reload requires a ServingArtifactLocator"
+                "TensorCast runtime artifact reload requires an ArtifactLocator"
             )
-        if policy is not None and not isinstance(policy, ServingPolicy):
+        if policy is not None and not isinstance(policy, RuntimePolicy):
             raise ConfigConflictError(
-                "TensorCast serving reload requires a ServingPolicy or None"
+                "TensorCast runtime artifact reload requires a RuntimePolicy or None"
             )
         if isinstance(current_attachment, RuntimeAttachment):
             self._reject_reload_with_active_publication(current_attachment)
@@ -5034,7 +4979,7 @@ def reload(
         )
         return self.integration.reload(
             current_state,
-            ExistingServingArtifact(artifact_locator=artifact_locator, policy=policy),
+            ExistingRuntimeArtifact(artifact_locator=artifact_locator, policy=policy),
             context,
             model=runtime_model,
             contract_identity=contract_identity,
@@ -5049,17 +4994,7 @@ def describe(
         return self.integration.describe(attachment_or_state)
 
     def _ensure_runtime_initialized(self) -> None:
-        self.serving_config.runtime.ensure_initialized()
-
-    def _replica_publication_policy(
-        self,
-        policy: ReplicaPublicationPolicy | Mapping[str, Any] | None,
-    ) -> ReplicaPublicationPolicy:
-        if policy is None:
-            return self.serving_config.replica_publication
-        if isinstance(policy, ReplicaPublicationPolicy):
-            return policy
-        return ReplicaPublicationPolicy.model_validate(dict(policy))
+        self.runtime_config.runtime.ensure_initialized()
 
     @staticmethod
     def _reject_reload_with_active_publication(
@@ -5067,11 +5002,11 @@ def _reject_reload_with_active_publication(
     ) -> None:
         tc_replica_publication.reject_reload_with_active_publication(current_attachment)
 
-    def _plan_start_intent(self, context: RequestContext) -> ServingIntent:
+    def _plan_start_intent(self, context: RequestContext) -> RuntimeIntent:
         source_selector = self._source_selector_from_context(context)
         expected_member = None
         if (
-            self.serving_config.retained_binding_acquire.mode == "external"
+            self.runtime_config.retained_binding_acquire.mode == "external"
             and self.host is not None
         ):
             placement = self.integration._framework_context(
@@ -5081,28 +5016,28 @@ def _plan_start_intent(self, context: RequestContext) -> ServingIntent:
             if placement is not None:
                 expected_member = placement.member
         try:
-            plan = tc_config.plan_serving_start(
-                config=self.serving_config,
+            plan = tc_runtime_config.plan_runtime_start(
+                config=self.runtime_config,
                 source_selector=source_selector,
                 expected_member=expected_member,
             )
-        except tc_config.ServingStartPlanError as exc:
+        except tc_runtime_config.RuntimeStartPlanError as exc:
             raise ConfigConflictError(str(exc)) from exc
 
-        if isinstance(plan, tc_config.RetainedBindingAcquireStartPlan):
+        if isinstance(plan, tc_runtime_config.RuntimeRetainedRealizationStartPlan):
             return RetainedBindingAcquire(plan.authority)
-        if isinstance(plan, tc_config.ArtifactBindStartPlan):
-            return ExistingServingArtifact(
+        if isinstance(plan, tc_runtime_config.RuntimeArtifactBindStartPlan):
+            return ExistingRuntimeArtifact(
                 artifact_locator=plan.artifact_locator,
                 policy=plan.policy,
             )
-        if isinstance(plan, tc_config.SourceBootstrapToBindingStartPlan):
+        if isinstance(plan, tc_runtime_config.RuntimeSourceBootstrapStartPlan):
             return LocalSourceBootstrap(
                 source_selector=plan.source_selector,
                 bootstrap_policy=plan.bootstrap_policy,
             )
         raise ConfigConflictError(
-            f"TensorCast serving planner returned unsupported plan: {plan!r}"
+            f"TensorCast runtime planner returned unsupported plan: {plan!r}"
         )
 
     def _source_selector_from_context(
@@ -5130,46 +5065,46 @@ def _reject_local_reload_artifact_locator(artifact_locator: object) -> None:
             or _artifact_locator_kind(artifact_locator) == "local_path"
         ):
             raise ConfigConflictError(
-                "TensorCast serving reload requires a durable serving "
+                "TensorCast runtime artifact reload requires a durable runtime "
                 "artifact locator, not a local source selector"
             )
 
 
-def bind_serving_artifact(
+def bind_runtime_artifact(
     *,
-    resolved_artifact: ResolvedServingArtifact,
+    resolved_artifact: ResolvedRuntimeArtifact,
     tensor_names: Sequence[str],
     device: Any,
-    serving_runtime_policy: Any | None,
+    runtime_artifact_policy: Any | None,
     options: Any | None,
 ) -> RuntimeBindingResult:
-    """Bind a durable serving artifact and return an attach-ready result."""
+    """Bind a durable runtime artifact and return an attach-ready result."""
 
-    binding = tc_binding_runtime.bind_serving_artifact(
+    binding = tc_binding_runtime.bind_runtime_artifact(
         resolved_artifact=resolved_artifact,
         tensor_names=tuple(tensor_names),
         device=device,
-        serving_runtime_policy=serving_runtime_policy,
+        runtime_artifact_policy=runtime_artifact_policy,
         options=options,
     )
     return RuntimeBindingResult.from_binding(binding)
 
 
-def swap_serving_artifact(
+def swap_runtime_artifact(
     *,
     binding: Any,
-    resolved_artifact: ResolvedServingArtifact,
+    resolved_artifact: ResolvedRuntimeArtifact,
     tensor_names: Sequence[str] | None = None,
-    serving_runtime_policy: Any | None,
+    runtime_artifact_policy: Any | None,
     options: Any | None,
 ) -> RuntimeBindingResult:
-    """Swap an existing runtime binding to another serving artifact."""
+    """Swap an existing runtime binding to another runtime artifact."""
 
-    operation_result = tc_binding_runtime.swap_serving_artifact(
+    operation_result = tc_binding_runtime.swap_runtime_artifact(
         binding=binding,
         resolved_artifact=resolved_artifact,
         tensor_names=tensor_names,
-        serving_runtime_policy=serving_runtime_policy,
+        runtime_artifact_policy=runtime_artifact_policy,
         options=options,
     )
     result_binding = operation_result if operation_result is not None else binding
@@ -5179,304 +5114,3 @@ def swap_serving_artifact(
         result_binding,
         operation_result=operation_result,
     )
-
-
-@contextmanager
-def restore_retained_binding(
-    *,
-    authority: tc_retained_binding.ParsedRetainedServingBindingAuthority | None = None,
-    local_serving_ref: str | None = None,
-    target_device: torch.device | str,
-    expected_member: tc.ServingBindingMemberRef | None = None,
-    expected_tensor_schema_hash: str | None = None,
-    expected_serving_build_digest: str | None = None,
-    expected_target_layout_hash: str | None = None,
-    expected_daemon_id: str | None = None,
-    expected_daemon_session_id: str | None = None,
-    serving_artifact_id: str | None = None,
-    caller_pid: int | None = None,
-    runtime: Any | None = None,
-    client: Any | None = None,
-    restore_fn: Any | None = None,
-    timeout_s: float | None = None,
-) -> Iterator[RestoredRetainedBinding]:
-    """Acquire and restore a retained binding value for framework attach.
-
-    If the framework does not call ``transfer_to_runtime()``, the restored owner
-    is released automatically when the context exits. After transfer, close
-    ownership belongs to the returned runtime handle.
-    """
-
-    with tc_retained_binding.acquire_retained_serving_binding(
-        authority=authority,
-        local_serving_ref=local_serving_ref,
-        target_device=target_device,
-        expected_member=expected_member,
-        expected_tensor_schema_hash=expected_tensor_schema_hash,
-        expected_serving_build_digest=expected_serving_build_digest,
-        expected_target_layout_hash=expected_target_layout_hash,
-        expected_daemon_id=expected_daemon_id,
-        expected_daemon_session_id=expected_daemon_session_id,
-        serving_artifact_id=serving_artifact_id,
-        caller_pid=caller_pid if caller_pid is not None else os.getpid(),
-        runtime=runtime,
-        client=client,
-        timeout_s=timeout_s,
-    ) as lease:
-        attached = lease.restore(
-            target_device=torch.device(target_device),
-            restore_fn=restore_fn,
-        )
-        restored = RestoredRetainedBinding(attached)
-        try:
-            yield restored
-        finally:
-            restored.close()
-
-
-@contextmanager
-def restore_prepared_local_ready_binding(
-    *,
-    resolved_artifact: ResolvedServingArtifact,
-    target_device: torch.device | str,
-    expected_member: tc.ServingBindingMemberRef,
-    expected_tensor_schema_hash: str,
-    expected_serving_build_digest: str | None = None,
-    caller_pid: int | None = None,
-    timeout_s: float | None = None,
-    runtime: Any | None = None,
-    client: Any | None = None,
-    restore_fn: Any | None = None,
-) -> Iterator[RestoredRetainedBinding]:
-    """Restore a local-ready retained value referenced by a serving manifest."""
-
-    manifest = resolved_artifact.manifest
-    local_serving_ref = getattr(manifest, "local_serving_ref", None)
-    if manifest is None or not local_serving_ref:
-        raise RuntimeError(
-            "TensorCast prepared local-ready startup requires local_serving_ref "
-            "in the serving artifact manifest"
-        )
-    serving_build_digest = (
-        expected_serving_build_digest
-        if expected_serving_build_digest is not None
-        else getattr(manifest, "serving_build_digest", None)
-    )
-    if not serving_build_digest:
-        raise RuntimeError(
-            "TensorCast prepared local-ready startup requires serving_build_digest"
-        )
-    with restore_retained_binding(
-        local_serving_ref=str(local_serving_ref),
-        target_device=target_device,
-        expected_member=expected_member,
-        expected_tensor_schema_hash=expected_tensor_schema_hash,
-        expected_serving_build_digest=str(serving_build_digest),
-        serving_artifact_id=str(resolved_artifact.artifact_ref),
-        caller_pid=caller_pid,
-        timeout_s=timeout_s,
-        runtime=runtime,
-        client=client,
-        restore_fn=restore_fn,
-    ) as restored:
-        yield restored
-
-
-def evaluate_semantic_validation_spec(spec: Any, actual_payload: Any) -> Any:
-    return tc_semantic_validation.evaluate_semantic_validation_spec(
-        spec, actual_payload
-    )
-
-
-def validate_tensor_schema_against_tensors(
-    tensor_schema: Any,
-    tensors: Mapping[str, torch.Tensor],
-) -> None:
-    tc_tensor_schema.validate_tensor_schema_against_tensors(tensor_schema, tensors)
-
-
-def collect_runtime_tensor_schema(
-    tensors: Mapping[str, torch.Tensor],
-    *,
-    remove_duplicate: bool,
-) -> Any:
-    return tc_contract.collect_runtime_tensor_schema(
-        tensors,
-        remove_duplicate=remove_duplicate,
-    )
-
-
-def compute_runtime_tensor_schema_hash(schema: Any) -> str:
-    return tc_contract.compute_runtime_tensor_schema_hash(schema)
-
-
-def compute_runtime_representation_contract_hash(**kwargs: Any) -> str:
-    return tc_contract.compute_runtime_representation_contract_hash(**kwargs)
-
-
-def compute_serving_tensor_schema_hash(*args: Any, **kwargs: Any) -> str:
-    return tc.compute_serving_tensor_schema_hash(*args, **kwargs)
-
-
-def canonical_index_from_recipe(recipe: Any) -> Any:
-    return tc_local_ready.canonical_index_from_recipe(recipe)
-
-
-def materialized_tensor_schema(recipe: Any) -> Any:
-    return tc_local_ready.materialized_tensor_schema(recipe)
-
-
-def prepare_same_binding_manifest_carrier(*args: Any, **kwargs: Any) -> Any:
-    return tc_local_ready.prepare_same_binding_manifest_carrier(*args, **kwargs)
-
-
-def compute_serving_binding_tensor_schema_hash(*args: Any, **kwargs: Any) -> str:
-    return tc_local_ready.compute_serving_binding_tensor_schema_hash(*args, **kwargs)
-
-
-def prepare_local_ready_serving(*args: Any, **kwargs: Any) -> Any:
-    return tc_local_ready.prepare_local_ready_serving(*args, **kwargs)
-
-
-def freeze_local_ready_binding(*args: Any, **kwargs: Any) -> Any:
-    return tc_local_ready.freeze_local_ready_binding(*args, **kwargs)
-
-
-def tensorcast_view_slice_count(recipe: Any) -> int:
-    return tc_local_ready.tensorcast_view_slice_count(recipe)
-
-
-def compiled_recipe_realization_plan_count(recipe: Any) -> int:
-    return tc_local_ready.compiled_recipe_realization_plan_count(recipe)
-
-
-def binding_value_verification_state_name(value: Any) -> str:
-    return tc_local_ready.binding_value_verification_state_name(value)
-
-
-def logical_topology_json_from_recipe(*args: Any, **kwargs: Any) -> Any:
-    return tc_local_ready.logical_topology_json_from_recipe(*args, **kwargs)
-
-
-def publication_context_from_recipe(*args: Any, **kwargs: Any) -> Any:
-    return tc_local_ready.publication_context_from_recipe(*args, **kwargs)
-
-
-def resolve_source_artifact_ref(*args: Any, **kwargs: Any) -> Any:
-    return tc_source_catalog.resolve_source_artifact_ref(*args, **kwargs)
-
-
-def source_catalog_from_selected_safetensors(*args: Any, **kwargs: Any) -> Any:
-    return tc_source_catalog.source_catalog_from_selected_safetensors(*args, **kwargs)
-
-
-def compute_trace_build_cache_key(*args: Any, **kwargs: Any) -> str:
-    return tc_recipe_build.compute_trace_cache_key(*args, **kwargs)
-
-
-def compute_recipe_build_cache_key(*args: Any, **kwargs: Any) -> str:
-    return tc_recipe_build.compute_recipe_cache_key(*args, **kwargs)
-
-
-def trace_build_cache_path(*args: Any, **kwargs: Any) -> str:
-    return tc_recipe_build.trace_cache_path(*args, **kwargs)
-
-
-def recipe_build_cache_path(*args: Any, **kwargs: Any) -> str:
-    return tc_recipe_build.recipe_cache_path(*args, **kwargs)
-
-
-def stable_recipe_build_hash(*args: Any, **kwargs: Any) -> str:
-    return tc_recipe_build.stable_recipe_build_hash(*args, **kwargs)
-
-
-def load_trace_plan_cache(*args: Any, **kwargs: Any) -> Any:
-    return tc_trace_cache.load_trace_plan_cache(*args, **kwargs)
-
-
-def write_trace_plan_cache(*args: Any, **kwargs: Any) -> None:
-    tc_trace_cache.write_trace_plan_cache(*args, **kwargs)
-
-
-def dump_trace_plan_debug(*args: Any, **kwargs: Any) -> None:
-    tc_trace_cache.dump_trace_plan_debug(*args, **kwargs)
-
-
-def load_compiled_recipe_cache(*args: Any, **kwargs: Any) -> Any:
-    return tc_recipe_cache.load_compiled_recipe_cache(*args, **kwargs)
-
-
-def write_compiled_recipe_cache(*args: Any, **kwargs: Any) -> None:
-    tc_recipe_cache.write_compiled_recipe_cache(*args, **kwargs)
-
-
-def compute_recipe_compile_key(*args: Any, **kwargs: Any) -> str:
-    return tc_compiler.compute_recipe_compile_key(*args, **kwargs)
-
-
-def compute_recipe_compile_key_from_inputs(*args: Any, **kwargs: Any) -> str:
-    return tc_compiler.compute_recipe_compile_key(*args, **kwargs)
-
-
-def compile_recipe_from_inputs(*args: Any, **kwargs: Any) -> Any:
-    return tc_compiler.compile_serving_recipe(*args, **kwargs)
-
-
-def allocate_tensors_from_schema(*args: Any, **kwargs: Any) -> Any:
-    return tc_materialization.allocate_tensors_from_schema(*args, **kwargs)
-
-
-def apply_copy_plan(*args: Any, **kwargs: Any) -> Any:
-    return tc_materialization.apply_copy_plan(*args, **kwargs)
-
-
-def tensorcast_view_slices_from_trace_plan(*args: Any, **kwargs: Any) -> Any:
-    return tc_materialization.tensorcast_view_slices_from_trace_plan(*args, **kwargs)
-
-
-def validate_dst_coverage(*args: Any, **kwargs: Any) -> None:
-    tc_materialization.validate_dst_coverage(*args, **kwargs)
-
-
-def validate_source_tensor_names(*args: Any, **kwargs: Any) -> None:
-    tc_materialization.validate_source_tensor_names(*args, **kwargs)
-
-
-def validate_recipe_for_builder_mode(*args: Any, **kwargs: Any) -> None:
-    tc_recipe_validation.validate_recipe_for_builder_mode(*args, **kwargs)
-
-
-def build_pure_transform_build_intent(*args: Any, **kwargs: Any) -> Any:
-    return tc_publication.build_pure_transform_build_intent(*args, **kwargs)
-
-
-def complete_pure_transform_publication(*args: Any, **kwargs: Any) -> Any:
-    return tc.complete_pure_transform_publication(*args, **kwargs)
-
-
-def build_materialization_execution_context(*args: Any, **kwargs: Any) -> Any:
-    return tc_binding_runtime.build_materialization_execution_context(*args, **kwargs)
-
-
-def retained_binding_acquire_mode(*args: Any, **kwargs: Any) -> str:
-    return tc_retained_binding.retained_binding_acquire_mode(*args, **kwargs)
-
-
-def retained_serving_binding_trusted_reservation_bytes(
-    *args: Any, **kwargs: Any
-) -> int:
-    return tc_retained_binding.retained_serving_binding_trusted_reservation_bytes(
-        *args, **kwargs
-    )
-
-
-def retained_serving_binding_extra_from_prefetched_binding(
-    *args: Any, **kwargs: Any
-) -> Any:
-    return tc_retained_binding.retained_serving_binding_extra_from_prefetched_binding(
-        *args, **kwargs
-    )
-
-
-def parse_retained_serving_binding_authority(*args: Any, **kwargs: Any) -> Any:
-    return tc_retained_binding.parse_retained_serving_binding_authority(*args, **kwargs)
diff --git a/tensorcast/artifact_runtime/locator.py b/tensorcast/artifact_runtime/locator.py
new file mode 100644
index 00000000..5e0c04c4
--- /dev/null
+++ b/tensorcast/artifact_runtime/locator.py
@@ -0,0 +1,149 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Artifact runtime locator schema and resolution helpers."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+from urllib.parse import quote
+
+from pydantic import BaseModel, ConfigDict, field_validator
+
+_ARTIFACT_LOCATOR_KINDS = {"version_key", "artifact_ref", "ranked_version_key"}
+ARTIFACT_LOCATOR_SCHEMA_VERSION = 1
+RANKED_VERSION_KEY_MEMBER_SEGMENT = "members"
+
+
+def _normalize_optional_text(value: Any) -> str | None:
+    if value is None:
+        return None
+    normalized = str(value).strip()
+    return normalized or None
+
+
+def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
+    normalized = str(value).strip().lower()
+    if normalized not in allowed:
+        raise ValueError(
+            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
+        )
+    return normalized
+
+
+def _member_id_from_ref(member: Any) -> str:
+    if member is None:
+        raise ValueError(
+            "ranked_version_key artifact locator resolution requires a member"
+        )
+    if isinstance(member, Mapping):
+        member_id = member.get("member_id")
+    else:
+        member_id = getattr(member, "member_id", None)
+    normalized = _normalize_optional_text(member_id)
+    if normalized is None:
+        raise ValueError(
+            "ranked_version_key artifact locator resolution requires member.member_id"
+        )
+    return normalized
+
+
+def _member_from_placement(placement: Any | None) -> Any | None:
+    if placement is None:
+        return None
+    if isinstance(placement, Mapping):
+        return placement.get("member")
+    return getattr(placement, "member", None)
+
+
+def ranked_version_key_for_member(version_key: str, member: Any) -> str:
+    base_key = _normalize_optional_text(version_key)
+    if base_key is None:
+        raise ValueError("ranked_version_key base value is required")
+    member_id = quote(_member_id_from_ref(member), safe=":._-")
+    return f"{base_key.rstrip('/')}/{RANKED_VERSION_KEY_MEMBER_SEGMENT}/{member_id}"
+
+
+class ArtifactLocator(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    kind: str
+    value: str
+    schema_version: int = ARTIFACT_LOCATOR_SCHEMA_VERSION
+
+    @field_validator("kind", mode="before")
+    @classmethod
+    def _normalize_kind(cls, value: Any) -> str:
+        return _normalize_enum(
+            value,
+            allowed=_ARTIFACT_LOCATOR_KINDS,
+            field_name="artifact_locator.kind",
+        )
+
+    @field_validator("value", mode="before")
+    @classmethod
+    def _normalize_value(cls, value: Any) -> str:
+        normalized = _normalize_optional_text(value)
+        if normalized is None:
+            raise ValueError("artifact_locator.value is required")
+        return normalized
+
+    @classmethod
+    def artifact_ref(cls, artifact_ref: str) -> ArtifactLocator:
+        return cls(kind="artifact_ref", value=str(artifact_ref))
+
+    @classmethod
+    def version_key(cls, version_key: str) -> ArtifactLocator:
+        return cls(kind="version_key", value=str(version_key))
+
+    @classmethod
+    def ranked_version_key(cls, version_key: str) -> ArtifactLocator:
+        return cls(kind="ranked_version_key", value=str(version_key))
+
+    def resolve_version_key(
+        self,
+        *,
+        member: Any | None = None,
+        placement: Any | None = None,
+    ) -> str:
+        if self.kind == "artifact_ref":
+            return self.value
+        if self.kind == "ranked_version_key":
+            if member is None:
+                member = _member_from_placement(placement)
+            return ranked_version_key_for_member(self.value, member)
+        return self.value
+
+    def resolve_artifact_ref(
+        self,
+        *,
+        member: Any | None = None,
+        placement: Any | None = None,
+    ) -> str:
+        if self.kind == "artifact_ref":
+            return self.value
+
+        from tensorcast.api.store import get_runtime_context
+
+        resolved_mapping = get_runtime_context().resolve_key_mapping_cached(
+            key=self.resolve_version_key(member=member, placement=placement)
+        )
+        artifact_id = (
+            resolved_mapping[0]
+            if isinstance(resolved_mapping, tuple)
+            else getattr(resolved_mapping, "artifact_id", None)
+        )
+        if not artifact_id:
+            raise ValueError(
+                "artifact locator version key did not resolve to an artifact: "
+                f"{self.value!r}"
+            )
+        return artifact_id
+
+
+__all__ = [
+    "ARTIFACT_LOCATOR_SCHEMA_VERSION",
+    "ArtifactLocator",
+    "RANKED_VERSION_KEY_MEMBER_SEGMENT",
+    "ranked_version_key_for_member",
+]
diff --git a/tensorcast/artifact_runtime/policy.py b/tensorcast/artifact_runtime/policy.py
new file mode 100644
index 00000000..afafc88b
--- /dev/null
+++ b/tensorcast/artifact_runtime/policy.py
@@ -0,0 +1,161 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Artifact runtime policy schema and reload request helpers."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, field_validator, model_validator
+
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.types import RuntimeArtifactPolicy
+
+_POLICY_MODES = {"from_manifest", "pinned"}
+RUNTIME_POLICY_SCHEMA_VERSION = 1
+
+
+def _normalize_optional_text(value: Any) -> str | None:
+    if value is None:
+        return None
+    normalized = str(value).strip()
+    return normalized or None
+
+
+def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
+    normalized = str(value).strip().lower()
+    if normalized not in allowed:
+        raise ValueError(
+            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
+        )
+    return normalized
+
+
+class RuntimePolicy(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    mode: str = "from_manifest"
+    manifest_ref: str | None = None
+    representation_contract_hash: str | None = None
+    serving_build_digest: str | None = None
+    schema_version: int = RUNTIME_POLICY_SCHEMA_VERSION
+
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _normalize_mode(cls, value: Any) -> str:
+        if value is None:
+            return "from_manifest"
+        return _normalize_enum(
+            value,
+            allowed=_POLICY_MODES,
+            field_name="runtime.policy.mode",
+        )
+
+    @field_validator(
+        "manifest_ref",
+        "representation_contract_hash",
+        "serving_build_digest",
+        mode="before",
+    )
+    @classmethod
+    def _normalize_optional_fields(cls, value: Any) -> Any:
+        return _normalize_optional_text(value)
+
+    @model_validator(mode="after")
+    def _validate_pinned_policy(self) -> RuntimePolicy:
+        if self.mode != "pinned":
+            return self
+        missing = [
+            name
+            for name, value in (
+                ("manifest_ref", self.manifest_ref),
+                (
+                    "representation_contract_hash",
+                    self.representation_contract_hash,
+                ),
+                ("serving_build_digest", self.serving_build_digest),
+            )
+            if value is None
+        ]
+        if missing:
+            raise ValueError(
+                f"runtime.policy.mode='pinned' requires {', '.join(missing)}"
+            )
+        return self
+
+    def to_runtime_policy(self) -> RuntimeArtifactPolicy | None:
+        if self.mode == "from_manifest":
+            return None
+        return RuntimeArtifactPolicy(
+            require_manifest=True,
+            serving_manifest_ref=self.manifest_ref,
+            expected_representation_contract_hash=(self.representation_contract_hash),
+            expected_serving_build_digest=self.serving_build_digest,
+        )
+
+
+def normalize_runtime_reload_request_payload(
+    *,
+    artifact_locator: ArtifactLocator | Mapping[str, Any],
+    policy: RuntimePolicy | Mapping[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Normalize runtime reload locator/policy data to the stable wire shape."""
+
+    parsed_locator = (
+        artifact_locator
+        if isinstance(artifact_locator, ArtifactLocator)
+        else ArtifactLocator.model_validate(artifact_locator)
+    )
+    parsed_policy = (
+        policy
+        if isinstance(policy, RuntimePolicy)
+        else RuntimePolicy.model_validate(policy or {"mode": "from_manifest"})
+    )
+    locator_payload = {
+        "kind": parsed_locator.kind,
+        "value": parsed_locator.value,
+    }
+    policy_payload: dict[str, Any] = {"mode": parsed_policy.mode}
+    if parsed_policy.manifest_ref is not None:
+        policy_payload["manifest_ref"] = parsed_policy.manifest_ref
+    if parsed_policy.representation_contract_hash is not None:
+        policy_payload["representation_contract_hash"] = (
+            parsed_policy.representation_contract_hash
+        )
+    if parsed_policy.serving_build_digest is not None:
+        policy_payload["serving_build_digest"] = parsed_policy.serving_build_digest
+    return locator_payload, policy_payload
+
+
+def merge_runtime_reload_extra_config(
+    extra: Mapping[str, Any] | None,
+    *,
+    artifact_locator: ArtifactLocator | Mapping[str, Any],
+    policy: RuntimePolicy | Mapping[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Return model-loader config with a normalized runtime reload request."""
+
+    normalized_locator, normalized_policy = normalize_runtime_reload_request_payload(
+        artifact_locator=artifact_locator,
+        policy=policy,
+    )
+    merged_extra = dict(extra or {})
+    if "serving" in merged_extra:
+        raise ValueError(
+            "TensorCast runtime reload config section 'serving' was removed; "
+            "use 'runtime_artifact'"
+        )
+    runtime_artifact = dict(merged_extra.get("runtime_artifact", {}))
+    runtime_artifact["artifact_locator"] = normalized_locator
+    runtime_artifact["policy"] = normalized_policy
+    merged_extra["runtime_artifact"] = runtime_artifact
+    return merged_extra
+
+
+__all__ = [
+    "RUNTIME_POLICY_SCHEMA_VERSION",
+    "RuntimePolicy",
+    "merge_runtime_reload_extra_config",
+    "normalize_runtime_reload_request_payload",
+]
diff --git a/tensorcast/artifact_runtime/publication/__init__.py b/tensorcast/artifact_runtime/publication/__init__.py
new file mode 100644
index 00000000..0d654d7a
--- /dev/null
+++ b/tensorcast/artifact_runtime/publication/__init__.py
@@ -0,0 +1,2 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Runtime publication and replica lifecycle helpers."""
diff --git a/tensorcast/artifact_runtime/publication/actions.py b/tensorcast/artifact_runtime/publication/actions.py
new file mode 100644
index 00000000..fa5e460c
--- /dev/null
+++ b/tensorcast/artifact_runtime/publication/actions.py
@@ -0,0 +1,110 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Artifact runtime replica publication actions.
+
+These helpers are the public artifact-runtime actions for publishing or
+retiring the replica represented by a realized runtime attachment.  The current
+implementation delegates to the serving runtime binding implementation while
+keeping callers away from ``ArtifactRuntimeSession``.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable, Mapping
+from dataclasses import dataclass
+from typing import Any
+
+from tensorcast.artifact_runtime.attachment import RuntimeAttachment
+from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig
+from tensorcast.artifact_runtime.publication import replica as replica_publication
+
+
+@dataclass(frozen=True)
+class RuntimeReplicaPublicationSettings:
+    """Runtime replica publication settings parsed from loader configuration."""
+
+    policy: object
+    ensure_runtime_initialized: Callable[[], None]
+
+    @property
+    def drain_timeout_s(self) -> float:
+        return float(getattr(self.policy, "drain_timeout_s", 30.0))
+
+
+def runtime_replica_publication_settings(
+    config: TensorCastRuntimeConfig | Mapping[str, Any] | None = None,
+) -> RuntimeReplicaPublicationSettings:
+    """Parse publication settings from runtime loader configuration."""
+
+    parsed = (
+        config
+        if isinstance(config, TensorCastRuntimeConfig)
+        else TensorCastRuntimeConfig.from_mapping(config or {})
+    )
+    return RuntimeReplicaPublicationSettings(
+        policy=parsed.replica_publication,
+        ensure_runtime_initialized=parsed.runtime.ensure_initialized,
+    )
+
+
+def publish_runtime_replica(
+    *,
+    current_attachment: RuntimeAttachment,
+    policy: object,
+    ensure_runtime_initialized: Callable[[], None],
+    profile_sink: Callable[[Mapping[str, object]], object] | None = None,
+) -> RuntimeAttachment:
+    """Publish the current artifact-backed runtime attachment as a replica."""
+
+    return replica_publication.publish_current_replica(
+        current_attachment=current_attachment,
+        policy=policy,
+        ensure_runtime_initialized=ensure_runtime_initialized,
+        profile_sink=profile_sink,
+    )
+
+
+def project_runtime_replica_publication_state(
+    *,
+    current_attachment: RuntimeAttachment,
+    state: str,
+    reason: str | None = None,
+    operation_id: str | None = None,
+) -> RuntimeAttachment:
+    """Return an attachment with an observational publication projection."""
+
+    return replica_publication.project_current_replica_publication_state(
+        current_attachment=current_attachment,
+        state=state,
+        reason=reason,
+        operation_id=operation_id,
+    )
+
+
+def retire_runtime_replica(
+    *,
+    current_attachment: RuntimeAttachment,
+    reason: str = "retire",
+    drain_timeout_s: float | None = None,
+    default_drain_timeout_s: float | None = None,
+    ensure_runtime_initialized: Callable[[], None],
+    profile_sink: Callable[[Mapping[str, object]], object] | None = None,
+) -> RuntimeAttachment:
+    """Retire the published replica tied to a runtime attachment."""
+
+    return replica_publication.retire_current_replica(
+        current_attachment=current_attachment,
+        reason=reason,
+        drain_timeout_s=drain_timeout_s,
+        default_drain_timeout_s=default_drain_timeout_s,
+        ensure_runtime_initialized=ensure_runtime_initialized,
+        profile_sink=profile_sink,
+    )
+
+
+__all__ = [
+    "RuntimeReplicaPublicationSettings",
+    "project_runtime_replica_publication_state",
+    "publish_runtime_replica",
+    "retire_runtime_replica",
+    "runtime_replica_publication_settings",
+]
diff --git a/tensorcast/artifact_runtime/publication/context.py b/tensorcast/artifact_runtime/publication/context.py
new file mode 100644
index 00000000..bef0ff2e
--- /dev/null
+++ b/tensorcast/artifact_runtime/publication/context.py
@@ -0,0 +1,113 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Artifact runtime publication context helpers for recipe-backed artifacts."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from tensorcast.artifact_runtime.contract import logical_topology_json
+from tensorcast.types import BuilderMode, RuntimeArtifactBuildIntent, RuntimeTopologyRef
+
+
+@dataclass(frozen=True)
+class RecipePublicationContext:
+    source_artifact_ref: str
+    framework_name: str
+    adapter_version: str
+    serving_abi_version: str
+    logical_topology_json: str | None = None
+
+
+def logical_topology_json_from_recipe(
+    recipe: Any,
+    *,
+    topology: RuntimeTopologyRef | None = None,
+    framework_payload: dict[str, Any] | None = None,
+) -> str | None:
+    if topology is None:
+        if (
+            getattr(recipe, "topology_ref", None) is None
+            and getattr(recipe, "member_ref", None) is None
+        ):
+            return None
+        raise ValueError(
+            "TensorCast publication manifest requires RuntimeTopologyRef for "
+            "a topology-sensitive recipe"
+        )
+    return logical_topology_json(
+        topology,
+        framework_payload=framework_payload or {},
+    )
+
+
+def publication_context_from_recipe(
+    recipe: Any,
+    *,
+    logical_topology_json_payload: str | None = None,
+) -> RecipePublicationContext:
+    return RecipePublicationContext(
+        source_artifact_ref=recipe.source_artifact_ref,
+        framework_name=recipe.runtime_facts.framework_name,
+        adapter_version=recipe.runtime_facts.adapter_version,
+        serving_abi_version=recipe.runtime_facts.serving_abi_version,
+        logical_topology_json=logical_topology_json_payload,
+    )
+
+
+def build_recipe_runtime_build_intent(
+    context: RecipePublicationContext,
+    *,
+    builder_mode: BuilderMode,
+    build_pipeline_version: str,
+    representation_contract_hash: str | None = None,
+) -> RuntimeArtifactBuildIntent:
+    return RuntimeArtifactBuildIntent(
+        representation_contract_hash=representation_contract_hash,
+        builder_mode=builder_mode,
+        framework_name=context.framework_name,
+        adapter_version=context.adapter_version,
+        serving_abi_version=context.serving_abi_version,
+        build_pipeline_version=str(build_pipeline_version),
+        source_artifact_ref=context.source_artifact_ref,
+    )
+
+
+def build_pure_transform_build_intent(
+    context: RecipePublicationContext,
+    *,
+    build_pipeline_version: str,
+    representation_contract_hash: str | None = None,
+) -> RuntimeArtifactBuildIntent:
+    return build_recipe_runtime_build_intent(
+        context,
+        builder_mode=BuilderMode.PURE_TRANSFORM,
+        build_pipeline_version=build_pipeline_version,
+        representation_contract_hash=representation_contract_hash,
+    )
+
+
+def build_binding_finalize_build_intent(
+    context: RecipePublicationContext,
+    *,
+    build_pipeline_version: str,
+    representation_contract_hash: str,
+) -> RuntimeArtifactBuildIntent:
+    return build_recipe_runtime_build_intent(
+        context,
+        builder_mode=BuilderMode.BINDING_FINALIZE,
+        build_pipeline_version=build_pipeline_version,
+        representation_contract_hash=str(representation_contract_hash),
+    )
+
+
+__all__ = [
+    "RecipePublicationContext",
+    "build_binding_finalize_build_intent",
+    "build_pure_transform_build_intent",
+    "build_recipe_runtime_build_intent",
+    "logical_topology_json",
+    "logical_topology_json_from_recipe",
+    "publication_context_from_recipe",
+]
diff --git a/tensorcast/serving/replica_publication.py b/tensorcast/artifact_runtime/publication/replica.py
similarity index 96%
rename from tensorcast/serving/replica_publication.py
rename to tensorcast/artifact_runtime/publication/replica.py
index f4d2a1ef..7334d577 100644
--- a/tensorcast/serving/replica_publication.py
+++ b/tensorcast/artifact_runtime/publication/replica.py
@@ -25,12 +25,12 @@
     release_contract_for,
     report_for_publication,
 )
-from tensorcast.serving.errors import ReplicaPublicationError
-from tensorcast.serving.runtime_attachment import (
+from tensorcast.artifact_runtime.attachment import (
     RuntimeAttachment,
     RuntimeBindingState,
 )
-from tensorcast.serving.runtime_view import (
+from tensorcast.artifact_runtime.errors import ReplicaPublicationError
+from tensorcast.artifact_runtime.view import (
     BindingValueRefProjection,
     PublishedReplicaProjection,
 )
@@ -306,7 +306,7 @@ def _attachment_with_published_replica(
     projection: PublishedReplicaProjection,
 ) -> RuntimeAttachment:
     binding = state_publication_binding(attachment.state)
-    spec = ArtifactRealizationSpec.publication(target=projection)
+    spec = ArtifactRealizationSpec._publication(target=projection)
     target_layout_digest = (
         projection.binding_layout_id
         or attachment.view.endpoint.weight_version.binding_layout_id
@@ -784,7 +784,7 @@ def reject_reload_with_active_publication(
     published = current_attachment.view.endpoint.weight_version.published_replica
     if published is not None and published.state in _ACTIVE_PUBLICATION_STATES:
         raise ReplicaPublicationError(
-            "TensorCast serving reload requires retiring the active published "
+            "TensorCast runtime reload requires retiring the active published "
             "replica before swap",
             operation="reload",
             details={
@@ -798,7 +798,7 @@ def reject_reload_with_active_publication(
     if not binding_has_active_published_replica(binding):
         return
     raise ReplicaPublicationError(
-        "TensorCast serving reload found an active published replica on the "
+        "TensorCast runtime reload found an active published replica on the "
         "runtime binding but no active attachment projection; retire the "
         "current replica before swap",
         operation="reload",
@@ -810,12 +810,37 @@ def reject_reload_with_active_publication(
     )
 
 
+def _policy_validation_payload(policy: object) -> object:
+    if isinstance(policy, Mapping):
+        return dict(policy)
+    model_dump = getattr(policy, "model_dump", None)
+    if callable(model_dump):
+        return model_dump(mode="python")
+    return policy
+
+
+def replica_publication_policy(
+    policy: object | None,
+    *,
+    default_policy: object,
+) -> object:
+    if policy is None:
+        return default_policy
+    if isinstance(policy, type(default_policy)):
+        return policy
+    validate = getattr(type(default_policy), "model_validate", None)
+    if callable(validate):
+        return validate(_policy_validation_payload(policy))
+    return policy
+
+
 __all__ = [
     "binding_has_active_published_replica",
     "project_current_replica_publication_state",
     "publication_generation",
     "publish_current_replica",
     "reject_reload_with_active_publication",
+    "replica_publication_policy",
     "retire_current_replica",
     "state_publication_binding",
 ]
diff --git a/tensorcast/serving/readiness.py b/tensorcast/artifact_runtime/readiness.py
similarity index 71%
rename from tensorcast/serving/readiness.py
rename to tensorcast/artifact_runtime/readiness.py
index 62c5c033..b2bb951e 100644
--- a/tensorcast/serving/readiness.py
+++ b/tensorcast/artifact_runtime/readiness.py
@@ -1,19 +1,19 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Framework-neutral serving readiness and admission helpers."""
+"""Artifact runtime readiness and admission helpers."""
 
 from __future__ import annotations
 
 from collections.abc import Callable
 from typing import Any
 
-from tensorcast.types import BuilderMode, FinalizeClass, ServingSupportLevel
+from tensorcast.types import BuilderMode, FinalizeClass, RuntimeSupportLevel
 
-_SUPPORT_LEVEL_ORDER: dict[ServingSupportLevel, int] = {
-    ServingSupportLevel.BLOCKED: -1,
-    ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: 0,
-    ServingSupportLevel.BUILDER_PUBLICATION_READY: 1,
-    ServingSupportLevel.RUNTIME_BIND_SWAP_READY: 2,
+_SUPPORT_LEVEL_ORDER: dict[RuntimeSupportLevel, int] = {
+    RuntimeSupportLevel.BLOCKED: -1,
+    RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: 0,
+    RuntimeSupportLevel.BUILDER_PUBLICATION_READY: 1,
+    RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY: 2,
 }
 
 
@@ -29,32 +29,32 @@ def coerce_finalize_class(
     return FinalizeClass(str(value).strip())
 
 
-def coerce_serving_support_level(
+def coerce_runtime_support_level(
     value: Any,
     *,
-    default: ServingSupportLevel = ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY,
-) -> ServingSupportLevel:
+    default: RuntimeSupportLevel = RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY,
+) -> RuntimeSupportLevel:
     if value is None:
         return default
-    if isinstance(value, ServingSupportLevel):
+    if isinstance(value, RuntimeSupportLevel):
         return value
     normalized = str(value).strip().lower()
-    return ServingSupportLevel(normalized)
+    return RuntimeSupportLevel(normalized)
 
 
-def serving_support_level_at_least(
-    value: ServingSupportLevel | str,
-    minimum: ServingSupportLevel | str,
+def runtime_support_level_at_least(
+    value: RuntimeSupportLevel | str,
+    minimum: RuntimeSupportLevel | str,
 ) -> bool:
-    resolved_value = coerce_serving_support_level(value)
-    resolved_minimum = coerce_serving_support_level(minimum)
+    resolved_value = coerce_runtime_support_level(value)
+    resolved_minimum = coerce_runtime_support_level(minimum)
     return (
         _SUPPORT_LEVEL_ORDER[resolved_value] >= _SUPPORT_LEVEL_ORDER[resolved_minimum]
     )
 
 
-def serving_support_level_display_name(value: ServingSupportLevel | str) -> str:
-    return coerce_serving_support_level(value).value
+def runtime_support_level_display_name(value: RuntimeSupportLevel | str) -> str:
+    return coerce_runtime_support_level(value).value
 
 
 def readiness_family(row: Any) -> str:
@@ -75,8 +75,8 @@ def readiness_post_bind_finalize_class(row: Any) -> FinalizeClass:
     )
 
 
-def readiness_support_level(row: Any) -> ServingSupportLevel:
-    return coerce_serving_support_level(getattr(row, "support_level", None))
+def readiness_support_level(row: Any) -> RuntimeSupportLevel:
+    return coerce_runtime_support_level(getattr(row, "support_level", None))
 
 
 def readiness_publication_modes(row: Any) -> tuple[str, ...]:
@@ -94,9 +94,9 @@ def is_pure_transform_publication_allowlisted(row: Any) -> bool:
         pure_transform_candidate
         and readiness_process_after_load_class(row) == FinalizeClass.RUNTIME_ONLY
         and readiness_post_bind_finalize_class(row) == FinalizeClass.RUNTIME_ONLY
-        and serving_support_level_at_least(
+        and runtime_support_level_at_least(
             readiness_support_level(row),
-            ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
         )
     )
 
@@ -110,9 +110,9 @@ def is_binding_finalize_publication_allowlisted(row: Any) -> bool:
     )
     return (
         binding_finalize_candidate
-        and serving_support_level_at_least(
+        and runtime_support_level_at_least(
             readiness_support_level(row),
-            ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
         )
         and readiness_process_after_load_class(row)
         == FinalizeClass.REPRESENTATION_CHANGING
@@ -121,22 +121,19 @@ def is_binding_finalize_publication_allowlisted(row: Any) -> bool:
 
 
 def is_runtime_bind_swap_allowlisted(row: Any) -> bool:
-    allowed = bool(
-        getattr(row, "runtime_bind_swap_allowed", False)
-        or getattr(row, "serving_only_runtime_allowed", False)
-    )
+    allowed = bool(getattr(row, "runtime_bind_swap_allowed", False))
     return (
         allowed
         and readiness_post_bind_finalize_class(row) == FinalizeClass.RUNTIME_ONLY
-        and serving_support_level_at_least(
+        and runtime_support_level_at_least(
             readiness_support_level(row),
-            ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+            RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         )
     )
 
 
 class ReadinessInventoryAdmissionPolicy:
-    """AdmissionPolicy implementation backed by a framework readiness resolver."""
+    """Admission policy backed by a framework readiness resolver."""
 
     def __init__(
         self,
@@ -148,14 +145,14 @@ def __init__(
         self._endpoint_fields = endpoint_fields
 
     def admit(self, request: Any) -> Any:
-        from tensorcast.serving.hosts import AdmissionDecision
+        from tensorcast.artifact_runtime.host import RuntimeAdmissionDecision
 
         row = self._resolve_readiness(request.model_config)
         missing_semantic_proofs = (
             request.placement_admission.missing_framework_semantic_proofs()
         )
         allowed = is_runtime_bind_swap_allowlisted(row) and not missing_semantic_proofs
-        support_level = serving_support_level_display_name(readiness_support_level(row))
+        support_level = runtime_support_level_display_name(readiness_support_level(row))
         if missing_semantic_proofs:
             support_level = (
                 f"{support_level}:placement_missing_semantic_proof:"
@@ -170,7 +167,7 @@ def admit(self, request: Any) -> Any:
         endpoint_fields: dict[str, object] = {"family": family}
         if self._endpoint_fields is not None:
             endpoint_fields.update(self._endpoint_fields(row))
-        return AdmissionDecision(
+        return RuntimeAdmissionDecision(
             family=family,
             support_level=support_level,
             startup_allowed=allowed,
@@ -183,7 +180,7 @@ def admit(self, request: Any) -> Any:
 __all__ = [
     "ReadinessInventoryAdmissionPolicy",
     "coerce_finalize_class",
-    "coerce_serving_support_level",
+    "coerce_runtime_support_level",
     "is_binding_finalize_publication_allowlisted",
     "is_pure_transform_publication_allowlisted",
     "is_runtime_bind_swap_allowlisted",
@@ -192,6 +189,6 @@ def admit(self, request: Any) -> Any:
     "readiness_process_after_load_class",
     "readiness_publication_modes",
     "readiness_support_level",
-    "serving_support_level_at_least",
-    "serving_support_level_display_name",
+    "runtime_support_level_at_least",
+    "runtime_support_level_display_name",
 ]
diff --git a/tensorcast/artifact_runtime/recipe/__init__.py b/tensorcast/artifact_runtime/recipe/__init__.py
new file mode 100644
index 00000000..330d7af8
--- /dev/null
+++ b/tensorcast/artifact_runtime/recipe/__init__.py
@@ -0,0 +1,2 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Runtime recipe build, compilation, trace, and materialization helpers."""
diff --git a/tensorcast/serving/recipe_build.py b/tensorcast/artifact_runtime/recipe/build.py
similarity index 79%
rename from tensorcast/serving/recipe_build.py
rename to tensorcast/artifact_runtime/recipe/build.py
index 7420bdd1..5a0a3e7d 100644
--- a/tensorcast/serving/recipe_build.py
+++ b/tensorcast/artifact_runtime/recipe/build.py
@@ -1,5 +1,5 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Framework-neutral recipe build identity and cache helpers."""
+"""Framework-neutral runtime recipe build and cache helpers."""
 
 from __future__ import annotations
 
@@ -11,12 +11,12 @@
 import threading
 import time
 from collections import OrderedDict
-from collections.abc import Callable, Iterator, MutableMapping, Sequence
+from collections.abc import Callable, Iterator, Mapping, MutableMapping, Sequence
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
+from typing import Any, cast
 
-from tensorcast.serving.binding_plan import ServingBindingPlan
+from tensorcast.artifact_runtime.recipe.identity import RuntimeBindingPlan
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -40,7 +40,7 @@ def stable_recipe_build_hash(payload: dict[str, Any]) -> str:
 
 
 def compute_trace_cache_key(
-    identity: ServingBindingPlan,
+    identity: RuntimeBindingPlan,
     *,
     metadata_fingerprint: str,
 ) -> str:
@@ -50,7 +50,7 @@ def compute_trace_cache_key(
 
 
 def compute_recipe_cache_key(
-    identity: ServingBindingPlan,
+    identity: RuntimeBindingPlan,
     *,
     metadata_fingerprint: str,
 ) -> str:
@@ -84,6 +84,115 @@ class RecipeBuildCacheConfig:
     synchronous_recipe_cache_write: bool = False
 
 
+def _optional_bool(fields: Mapping[str, object], name: str, default: bool) -> bool:
+    value = fields.get(name)
+    if value is None:
+        return default
+    return bool(value)
+
+
+def _optional_path(value: object | None) -> Path | None:
+    if value is None:
+        return None
+    text = str(value).strip()
+    if not text:
+        return None
+    return Path(text).expanduser()
+
+
+def _unique_paths(paths: Sequence[Path]) -> tuple[Path, ...]:
+    unique: list[Path] = []
+    seen: set[str] = set()
+    for path in paths:
+        key = str(path)
+        if key in seen:
+            continue
+        seen.add(key)
+        unique.append(path)
+    return tuple(unique)
+
+
+def _selected_file_parent_paths(source_catalog: object) -> tuple[str, ...]:
+    selected_files_value = getattr(source_catalog, "selected_files", None)
+    if selected_files_value is None:
+        return ()
+    selected_files = cast(Sequence[object], selected_files_value)
+    parent_paths: list[str] = []
+    for entry in selected_files:
+        path = getattr(entry, "path", None)
+        if path is None:
+            continue
+        parent_paths.append(str(Path(path).expanduser().resolve().parent))
+    return tuple(parent_paths)
+
+
+def _model_adjacent_cache_root(source_catalog: object) -> Path | None:
+    parent_paths = _selected_file_parent_paths(source_catalog)
+    if not parent_paths:
+        return None
+    return Path(os.path.commonpath(parent_paths)) / ".tensorcast" / "bootstrap_cache"
+
+
+def _is_writable_or_creatable(path: Path) -> bool:
+    if path.exists():
+        return os.access(path, os.W_OK)
+    parent = path.parent
+    while not parent.exists() and parent != parent.parent:
+        parent = parent.parent
+    return parent.exists() and os.access(parent, os.W_OK)
+
+
+def recipe_build_cache_config_from_policy(
+    policy: object,
+    *,
+    source_catalog: object,
+) -> RecipeBuildCacheConfig:
+    fields = dict(getattr(policy, "fields", None) or {})
+    explicit_cache_root = _optional_bool(fields, "explicit_cache_root", False)
+    prefer_model_adjacent = _optional_bool(fields, "prefer_model_adjacent", True)
+    cache_root = _optional_path(fields.get("cache_root"))
+
+    roots: list[Path] = []
+    if prefer_model_adjacent:
+        model_adjacent = _model_adjacent_cache_root(source_catalog)
+        if model_adjacent is not None:
+            roots.append(model_adjacent)
+    if cache_root is not None and (explicit_cache_root or not roots):
+        roots.append(cache_root)
+    roots = list(_unique_paths(roots))
+
+    write_roots: list[Path] = []
+    if prefer_model_adjacent:
+        model_adjacent = _model_adjacent_cache_root(source_catalog)
+        if model_adjacent is not None and _is_writable_or_creatable(model_adjacent):
+            write_roots.append(model_adjacent)
+    if cache_root is not None and (explicit_cache_root or not write_roots):
+        write_roots.append(cache_root)
+    write_roots = list(_unique_paths(write_roots))
+
+    debug_output_dir = _optional_path(fields.get("debug_output_dir"))
+    return RecipeBuildCacheConfig(
+        cache_dirs=tuple(str(root / "trace_plans") for root in roots),
+        trace_write_dirs=tuple(str(root / "trace_plans") for root in write_roots),
+        recipe_cache_dirs=tuple(str(root / "compiled_recipes") for root in roots),
+        recipe_cache_write_dirs=tuple(
+            str(root / "compiled_recipes") for root in write_roots
+        ),
+        debug_output_dir=debug_output_dir,
+        allow_cache=_optional_bool(fields, "allow_cache", True),
+        allow_recipe_cache=_optional_bool(fields, "allow_recipe_cache", True),
+        allow_trace=_optional_bool(fields, "allow_trace", True),
+        trace_tp_slices=_optional_bool(fields, "trace_tp_slices", True),
+        debug_dump_trace=_optional_bool(fields, "debug_dump_trace", False),
+        synchronous_cache_write=_optional_bool(
+            fields, "synchronous_cache_write", False
+        ),
+        synchronous_recipe_cache_write=_optional_bool(
+            fields, "synchronous_recipe_cache_write", False
+        ),
+    )
+
+
 DEFAULT_RECIPE_BUILD_MEMORY_CACHE_ENTRIES = 128
 
 
@@ -176,6 +285,119 @@ class RecipeBuildRunResult:
     diagnostics: dict[str, Any]
 
 
+@dataclass(frozen=True)
+class RecipeBuildSessionRequest:
+    source_subject: Any | None = None
+    framework_config: Any | None = None
+    model_config: Any | None = None
+    placement: Any | None = None
+    cache_config: Any | None = None
+    identity: RuntimeBindingPlan | None = None
+    trace_cache_schema_version: int | None = None
+    tp_rank: int | None = None
+    tp_world_size: int | None = None
+
+
+@dataclass(frozen=True)
+class RecipeBuildResult:
+    session: "RecipeBuildSession"
+    recipe: Any | None = None
+    diagnostics: Mapping[str, Any] | None = None
+
+
+def _adapter_text(
+    adapter: Any | None,
+    method_name: str,
+    *args: Any,
+) -> str:
+    method = getattr(adapter, method_name, None)
+    if callable(method):
+        return str(method(*args))
+    return ""
+
+
+def recipe_build_identity(
+    request: RecipeBuildSessionRequest,
+    *,
+    adapter: Any | None,
+    placement: Any | None,
+) -> RuntimeBindingPlan:
+    model_config = request.model_config
+    if model_config is None:
+        raise ValueError("RecipeBuildSessionRequest requires model_config")
+    runtime_placement = getattr(placement, "runtime_placement", placement)
+    member = getattr(runtime_placement, "member", None)
+    stable_identity_payload = getattr(
+        runtime_placement, "stable_identity_payload", None
+    )
+    if callable(stable_identity_payload):
+        placement_payload = stable_identity_payload()
+    else:
+        placement_payload = getattr(placement, "identity_payload", None)
+        if placement_payload is None:
+            placement_payload = getattr(runtime_placement, "identity_payload", None)
+    trace_cache_schema_version = request.trace_cache_schema_version
+    if trace_cache_schema_version is None:
+        trace_cache_schema_version = getattr(
+            request.cache_config,
+            "trace_cache_schema_version",
+            1,
+        )
+    tp_rank = request.tp_rank
+    if tp_rank is None:
+        tp_rank = getattr(placement, "tp_rank", None)
+    if tp_rank is None and member is not None:
+        tp_rank = getattr(member, "member_index", None)
+    tp_world_size = request.tp_world_size
+    if tp_world_size is None:
+        tp_world_size = getattr(placement, "tp_world_size", None)
+    if tp_world_size is None and member is not None:
+        tp_world_size = getattr(member, "member_count", None)
+    compute_hash = getattr(model_config, "compute_hash", None)
+    framework_version = _adapter_text(adapter, "framework_version")
+    return RuntimeBindingPlan(
+        model_hash=str(
+            compute_hash()
+            if callable(compute_hash)
+            else getattr(model_config, "model", "unknown")
+        ),
+        model_id=str(getattr(model_config, "model", "unknown")),
+        model_revision=getattr(model_config, "revision", None),
+        dtype=str(getattr(model_config, "dtype", "unknown")),
+        runtime_version=framework_version,
+        framework_name=_adapter_text(adapter, "framework_name"),
+        framework_version=framework_version,
+        adapter_version=_adapter_text(adapter, "adapter_version"),
+        serving_abi_version=_adapter_text(
+            adapter,
+            "serving_abi_version",
+            model_config,
+        ),
+        trace_cache_schema_version=int(trace_cache_schema_version),
+        tp_rank=int(tp_rank or 0),
+        tp_world_size=int(tp_world_size or 1),
+        topology_ref=getattr(runtime_placement, "topology", None),
+        member_ref=member,
+        placement=placement_payload,
+    )
+
+
+def build_recipe_session(
+    request: RecipeBuildSessionRequest,
+    *,
+    adapter: Any | None = None,
+    placement: Any | None = None,
+) -> "RecipeBuildSession":
+    identity = request.identity
+    if identity is None:
+        identity = recipe_build_identity(
+            request,
+            adapter=adapter,
+            placement=placement,
+        )
+    return RecipeBuildSession(identity)
+
+
 def _metadata_fingerprint(source_catalog: Any) -> str:
     return str(getattr(source_catalog, "metadata_fingerprint", ""))
 
@@ -187,7 +409,7 @@ def _cache_config_attr(cache_config: Any, name: str, default: Any) -> Any:
 class RecipeBuildSession:
     """Small core-owned shell for stable recipe build cache identity."""
 
-    def __init__(self, identity: ServingBindingPlan) -> None:
+    def __init__(self, identity: RuntimeBindingPlan) -> None:
         self.identity = identity
 
     def trace_cache_key(self, *, metadata_fingerprint: str) -> str:
@@ -226,26 +448,26 @@ def recipe_cache_path(
             tp_rank=self.identity.tp_rank,
         )
 
-    def compile_identity(self, *, serving_facts: Any) -> Any:
-        return ServingBindingPlan(
+    def compile_identity(self, *, runtime_facts: Any) -> Any:
+        return RuntimeBindingPlan(
             model_id=self.identity.model_id,
             model_revision=self.identity.model_revision,
             dtype=self.identity.dtype,
             model_hash=self.identity.model_hash,
             runtime_version=self.identity.runtime_version,
             framework_name=getattr(
-                serving_facts, "framework_name", self.identity.framework_name
+                runtime_facts, "framework_name", self.identity.framework_name
             ),
             adapter_version=getattr(
-                serving_facts, "adapter_version", self.identity.adapter_version
+                runtime_facts, "adapter_version", self.identity.adapter_version
             ),
             serving_abi_version=getattr(
-                serving_facts,
+                runtime_facts,
                 "serving_abi_version",
                 self.identity.serving_abi_version,
             ),
             framework_version=getattr(
-                serving_facts, "framework_version", self.identity.framework_version
+                runtime_facts, "framework_version", self.identity.framework_version
             ),
             trace_cache_schema_version=self.identity.trace_cache_schema_version,
             tp_rank=self.identity.tp_rank,
@@ -257,7 +479,7 @@ def compile_identity(self, *, serving_facts: Any) -> Any:
 
     def compile_recipe(self, *, inputs: Any) -> Any:
         return self.compile_recipe_from_inputs(
-            identity=self.compile_identity(serving_facts=inputs.serving_facts),
+            identity=self.compile_identity(runtime_facts=inputs.runtime_facts),
             inputs=inputs,
         )
 
@@ -270,7 +492,7 @@ def build_recipe(
         framework_adapter: Any,
         build_meta_model: Callable[[], Any],
         cache_config: Any,
-        is_reserved_serving_tensor_name: Callable[[str], bool],
+        is_reserved_runtime_tensor_name: Callable[[str], bool],
         semantic_validation_spec: object | None = None,
         trace_capture_fn: Callable[[Any, list[str], dict[str, Any]], Any] | None = None,
         trace_plan_memory_cache: MutableMapping[str, Any] | None = None,
@@ -392,15 +614,15 @@ def build_recipe(
                 "meta_model_class": type(meta_model).__name__,
             },
         )
-        serving_facts = self.collect_serving_facts(
+        runtime_facts = self.collect_runtime_facts(
             meta_model,
             model_config,
             framework_adapter,
         )
         tensor_schema = self.collect_tensor_schema(
             meta_model,
-            runtime_only_tensor_names=serving_facts.runtime_only_tensor_names,
-            is_reserved_serving_tensor_name=is_reserved_serving_tensor_name,
+            runtime_only_tensor_names=runtime_facts.runtime_only_tensor_names,
+            is_reserved_runtime_tensor_name=is_reserved_runtime_tensor_name,
         )
         resolved_semantic_validation_spec = self.resolve_semantic_validation_spec(
             meta_model,
@@ -413,20 +635,20 @@ def build_recipe(
             "recipe.collect_model_metadata",
             {
                 "support_level": getattr(
-                    serving_facts.support_level, "value", serving_facts.support_level
+                    runtime_facts.support_level, "value", runtime_facts.support_level
                 ),
                 "process_after_load_class": getattr(
-                    serving_facts.process_after_load_class,
+                    runtime_facts.process_after_load_class,
                     "value",
-                    serving_facts.process_after_load_class,
+                    runtime_facts.process_after_load_class,
                 ),
                 "post_bind_finalize_class": getattr(
-                    serving_facts.post_bind_finalize_class,
+                    runtime_facts.post_bind_finalize_class,
                     "value",
-                    serving_facts.post_bind_finalize_class,
+                    runtime_facts.post_bind_finalize_class,
                 ),
                 "runtime_only_tensor_count": len(
-                    serving_facts.runtime_only_tensor_names
+                    runtime_facts.runtime_only_tensor_names
                 ),
                 "tensor_schema_count": len(tensor_schema),
             },
@@ -556,7 +778,7 @@ def build_recipe(
             inputs=self._recipe_compile_inputs(
                 source_catalog=source_catalog,
                 trace_plan=trace_plan,
-                serving_facts=serving_facts,
+                runtime_facts=runtime_facts,
                 tensor_schema=tensor_schema,
                 semantic_validation_spec=resolved_semantic_validation_spec,
             )
@@ -672,16 +894,16 @@ def _recipe_compile_inputs(
         *,
         source_catalog: Any,
         trace_plan: Any,
-        serving_facts: Any,
+        runtime_facts: Any,
         tensor_schema: Any,
         semantic_validation_spec: Any,
     ) -> Any:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
         return tc_compiler.RecipeCompileInputs(
             source_catalog=source_catalog,
             trace_plan=trace_plan,
-            serving_facts=serving_facts,
+            runtime_facts=runtime_facts,
             tensor_schema=tensor_schema,
             semantic_validation_spec=semantic_validation_spec,
         )
@@ -773,8 +995,8 @@ def rebind_cached_recipe_template(
     ) -> Any:
         from dataclasses import replace
 
-        from tensorcast.serving.builder import compiler as tc_compiler
-        from tensorcast.serving.source_catalog import (
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
+        from tensorcast.artifact_runtime.source import (
             resolve_source_artifact_ref,
         )
 
@@ -782,12 +1004,12 @@ def rebind_cached_recipe_template(
             source_catalog.source_artifact_ref
         )
         source_metadata_fingerprint = str(source_catalog.metadata_fingerprint)
-        identity = self.compile_identity(serving_facts=cached_recipe.serving_facts)
+        identity = self.compile_identity(runtime_facts=cached_recipe.runtime_facts)
         realization_plan_proto = bytes(cached_recipe.realization_plan_proto or b"")
         binding_plan = identity.with_compiled_artifacts(
             source_artifact_ref=source_artifact_ref,
             source_metadata_fingerprint=source_metadata_fingerprint,
-            serving_facts=cached_recipe.serving_facts,
+            runtime_facts=cached_recipe.runtime_facts,
             trace_plan=cached_recipe.trace_plan,
             tensor_schema=tuple(cached_recipe.tensor_schema),
             source_hull=tuple(cached_recipe.source_hull),
@@ -813,7 +1035,7 @@ def rebind_cached_recipe_template(
             identity=binding_plan,
             source_artifact_ref=source_artifact_ref,
             source_metadata_fingerprint=source_metadata_fingerprint,
-            serving_facts=cached_recipe.serving_facts,
+            runtime_facts=cached_recipe.runtime_facts,
             tensor_schema=cached_recipe.tensor_schema,
             semantic_validation_spec=cached_recipe.semantic_validation_spec,
         )
@@ -839,9 +1061,9 @@ def cached_recipe_matches_context(
         ):
             return False
         if placement is not None:
-            serving_placement = getattr(placement, "serving_placement", placement)
-            placement_topology = getattr(serving_placement, "topology", None)
-            placement_member = getattr(serving_placement, "member", None)
+            runtime_placement = getattr(placement, "runtime_placement", placement)
+            placement_topology = getattr(runtime_placement, "topology", None)
+            placement_member = getattr(runtime_placement, "member", None)
             recipe_topology = getattr(recipe, "topology_ref", None)
             recipe_member = getattr(recipe, "member_ref", None)
             if recipe_topology is not None and recipe_topology != placement_topology:
@@ -1076,14 +1298,14 @@ def _worker() -> None:
         ).start()
 
     @staticmethod
-    def collect_serving_facts(
+    def collect_runtime_facts(
         model: Any,
         model_config: Any,
         framework_adapter: Any,
     ) -> Any:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
-        return tc_compiler.TensorcastServingFacts(
+        return tc_compiler.TensorcastRuntimeFacts(
             framework_name=framework_adapter.framework_name(),
             framework_version=framework_adapter.framework_version(),
             adapter_version=framework_adapter.adapter_version(),
@@ -1105,9 +1327,9 @@ def collect_tensor_schema(
         model: Any,
         *,
         runtime_only_tensor_names: tuple[str, ...],
-        is_reserved_serving_tensor_name: Any,
+        is_reserved_runtime_tensor_name: Any,
     ) -> tuple[Any, ...]:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
         excluded = set(runtime_only_tensor_names)
         entries: list[Any] = []
@@ -1116,7 +1338,7 @@ def collect_tensor_schema(
         for name, param in model.named_parameters(remove_duplicate=True):
             if name in excluded:
                 continue
-            if is_reserved_serving_tensor_name(name):
+            if is_reserved_runtime_tensor_name(name):
                 raise RuntimeError(
                     f"Model tensor name '{name}' collides with Tensorcast reserved names"
                 )
@@ -1132,7 +1354,7 @@ def collect_tensor_schema(
         for name, buf in model.named_buffers(remove_duplicate=True):
             if name in excluded:
                 continue
-            if is_reserved_serving_tensor_name(name):
+            if is_reserved_runtime_tensor_name(name):
                 raise RuntimeError(
                     f"Model tensor name '{name}' collides with Tensorcast reserved names"
                 )
@@ -1157,7 +1379,7 @@ def resolve_semantic_validation_spec(
         framework_adapter: Any,
         explicit_spec: object | None,
     ) -> Any:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
         if explicit_spec is not None:
             if isinstance(explicit_spec, tc_compiler.TensorcastSemanticValidationSpec):
@@ -1185,7 +1407,7 @@ def trace_plan_summary_fields(trace_plan: Any) -> dict[str, int]:
 
     @staticmethod
     def recipe_summary_fields(recipe: Any) -> dict[str, int]:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
         return {
             "tensor_schema_count": len(recipe.tensor_schema),
@@ -1198,61 +1420,66 @@ def recipe_summary_fields(recipe: Any) -> dict[str, int]:
 
     @staticmethod
     def load_trace_plan_cache(cache_path: str | None) -> Any:
-        from tensorcast.serving.builder import trace_cache as tc_trace_cache
+        import tensorcast.artifact_runtime.recipe.trace_cache as tc_trace_cache
 
         return tc_trace_cache.load_trace_plan_cache(cache_path)
 
     @staticmethod
     def write_trace_plan_cache(cache_path: str, trace_plan: Any) -> None:
-        from tensorcast.serving.builder import trace_cache as tc_trace_cache
+        import tensorcast.artifact_runtime.recipe.trace_cache as tc_trace_cache
 
         tc_trace_cache.write_trace_plan_cache(cache_path, trace_plan)
 
     @staticmethod
     def dump_trace_plan_debug(*args: Any, **kwargs: Any) -> Any:
-        from tensorcast.serving.builder import trace_cache as tc_trace_cache
+        import tensorcast.artifact_runtime.recipe.trace_cache as tc_trace_cache
 
         return tc_trace_cache.dump_trace_plan_debug(*args, **kwargs)
 
     @staticmethod
     def load_compiled_recipe_cache(cache_path: str | None) -> Any:
-        from tensorcast.serving.builder import recipe_cache as tc_recipe_cache
+        import tensorcast.artifact_runtime.recipe.cache as tc_recipe_cache
 
         return tc_recipe_cache.load_compiled_recipe_cache(cache_path)
 
     @staticmethod
     def write_compiled_recipe_cache(cache_path: str, recipe: Any) -> None:
-        from tensorcast.serving.builder import recipe_cache as tc_recipe_cache
+        import tensorcast.artifact_runtime.recipe.cache as tc_recipe_cache
 
         tc_recipe_cache.write_compiled_recipe_cache(cache_path, recipe)
 
     @staticmethod
     def compute_recipe_compile_key(*args: Any, **kwargs: Any) -> str:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
         return tc_compiler.compute_recipe_compile_key(*args, **kwargs)
 
     @staticmethod
     def compile_recipe_from_inputs(*args: Any, **kwargs: Any) -> Any:
-        from tensorcast.serving.builder import compiler as tc_compiler
+        import tensorcast.artifact_runtime.recipe.compiler as tc_compiler
 
-        return tc_compiler.compile_serving_recipe(*args, **kwargs)
+        return tc_compiler.compile_runtime_recipe(*args, **kwargs)
 
 
 __all__ = [
-    "ServingBindingPlan",
+    "RuntimeBindingPlan",
     "RecipeBuildMemoryCache",
     "RecipeBuildCacheConfig",
+    "RecipeBuildResult",
     "RecipeBuildRunResult",
+    "RecipeBuildSessionRequest",
     "RecipeCacheLookupResult",
     "RecipeCacheWriteResult",
     "RecipeBuildSession",
     "COMPILED_RECIPE_MEMORY_CACHE",
     "DEFAULT_RECIPE_BUILD_MEMORY_CACHE_ENTRIES",
     "TRACE_PLAN_MEMORY_CACHE",
+    "build_recipe_session",
     "compute_recipe_cache_key",
     "compute_trace_cache_key",
     "recipe_cache_path",
+    "recipe_build_cache_config_from_policy",
+    "recipe_build_identity",
     "stable_recipe_build_hash",
     "trace_cache_path",
 ]
diff --git a/tensorcast/artifact_runtime/recipe/builder.py b/tensorcast/artifact_runtime/recipe/builder.py
new file mode 100644
index 00000000..05f7db47
--- /dev/null
+++ b/tensorcast/artifact_runtime/recipe/builder.py
@@ -0,0 +1,47 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Artifact runtime builder primitives for offline publication workflows."""
+
+from __future__ import annotations
+
+from tensorcast.artifact_runtime.locator import ranked_version_key_for_member
+from tensorcast.artifact_runtime.recipe.compiler import (
+    CompiledRuntimeRecipe,
+    TensorSchemaEntry,
+)
+from tensorcast.artifact_runtime.recipe.materialization import (
+    BindingFinalizeMaterializationResult,
+    collect_runtime_tensors_from_model,
+    load_source_tensors_for_recipe,
+    materialize_binding_finalize_runtime_tensors,
+    materialize_pure_transform_runtime_tensors,
+    run_binding_finalize_semantic_validation,
+    tensorcast_view_slices_from_trace_plan,
+    validate_binding_finalize_tensor_schema,
+)
+from tensorcast.artifact_runtime.recipe.publication import (
+    complete_pure_transform_recipe_publication,
+)
+from tensorcast.artifact_runtime.recipe.validation import (
+    validate_recipe_for_builder_mode,
+)
+
+LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION = "tensorcast-bootstrap-v1"
+
+
+__all__ = [
+    "BindingFinalizeMaterializationResult",
+    "CompiledRuntimeRecipe",
+    "LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION",
+    "TensorSchemaEntry",
+    "collect_runtime_tensors_from_model",
+    "complete_pure_transform_recipe_publication",
+    "load_source_tensors_for_recipe",
+    "materialize_binding_finalize_runtime_tensors",
+    "materialize_pure_transform_runtime_tensors",
+    "ranked_version_key_for_member",
+    "run_binding_finalize_semantic_validation",
+    "tensorcast_view_slices_from_trace_plan",
+    "validate_binding_finalize_tensor_schema",
+    "validate_recipe_for_builder_mode",
+]
diff --git a/tensorcast/serving/builder/recipe_cache.py b/tensorcast/artifact_runtime/recipe/cache.py
similarity index 89%
rename from tensorcast/serving/builder/recipe_cache.py
rename to tensorcast/artifact_runtime/recipe/cache.py
index e7c03487..7010b0db 100644
--- a/tensorcast/serving/builder/recipe_cache.py
+++ b/tensorcast/artifact_runtime/recipe/cache.py
@@ -1,5 +1,5 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""CompiledServingRecipe cache helpers."""
+"""CompiledRuntimeRecipe cache helpers."""
 
 from __future__ import annotations
 
@@ -10,16 +10,16 @@
 from pathlib import Path
 from typing import Any
 
-from tensorcast.serving.builder.compiler import (
-    CompiledServingRecipe,
+from tensorcast.artifact_runtime.recipe.compiler import (
+    CompiledRuntimeRecipe,
     SourceHullEntry,
+    TensorcastRuntimeFacts,
     TensorcastSemanticValidationSpec,
-    TensorcastServingFacts,
     TensorSchemaEntry,
     binding_realization_plan_proto_bytes,
     compiled_recipe_realization_plan_count,
 )
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     TracePlan,
     copy_plan_from_dict,
     copy_plan_to_dict,
@@ -28,15 +28,15 @@
 )
 from tensorcast.types import (
     FinalizeClass,
-    ServingBindingMemberRef,
-    ServingSupportLevel,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
 )
 
-RECIPE_CACHE_PAYLOAD_VERSION = 5
+RECIPE_CACHE_PAYLOAD_VERSION = 6
 
 
-def _serving_facts_to_dict(facts: TensorcastServingFacts) -> dict[str, Any]:
+def _runtime_facts_to_dict(facts: TensorcastRuntimeFacts) -> dict[str, Any]:
     return {
         "framework_name": facts.framework_name,
         "framework_version": facts.framework_version,
@@ -49,8 +49,8 @@ def _serving_facts_to_dict(facts: TensorcastServingFacts) -> dict[str, Any]:
     }
 
 
-def _serving_facts_from_dict(data: Mapping[str, Any]) -> TensorcastServingFacts:
-    return TensorcastServingFacts(
+def _runtime_facts_from_dict(data: Mapping[str, Any]) -> TensorcastRuntimeFacts:
+    return TensorcastRuntimeFacts(
         framework_name=str(data["framework_name"]),
         framework_version=(
             None
@@ -59,7 +59,7 @@ def _serving_facts_from_dict(data: Mapping[str, Any]) -> TensorcastServingFacts:
         ),
         adapter_version=str(data["adapter_version"]),
         serving_abi_version=str(data["serving_abi_version"]),
-        support_level=ServingSupportLevel(str(data["support_level"])),
+        support_level=RuntimeSupportLevel(str(data["support_level"])),
         runtime_only_tensor_names=tuple(
             str(name) for name in data.get("runtime_only_tensor_names", ())
         ),
@@ -167,18 +167,18 @@ def _pydantic_model_to_dict(value: Any | None) -> dict[str, Any] | None:
 
 def _topology_ref_from_dict(
     data: Mapping[str, Any] | None,
-) -> ServingTopologyRef | None:
+) -> RuntimeTopologyRef | None:
     if data is None:
         return None
-    return ServingTopologyRef.model_validate(dict(data))
+    return RuntimeTopologyRef.model_validate(dict(data))
 
 
 def _member_ref_from_dict(
     data: Mapping[str, Any] | None,
-) -> ServingBindingMemberRef | None:
+) -> RuntimeBindingMemberRef | None:
     if data is None:
         return None
-    return ServingBindingMemberRef.model_validate(dict(data))
+    return RuntimeBindingMemberRef.model_validate(dict(data))
 
 
 def _semantic_validation_spec_to_dict(
@@ -199,7 +199,7 @@ def _semantic_validation_spec_from_dict(
     )
 
 
-def compiled_recipe_to_dict(recipe: CompiledServingRecipe) -> dict[str, Any]:
+def compiled_recipe_to_dict(recipe: CompiledRuntimeRecipe) -> dict[str, Any]:
     realization_plan_proto = bytes(recipe.realization_plan_proto or b"")
     if not realization_plan_proto and recipe.realization_plan:
         realization_plan_proto = binding_realization_plan_proto_bytes(
@@ -210,7 +210,7 @@ def compiled_recipe_to_dict(recipe: CompiledServingRecipe) -> dict[str, Any]:
         "compile_key": recipe.compile_key,
         "source_artifact_ref": recipe.source_artifact_ref,
         "source_metadata_fingerprint": recipe.source_metadata_fingerprint,
-        "serving_facts": _serving_facts_to_dict(recipe.serving_facts),
+        "runtime_facts": _runtime_facts_to_dict(recipe.runtime_facts),
         "trace_plan_summary": _trace_plan_summary_to_dict(recipe.trace_plan),
         "tensor_schema": [
             _tensor_schema_to_dict(entry) for entry in recipe.tensor_schema
@@ -229,16 +229,16 @@ def compiled_recipe_to_dict(recipe: CompiledServingRecipe) -> dict[str, Any]:
     }
 
 
-def compiled_recipe_from_dict(data: Mapping[str, Any]) -> CompiledServingRecipe:
+def compiled_recipe_from_dict(data: Mapping[str, Any]) -> CompiledRuntimeRecipe:
     realization_plan_proto = _bytes_from_base64_payload(
         data["realization_plan_proto"],
         field="realization_plan_proto",
     )
-    return CompiledServingRecipe(
+    return CompiledRuntimeRecipe(
         compile_key=str(data["compile_key"]),
         source_artifact_ref=str(data["source_artifact_ref"]),
         source_metadata_fingerprint=str(data["source_metadata_fingerprint"]),
-        serving_facts=_serving_facts_from_dict(data["serving_facts"]),
+        runtime_facts=_runtime_facts_from_dict(data["runtime_facts"]),
         trace_plan=_trace_plan_summary_from_dict(data["trace_plan_summary"]),
         tensor_schema=tuple(
             _tensor_schema_from_dict(entry) for entry in data["tensor_schema"]
@@ -263,7 +263,7 @@ def compiled_recipe_from_dict(data: Mapping[str, Any]) -> CompiledServingRecipe:
 
 def load_compiled_recipe_cache(
     cache_path: str | os.PathLike[str] | None,
-) -> CompiledServingRecipe | None:
+) -> CompiledRuntimeRecipe | None:
     if not cache_path:
         return None
     path = Path(cache_path)
@@ -284,7 +284,7 @@ def load_compiled_recipe_cache(
 
 def write_compiled_recipe_cache(
     cache_path: str | os.PathLike[str],
-    recipe: CompiledServingRecipe,
+    recipe: CompiledRuntimeRecipe,
 ) -> None:
     path = Path(cache_path)
     path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/tensorcast/serving/builder/compiler.py b/tensorcast/artifact_runtime/recipe/compiler.py
similarity index 83%
rename from tensorcast/serving/builder/compiler.py
rename to tensorcast/artifact_runtime/recipe/compiler.py
index 471d1b92..dd8727f3 100644
--- a/tensorcast/serving/builder/compiler.py
+++ b/tensorcast/artifact_runtime/recipe/compiler.py
@@ -1,5 +1,5 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Framework-neutral serving recipe compiler primitives."""
+"""Framework-neutral runtime recipe compiler primitives."""
 
 from __future__ import annotations
 
@@ -14,31 +14,31 @@
 from tensorcast.api.store.common import dtype_from_string as store_dtype_from_string
 from tensorcast.api.store.realization_plan import binding_realization_plan_to_proto
 from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry
-from tensorcast.serving.binding_plan import ServingBindingPlan
-from tensorcast.serving.builder.binding_plan import lower_trace_plan_for_realization
-from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan
-from tensorcast.serving.source_catalog import resolve_source_artifact_ref
+from tensorcast.artifact_runtime.binding.plan import lower_trace_plan_for_realization
+from tensorcast.artifact_runtime.recipe.identity import RuntimeBindingPlan
+from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan
+from tensorcast.artifact_runtime.source import resolve_source_artifact_ref
 from tensorcast.types import (
     FinalizeClass,
-    ServingBindingMemberRef,
-    ServingSupportLevel,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
 )
 
 
 @dataclass(frozen=True)
-class TensorcastServingFacts:
+class TensorcastRuntimeFacts:
     framework_name: str
     adapter_version: str
     serving_abi_version: str
-    support_level: ServingSupportLevel
+    support_level: RuntimeSupportLevel
     runtime_only_tensor_names: tuple[str, ...]
     process_after_load_class: FinalizeClass
     post_bind_finalize_class: FinalizeClass
     framework_version: str | None = None
 
 
-ServingFacts = TensorcastServingFacts
+RuntimeFacts = TensorcastRuntimeFacts
 
 
 @dataclass(frozen=True)
@@ -69,20 +69,20 @@ def empty(cls) -> TensorcastSemanticValidationSpec:
 
 
 @dataclass(frozen=True)
-class CompiledServingRecipe:
+class CompiledRuntimeRecipe:
     compile_key: str
     source_artifact_ref: str
     source_metadata_fingerprint: str
-    serving_facts: TensorcastServingFacts
+    runtime_facts: TensorcastRuntimeFacts
     trace_plan: TracePlan
     tensor_schema: tuple[TensorSchemaEntry, ...]
     source_hull: tuple[SourceHullEntry, ...]
     realization_plan: tuple[BindingRealizationEntry, ...]
     realization_fallback_plan: tuple[CopyPlanEntry, ...]
-    topology_ref: ServingTopologyRef | None
-    member_ref: ServingBindingMemberRef | None
+    topology_ref: RuntimeTopologyRef | None
+    member_ref: RuntimeBindingMemberRef | None
     semantic_validation_spec: TensorcastSemanticValidationSpec
-    binding_plan: ServingBindingPlan | None = None
+    binding_plan: RuntimeBindingPlan | None = None
     realization_plan_proto: bytes = b""
     realization_plan_count: int = 0
 
@@ -91,26 +91,26 @@ class CompiledServingRecipe:
 class RecipeCompileInputs:
     source_catalog: Any
     trace_plan: TracePlan
-    serving_facts: TensorcastServingFacts
+    runtime_facts: TensorcastRuntimeFacts
     tensor_schema: tuple[TensorSchemaEntry, ...]
     semantic_validation_spec: TensorcastSemanticValidationSpec = field(
         default_factory=TensorcastSemanticValidationSpec.empty
     )
 
 
-class ServingBuildObserver(Protocol):
+class RuntimeBuildObserver(Protocol):
     def event(self, name: str, payload: Mapping[str, object]) -> None: ...
 
 
-def compile_serving_recipe(
+def compile_runtime_recipe(
     *,
-    identity: ServingBindingPlan,
+    identity: RuntimeBindingPlan,
     inputs: RecipeCompileInputs,
-    observer: ServingBuildObserver | None = None,
-) -> CompiledServingRecipe:
-    """Assemble a serving recipe from framework-collected pure inputs."""
+    observer: RuntimeBuildObserver | None = None,
+) -> CompiledRuntimeRecipe:
+    """Assemble a runtime recipe from framework-collected pure inputs."""
 
-    _validate_compile_identity_matches_facts(identity, inputs.serving_facts)
+    _validate_compile_identity_matches_facts(identity, inputs.runtime_facts)
     source_artifact_ref = resolve_source_artifact_ref(
         inputs.source_catalog.source_artifact_ref
     )
@@ -144,7 +144,7 @@ def compile_serving_recipe(
     resolved_plan = identity.with_compiled_artifacts(
         source_artifact_ref=source_artifact_ref,
         source_metadata_fingerprint=source_metadata_fingerprint,
-        serving_facts=inputs.serving_facts,
+        runtime_facts=inputs.runtime_facts,
         trace_plan=inputs.trace_plan,
         tensor_schema=tuple(tensor_schema),
         source_hull=source_hull,
@@ -161,15 +161,15 @@ def compile_serving_recipe(
         identity=resolved_plan,
         source_artifact_ref=source_artifact_ref,
         source_metadata_fingerprint=source_metadata_fingerprint,
-        serving_facts=inputs.serving_facts,
+        runtime_facts=inputs.runtime_facts,
         tensor_schema=tensor_schema,
         semantic_validation_spec=inputs.semantic_validation_spec,
     )
-    recipe = CompiledServingRecipe(
+    recipe = CompiledRuntimeRecipe(
         compile_key=compile_key,
         source_artifact_ref=source_artifact_ref,
         source_metadata_fingerprint=source_metadata_fingerprint,
-        serving_facts=inputs.serving_facts,
+        runtime_facts=inputs.runtime_facts,
         trace_plan=inputs.trace_plan,
         tensor_schema=tensor_schema,
         source_hull=source_hull,
@@ -255,7 +255,7 @@ def realization_plan_digest(realization_plan_proto: bytes) -> str:
 
 
 def compiled_recipe_realization_plan_count(
-    recipe: CompiledServingRecipe,
+    recipe: CompiledRuntimeRecipe,
 ) -> int:
     return int(recipe.realization_plan_count or len(recipe.realization_plan))
 
@@ -269,34 +269,34 @@ def filter_tensor_schema_for_trace_plan(
     missing = expected - set(schema_by_name)
     if missing:
         raise ValueError(
-            "TensorCast serving recipe tensor_schema is missing destination "
+            "TensorCast runtime recipe tensor_schema is missing destination "
             f"entries: {sorted(missing)}"
         )
     return tuple(entry for entry in tensor_schema if entry.name in expected)
 
 
 def _validate_compile_identity_matches_facts(
-    identity: ServingBindingPlan,
-    serving_facts: TensorcastServingFacts,
+    identity: RuntimeBindingPlan,
+    runtime_facts: TensorcastRuntimeFacts,
 ) -> None:
     mismatches = [
         field_name
         for field_name, identity_value, facts_value in (
-            ("framework_name", identity.framework_name, serving_facts.framework_name),
+            ("framework_name", identity.framework_name, runtime_facts.framework_name),
             (
                 "framework_version",
                 identity.framework_version,
-                serving_facts.framework_version,
+                runtime_facts.framework_version,
             ),
             (
                 "adapter_version",
                 identity.adapter_version,
-                serving_facts.adapter_version,
+                runtime_facts.adapter_version,
             ),
             (
                 "serving_abi_version",
                 identity.serving_abi_version,
-                serving_facts.serving_abi_version,
+                runtime_facts.serving_abi_version,
             ),
         )
         if identity_value is not None
@@ -305,24 +305,24 @@ def _validate_compile_identity_matches_facts(
     ]
     if mismatches:
         raise ValueError(
-            "ServingBindingPlan must match TensorcastServingFacts for "
+            "RuntimeBindingPlan must match TensorcastRuntimeFacts for "
             f"{', '.join(mismatches)}"
         )
 
 
 def compute_recipe_compile_key(
     *,
-    identity: ServingBindingPlan,
+    identity: RuntimeBindingPlan,
     source_artifact_ref: str,
     source_metadata_fingerprint: str,
-    serving_facts: TensorcastServingFacts,
+    runtime_facts: TensorcastRuntimeFacts,
     tensor_schema: Sequence[TensorSchemaEntry],
     semantic_validation_spec: TensorcastSemanticValidationSpec,
 ) -> str:
     payload = identity.compile_payload(
         source_artifact_ref=source_artifact_ref,
         source_metadata_fingerprint=source_metadata_fingerprint,
-        serving_facts=serving_facts,
+        runtime_facts=runtime_facts,
         tensor_schema=tensor_schema,
         semantic_validation_spec=semantic_validation_spec,
     )
@@ -332,18 +332,18 @@ def compute_recipe_compile_key(
 
 
 __all__ = [
-    "CompiledServingRecipe",
+    "CompiledRuntimeRecipe",
     "RecipeCompileInputs",
     "SemanticValidationSpec",
-    "ServingBuildObserver",
-    "ServingBindingPlan",
-    "ServingFacts",
+    "RuntimeBuildObserver",
+    "RuntimeBindingPlan",
+    "RuntimeFacts",
     "SourceHullEntry",
     "TensorSchemaEntry",
     "TensorcastSemanticValidationSpec",
-    "TensorcastServingFacts",
+    "TensorcastRuntimeFacts",
     "binding_realization_plan_proto_bytes",
-    "compile_serving_recipe",
+    "compile_runtime_recipe",
     "compiled_recipe_realization_plan_count",
     "compute_recipe_compile_key",
     "filter_tensor_schema_for_trace_plan",
diff --git a/tensorcast/serving/binding_plan.py b/tensorcast/artifact_runtime/recipe/identity.py
similarity index 90%
rename from tensorcast/serving/binding_plan.py
rename to tensorcast/artifact_runtime/recipe/identity.py
index 20d7d321..c811d30d 100644
--- a/tensorcast/serving/binding_plan.py
+++ b/tensorcast/artifact_runtime/recipe/identity.py
@@ -1,5 +1,5 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Serving binding plan identity shared by trace and recipe compilation."""
+"""Runtime recipe identity shared by trace and recipe compilation."""
 
 from __future__ import annotations
 
@@ -7,12 +7,12 @@
 from dataclasses import asdict, dataclass, field, is_dataclass, replace
 from typing import Any
 
-from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef
+from tensorcast.types import RuntimeBindingMemberRef, RuntimeTopologyRef
 
 
 @dataclass(frozen=True)
-class ServingBindingPlan:
-    """Cache and correctness identity for serving source bootstrap."""
+class RuntimeBindingPlan:
+    """Cache and correctness identity for runtime source bootstrap."""
 
     model_id: str
     model_revision: str | None
@@ -26,8 +26,8 @@ class ServingBindingPlan:
     framework_version: str | None = None
     tp_rank: int = 0
     tp_world_size: int = 1
-    topology_ref: ServingTopologyRef | None = None
-    member_ref: ServingBindingMemberRef | None = None
+    topology_ref: RuntimeTopologyRef | None = None
+    member_ref: RuntimeBindingMemberRef | None = None
     placement: Any | None = None
     source_artifact_ref: str | None = None
     source_metadata_fingerprint: str | None = None
@@ -42,7 +42,7 @@ class ServingBindingPlan:
     target_layout_hash: str | None = None
     tensor_schema_hash: str | None = None
     resolved_spec_digest: str | None = None
-    serving_facts: Any | None = None
+    runtime_facts: Any | None = None
     trace_plan: Any | None = None
     tensor_schema: tuple[Any, ...] = ()
     source_hull: tuple[Any, ...] = ()
@@ -77,7 +77,7 @@ def base_payload(self) -> dict[str, Any]:
     def with_resolved_spec_cache_entry(
         self,
         resolved_spec_cache_entry: Any,
-    ) -> "ServingBindingPlan":
+    ) -> "RuntimeBindingPlan":
         return replace(
             self,
             resolved_spec_cache_entry=resolved_spec_cache_entry,
@@ -152,7 +152,7 @@ def compile_payload(
         *,
         source_artifact_ref: str,
         source_metadata_fingerprint: str,
-        serving_facts: Any,
+        runtime_facts: Any,
         tensor_schema: Any,
         semantic_validation_spec: Any,
     ) -> dict[str, Any]:
@@ -163,20 +163,20 @@ def compile_payload(
         payload.update(
             {
                 "runtime_version": self.runtime_version,
-                "framework_name": serving_facts.framework_name,
-                "framework_version": serving_facts.framework_version,
-                "adapter_version": serving_facts.adapter_version,
-                "serving_abi_version": serving_facts.serving_abi_version,
+                "framework_name": runtime_facts.framework_name,
+                "framework_version": runtime_facts.framework_version,
+                "adapter_version": runtime_facts.adapter_version,
+                "serving_abi_version": runtime_facts.serving_abi_version,
                 "identity_framework_name": self.framework_name,
                 "identity_framework_version": self.framework_version,
                 "identity_adapter_version": self.adapter_version,
                 "identity_serving_abi_version": self.serving_abi_version,
-                "support_level": str(serving_facts.support_level),
+                "support_level": str(runtime_facts.support_level),
                 "runtime_only_tensor_names": list(
-                    serving_facts.runtime_only_tensor_names
+                    runtime_facts.runtime_only_tensor_names
                 ),
-                "process_after_load_class": str(serving_facts.process_after_load_class),
-                "post_bind_finalize_class": str(serving_facts.post_bind_finalize_class),
+                "process_after_load_class": str(runtime_facts.process_after_load_class),
+                "post_bind_finalize_class": str(runtime_facts.post_bind_finalize_class),
                 "tensor_schema": [
                     {
                         "name": item.name,
@@ -200,7 +200,7 @@ def with_compiled_artifacts(
         *,
         source_artifact_ref: str,
         source_metadata_fingerprint: str,
-        serving_facts: Any,
+        runtime_facts: Any,
         trace_plan: Any,
         tensor_schema: tuple[Any, ...],
         source_hull: tuple[Any, ...],
@@ -213,13 +213,13 @@ def with_compiled_artifacts(
         tensor_schema_hash: str | None = None,
         realization_plan_digest: str | None = None,
         resolved_spec_cache_entry: Any | None = None,
-    ) -> "ServingBindingPlan":
+    ) -> "RuntimeBindingPlan":
         return replace(
             self,
             source_artifact_ref=str(source_artifact_ref),
             source_metadata_fingerprint=str(source_metadata_fingerprint),
             source_schema_hash=_optional_str(source_schema_hash),
-            serving_facts=serving_facts,
+            runtime_facts=runtime_facts,
             trace_plan=trace_plan,
             tensor_schema=tuple(tensor_schema),
             tensor_schema_hash=_optional_str(tensor_schema_hash),
@@ -248,7 +248,7 @@ def compiled_artifact_payload(self) -> dict[str, Any]:
             "target_layout_hash": self.target_layout_hash,
             "tensor_schema_hash": self.tensor_schema_hash,
             "resolved_spec_digest": self.resolved_spec_digest,
-            "serving_facts": _jsonable(self.serving_facts),
+            "runtime_facts": _jsonable(self.runtime_facts),
             "trace_plan": _jsonable(self.trace_plan),
             "tensor_schema": _jsonable(self.tensor_schema),
             "source_hull": _jsonable(self.source_hull),
@@ -288,7 +288,7 @@ def _optional_str(value: Any | None) -> str | None:
     return text or None
 
 
-def _optional_identity_payload(plan: ServingBindingPlan) -> dict[str, Any]:
+def _optional_identity_payload(plan: RuntimeBindingPlan) -> dict[str, Any]:
     payload: dict[str, Any] = {}
     for field_name in (
         "source_schema_hash",
@@ -313,4 +313,4 @@ def _optional_identity_payload(plan: ServingBindingPlan) -> dict[str, Any]:
     return payload
 
 
-__all__ = ["ServingBindingPlan"]
+__all__ = ["RuntimeBindingPlan"]
diff --git a/tensorcast/serving/local_ready.py b/tensorcast/artifact_runtime/recipe/local_ready.py
similarity index 78%
rename from tensorcast/serving/local_ready.py
rename to tensorcast/artifact_runtime/recipe/local_ready.py
index cc33eaee..14c5ec2d 100644
--- a/tensorcast/serving/local_ready.py
+++ b/tensorcast/artifact_runtime/recipe/local_ready.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Core local-ready serving helpers used by framework integrations."""
+"""Core local-ready runtime binding helpers used by framework integrations."""
 
 from __future__ import annotations
 
@@ -12,25 +12,30 @@
 import torch
 
 import tensorcast as tc
+import tensorcast.artifact_runtime.contract as tc_contract
+import tensorcast.artifact_runtime.recipe.materialization as tc_materialization
+import tensorcast.artifact_runtime.recipe.publication as tc_publication
 from tensorcast.api.store import create_binding as create_tensorcast_binding
 from tensorcast.api.store.owned_binding_layout import (
     build_mapped_tensor_spec,
     build_owned_layout,
 )
-from tensorcast.api.store.serving_builder import prepare_serving_manifest_carrier
+from tensorcast.api.store.publication_builder import (
+    prepare_runtime_artifact_manifest_carrier,
+)
 from tensorcast.api.store.types import CanonicalIndexEntry
-from tensorcast.proto.daemon.v2 import store_daemon_pb2
-from tensorcast.serving.builder import materialization as tc_materialization
-from tensorcast.serving.builder import publication as tc_publication
-from tensorcast.serving.builder.compiler import (
-    CompiledServingRecipe,
+from tensorcast.artifact_runtime.publication.context import (
+    logical_topology_json_from_recipe,
+    publication_context_from_recipe,
+)
+from tensorcast.artifact_runtime.recipe.compiler import (
+    CompiledRuntimeRecipe,
     TensorSchemaEntry,
 )
-from tensorcast.serving.builder.compiler import (
+from tensorcast.artifact_runtime.recipe.compiler import (
     compiled_recipe_realization_plan_count as _compiled_recipe_realization_plan_count,
 )
-from tensorcast.serving.contract import logical_topology_json
-from tensorcast.types import ServingTopologyRef
+from tensorcast.proto.daemon.v2 import store_daemon_pb2
 
 LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION = "tensorcast-bootstrap-v1"
 _LOGGER = logging.getLogger(__name__)
@@ -46,7 +51,7 @@ class LocalReadyBindingRealizationResult:
 
 
 def materialized_tensor_schema(
-    recipe: CompiledServingRecipe,
+    recipe: CompiledRuntimeRecipe,
 ) -> tuple[TensorSchemaEntry, ...]:
     expected_names = set(recipe.trace_plan.expected_dst_names)
     return tuple(
@@ -54,8 +59,8 @@ def materialized_tensor_schema(
     )
 
 
-def serving_binding_tensor_schema(
-    recipe: CompiledServingRecipe,
+def runtime_binding_tensor_schema(
+    recipe: CompiledRuntimeRecipe,
     *,
     manifest_tensor_name: str,
     manifest_bytes: bytes | None = None,
@@ -99,7 +104,7 @@ def canonical_index_entries_from_tensor_schema(
     return tuple(entries)
 
 
-def canonical_index_from_recipe(recipe: CompiledServingRecipe) -> tc.CanonicalIndex:
+def canonical_index_from_recipe(recipe: CompiledRuntimeRecipe) -> tc.CanonicalIndex:
     entries = canonical_index_entries_from_tensor_schema(
         materialized_tensor_schema(recipe)
     )
@@ -111,44 +116,8 @@ def canonical_index_from_recipe(recipe: CompiledServingRecipe) -> tc.CanonicalIn
     )
 
 
-def logical_topology_json_from_recipe(
-    recipe: CompiledServingRecipe,
-    *,
-    topology: ServingTopologyRef | None = None,
-    framework_payload: dict[str, Any] | None = None,
-) -> str | None:
-    if topology is None:
-        if (
-            getattr(recipe, "topology_ref", None) is None
-            and getattr(recipe, "member_ref", None) is None
-        ):
-            return None
-        raise ValueError(
-            "TensorCast local-ready manifest requires ServingTopologyRef for "
-            "a topology-sensitive recipe"
-        )
-    return logical_topology_json(
-        topology,
-        framework_payload=framework_payload or {},
-    )
-
-
-def publication_context_from_recipe(
-    recipe: CompiledServingRecipe,
-    *,
-    logical_topology_json_payload: str | None = None,
-) -> tc_publication.RecipePublicationContext:
-    return tc_publication.RecipePublicationContext(
-        source_artifact_ref=recipe.source_artifact_ref,
-        framework_name=recipe.serving_facts.framework_name,
-        adapter_version=recipe.serving_facts.adapter_version,
-        serving_abi_version=recipe.serving_facts.serving_abi_version,
-        logical_topology_json=logical_topology_json_payload,
-    )
-
-
 def prepare_same_binding_manifest_carrier(
-    recipe: CompiledServingRecipe,
+    recipe: CompiledRuntimeRecipe,
     *,
     manifest_tensor_name: str,
     representation_contract_hash: str,
@@ -162,7 +131,7 @@ def prepare_same_binding_manifest_carrier(
         logical_topology_json_payload=logical_topology_json_payload,
     )
     if (
-        recipe.serving_facts.process_after_load_class
+        recipe.runtime_facts.process_after_load_class
         == tc.FinalizeClass.REPRESENTATION_CHANGING
     ):
         build_intent = tc_publication.build_binding_finalize_build_intent(
@@ -176,7 +145,7 @@ def prepare_same_binding_manifest_carrier(
             build_pipeline_version=build_pipeline_version,
             representation_contract_hash=representation_contract_hash,
         )
-    carrier = prepare_serving_manifest_carrier(
+    carrier = prepare_runtime_artifact_manifest_carrier(
         build_intent=build_intent,
         canonical_index=base_canonical_index,
         representation_contract_hash=representation_contract_hash,
@@ -187,14 +156,14 @@ def prepare_same_binding_manifest_carrier(
     return representation_contract_hash, carrier.serving_manifest_bytes
 
 
-def compute_serving_binding_tensor_schema_hash(
-    recipe: CompiledServingRecipe,
+def compute_runtime_binding_tensor_schema_hash(
+    recipe: CompiledRuntimeRecipe,
     *,
     manifest_tensor_name: str,
     manifest_bytes: bytes | None = None,
 ) -> str:
     entries = canonical_index_entries_from_tensor_schema(
-        serving_binding_tensor_schema(
+        runtime_binding_tensor_schema(
             recipe,
             manifest_tensor_name=manifest_tensor_name,
             manifest_bytes=manifest_bytes,
@@ -205,7 +174,7 @@ def compute_serving_binding_tensor_schema_hash(
         total_size_bytes=sum(int(entry.size_bytes) for entry in entries),
         avbs_hash="",
     )
-    return tc.compute_serving_tensor_schema_hash(
+    return tc_contract.compute_canonical_runtime_tensor_schema_hash(
         canonical_index,
         manifest_tensor_name=manifest_tensor_name,
     )
@@ -227,7 +196,7 @@ def realization_plan_proto_with_manifest(
 
 
 def build_binding_layout_for_recipe(
-    recipe: CompiledServingRecipe,
+    recipe: CompiledRuntimeRecipe,
     *,
     target_device: torch.device,
     manifest_tensor_name: str,
@@ -238,7 +207,7 @@ def build_binding_layout_for_recipe(
         raise RuntimeError(
             "Tensorcast bootstrap requires an explicit CUDA device index"
         )
-    tensor_schema = serving_binding_tensor_schema(
+    tensor_schema = runtime_binding_tensor_schema(
         recipe,
         manifest_tensor_name=manifest_tensor_name,
         manifest_bytes=manifest_bytes,
@@ -295,7 +264,7 @@ def _close_binding_after_failure(binding: Any, *, phase: str) -> None:
 
 def realize_local_ready_binding_from_source(
     *,
-    recipe: CompiledServingRecipe,
+    recipe: CompiledRuntimeRecipe,
     source_subject: Any,
     target_device: torch.device,
     manifest_tensor_name: str,
@@ -338,27 +307,6 @@ def realize_local_ready_binding_from_source(
     )
 
 
-def prepare_local_ready_serving(
-    *,
-    recipe: CompiledServingRecipe,
-    source_subject: Any,
-    target_device: torch.device,
-    manifest_tensor_name: str,
-    manifest_bytes: bytes | None,
-    options: Any | None,
-    binding_factory: Callable[..., Any] | None = None,
-) -> LocalReadyBindingRealizationResult:
-    return realize_local_ready_binding_from_source(
-        recipe=recipe,
-        source_subject=source_subject,
-        target_device=target_device,
-        manifest_tensor_name=manifest_tensor_name,
-        manifest_bytes=manifest_bytes,
-        options=options,
-        binding_factory=binding_factory,
-    )
-
-
 def freeze_local_ready_binding(
     *,
     binding: Any,
@@ -375,7 +323,7 @@ def freeze_local_ready_binding(
         raise
 
 
-def source_view_for_recipe(recipe: CompiledServingRecipe, source_subject: Any) -> Any:
+def source_view_for_recipe(recipe: CompiledRuntimeRecipe, source_subject: Any) -> Any:
     source_view = source_subject
     if not isinstance(source_subject, tc.PublicDiskSourceHandle):
         subset_fn = getattr(source_subject, "subset", None)
@@ -401,13 +349,13 @@ def source_view_for_recipe(recipe: CompiledServingRecipe, source_subject: Any) -
     return source_view
 
 
-def tensorcast_view_slice_count(recipe: CompiledServingRecipe) -> int:
+def tensorcast_view_slice_count(recipe: CompiledRuntimeRecipe) -> int:
     return len(
         tc_materialization.tensorcast_view_slices_from_trace_plan(recipe.trace_plan)
     )
 
 
-def compiled_recipe_realization_plan_count(recipe: CompiledServingRecipe) -> int:
+def compiled_recipe_realization_plan_count(recipe: CompiledRuntimeRecipe) -> int:
     return _compiled_recipe_realization_plan_count(recipe)
 
 
@@ -434,7 +382,7 @@ def binding_value_verification_state_name(value: Any) -> str:
     "binding_value_verification_state_name",
     "canonical_index_entries_from_tensor_schema",
     "canonical_index_from_recipe",
-    "compute_serving_binding_tensor_schema_hash",
+    "compute_runtime_binding_tensor_schema_hash",
     "create_local_ready_binding",
     "compiled_recipe_realization_plan_count",
     "freeze_local_ready_binding",
@@ -442,12 +390,11 @@ def binding_value_verification_state_name(value: Any) -> str:
     "LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION",
     "LocalReadyBindingRealizationResult",
     "materialized_tensor_schema",
-    "prepare_local_ready_serving",
     "prepare_same_binding_manifest_carrier",
     "publication_context_from_recipe",
     "realization_plan_proto_with_manifest",
     "realize_local_ready_binding_from_source",
-    "serving_binding_tensor_schema",
+    "runtime_binding_tensor_schema",
     "source_view_for_recipe",
     "tensorcast_view_slice_count",
 ]
diff --git a/tensorcast/serving/builder/materialization.py b/tensorcast/artifact_runtime/recipe/materialization.py
similarity index 90%
rename from tensorcast/serving/builder/materialization.py
rename to tensorcast/artifact_runtime/recipe/materialization.py
index 435fcf4b..bbce2882 100644
--- a/tensorcast/serving/builder/materialization.py
+++ b/tensorcast/artifact_runtime/recipe/materialization.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Framework-neutral tensor-dict materialization helpers."""
+"""Framework-neutral runtime tensor-dict materialization helpers."""
 
 from __future__ import annotations
 
@@ -11,33 +11,33 @@
 import torch
 from torch import nn
 
-from tensorcast.pytorch.module_binding import (
-    attach_tensors_to_module,
-    collect_module_tensors,
-)
-from tensorcast.serving.builder.recipe_validation import (
-    validate_recipe_for_builder_mode,
-)
-from tensorcast.serving.builder.semantic_validation import (
+from tensorcast.artifact_runtime.recipe.semantic_validation import (
     evaluate_semantic_validation_spec,
 )
-from tensorcast.serving.builder.tensor_schema import (
+from tensorcast.artifact_runtime.recipe.tensor_schema import (
     validate_tensor_schema_against_tensors,
 )
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
     RangeSpec,
     TracePlan,
 )
+from tensorcast.artifact_runtime.recipe.validation import (
+    validate_recipe_for_builder_mode,
+)
+from tensorcast.pytorch.module_binding import (
+    attach_tensors_to_module,
+    collect_module_tensors,
+)
 from tensorcast.types import BuilderMode
 
 
 @dataclass(frozen=True)
 class BindingFinalizeMaterializationResult:
     model: nn.Module
-    serving_tensors: dict[str, torch.Tensor]
+    runtime_tensors: dict[str, torch.Tensor]
     semantic_probe_result: Any = None
 
 
@@ -132,12 +132,12 @@ def narrow_source_view(
 def apply_copy_plan(
     trace_plan: TracePlan,
     source_tensors: Mapping[str, torch.Tensor],
-    serving_tensors: Mapping[str, torch.Tensor],
+    runtime_tensors: Mapping[str, torch.Tensor],
     *,
     entries: Iterable[CopyPlanEntry] | None = None,
 ) -> None:
     for entry in trace_plan.copy_plan if entries is None else entries:
-        dst_base = serving_tensors.get(entry.dst_name)
+        dst_base = runtime_tensors.get(entry.dst_name)
         if dst_base is None:
             raise RuntimeError(f"Missing destination tensor {entry.dst_name}")
         dst_view = (
@@ -239,10 +239,10 @@ def update_dst_coverage(
 
 def validate_dst_coverage(
     trace_plan: TracePlan,
-    serving_tensors: Mapping[str, torch.Tensor],
+    runtime_tensors: Mapping[str, torch.Tensor],
 ) -> None:
     expected = set(trace_plan.expected_dst_names)
-    provided = set(serving_tensors)
+    provided = set(runtime_tensors)
     missing = expected - provided
     unexpected = provided - expected
     if missing or unexpected:
@@ -253,7 +253,7 @@ def validate_dst_coverage(
 
     coverage: dict[str, dict[str, Any]] = {}
     for entry in trace_plan.copy_plan:
-        dst_base = serving_tensors.get(entry.dst_name)
+        dst_base = runtime_tensors.get(entry.dst_name)
         if dst_base is None:
             continue
         update_dst_coverage(coverage, entry, dst_base)
@@ -297,7 +297,7 @@ def load_source_tensors_for_recipe(
     }
 
 
-def materialize_pure_transform_serving_tensors(
+def materialize_pure_transform_runtime_tensors(
     recipe: Any,
     source_tensors: Mapping[str, torch.Tensor],
     *,
@@ -321,7 +321,7 @@ def materialize_recipe_copy_plan_tensors(
         str(name): tensor for name, tensor in dict(source_tensors).items()
     }
     validate_source_tensor_names(recipe.trace_plan, resolved_source_tensors)
-    serving_tensors = allocate_tensors_from_schema(
+    runtime_tensors = allocate_tensors_from_schema(
         recipe.tensor_schema,
         target_device=torch.device(target_device),
     )
@@ -329,13 +329,13 @@ def materialize_recipe_copy_plan_tensors(
         apply_copy_plan(
             recipe.trace_plan,
             resolved_source_tensors,
-            serving_tensors,
+            runtime_tensors,
         )
-    validate_dst_coverage(recipe.trace_plan, serving_tensors)
-    return serving_tensors
+    validate_dst_coverage(recipe.trace_plan, runtime_tensors)
+    return runtime_tensors
 
 
-def materialize_binding_finalize_serving_tensors(
+def materialize_binding_finalize_runtime_tensors(
     recipe: Any,
     source_tensors: Mapping[str, torch.Tensor],
     *,
@@ -346,7 +346,7 @@ def materialize_binding_finalize_serving_tensors(
 ) -> BindingFinalizeMaterializationResult:
     validate_recipe_for_builder_mode(recipe, BuilderMode.BINDING_FINALIZE)
     resolved_target_device = torch.device(target_device)
-    serving_tensors = materialize_recipe_copy_plan_tensors(
+    runtime_tensors = materialize_recipe_copy_plan_tensors(
         recipe,
         source_tensors,
         target_device=resolved_target_device,
@@ -354,7 +354,7 @@ def materialize_binding_finalize_serving_tensors(
     model = build_runtime_model(resolved_target_device)
     attach_tensors_to_module(
         model,
-        serving_tensors,
+        runtime_tensors,
         replace_meta_params=True,
         skip_reserved_tensor_names=True,
         preserve_aliases=True,
@@ -372,14 +372,14 @@ def materialize_binding_finalize_serving_tensors(
         model_config=model_config,
         framework_adapter=framework_adapter,
     )
-    finalized_tensors = collect_serving_tensors_from_model(
+    finalized_tensors = collect_runtime_tensors_from_model(
         model,
-        runtime_only_tensor_names=recipe.serving_facts.runtime_only_tensor_names,
+        runtime_only_tensor_names=recipe.runtime_facts.runtime_only_tensor_names,
     )
     validate_tensor_schema_against_tensors(recipe.tensor_schema, finalized_tensors)
     return BindingFinalizeMaterializationResult(
         model=model,
-        serving_tensors=finalized_tensors,
+        runtime_tensors=finalized_tensors,
         semantic_probe_result=semantic_probe_result,
     )
 
@@ -400,7 +400,7 @@ def run_binding_finalize_semantic_validation(
     )
 
 
-def collect_serving_tensors_from_model(
+def collect_runtime_tensors_from_model(
     model: nn.Module,
     *,
     runtime_only_tensor_names: Sequence[str],
@@ -424,12 +424,12 @@ def validate_binding_finalize_tensor_schema(
     "BindingFinalizeMaterializationResult",
     "allocate_tensors_from_schema",
     "apply_copy_plan",
-    "collect_serving_tensors_from_model",
+    "collect_runtime_tensors_from_model",
     "dtype_from_string",
     "iter_ranges",
     "load_source_tensors_for_recipe",
-    "materialize_binding_finalize_serving_tensors",
-    "materialize_pure_transform_serving_tensors",
+    "materialize_binding_finalize_runtime_tensors",
+    "materialize_pure_transform_runtime_tensors",
     "materialize_recipe_copy_plan_tensors",
     "narrow_by_range_spec",
     "narrow_source_view",
diff --git a/tensorcast/serving/builder/publication.py b/tensorcast/artifact_runtime/recipe/publication.py
similarity index 77%
rename from tensorcast/serving/builder/publication.py
rename to tensorcast/artifact_runtime/recipe/publication.py
index 125d1f06..da67f096 100644
--- a/tensorcast/serving/builder/publication.py
+++ b/tensorcast/artifact_runtime/recipe/publication.py
@@ -1,100 +1,48 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Recipe-oriented serving publication helpers."""
+"""Recipe-oriented runtime publication helpers."""
 
 from __future__ import annotations
 
 from collections.abc import Mapping
-from dataclasses import dataclass
 from typing import Any
 
 import torch
 
 import tensorcast as tc
 from tensorcast.api.store.handles import RegisteredArtifact
-from tensorcast.api.store.serving_builder import (
+from tensorcast.api.store.publication_builder import (
     build_binding_finalize_admission_facts,
     build_binding_finalize_publication_bundle,
     build_pure_transform_publication_bundle_from_registered_artifact,
     build_pure_transform_publication_spec,
-    prepare_binding_finalize_serving_registration,
-    prepare_pure_transform_serving_registration,
+    prepare_binding_finalize_runtime_registration,
+    prepare_pure_transform_runtime_registration,
 )
 from tensorcast.api.store.types import CanonicalIndex
-from tensorcast.serving.builder.materialization import (
+from tensorcast.artifact_runtime.publication.context import (
+    RecipePublicationContext,
+    build_binding_finalize_build_intent,
+    build_pure_transform_build_intent,
+    build_recipe_runtime_build_intent,
+)
+from tensorcast.artifact_runtime.recipe.materialization import (
     load_source_tensors_for_recipe,
-    materialize_pure_transform_serving_tensors,
+    materialize_pure_transform_runtime_tensors,
 )
 from tensorcast.types import (
     AssemblyReadinessPolicy,
     AssemblyRequirementSetRef,
     BindingValueRef,
-    BuilderMode,
     PureTransformPublicationSpec,
     RepresentationPublishSpec,
-    ServingAdmissionFacts,
-    ServingBuildIntent,
-    ServingPublicationSubject,
-    ServingSupportLevel,
+    RuntimeAdmissionFacts,
+    RuntimePublicationSubject,
+    RuntimeSupportLevel,
 )
 
 
-@dataclass(frozen=True)
-class RecipePublicationContext:
-    source_artifact_ref: str
-    framework_name: str
-    adapter_version: str
-    serving_abi_version: str
-    logical_topology_json: str | None = None
-
-
-def build_recipe_serving_build_intent(
-    context: RecipePublicationContext,
-    *,
-    builder_mode: BuilderMode,
-    build_pipeline_version: str,
-    representation_contract_hash: str | None = None,
-) -> ServingBuildIntent:
-    return ServingBuildIntent(
-        representation_contract_hash=representation_contract_hash,
-        builder_mode=builder_mode,
-        framework_name=context.framework_name,
-        adapter_version=context.adapter_version,
-        serving_abi_version=context.serving_abi_version,
-        build_pipeline_version=str(build_pipeline_version),
-        source_artifact_ref=context.source_artifact_ref,
-    )
-
-
-def build_pure_transform_build_intent(
-    context: RecipePublicationContext,
-    *,
-    build_pipeline_version: str,
-    representation_contract_hash: str | None = None,
-) -> ServingBuildIntent:
-    return build_recipe_serving_build_intent(
-        context,
-        builder_mode=BuilderMode.PURE_TRANSFORM,
-        build_pipeline_version=build_pipeline_version,
-        representation_contract_hash=representation_contract_hash,
-    )
-
-
-def build_binding_finalize_build_intent(
-    context: RecipePublicationContext,
-    *,
-    build_pipeline_version: str,
-    representation_contract_hash: str,
-) -> ServingBuildIntent:
-    return build_recipe_serving_build_intent(
-        context,
-        builder_mode=BuilderMode.BINDING_FINALIZE,
-        build_pipeline_version=build_pipeline_version,
-        representation_contract_hash=str(representation_contract_hash),
-    )
-
-
-def prepare_pure_transform_serving_registration_from_context(
+def prepare_pure_transform_runtime_registration_from_context(
     context: RecipePublicationContext,
     *,
     tensors: Mapping[str, torch.Tensor],
@@ -104,7 +52,7 @@ def prepare_pure_transform_serving_registration_from_context(
     serving_manifest_ref: str | None = None,
     topology_admission_digest: str | None = None,
 ) -> Any:
-    return prepare_pure_transform_serving_registration(
+    return prepare_pure_transform_runtime_registration(
         build_intent=build_pure_transform_build_intent(
             context,
             build_pipeline_version=build_pipeline_version,
@@ -186,7 +134,7 @@ def build_pure_transform_publication_bundle_from_context(
     )
 
 
-def prepare_binding_finalize_serving_registration_from_context(
+def prepare_binding_finalize_runtime_registration_from_context(
     context: RecipePublicationContext,
     *,
     tensors: dict[str, torch.Tensor],
@@ -195,7 +143,7 @@ def prepare_binding_finalize_serving_registration_from_context(
     serving_manifest_ref: str | None = None,
     topology_admission_digest: str | None = None,
 ) -> Any:
-    return prepare_binding_finalize_serving_registration(
+    return prepare_binding_finalize_runtime_registration(
         build_intent=build_binding_finalize_build_intent(
             context,
             build_pipeline_version=build_pipeline_version,
@@ -211,10 +159,10 @@ def prepare_binding_finalize_serving_registration_from_context(
 
 def build_binding_finalize_admission_facts_from_context(
     *,
-    support_level: ServingSupportLevel,
+    support_level: RuntimeSupportLevel,
     topology_admission_digest: str | None = None,
     same_binding_fast_path_validated: bool = True,
-) -> ServingAdmissionFacts:
+) -> RuntimeAdmissionFacts:
     return build_binding_finalize_admission_facts(
         support_level=support_level,
         topology_admission_digest=topology_admission_digest,
@@ -225,7 +173,7 @@ def build_binding_finalize_admission_facts_from_context(
 def build_binding_finalize_publication_bundle_from_context(
     context: RecipePublicationContext,
     *,
-    publication_subject: ServingPublicationSubject | BindingValueRef,
+    publication_subject: RuntimePublicationSubject | BindingValueRef,
     canonical_index: CanonicalIndex,
     build_pipeline_version: str,
     representation_contract_hash: str,
@@ -237,7 +185,7 @@ def build_binding_finalize_publication_bundle_from_context(
     requirements: AssemblyRequirementSetRef | None = None,
     readiness_policy: AssemblyReadinessPolicy | None = None,
     structural_view_ids: tuple[str, ...] = (),
-    admission_facts: ServingAdmissionFacts | None = None,
+    admission_facts: RuntimeAdmissionFacts | None = None,
 ) -> RepresentationPublishSpec:
     if admission_facts is None:
         raise ValueError(
@@ -298,13 +246,13 @@ def complete_pure_transform_recipe_publication(
         if source_tensors is None
         else {str(name): tensor for name, tensor in dict(source_tensors).items()}
     )
-    serving_tensors = materialize_pure_transform_serving_tensors(
+    runtime_tensors = materialize_pure_transform_runtime_tensors(
         recipe,
         resolved_source_tensors,
         target_device=materialization_device,
     )
     return tc.complete_pure_transform_publication(
-        serving_tensors,
+        runtime_tensors,
         build_intent=build_pure_transform_build_intent(
             publication_context,
             build_pipeline_version=build_pipeline_version,
@@ -342,7 +290,7 @@ def complete_pure_transform_recipe_publication(
     "build_pure_transform_build_intent",
     "build_pure_transform_publication_bundle_from_context",
     "build_pure_transform_publication_spec_from_context",
-    "build_recipe_serving_build_intent",
-    "prepare_binding_finalize_serving_registration_from_context",
-    "prepare_pure_transform_serving_registration_from_context",
+    "build_recipe_runtime_build_intent",
+    "prepare_binding_finalize_runtime_registration_from_context",
+    "prepare_pure_transform_runtime_registration_from_context",
 ]
diff --git a/tensorcast/serving/builder/semantic_validation.py b/tensorcast/artifact_runtime/recipe/semantic_validation.py
similarity index 64%
rename from tensorcast/serving/builder/semantic_validation.py
rename to tensorcast/artifact_runtime/recipe/semantic_validation.py
index 6ef6736d..c44a7a58 100644
--- a/tensorcast/serving/builder/semantic_validation.py
+++ b/tensorcast/artifact_runtime/recipe/semantic_validation.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Framework-neutral semantic validation helpers for serving recipes."""
+"""Framework-neutral semantic validation helpers for runtime recipes."""
 
 from __future__ import annotations
 
@@ -14,20 +14,36 @@ def evaluate_semantic_validation_spec(spec: Any, actual_payload: Any) -> Any:
         return None
     actual = _jsonable(actual_payload)
     if spec.kind == "framework_semantic_probes":
-        return actual
+        return _compare_semantic_payload(
+            label="framework probe",
+            expected=_jsonable(spec.payload),
+            actual=actual,
+        )
     if spec.kind == "explicit":
-        expected = _jsonable(spec.payload)
-        if actual != expected:
-            raise RuntimeError(
-                "TensorCast semantic validation failed for explicit probe "
-                f"spec: expected={expected!r}, actual={actual!r}"
-            )
-        return actual
+        return _compare_semantic_payload(
+            label="explicit probe",
+            expected=_jsonable(spec.payload),
+            actual=actual,
+        )
     raise RuntimeError(
         f"Unsupported TensorCast semantic validation spec kind: {spec.kind!r}"
     )
 
 
+def _compare_semantic_payload(
+    *,
+    label: str,
+    expected: Any,
+    actual: Any,
+) -> Any:
+    if actual != expected:
+        raise RuntimeError(
+            f"TensorCast semantic validation failed for {label} "
+            f"spec: expected={expected!r}, actual={actual!r}"
+        )
+    return actual
+
+
 def _jsonable(value: Any) -> Any:
     if value is None or isinstance(value, (str, int, float, bool)):
         return value
diff --git a/tensorcast/serving/builder/tensor_parity.py b/tensorcast/artifact_runtime/recipe/tensor_parity.py
similarity index 99%
rename from tensorcast/serving/builder/tensor_parity.py
rename to tensorcast/artifact_runtime/recipe/tensor_parity.py
index c2001c51..6c0cfb7c 100644
--- a/tensorcast/serving/builder/tensor_parity.py
+++ b/tensorcast/artifact_runtime/recipe/tensor_parity.py
@@ -1,5 +1,5 @@
 #  Copyright (c) 2026, TensorCast Team.
-"""Tensor parity diagnostics for TensorCast serving recipes."""
+"""Tensor parity diagnostics for TensorCast runtime recipes."""
 
 from __future__ import annotations
 
@@ -10,18 +10,18 @@
 import torch
 
 from tensorcast.api.store import BindingRealizationEntry
-from tensorcast.proto.daemon.v2 import store_daemon_pb2
-from tensorcast.serving.builder.materialization import (
+from tensorcast.artifact_runtime.recipe.materialization import (
     narrow_by_range_spec,
     narrow_source_view,
 )
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
     RangeSpec,
     TracePlan,
 )
+from tensorcast.proto.daemon.v2 import store_daemon_pb2
 
 
 class _RangeLike(Protocol):
diff --git a/tensorcast/serving/builder/tensor_schema.py b/tensorcast/artifact_runtime/recipe/tensor_schema.py
similarity index 92%
rename from tensorcast/serving/builder/tensor_schema.py
rename to tensorcast/artifact_runtime/recipe/tensor_schema.py
index 494a38e3..c58cdbf9 100644
--- a/tensorcast/serving/builder/tensor_schema.py
+++ b/tensorcast/artifact_runtime/recipe/tensor_schema.py
@@ -5,14 +5,13 @@
 from __future__ import annotations
 
 from collections.abc import Mapping, Sequence
+from typing import Any
 
 import torch
 
-from tensorcast.serving.builder.compiler import TensorSchemaEntry
-
 
 def validate_tensor_schema_against_tensors(
-    tensor_schema: Sequence[TensorSchemaEntry],
+    tensor_schema: Sequence[Any],
     tensors: Mapping[str, torch.Tensor],
 ) -> None:
     expected = {entry.name: entry for entry in tensor_schema}
@@ -31,7 +30,7 @@ def validate_tensor_schema_against_tensors(
 
 def _validate_tensor_schema_entry(
     name: str,
-    entry: TensorSchemaEntry,
+    entry: Any,
     tensor: torch.Tensor,
 ) -> None:
     shape = tuple(int(dim) for dim in tensor.shape)
diff --git a/tensorcast/serving/builder/trace_cache.py b/tensorcast/artifact_runtime/recipe/trace_cache.py
similarity index 98%
rename from tensorcast/serving/builder/trace_cache.py
rename to tensorcast/artifact_runtime/recipe/trace_cache.py
index d86fb3d4..772cfaaf 100644
--- a/tensorcast/serving/builder/trace_cache.py
+++ b/tensorcast/artifact_runtime/recipe/trace_cache.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from typing import Any
 
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     TracePlan,
     trace_plan_from_dict,
     trace_plan_to_dict,
diff --git a/tensorcast/serving/builder/trace_ir.py b/tensorcast/artifact_runtime/recipe/trace_ir.py
similarity index 98%
rename from tensorcast/serving/builder/trace_ir.py
rename to tensorcast/artifact_runtime/recipe/trace_ir.py
index 6e2b133e..dddd0088 100644
--- a/tensorcast/serving/builder/trace_ir.py
+++ b/tensorcast/artifact_runtime/recipe/trace_ir.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Framework-neutral TensorCast serving trace IR."""
+"""Framework-neutral TensorCast runtime trace IR."""
 
 from __future__ import annotations
 
diff --git a/tensorcast/serving/builder/recipe_validation.py b/tensorcast/artifact_runtime/recipe/validation.py
similarity index 62%
rename from tensorcast/serving/builder/recipe_validation.py
rename to tensorcast/artifact_runtime/recipe/validation.py
index 4d0f49f9..74bb8b9b 100644
--- a/tensorcast/serving/builder/recipe_validation.py
+++ b/tensorcast/artifact_runtime/recipe/validation.py
@@ -1,46 +1,28 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Framework-neutral serving recipe fact validation."""
+"""Framework-neutral runtime recipe fact validation."""
 
 from __future__ import annotations
 
 from typing import Any
 
-from tensorcast.types import BuilderMode, FinalizeClass, ServingSupportLevel
-
-_SUPPORT_LEVEL_ORDER = {
-    ServingSupportLevel.BLOCKED: 0,
-    ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: 1,
-    ServingSupportLevel.BUILDER_PUBLICATION_READY: 2,
-    ServingSupportLevel.RUNTIME_BIND_SWAP_READY: 3,
-}
-
-
-def serving_support_level_at_least(
-    value: ServingSupportLevel | str,
-    minimum: ServingSupportLevel | str,
-) -> bool:
-    resolved_value = _coerce_support_level(value)
-    resolved_minimum = _coerce_support_level(minimum)
-    return (
-        _SUPPORT_LEVEL_ORDER[resolved_value] >= _SUPPORT_LEVEL_ORDER[resolved_minimum]
-    )
-
-
-def serving_support_level_display_name(value: ServingSupportLevel | str) -> str:
-    return str(_coerce_support_level(value).value)
+from tensorcast.artifact_runtime.readiness import (
+    runtime_support_level_at_least,
+    runtime_support_level_display_name,
+)
+from tensorcast.types import BuilderMode, FinalizeClass, RuntimeSupportLevel
 
 
 def validate_recipe_for_builder_mode(recipe: Any, mode: BuilderMode | str) -> Any:
-    facts = recipe.serving_facts
+    facts = recipe.runtime_facts
     builder_mode = _coerce_builder_mode(mode)
     failures: list[str] = []
-    if not serving_support_level_at_least(
-        facts.support_level, ServingSupportLevel.BUILDER_PUBLICATION_READY
+    if not runtime_support_level_at_least(
+        facts.support_level, RuntimeSupportLevel.BUILDER_PUBLICATION_READY
     ):
         failures.append(
             "support_level="
-            f"{serving_support_level_display_name(facts.support_level)} "
+            f"{runtime_support_level_display_name(facts.support_level)} "
             "is below builder_publication_ready"
         )
     if builder_mode == BuilderMode.PURE_TRANSFORM:
@@ -77,12 +59,6 @@ def validate_recipe_for_builder_mode(recipe: Any, mode: BuilderMode | str) -> An
     return recipe
 
 
-def _coerce_support_level(value: ServingSupportLevel | str) -> ServingSupportLevel:
-    if isinstance(value, ServingSupportLevel):
-        return value
-    return ServingSupportLevel(str(value).strip())
-
-
 def _coerce_builder_mode(value: BuilderMode | str) -> BuilderMode:
     if isinstance(value, BuilderMode):
         return value
@@ -90,7 +66,7 @@ def _coerce_builder_mode(value: BuilderMode | str) -> BuilderMode:
 
 
 __all__ = [
-    "serving_support_level_at_least",
-    "serving_support_level_display_name",
+    "runtime_support_level_at_least",
+    "runtime_support_level_display_name",
     "validate_recipe_for_builder_mode",
 ]
diff --git a/tensorcast/artifact_runtime/reload.py b/tensorcast/artifact_runtime/reload.py
new file mode 100644
index 00000000..ada49b5e
--- /dev/null
+++ b/tensorcast/artifact_runtime/reload.py
@@ -0,0 +1,89 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Artifact runtime reload actions."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+
+from tensorcast.artifact_runtime.artifact.resolver import RuntimeArtifactResolver
+from tensorcast.artifact_runtime.attachment import (
+    RuntimeAttachment,
+    RuntimeBindingState,
+)
+from tensorcast.artifact_runtime.errors import ConfigConflictError
+from tensorcast.artifact_runtime.host import RuntimeHostCapabilities
+from tensorcast.artifact_runtime.intent import ExistingRuntimeArtifact, RequestContext
+from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.policy import (
+    RuntimePolicy,
+    merge_runtime_reload_extra_config,
+    normalize_runtime_reload_request_payload,
+)
+from tensorcast.artifact_runtime.publication import replica as replica_publication
+
+
+def _reject_local_reload_artifact_locator(artifact_locator: object) -> None:
+    if getattr(artifact_locator, "kind", None) == "local_path":
+        raise ConfigConflictError(
+            "TensorCast runtime reload requires a durable artifact locator, "
+            "not a local source selector"
+        )
+
+
+def reload_runtime_attachment(
+    *,
+    current_attachment: RuntimeAttachment | RuntimeBindingState,
+    artifact_locator: object,
+    policy: object | None,
+    runtime_host: RuntimeHostCapabilities,
+    runtime_context: RequestContext,
+    ensure_runtime_initialized: Callable[[], None],
+    model: object | None = None,
+    contract_identity: str | None = None,
+    runtime_resolver: RuntimeArtifactResolver | None = None,
+    profile_sink: object | None = None,
+) -> RuntimeAttachment:
+    """Reload an existing artifact-backed runtime binding."""
+
+    _reject_local_reload_artifact_locator(artifact_locator)
+    if not isinstance(artifact_locator, ArtifactLocator):
+        raise ConfigConflictError(
+            "TensorCast runtime reload requires an ArtifactLocator"
+        )
+    if policy is not None and not isinstance(policy, RuntimePolicy):
+        raise ConfigConflictError(
+            "TensorCast runtime reload requires a RuntimePolicy or None"
+        )
+    if isinstance(current_attachment, RuntimeAttachment):
+        replica_publication.reject_reload_with_active_publication(current_attachment)
+    ensure_runtime_initialized()
+    current_state = (
+        current_attachment.state
+        if isinstance(current_attachment, RuntimeAttachment)
+        else current_attachment
+    )
+    runtime_model = (
+        model if model is not None else getattr(current_attachment, "model", None)
+    )
+    return ArtifactRuntimeIntegration(
+        resolver=runtime_resolver,
+        profile_sink=profile_sink,
+        host=runtime_host,
+    ).reload(
+        current_state,
+        ExistingRuntimeArtifact(
+            artifact_locator=artifact_locator,
+            policy=policy,
+        ),
+        runtime_context,
+        model=runtime_model,
+        contract_identity=contract_identity,
+    )
+
+
+__all__ = [
+    "merge_runtime_reload_extra_config",
+    "normalize_runtime_reload_request_payload",
+    "reload_runtime_attachment",
+]
diff --git a/tensorcast/artifact_runtime/request_facts.py b/tensorcast/artifact_runtime/request_facts.py
new file mode 100644
index 00000000..64da7463
--- /dev/null
+++ b/tensorcast/artifact_runtime/request_facts.py
@@ -0,0 +1,273 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Fail-closed request fact resolution for model-runtime realization."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Mapping
+from dataclasses import dataclass, replace
+from typing import Any
+
+import torch
+
+from tensorcast.artifact_runtime.errors import ArtifactRuntimeIntegrationError
+from tensorcast.artifact_runtime.intent import RequestContext
+
+
+class ModelRuntimeRequestFactsError(ArtifactRuntimeIntegrationError):
+    """Raised when model-runtime spec, request, and host facts disagree."""
+
+    code = "invalid_argument"
+    operation = "model_runtime_request"
+
+
+@dataclass(frozen=True)
+class ResolvedModelRuntimeRequestFacts:
+    spec: Any
+    context: Any
+
+
+def resolve_model_runtime_request_facts(
+    *,
+    spec: Any,
+    runtime_context: Any | None,
+    host_context: Any | None = None,
+    host_target_device: Any | None = None,
+) -> ResolvedModelRuntimeRequestFacts:
+    """Resolve request facts without silently preferring one authority."""
+
+    context = runtime_context or RequestContext(
+        target_device=getattr(spec, "device", None)
+    )
+    spec, context = _resolve_device_fact(
+        spec=spec,
+        context=context,
+        host_target_device=host_target_device,
+    )
+    spec = _resolve_runtime_fact(
+        spec=spec,
+        context=context,
+        host_context=host_context,
+        field_name="topology",
+        host_value=_placement_value(host_context, "topology"),
+    )
+    spec = _resolve_runtime_fact(
+        spec=spec,
+        context=context,
+        host_context=host_context,
+        field_name="member",
+        host_value=_placement_value(host_context, "member"),
+    )
+    spec = _resolve_runtime_fact(
+        spec=spec,
+        context=context,
+        host_context=host_context,
+        field_name="adapter_version",
+        host_value=_optional_text(getattr(host_context, "adapter_version", None)),
+    )
+    spec = _resolve_runtime_fact(
+        spec=spec,
+        context=context,
+        host_context=host_context,
+        field_name="runtime_abi_version",
+        context_field_names=("runtime_abi_version", "serving_abi_version"),
+        host_value=_optional_text(getattr(host_context, "serving_abi_version", None)),
+    )
+    return ResolvedModelRuntimeRequestFacts(spec=spec, context=context)
+
+
+def _resolve_device_fact(
+    *,
+    spec: Any,
+    context: Any,
+    host_target_device: Any | None,
+) -> tuple[Any, Any]:
+    facts = (
+        ("spec.device", getattr(spec, "device", None)),
+        ("runtime_context.target_device", getattr(context, "target_device", None)),
+        ("host.target_device", host_target_device),
+    )
+    resolved = _single_resolved_value(
+        facts,
+        normalize=_normalized_device,
+        field_name="target_device",
+    )
+    if resolved is None:
+        return spec, context
+    if getattr(spec, "device", None) is None:
+        spec = _replace_field(
+            spec,
+            field_name="device",
+            new_value=resolved,
+            subject="model_runtime spec",
+        )
+    if getattr(context, "target_device", None) is None:
+        context = _replace_field(
+            context,
+            field_name="target_device",
+            new_value=resolved,
+            subject="model_runtime runtime_context",
+        )
+    return spec, context
+
+
+def _resolve_runtime_fact(
+    *,
+    spec: Any,
+    context: Any,
+    host_context: Any | None,
+    field_name: str,
+    host_value: Any | None,
+    context_field_names: tuple[str, ...] | None = None,
+) -> Any:
+    del host_context
+    context_fields = context_field_names or (field_name,)
+    context_value = _first_present_attr(context, context_fields)
+    facts = (
+        (f"spec.{field_name}", getattr(spec, field_name, None)),
+        (f"runtime_context.{field_name}", context_value),
+        (f"host.{field_name}", host_value),
+    )
+    resolved = _single_resolved_value(
+        facts,
+        normalize=lambda value: _normalized_fact(field_name, value),
+        field_name=field_name,
+    )
+    if resolved is None or getattr(spec, field_name, None) is not None:
+        return spec
+    return _replace_field(
+        spec,
+        field_name=field_name,
+        new_value=resolved,
+        subject="model_runtime spec",
+    )
+
+
+def _single_resolved_value(
+    facts: tuple[tuple[str, Any | None], ...],
+    *,
+    normalize: Any,
+    field_name: str,
+) -> Any | None:
+    present: list[tuple[str, Any, Any]] = []
+    for source, value in facts:
+        if value is None:
+            continue
+        normalized = normalize(value)
+        if normalized is None:
+            continue
+        present.append((source, value, normalized))
+    if not present:
+        return None
+    expected = present[0][2]
+    mismatches = [
+        (source, normalized)
+        for source, _value, normalized in present[1:]
+        if normalized != expected
+    ]
+    if mismatches:
+        details = {source: normalized for source, _value, normalized in present}
+        raise ModelRuntimeRequestFactsError(
+            f"model_runtime {field_name} facts disagree",
+            details=details,
+        )
+    return present[0][1]
+
+
+def _replace_field(
+    obj: Any,
+    *,
+    field_name: str,
+    new_value: Any,
+    subject: str,
+) -> Any:
+    model_copy = getattr(obj, "model_copy", None)
+    if callable(model_copy):
+        return model_copy(update={field_name: new_value})
+    try:
+        return replace(obj, **{field_name: new_value})
+    except TypeError as exc:
+        raise ModelRuntimeRequestFactsError(
+            f"{subject} must be dataclass-compatible when {field_name} is omitted",
+            details={"field": field_name},
+        ) from exc
+
+
+def _normalized_device(value: Any) -> str:
+    try:
+        return str(torch.device(value))
+    except Exception as exc:  # noqa: BLE001
+        raise ModelRuntimeRequestFactsError(
+            f"model_runtime target_device is invalid: {value!r}",
+            details={"target_device": repr(value)},
+        ) from exc
+
+
+def _normalized_fact(field_name: str, value: Any) -> Any | None:
+    if field_name == "topology":
+        return _topology_identity(value)
+    if field_name == "member":
+        return _member_identity(value)
+    return _optional_text(value)
+
+
+def _topology_identity(value: Any) -> Any | None:
+    digest = _optional_text(getattr(value, "schema_topology_digest", None))
+    if digest is not None:
+        return ("schema_topology_digest", digest)
+    return _stable_value(value)
+
+
+def _member_identity(value: Any) -> Any | None:
+    member_id = _optional_text(getattr(value, "member_id", None))
+    if member_id is not None:
+        return (
+            member_id,
+            int(getattr(value, "member_index", 0)),
+            int(getattr(value, "member_count", 1)),
+            _optional_text(getattr(value, "group_id", None)),
+        )
+    return _stable_value(value)
+
+
+def _stable_value(value: Any) -> Any | None:
+    if value is None:
+        return None
+    dump = getattr(value, "model_dump", None)
+    if callable(dump):
+        return _stable_json(dump(mode="python"))
+    if isinstance(value, Mapping):
+        return _stable_json(value)
+    return value
+
+
+def _stable_json(value: Any) -> str:
+    return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str)
+
+
+def _placement_value(host_context: Any | None, field_name: str) -> Any | None:
+    placement = getattr(host_context, "placement", None)
+    return getattr(placement, field_name, None)
+
+
+def _first_present_attr(value: Any, names: tuple[str, ...]) -> Any | None:
+    for name in names:
+        attr = getattr(value, name, None)
+        if attr is not None:
+            return attr
+    return None
+
+
+def _optional_text(value: Any) -> str | None:
+    if value is None:
+        return None
+    text = str(value).strip()
+    return text or None
+
+
+__all__ = [
+    "ModelRuntimeRequestFactsError",
+    "ResolvedModelRuntimeRequestFacts",
+    "resolve_model_runtime_request_facts",
+]
diff --git a/tensorcast/serving/source_catalog.py b/tensorcast/artifact_runtime/source.py
similarity index 62%
rename from tensorcast/serving/source_catalog.py
rename to tensorcast/artifact_runtime/source.py
index e0eaef84..79e4883c 100644
--- a/tensorcast/serving/source_catalog.py
+++ b/tensorcast/artifact_runtime/source.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-"""Source catalog primitives for serving local bootstrap and builders."""
+"""Artifact runtime source catalog primitives."""
 
 from __future__ import annotations
 
@@ -12,6 +12,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 from types import MappingProxyType
+from typing import Any
 
 import torch
 
@@ -21,7 +22,9 @@
     canonical_index_to_bytes,
 )
 from tensorcast.api.store.types import CanonicalIndex
+from tensorcast.artifact_runtime.errors import SourceSubjectError
 from tensorcast.common.identity import ArtifactIdKind, validate_artifact_id
+from tensorcast.types import PublicDiskSourceHandle
 
 _SOURCE_CATALOG_FINGERPRINT_VERSION = "tensorcast-source-catalog-v1"
 SOURCE_CATALOG_SCHEMA_VERSION = 1
@@ -69,6 +72,165 @@ def __post_init__(self) -> None:
         )
 
 
+@dataclass(frozen=True)
+class SourceSubject:
+    """Framework-facing source subject with a durable source artifact root."""
+
+    artifact_ref: str
+    subject: Any
+    source_kind: str = "opaque"
+    metadata_fingerprint: str | None = None
+
+    def broadcast_payload(self) -> dict[str, Any]:
+        if self.source_kind == "public_disk":
+            subject_payload = _public_disk_source_payload(self.subject)
+        else:
+            subject_payload = self.subject
+        return {
+            "kind": self.source_kind,
+            "artifact_ref": self.artifact_ref,
+            "subject": subject_payload,
+            "metadata_fingerprint": self.metadata_fingerprint,
+        }
+
+    def profile_fields(self) -> dict[str, Any]:
+        source = self.subject
+        fields: dict[str, Any] = {
+            "artifact_ref": self.artifact_ref,
+            "source_kind": self.source_kind,
+        }
+        if self.metadata_fingerprint is not None:
+            fields["metadata_fingerprint"] = self.metadata_fingerprint
+        canonical_index = getattr(source, "canonical_index_bytes", None)
+        if canonical_index is not None:
+            fields["canonical_index_bytes"] = len(canonical_index)
+        source_index = getattr(source, "source_index_bytes", None)
+        if source_index is not None:
+            fields["source_index_bytes"] = len(bytes(source_index or b""))
+        for name in ("format_kind", "metadata_capability"):
+            value = getattr(source, name, None)
+            if value is not None:
+                fields[name] = str(value or "")
+        return fields
+
+
+def _optional_str(value: Any) -> str | None:
+    if value is None:
+        return None
+    text = str(value)
+    return text or None
+
+
+def _optional_text(value: Any) -> str | None:
+    return _optional_str(value)
+
+
+def _optional_bytes(value: Any) -> bytes | None:
+    if value is None:
+        return None
+    data = bytes(value)
+    return data or None
+
+
+def _enum_wire_value(value: Any) -> str | int | None:
+    if value is None:
+        return None
+    enum_value = getattr(value, "value", value)
+    if isinstance(enum_value, (str, int)):
+        return enum_value
+    return str(enum_value)
+
+
+def _public_disk_source_payload(source: Any) -> dict[str, Any]:
+    return {
+        "path": str(getattr(source, "path", "") or ""),
+        "canonical_index_bytes": bytes(source.canonical_index_bytes),
+        "artifact_id": str(getattr(source, "artifact_id", "") or ""),
+        "generation": int(getattr(source, "generation", 0) or 0),
+        "verify_checksums": bool(getattr(source, "verify_checksums", True)),
+        "trusted_content_artifact_id": _optional_str(
+            getattr(source, "trusted_content_artifact_id", None)
+        ),
+        "source_index_bytes": _optional_bytes(
+            getattr(source, "source_index_bytes", None)
+        ),
+        "format_kind": _enum_wire_value(getattr(source, "format_kind", None)),
+        "metadata_capability": _enum_wire_value(
+            getattr(source, "metadata_capability", None)
+        ),
+        "resolution_strategy": _enum_wire_value(
+            getattr(source, "resolution_strategy", None)
+        ),
+        "validation_mode": _enum_wire_value(getattr(source, "validation_mode", None)),
+        "policy_id": _optional_str(getattr(source, "policy_id", None)),
+        "exact_size_bytes": int(getattr(source, "exact_size_bytes", 0) or 0),
+    }
+
+
+def _source_subject_from_handle(source: Any) -> SourceSubject:
+    artifact_ref = str(getattr(source, "artifact_id", "") or "")
+    if not artifact_ref:
+        raise RuntimeError("TensorCast source subject is missing a source artifact_id")
+    return SourceSubject(
+        artifact_ref=artifact_ref,
+        subject=source,
+        source_kind="public_disk",
+    )
+
+
+def resolve_source_subject(
+    path: str,
+    *,
+    verify_checksums: bool,
+) -> SourceSubject:
+    from tensorcast.api.store import resolve_public_disk_source
+
+    return _source_subject_from_handle(
+        resolve_public_disk_source(
+            path,
+            verify_checksums=verify_checksums,
+        )
+    )
+
+
+def source_subject_from_broadcast_payload(payload: Mapping[str, Any]) -> SourceSubject:
+    payload_dict = dict(payload)
+    if "kind" not in payload_dict:
+        raise SourceSubjectError(
+            "TensorCast source subject broadcast payload is missing kind"
+        )
+    kind = str(payload_dict.get("kind") or "")
+    artifact_ref = str(payload_dict.get("artifact_ref") or "")
+    if not artifact_ref:
+        raise SourceSubjectError(
+            "TensorCast source subject broadcast payload is missing artifact_ref"
+        )
+    source: Any
+    if kind == "public_disk":
+        subject_payload = payload_dict.get("subject")
+        if not isinstance(subject_payload, Mapping):
+            raise SourceSubjectError(
+                "TensorCast public_disk source subject payload must be a mapping"
+            )
+        source = PublicDiskSourceHandle(**dict(subject_payload))
+    else:
+        source = payload_dict.get("subject")
+    return SourceSubject(
+        artifact_ref=artifact_ref,
+        subject=source,
+        source_kind=kind,
+        metadata_fingerprint=_optional_text(payload_dict.get("metadata_fingerprint")),
+    )
+
+
+def source_subject_broadcast_payload(subject: SourceSubject) -> dict[str, Any]:
+    return subject.broadcast_payload()
+
+
+def is_public_disk_source_subject(subject: Any) -> bool:
+    return isinstance(subject, PublicDiskSourceHandle)
+
+
 def source_catalog_from_selected_safetensors(
     directory: Path | str,
     *,
@@ -275,15 +437,20 @@ def resolve_source_artifact_ref(source_artifact_ref: str) -> str:
 
 
 __all__ = [
+    "SOURCE_CATALOG_SCHEMA_VERSION",
     "SourceCatalog",
     "SourceFileEntry",
     "SourceManifest",
+    "SourceSubject",
     "SourceTensorMeta",
-    "SOURCE_CATALOG_SCHEMA_VERSION",
     "compute_source_metadata_fingerprint",
+    "is_public_disk_source_subject",
     "resolve_source_artifact_ref",
+    "resolve_source_subject",
     "source_catalog_from_all_safetensors_dir",
     "source_catalog_from_canonical_index",
     "source_catalog_from_manifest",
     "source_catalog_from_selected_safetensors",
+    "source_subject_broadcast_payload",
+    "source_subject_from_broadcast_payload",
 ]
diff --git a/tensorcast/serving/state.py b/tensorcast/artifact_runtime/state.py
similarity index 99%
rename from tensorcast/serving/state.py
rename to tensorcast/artifact_runtime/state.py
index b6a3d1ff..aca5bad6 100644
--- a/tensorcast/serving/state.py
+++ b/tensorcast/artifact_runtime/state.py
@@ -10,7 +10,7 @@
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from tensorcast.serving.runtime_attachment import (
+    from tensorcast.artifact_runtime.attachment import (
         RuntimeAttachment,
         RuntimeBindingState,
         RuntimeBindingView,
diff --git a/tensorcast/artifact_runtime/testing.py b/tensorcast/artifact_runtime/testing.py
new file mode 100644
index 00000000..d8a5c56a
--- /dev/null
+++ b/tensorcast/artifact_runtime/testing.py
@@ -0,0 +1,910 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Reusable conformance checks for framework artifact-runtime integrations."""
+
+from __future__ import annotations
+
+import weakref
+from collections.abc import Iterable, Mapping
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from types import ModuleType, SimpleNamespace
+from typing import Any, cast
+
+import torch
+
+import tensorcast as tc
+import tensorcast.artifact_runtime.lifecycle as _integration
+
+
+@dataclass(frozen=True)
+class ConformanceResult:
+    """Result from a lightweight artifact runtime conformance check."""
+
+    checks: Mapping[str, bool] = field(default_factory=dict)
+    messages: Mapping[str, str] = field(default_factory=dict)
+    level: str | None = None
+
+    @property
+    def failed_checks(self) -> tuple[str, ...]:
+        return tuple(name for name, passed in self.checks.items() if not passed)
+
+    def failure_summary(self) -> str:
+        failed = self.failed_checks
+        if not failed:
+            return "TensorCast artifact-runtime conformance checks passed"
+        lines = [
+            "TensorCast artifact-runtime conformance checks failed"
+            + (f" for {self.level}" if self.level else "")
+            + ":"
+        ]
+        for name in failed:
+            message = self.messages.get(name, "No remediation hint available")
+            lines.append(f"- {name}: {message}")
+        return "\n".join(lines)
+
+    def assert_passed(self) -> None:
+        if self.failed_checks:
+            raise AssertionError(self.failure_summary())
+
+
+def _result(
+    *,
+    level: str,
+    checks: Mapping[str, bool],
+    messages: Mapping[str, str],
+) -> ConformanceResult:
+    result = ConformanceResult(checks=checks, messages=messages, level=level)
+    result.assert_passed()
+    return result
+
+
+_PUBLIC_BOUNDARY_MESSAGES = {
+    "hides_runtime_session": (
+        "Do not expose ArtifactRuntimeSession from the public runtime API; "
+        "frameworks should use Artifact.realize(... model_runtime ...) and "
+        "artifact-runtime actions."
+    ),
+    "has_attachment": (
+        "Expose RuntimeAttachment as the framework-held lifecycle token."
+    ),
+    "has_request_context": (
+        "Expose RequestContext so framework facts enter lifecycle calls through "
+        "one typed context object."
+    ),
+    "hides_admin_local_bootstrap": (
+        "Keep admin/local-bootstrap override DTOs out of the framework runtime "
+        "module; route them through admin/offline surfaces."
+    ),
+    "hides_low_level_bind": (
+        "Do not expose bind/swap/restore helpers from the runtime module; "
+        "frameworks should use artifact-runtime start/reload/publication actions."
+    ),
+    "hides_serving_locator_policy": (
+        "Keep serving-rooted locator and policy aliases out of "
+        "the public runtime API; use ArtifactLocator, RuntimePolicy, and "
+        "runtime reload helpers."
+    ),
+    "hides_legacy_config": (
+        "Keep serving-rooted config and start-plan names out of "
+        "the public runtime API; use TensorCastRuntimeConfig and "
+        "plan_runtime_start."
+    ),
+    "hides_projection_dtos": (
+        "Runtime endpoint projection DTOs live in tensorcast.artifact_runtime.view."
+    ),
+    "hides_state_helpers": (
+        "Model attribute helpers live in tensorcast.artifact_runtime.state."
+    ),
+}
+
+_ARTIFACT_RUNTIME_BOUNDARY_MESSAGES = {
+    "has_artifact_realization_spec": (
+        "Expose ArtifactRealizationSpec so frameworks can request model_runtime "
+        "realization through the artifact API."
+    ),
+    "has_runtime_host": (
+        "Expose RuntimeHostCapabilities as the framework-provided host surface."
+    ),
+    "has_runtime_context": (
+        "Expose RuntimeRequestContext so framework facts enter runtime actions "
+        "through one typed context object."
+    ),
+    "has_artifact_locator": (
+        "Expose ArtifactLocator for durable artifact runtime reload requests."
+    ),
+    "has_runtime_policy": ("Expose RuntimePolicy for typed runtime reload admission."),
+    "has_reload_action": (
+        "Expose reload_runtime_attachment for runtime reload without a serving "
+        "session object."
+    ),
+    "has_publication_actions": (
+        "Expose runtime replica publish/retire actions without requiring a "
+        "runtime session object."
+    ),
+    "hides_runtime_session": (
+        "The tensorcast root runtime path must not expose ArtifactRuntimeSession; "
+        "frameworks should use Artifact.realize(... model_runtime ...) instead."
+    ),
+    "hides_legacy_serving_dtos": (
+        "Keep legacy serving-rooted DTO aliases off the tensorcast root runtime "
+        "surface."
+    ),
+}
+
+_FRAMEWORK_ISOLATION_MESSAGES = {
+    "no_vllm_imports": (
+        "Reference and conformance frameworks must not import vLLM. Move any "
+        "needed generic fact extraction into TensorCast hosts or testing helpers."
+    ),
+    "no_internal_runtime_imports": (
+        "Framework examples should not import TensorCast private/internal "
+        "runtime modules."
+    ),
+    "no_serving_imports": (
+        "Framework examples should not import the removed tensorcast.serving "
+        "package; use tensorcast.artifact_runtime host/testing surfaces instead."
+    ),
+}
+
+
+def assert_public_artifact_runtime_boundary(
+    tc_module: ModuleType = tc,
+) -> ConformanceResult:
+    """Check that the root API exposes artifact-runtime, not serving-session, APIs."""
+
+    public_names = set(getattr(tc_module, "__all__", ()))
+    checks = {
+        "has_artifact_realization_spec": "ArtifactRealizationSpec" in public_names,
+        "has_runtime_host": "RuntimeHostCapabilities" in public_names,
+        "has_runtime_context": "RuntimeRequestContext" in public_names,
+        "has_artifact_locator": "ArtifactLocator" in public_names,
+        "has_runtime_policy": "RuntimePolicy" in public_names,
+        "has_reload_action": "reload_runtime_attachment" in public_names,
+        "has_publication_actions": {
+            "publish_runtime_replica",
+            "retire_runtime_replica",
+        }.issubset(public_names),
+        "hides_runtime_session": "ArtifactRuntimeSession" not in public_names,
+        "hides_legacy_serving_dtos": {
+            "ServingBuildIntent",
+            "ServingArtifactManifest",
+            "ServingRuntimePolicy",
+            "ServingBindingTarget",
+            "ServingBindingSetTarget",
+            "PrefetchedServingBinding",
+            "PrefetchedServingBindingSet",
+        }.isdisjoint(public_names),
+    }
+    return _result(
+        level="public-artifact-runtime-boundary",
+        checks=checks,
+        messages=_ARTIFACT_RUNTIME_BOUNDARY_MESSAGES,
+    )
+
+
+def assert_public_runtime_boundary(runtime_module: ModuleType) -> ConformanceResult:
+    """Check that runtime imports expose framework APIs, not admin helpers."""
+
+    public_names = set(getattr(runtime_module, "__all__", ()))
+    checks = {
+        "hides_runtime_session": "ArtifactRuntimeSession" not in public_names,
+        "has_attachment": "RuntimeAttachment" in public_names,
+        "has_request_context": "RequestContext" in public_names,
+        "hides_admin_local_bootstrap": "AdminLocalSourceBootstrap" not in public_names
+        and "_AdminLocalSourceBootstrap" not in public_names,
+        "hides_low_level_bind": "bind_runtime_artifact" not in public_names
+        and "swap_runtime_artifact" not in public_names
+        and "restore_retained_binding" not in public_names,
+        "hides_serving_locator_policy": {
+            "ServingArtifactLocator",
+            "ServingPolicy",
+            "merge_serving_reload_extra_config",
+            "normalize_serving_reload_request_payload",
+        }.isdisjoint(public_names),
+        "hides_legacy_config": {
+            "ServingConfig",
+            "ServingStartPlan",
+            "ServingStartPlanError",
+            "plan_serving_start",
+        }.isdisjoint(public_names)
+        and "TensorCastRuntimeConfig" in public_names
+        and "plan_runtime_start" in public_names,
+        "hides_projection_dtos": {
+            "PublishedReplicaProjection",
+            "ReloadResponseProjection",
+            "RuntimeEndpointProjection",
+            "SourceSelectionProjection",
+            "WeightVersionProjection",
+        }.isdisjoint(public_names),
+        "hides_state_helpers": {
+            "ModelAttributeRuntimeState",
+            "RuntimeAttachmentRecord",
+            "RuntimeAttachmentStore",
+        }.isdisjoint(public_names),
+    }
+    return _result(
+        level="public-runtime-boundary",
+        checks=checks,
+        messages=_PUBLIC_BOUNDARY_MESSAGES,
+    )
+
+
+def assert_framework_isolation(module_names: Iterable[str]) -> ConformanceResult:
+    """Check that a fake/reference framework avoids vLLM imports."""
+
+    names = tuple(str(name) for name in module_names)
+    checks = {
+        "no_vllm_imports": not any(
+            name == "vllm" or name.startswith("vllm.") for name in names
+        ),
+        "no_internal_runtime_imports": not any(
+            name.startswith("tensorcast.serving.internal") for name in names
+        ),
+        "no_serving_imports": not any(
+            name == "tensorcast.serving" or name.startswith("tensorcast.serving.")
+            for name in names
+        ),
+    }
+    return _result(
+        level="framework-isolation",
+        checks=checks,
+        messages=_FRAMEWORK_ISOLATION_MESSAGES,
+    )
+
+
+class FakeArtifactView:
+    def __init__(self, names: Iterable[str] = ()) -> None:
+        self.names = tuple(names)
+
+    def bind(self, **kwargs: Any) -> "FakeBinding":
+        binding = FakeBinding()
+        binding.names = self.names
+        binding.bind_kwargs = kwargs
+        return binding
+
+
+class FakeArtifact:
+    def subset(self, names: Iterable[str]) -> FakeArtifactView:
+        return FakeArtifactView(names)
+
+
+class FakeBinding:
+    def __init__(self) -> None:
+        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
+        self.binding_layout_id = "layout-1"
+        self.current_value = SimpleNamespace(
+            binding_id="binding-1",
+            binding_layout_id="layout-1",
+            binding_value_id="value-1",
+            seal_generation=1,
+        )
+        self.names: tuple[str, ...] = ()
+        self.bind_kwargs: dict[str, Any] = {}
+        self.swapped: tuple[object, dict[str, Any]] | None = None
+        self.published_lease_id: str | None = None
+        self.published_replica_id: str | None = None
+        self.publish_calls = 0
+        self.retire_calls: list[float | None] = []
+        self.closed = False
+
+    def swap(self, artifact: object, **kwargs: Any) -> "FakeBinding":
+        self.swapped = (artifact, kwargs)
+        self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)}
+        return self
+
+    def publish_replica(self) -> object:
+        self.publish_calls += 1
+        self.published_lease_id = "lease-1"
+        self.published_replica_id = "replica-1"
+        return SimpleNamespace(
+            binding_id=self.current_value.binding_id,
+            binding_layout_id=self.current_value.binding_layout_id,
+            binding_value_id=self.current_value.binding_value_id,
+            seal_generation=self.current_value.seal_generation,
+            replica_id=self.published_replica_id,
+            lease_id=self.published_lease_id,
+            serving_artifact_id="mi2:serving",
+            device_uuid="gpu-0",
+        )
+
+    def retire(self, *, drain_timeout_s: float | None = None) -> None:
+        self.retire_calls.append(drain_timeout_s)
+        self.published_lease_id = None
+        self.published_replica_id = None
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class FakeRuntimeModel:
+    def __init__(self) -> None:
+        self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")}
+
+
+class FakeFrameworkHost:
+    def identity(self, model_config: object) -> _integration.FrameworkIdentity:
+        del model_config
+        return _integration.FrameworkIdentity(
+            framework_name="fakefw",
+            framework_version="fakefw-v1",
+            adapter_version="adapter-v1",
+            serving_abi_version="abi-v1",
+        )
+
+    def prepare_model_construction(
+        self,
+        framework_config: object | None,
+        model_config: object | None,
+    ) -> None:
+        del framework_config, model_config
+
+    def build_meta_model(
+        self,
+        framework_config: object | None,
+        model_config: object | None,
+    ) -> FakeRuntimeModel:
+        del framework_config, model_config
+        return FakeRuntimeModel()
+
+    def build_runtime_model(
+        self,
+        framework_config: object | None,
+        model_config: object | None,
+        target_device: object | None,
+    ) -> FakeRuntimeModel:
+        del framework_config, model_config, target_device
+        return FakeRuntimeModel()
+
+    def assert_model_ready_for_runtime_binding(
+        self,
+        model: FakeRuntimeModel,
+        *,
+        context: object,
+    ) -> None:
+        del context
+        if "w" not in model.tensors:
+            raise AssertionError("fake model missing runtime tensor 'w'")
+
+    def semantic_probes(
+        self,
+        model: FakeRuntimeModel,
+        model_config: object | None,
+    ) -> dict[str, object]:
+        del model, model_config
+        return {}
+
+
+class FakePlacementHost:
+    def identity_facts(
+        self,
+        framework_config: object | None,
+    ) -> _integration.PlacementIdentityFacts:
+        del framework_config
+        return _integration.PlacementIdentityFacts(
+            tensor_parallel_rank=0,
+            tensor_parallel_size=1,
+            pipeline_parallel_rank=0,
+            pipeline_parallel_size=1,
+            data_parallel_rank=0,
+            data_parallel_size=1,
+        )
+
+    def admission_facts(
+        self,
+        framework_config: object | None,
+    ) -> _integration.PlacementAdmissionFacts:
+        del framework_config
+        return _integration.PlacementAdmissionFacts()
+
+    def member_facts(
+        self,
+        framework_config: object | None,
+    ) -> _integration.PlacementMemberFacts:
+        del framework_config
+        return _integration.PlacementMemberFacts(
+            runtime_rank=0,
+            runtime_world_size=1,
+            member_id="member-0",
+            member_index=0,
+            member_count=1,
+            group_id_hint="group-1",
+        )
+
+    def execution_facts(
+        self,
+        framework_config: object | None,
+    ) -> _integration.MaterializationExecutionFacts:
+        del framework_config
+        return _integration.MaterializationExecutionFacts(
+            collective_rank=0,
+            collective_world_size=1,
+            tensor_parallel_ranks=(0,),
+        )
+
+
+class FakeTensorSurface:
+    def runtime_only_tensor_names(self, model: FakeRuntimeModel) -> tuple[str, ...]:
+        del model
+        return ()
+
+    def align_runtime_tensor_names(
+        self,
+        model: FakeRuntimeModel,
+        expected_names: Iterable[str],
+    ) -> int:
+        if set(expected_names) != set(model.tensors):
+            raise AssertionError("fake runtime tensor names do not match")
+        return 0
+
+    def collect_runtime_tensors(
+        self,
+        model: FakeRuntimeModel,
+        *,
+        remove_duplicate: bool = False,
+    ) -> dict[str, object]:
+        del remove_duplicate
+        return dict(model.tensors)
+
+    def collect_runtime_tensor_view(
+        self,
+        tensors: Mapping[str, object],
+    ) -> tuple[object, ...]:
+        del tensors
+        return ()
+
+    def compute_runtime_tensor_schema_hash(
+        self,
+        tensors: Mapping[str, object],
+        *,
+        remove_duplicate: bool = False,
+    ) -> str:
+        del tensors, remove_duplicate
+        return "fake-schema"
+
+    def attach_bound_tensors(
+        self,
+        model: FakeRuntimeModel,
+        tensors: Mapping[str, object],
+        *,
+        replace_meta_params: bool,
+    ) -> FakeRuntimeModel:
+        del replace_meta_params
+        model.tensors.update(cast(Mapping[str, torch.Tensor], tensors))
+        return model
+
+    def allocate_runtime_only_tensors(
+        self,
+        model: FakeRuntimeModel,
+        target_device: torch.device,
+    ) -> dict[str, object]:
+        del model, target_device
+        return {}
+
+    def snapshot_tensor_invariants(
+        self,
+        tensors: Mapping[str, object],
+    ) -> tuple[str, ...]:
+        return tuple(sorted(tensors))
+
+    def validate_tensor_invariants(
+        self,
+        before: tuple[str, ...],
+        after: Mapping[str, object],
+    ) -> None:
+        if before != tuple(sorted(after)):
+            raise AssertionError("fake tensor invariants changed")
+
+
+class FakeRuntimeOnlyTensorSurface(FakeTensorSurface):
+    def __init__(self) -> None:
+        self.allocated: list[tuple[str, torch.device]] = []
+
+    def runtime_only_tensor_names(self, model: FakeRuntimeModel) -> tuple[str, ...]:
+        del model
+        return ("cache",)
+
+    def collect_runtime_tensors(
+        self,
+        model: FakeRuntimeModel,
+        *,
+        remove_duplicate: bool = False,
+    ) -> dict[str, object]:
+        del remove_duplicate
+        return {
+            name: tensor for name, tensor in model.tensors.items() if name != "cache"
+        }
+
+    def allocate_runtime_only_tensors(
+        self,
+        model: FakeRuntimeModel,
+        target_device: torch.device,
+    ) -> dict[str, object]:
+        self.allocated.append(("cache", target_device))
+        tensor = torch.zeros((1,), dtype=torch.float16)
+        model.tensors["cache"] = tensor
+        return {"cache": tensor}
+
+
+class FakeRuntimeArtifactResolver:
+    def resolve(self, artifact_ref: str) -> SimpleNamespace:
+        return SimpleNamespace(
+            artifact=FakeArtifact(),
+            artifact_ref=artifact_ref,
+            tensor_names=("w",),
+            manifest=SimpleNamespace(
+                representation_contract_hash=f"repr:{artifact_ref}",
+                source_artifact_ref="mi2:source",
+                serving_build_digest=f"build:{artifact_ref}",
+            ),
+        )
+
+    def cross_check(
+        self,
+        resolved_artifact: SimpleNamespace,
+        **kwargs: object,
+    ) -> SimpleNamespace:
+        del kwargs
+        return resolved_artifact
+
+
+class RecordingRuntimeArtifactResolver(FakeRuntimeArtifactResolver):
+    def __init__(self) -> None:
+        self.calls: list[tuple[str, object]] = []
+
+    def resolve(self, artifact_ref: str) -> SimpleNamespace:
+        self.calls.append(("resolve", artifact_ref))
+        return super().resolve(artifact_ref)
+
+    def cross_check(
+        self,
+        resolved_artifact: SimpleNamespace,
+        **kwargs: object,
+    ) -> SimpleNamespace:
+        self.calls.append(("cross_check", dict(kwargs)))
+        return super().cross_check(resolved_artifact, **kwargs)
+
+
+class _LocalPathLocator:
+    kind = "local_path"
+    value = "/tmp/fakefw-model"
+
+
+def build_fake_artifact_runtime_host(
+    tc_module: ModuleType = tc,
+    *,
+    tensor_surface: object | None = None,
+) -> object:
+    """Build a minimal non-vLLM host through the root artifact-runtime API."""
+
+    return tc_module.RuntimeHostCapabilities(
+        framework=FakeFrameworkHost(),
+        placement=FakePlacementHost(),
+        tensor_surface=tensor_surface or FakeTensorSurface(),
+    )
+
+
+_ARTIFACT_LEVEL1_MESSAGES = {
+    "direct_start": (
+        "Artifact model_runtime startup failed. Verify framework model "
+        "construction, tensor surface attach/schema behavior, placement facts, "
+        "and artifact resolver output."
+    ),
+    "artifact_realization_report": (
+        "Artifact.realize(... model_runtime ...) must return a model_runtime "
+        "realization report for the requested framework."
+    ),
+    "runtime_session_not_required": (
+        "Level 1 artifact-runtime start/reload must not instantiate or call "
+        "ArtifactRuntimeSession."
+    ),
+    "target_layout_from_runtime_binding": (
+        "Model-runtime reports must carry target layout identity from the "
+        "runtime attachment binding."
+    ),
+    "runtime_only_tensors_allocated": (
+        "Runtime-only tensor allocation must be expressible through the neutral "
+        "RuntimeHostCapabilities tensor surface."
+    ),
+    "runtime_publication_actions": (
+        "Runtime publication must be represented by artifact-runtime "
+        "publish/retire actions, not by a runtime session."
+    ),
+    "describe": (
+        "RuntimeAttachment.view must expose the typed RuntimeWorkerView for the "
+        "current attachment."
+    ),
+    "reload": (
+        "Artifact runtime reload failed. Level 1 reload must use a typed "
+        "ArtifactLocator and RuntimePolicy."
+    ),
+    "reload_identity_from_runtime_view": (
+        "Reload response identity must come from the runtime view, not from the "
+        "request payload."
+    ),
+    "source_capability_not_required": (
+        "Level 1 direct artifact runtime start/reload must not require SourceHost."
+    ),
+    "source_catalog_not_required": (
+        "Level 1 direct artifact runtime start/reload must not require "
+        "SourceCatalogProvider."
+    ),
+    "resolver_uses_artifact_refs": (
+        "Artifact runtime start/reload must resolve durable artifact refs through "
+        "the supplied runtime resolver."
+    ),
+    "rejects_local_reload_artifact_locator": (
+        "Reload must reject local source selectors; local paths belong to "
+        "source bootstrap, not durable artifact runtime reload."
+    ),
+    "rejects_untyped_reload_artifact_locator": (
+        "Reload must reject untyped artifact locator dictionaries on the public "
+        "runtime path. Use ArtifactLocator."
+    ),
+    "rejects_untyped_reload_policy": (
+        "Reload must reject untyped policy dictionaries on the public runtime "
+        "path. Use RuntimePolicy."
+    ),
+}
+
+
+@contextmanager
+def _patched_direct_artifact_runtime():
+    integration_module = cast(Any, _integration)
+    original_contract_reader = integration_module.read_source_bound_contract_state
+    original_materialization_options = (
+        integration_module.ArtifactRuntimeIntegration.build_materialization_options
+    )
+    integration_module.read_source_bound_contract_state = lambda: SimpleNamespace(
+        source_bound_contract_ready=True,
+        source_bound_contract_version=4,
+        source_bound_capability_names=("collective",),
+    )
+    integration_module.ArtifactRuntimeIntegration.build_materialization_options = (
+        lambda self, **kwargs: ("fake-materialization-options", kwargs)
+    )
+    try:
+        yield
+    finally:
+        integration_module.read_source_bound_contract_state = original_contract_reader
+        integration_module.ArtifactRuntimeIntegration.build_materialization_options = (
+            original_materialization_options
+        )
+
+
+@contextmanager
+def _reject_artifact_runtime_session():
+    session_cls = cast(Any, _integration.ArtifactRuntimeSession)
+    original_from_config = session_cls.__dict__["from_config"]
+    original_start = session_cls.__dict__["start"]
+    original_reload = session_cls.__dict__["reload"]
+
+    def reject_runtime_session(*_args: object, **_kwargs: object) -> None:
+        raise AssertionError("artifact-runtime conformance used ArtifactRuntimeSession")
+
+    session_cls.from_config = classmethod(reject_runtime_session)
+    session_cls.start = reject_runtime_session
+    session_cls.reload = reject_runtime_session
+    try:
+        yield
+    finally:
+        session_cls.from_config = original_from_config
+        session_cls.start = original_start
+        session_cls.reload = original_reload
+
+
+def assert_level1_artifact_runtime_conformance(
+    tc_module: ModuleType = tc,
+    *,
+    host: object | None = None,
+) -> ConformanceResult:
+    """Run Level 1 durable model-runtime conformance through Artifact.realize."""
+
+    from tensorcast.api.store.artifact import Artifact
+
+    checks: dict[str, bool] = {}
+    assert_public_artifact_runtime_boundary(tc_module)
+    assert_framework_isolation((tc_module.__name__, __name__))
+
+    class _Store:
+        pass
+
+    with _patched_direct_artifact_runtime(), _reject_artifact_runtime_session():
+        store = _Store()
+        tensor_surface = None if host is not None else FakeRuntimeOnlyTensorSurface()
+        runtime_host = host or build_fake_artifact_runtime_host(
+            tc_module,
+            tensor_surface=tensor_surface,
+        )
+        model_config = SimpleNamespace(model="fake-model")
+        identity = runtime_host.framework.identity(model_config)
+        resolver = RecordingRuntimeArtifactResolver()
+        store_ref: weakref.ReferenceType[Any] = weakref.ref(store)
+        artifact = Artifact(
+            store_ref=store_ref,
+            artifact_id="mi2:serving",
+        )
+        handle = artifact.realize(
+            tc_module.ArtifactRealizationSpec.model_runtime(
+                framework=str(identity.framework_name),
+                device=torch.device("cuda:0"),
+                adapter_version=str(identity.adapter_version),
+                runtime_abi_version=str(identity.serving_abi_version),
+            ),
+            runtime_host=runtime_host,
+            runtime_context=tc_module.RuntimeRequestContext(
+                framework_config=SimpleNamespace(),
+                model_config=model_config,
+            ),
+            runtime_resolver=resolver,
+        )
+        attachment = handle.attachment()
+        model_runtime_report = handle.report.model_runtime
+        target_plan = handle.report.target_plan
+        direct_payload = attachment.view.endpoint.to_weight_version_payload()
+        checks["direct_start"] = (
+            direct_payload.get("serving_artifact_ref") == "mi2:serving"
+            and direct_payload.get("source_artifact_ref") == "mi2:source"
+        )
+        checks["artifact_realization_report"] = (
+            handle.report.target_kind == "model_runtime"
+            and model_runtime_report is not None
+            and model_runtime_report.framework == str(identity.framework_name)
+        )
+        checks["target_layout_from_runtime_binding"] = (
+            handle.report.target_layout_digest == "binding-layout:layout-1"
+            and target_plan is not None
+            and target_plan.target_layout_digest == "binding-layout:layout-1"
+        )
+        checks["runtime_only_tensors_allocated"] = (
+            True
+            if tensor_surface is None
+            else (
+                "cache" in attachment.model.tensors
+                and ("cache", torch.device("cuda:0")) in tensor_surface.allocated
+            )
+        )
+        publication_events: list[Mapping[str, object]] = []
+        published = tc_module.publish_runtime_replica(
+            current_attachment=attachment,
+            policy=SimpleNamespace(
+                mode="required",
+                timeout_s=0.0,
+                drain_timeout_s=0.0,
+            ),
+            ensure_runtime_initialized=lambda: None,
+            profile_sink=publication_events.append,
+        )
+        published_replica = published.view.endpoint.weight_version.published_replica
+        retired = tc_module.retire_runtime_replica(
+            current_attachment=published,
+            reason="conformance",
+            drain_timeout_s=0.0,
+            ensure_runtime_initialized=lambda: None,
+            profile_sink=publication_events.append,
+        )
+        retired_replica = retired.view.endpoint.weight_version.published_replica
+        published_binding = published.state.binding
+        checks["runtime_publication_actions"] = (
+            published_replica is not None
+            and published_replica.state == "published"
+            and published_replica.replica_id == "replica-1"
+            and retired_replica is not None
+            and retired_replica.state == "retired"
+            and getattr(published_binding, "publish_calls", 0) == 1
+            and getattr(published_binding, "retire_calls", ()) == [0.0]
+            and [event["event"] for event in publication_events]
+            == [
+                "runtime_publication.publish.done",
+                "runtime_publication.retire.done",
+            ]
+        )
+        checks["describe"] = (
+            attachment.view.endpoint.to_weight_version_payload().get(
+                "serving_artifact_ref"
+            )
+            == "mi2:serving"
+        )
+
+        reloaded = tc_module.reload_runtime_attachment(
+            current_attachment=retired,
+            artifact_locator=tc_module.ArtifactLocator.artifact_ref("mi2:serving-next"),
+            policy=tc_module.RuntimePolicy(),
+            runtime_host=runtime_host,
+            runtime_context=tc_module.RuntimeRequestContext(
+                framework_config=SimpleNamespace(),
+                model_config=SimpleNamespace(model="fake-model"),
+            ),
+            ensure_runtime_initialized=lambda: None,
+            model=attachment.model,
+            runtime_resolver=resolver,
+        )
+        reload_response = reloaded.view.endpoint.to_reload_response_payload()
+        checks["reload"] = (
+            reload_response is not None
+            and reload_response.get("serving_artifact_ref") == "mi2:serving-next"
+        )
+        checks["reload_identity_from_runtime_view"] = (
+            reload_response is not None
+            and reloaded.state.runtime_view.serving_artifact_ref
+            == reload_response.get("serving_artifact_ref")
+        )
+        checks["source_capability_not_required"] = True
+        checks["source_catalog_not_required"] = True
+        checks["resolver_uses_artifact_refs"] = (
+            "resolve",
+            "mi2:serving",
+        ) in resolver.calls and ("resolve", "mi2:serving-next") in resolver.calls
+
+        try:
+            tc_module.reload_runtime_attachment(
+                current_attachment=reloaded,
+                artifact_locator=_LocalPathLocator(),
+                policy=tc_module.RuntimePolicy(),
+                runtime_host=runtime_host,
+                runtime_context=tc_module.RuntimeRequestContext(),
+                ensure_runtime_initialized=lambda: None,
+            )
+        except _integration.ConfigConflictError:
+            checks["rejects_local_reload_artifact_locator"] = True
+        else:
+            checks["rejects_local_reload_artifact_locator"] = False
+
+        try:
+            tc_module.reload_runtime_attachment(
+                current_attachment=reloaded,
+                artifact_locator={
+                    "kind": "artifact_ref",
+                    "value": "mi2:serving-next",
+                },
+                policy=tc_module.RuntimePolicy(),
+                runtime_host=runtime_host,
+                runtime_context=tc_module.RuntimeRequestContext(),
+                ensure_runtime_initialized=lambda: None,
+            )
+        except _integration.ConfigConflictError:
+            checks["rejects_untyped_reload_artifact_locator"] = True
+        else:
+            checks["rejects_untyped_reload_artifact_locator"] = False
+
+        try:
+            tc_module.reload_runtime_attachment(
+                current_attachment=reloaded,
+                artifact_locator=tc_module.ArtifactLocator.artifact_ref(
+                    "mi2:serving-next"
+                ),
+                policy={"mode": "from_manifest"},
+                runtime_host=runtime_host,
+                runtime_context=tc_module.RuntimeRequestContext(),
+                ensure_runtime_initialized=lambda: None,
+            )
+        except _integration.ConfigConflictError:
+            checks["rejects_untyped_reload_policy"] = True
+        else:
+            checks["rejects_untyped_reload_policy"] = False
+        checks["runtime_session_not_required"] = True
+
+    return _result(
+        level="level1-artifact-runtime",
+        checks=checks,
+        messages=_ARTIFACT_LEVEL1_MESSAGES,
+    )
+
+
+__all__ = [
+    "ConformanceResult",
+    "FakeArtifact",
+    "FakeArtifactView",
+    "FakeBinding",
+    "FakeFrameworkHost",
+    "FakePlacementHost",
+    "FakeRuntimeOnlyTensorSurface",
+    "FakeRuntimeModel",
+    "FakeRuntimeArtifactResolver",
+    "FakeTensorSurface",
+    "RecordingRuntimeArtifactResolver",
+    "assert_framework_isolation",
+    "assert_level1_artifact_runtime_conformance",
+    "assert_public_artifact_runtime_boundary",
+    "assert_public_runtime_boundary",
+    "build_fake_artifact_runtime_host",
+]
diff --git a/tensorcast/serving/runtime_view.py b/tensorcast/artifact_runtime/view.py
similarity index 94%
rename from tensorcast/serving/runtime_view.py
rename to tensorcast/artifact_runtime/view.py
index 5ae3665b..8e83ae69 100644
--- a/tensorcast/serving/runtime_view.py
+++ b/tensorcast/artifact_runtime/view.py
@@ -56,10 +56,12 @@ def _diagnostic_value(
     return getattr(diagnostics, name, default)
 
 
-def _serving_realization_report(
+def _runtime_realization_report(
     diagnostics: Mapping[str, object],
 ) -> Mapping[str, object] | None:
-    value = diagnostics.get("serving_realization_report")
+    value = diagnostics.get("runtime_realization_report")
+    if not isinstance(value, Mapping):
+        value = diagnostics.get("serving_realization_report")
     if isinstance(value, Mapping):
         return value
     return None
@@ -246,10 +248,39 @@ def to_dict(self) -> dict[str, object]:
         return payload
 
 
+def _source_selection_projection_from_value(
+    value: object | None,
+) -> SourceSelectionProjection | None:
+    if value is None:
+        return None
+    if isinstance(value, SourceSelectionProjection):
+        return value
+    if not isinstance(value, Mapping):
+        return None
+    selected_source_kind = _optional_text(value.get("selected_source_kind"))
+    if selected_source_kind is None:
+        return None
+    return SourceSelectionProjection(
+        selected_source_kind=selected_source_kind,
+        selected_replica_id=_optional_text(value.get("selected_replica_id")),
+        selected_producer_worker_id=_optional_text(
+            value.get("selected_producer_worker_id")
+        ),
+        selected_byte_space_kind=_optional_text(value.get("selected_byte_space_kind")),
+        selected_byte_space_id=_optional_text(value.get("selected_byte_space_id")),
+        p2p_bytes=_optional_int(value.get("p2p_bytes")) or 0,
+        fallback_bytes=_optional_int(value.get("fallback_bytes")) or 0,
+        disk_bytes=_optional_int(value.get("disk_bytes")) or 0,
+        reselection_attempts=_optional_int(value.get("reselection_attempts")) or 0,
+        reject_reason_bucket=_optional_text(value.get("reject_reason_bucket")),
+        fallback_reason_bucket=_optional_text(value.get("fallback_reason_bucket")),
+    )
+
+
 def _source_bound_projection_from_diagnostics(
     diagnostics: Mapping[str, object],
 ) -> SourceBoundContractProjection | None:
-    report = _serving_realization_report(diagnostics)
+    report = _runtime_realization_report(diagnostics)
     source_contract = _nested_mapping(report, "source_bound_contract")
     if source_contract is not None:
         return SourceBoundContractProjection(dict(source_contract))
@@ -275,7 +306,7 @@ def _materialization_projection_from_fields(
 ) -> MaterializationDiagnosticsProjection | None:
     fields: dict[str, object] = {}
     if prefix == "realize":
-        report = _serving_realization_report(diagnostics)
+        report = _runtime_realization_report(diagnostics)
         realization = _nested_mapping(report, "realization")
         execution = _nested_mapping(realization, "execution")
         plan = _nested_mapping(realization, "plan")
@@ -553,6 +584,11 @@ def source_selection_projection_from_runtime_diagnostics(
 
     if diagnostics is None:
         return None
+    explicit = _source_selection_projection_from_value(
+        diagnostics.get("source_selection")
+    )
+    if explicit is not None:
+        return explicit
     materialization = source_selection_projection_from_materialization_diagnostics(
         diagnostics.get("materialization")
     )
@@ -563,7 +599,7 @@ def source_selection_projection_from_runtime_diagnostics(
     )
     if execution is not None:
         return execution
-    report = _serving_realization_report(diagnostics)
+    report = _runtime_realization_report(diagnostics)
     realization = _nested_mapping(report, "realization")
     report_execution = _nested_mapping(realization, "execution")
     serving_projection = source_selection_projection_from_execution_diagnostics(
@@ -699,7 +735,7 @@ def from_runtime_view(
         include_reload_response: bool = False,
     ) -> "RuntimeWorkerView":
         diagnostics = dict(getattr(view, "diagnostics", None) or {})
-        report = _serving_realization_report(diagnostics)
+        report = _runtime_realization_report(diagnostics)
         binding_value_ref = BindingValueRefProjection.from_value(
             getattr(view, "binding_value_ref", None)
         )
diff --git a/tensorcast/daemon_ctl.py b/tensorcast/daemon_ctl.py
index ab8e4352..c053d64f 100644
--- a/tensorcast/daemon_ctl.py
+++ b/tensorcast/daemon_ctl.py
@@ -75,17 +75,17 @@
     LocalStableTierResult,
     Plan,
     PrefetchRetentionPolicy,
+    RealizationTarget,
+    RealizationTargetSet,
     RegionMemoryKind,
     RegisterStorage,
     RegisterTensorAlias,
     RepresentationPublishSpec,
+    RuntimeArtifactPolicy,
+    RuntimeBindingMemberRef,
+    RuntimeBindingReadiness,
     SealAssemblyResult,
     ServerConfig,
-    ServingBindingMemberRef,
-    ServingBindingReadiness,
-    ServingBindingSetTarget,
-    ServingBindingTarget,
-    ServingRuntimePolicy,
     StableDramHandshake,
     VramRegionHandle,
 )
@@ -943,7 +943,7 @@ def materialize_into_target(
         target_layout: store_daemon_pb2.TargetLayout,
         device_uuid: str,
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         placement: store_daemon_pb2.TransformPlacement | None = None,
         pid: int | None = None,
         operation_id: str | None = None,
@@ -967,9 +967,9 @@ def materialize_into_target(
                 pid=pid_value,
             )
             request.source_policy.CopyFrom(resolved_source_policy)
-            if serving_runtime_policy is not None:
+            if runtime_artifact_policy is not None:
                 request.serving_artifact_policy.CopyFrom(
-                    serving_runtime_policy.to_proto()
+                    runtime_artifact_policy.to_proto()
                 )
             if placement is not None:
                 request.placement = placement
@@ -1017,7 +1017,7 @@ def materialize_into_mapped_target(
         copy_plan,
         dst_tensors: Mapping[str, torch.Tensor],
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         placement: store_daemon_pb2.TransformPlacement | None = None,
         pid: int | None = None,
         operation_id: str | None = None,
@@ -1072,9 +1072,9 @@ def materialize_into_mapped_target(
                 spec.stride.extend(int(v) for v in tensor.stride())
                 request.dst_tensors.append(spec)
             request.source_policy.CopyFrom(resolved_source_policy)
-            if serving_runtime_policy is not None:
+            if runtime_artifact_policy is not None:
                 request.serving_artifact_policy.CopyFrom(
-                    serving_runtime_policy.to_proto()
+                    runtime_artifact_policy.to_proto()
                 )
             if placement is not None:
                 request.placement = placement
@@ -1128,7 +1128,7 @@ def create_owned_binding(
         device_uuid: str,
         binding_layout_id: str,
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         placement: store_daemon_pb2.TransformPlacement | None = None,
         copy_plan: store_daemon_pb2.CopyPlan | None = None,
         dst_specs: Iterable[store_daemon_pb2.MappedTensorSpec] | None = None,
@@ -1159,9 +1159,9 @@ def create_owned_binding(
                 pid=pid_value,
             )
             request.source_policy.CopyFrom(resolved_source_policy)
-            if serving_runtime_policy is not None:
+            if runtime_artifact_policy is not None:
                 request.serving_artifact_policy.CopyFrom(
-                    serving_runtime_policy.to_proto()
+                    runtime_artifact_policy.to_proto()
                 )
             if placement is not None:
                 request.placement = placement
@@ -1203,8 +1203,8 @@ def prefetch_serving_binding(
         self,
         *,
         source_selection: common_pb2.ArtifactSelection,
-        target: ServingBindingTarget | ServingBindingSetTarget,
-        requested_readiness: ServingBindingReadiness,
+        target: RealizationTarget | RealizationTargetSet,
+        requested_readiness: RuntimeBindingReadiness,
         retention_policy: PrefetchRetentionPolicy | None = None,
         operation_id: str | None = None,
         group_realization: Any | None = None,
@@ -1220,15 +1220,15 @@ def prefetch_serving_binding(
             requested_readiness=_SERVING_READINESS_TO_PROTO[requested_readiness],
         )
         request.source_selection.CopyFrom(source_selection)
-        if isinstance(target, ServingBindingTarget):
+        if isinstance(target, RealizationTarget):
             request.source.CopyFrom(target.source.to_proto())
             request.serving_binding_target.CopyFrom(target.to_proto())
-        elif isinstance(target, ServingBindingSetTarget):
+        elif isinstance(target, RealizationTargetSet):
             request.source.CopyFrom(target.source.to_proto())
             request.serving_binding_set_target.CopyFrom(target.to_proto())
         else:
             raise ValueError(
-                "target must be a ServingBindingTarget or ServingBindingSetTarget"
+                "target must be a RealizationTarget or RealizationTargetSet"
             )
         if retention_policy is not None:
             request.retention_policy.CopyFrom(retention_policy.to_proto())
@@ -1322,7 +1322,7 @@ def acquire_binding_value(
         expected_serving_build_digest: str,
         expected_daemon_id: str | None = None,
         expected_daemon_session_id: str | None = None,
-        expected_member: ServingBindingMemberRef | None = None,
+        expected_member: RuntimeBindingMemberRef | None = None,
         local_serving_ref: str | None = None,
         group_realization_acquire: GroupRealizationAcquireRef | None = None,
         caller_pid: int | None = None,
@@ -1391,7 +1391,7 @@ def acquire_binding_value_by_local_ref(
         expected_device_uuid: str,
         expected_tensor_schema_hash: str,
         expected_serving_build_digest: str,
-        expected_member: ServingBindingMemberRef,
+        expected_member: RuntimeBindingMemberRef,
         expected_target_layout_hash: str | None = None,
         expected_daemon_id: str | None = None,
         expected_daemon_session_id: str | None = None,
@@ -1816,7 +1816,7 @@ def refill_owned_binding(
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
         execution_topology: store_daemon_pb2.SourceExecutionTopology | None = None,
         collective_policy: store_daemon_pb2.CollectivePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         placement: store_daemon_pb2.TransformPlacement | None = None,
         operation_id: str | None = None,
         timeout_s: float = 600.0,
@@ -1842,9 +1842,9 @@ def refill_owned_binding(
                 request.execution_topology.CopyFrom(execution_topology)
             if collective_policy is not None:
                 request.collective_policy = collective_policy
-            if serving_runtime_policy is not None:
+            if runtime_artifact_policy is not None:
                 request.serving_artifact_policy.CopyFrom(
-                    serving_runtime_policy.to_proto()
+                    runtime_artifact_policy.to_proto()
                 )
             if placement is not None:
                 request.placement = placement
@@ -2407,7 +2407,7 @@ def materialize_by_artifact_id(
         placement: store_daemon_pb2.TransformPlacement | None = None,
         return_response: Literal[True],
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         export_policy: store_daemon_pb2.ExportPolicy | None = None,
         need_view_data_hash: bool = True,
         target_device_type: store_daemon_pb2.DeviceType = store_daemon_pb2.DeviceType.DEVICE_TYPE_GPU,
@@ -2431,7 +2431,7 @@ def materialize_by_artifact_id(
         placement: store_daemon_pb2.TransformPlacement | None = None,
         return_response: Literal[False] = False,
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         export_policy: store_daemon_pb2.ExportPolicy | None = None,
         need_view_data_hash: bool = True,
         target_device_type: store_daemon_pb2.DeviceType = store_daemon_pb2.DeviceType.DEVICE_TYPE_GPU,
@@ -2476,7 +2476,7 @@ def materialize_by_artifact_id(
         placement: store_daemon_pb2.TransformPlacement | None = None,
         return_response: bool = False,
         source_policy: store_daemon_pb2.SourcePolicy | None = None,
-        serving_runtime_policy: "ServingRuntimePolicy | None" = None,
+        runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None,
         export_policy: store_daemon_pb2.ExportPolicy | None = None,
         need_view_data_hash: bool = True,
         target_device_type: store_daemon_pb2.DeviceType = store_daemon_pb2.DeviceType.DEVICE_TYPE_GPU,
@@ -2527,9 +2527,9 @@ def materialize_by_artifact_id(
             if wait_for_shared_disk_ms:
                 request.wait_for_shared_disk_ms = int(wait_for_shared_disk_ms)
             request.source_policy.CopyFrom(resolved_source_policy)
-            if serving_runtime_policy is not None:
+            if runtime_artifact_policy is not None:
                 request.serving_artifact_policy.CopyFrom(
-                    serving_runtime_policy.to_proto()
+                    runtime_artifact_policy.to_proto()
                 )
             if export_policy is not None:
                 request.export_policy = export_policy
diff --git a/tensorcast/engine_adapter/adapter.py b/tensorcast/engine_adapter/adapter.py
index 75d657c0..14ce9db6 100644
--- a/tensorcast/engine_adapter/adapter.py
+++ b/tensorcast/engine_adapter/adapter.py
@@ -20,10 +20,9 @@
 from tensorcast.api.plan.transforms import TransformSpec
 from tensorcast.api.store import Artifact, Store
 from tensorcast.api.store.handles import RegisteredArtifact
-from tensorcast.api.store.serving_builder import (
-    RepresentationPublishSpec,
+from tensorcast.api.store.publication_builder import (
     build_pure_transform_publication_bundle_from_registered_artifact,
-    prepare_pure_transform_serving_registration,
+    prepare_pure_transform_runtime_registration,
 )
 from tensorcast.engine_adapter.artifact_api import (
     BatchResult,
@@ -33,7 +32,7 @@
     PublishResult,
     SealedByteArtifact,
 )
-from tensorcast.types import ServingBuildIntent
+from tensorcast.types import RepresentationPublishSpec, RuntimeArtifactBuildIntent
 
 
 def _encode_token(token: bytes) -> str:
@@ -65,7 +64,7 @@ def _pure_transform_build_intent(
     ctx: "TransformContext",
     *,
     source_artifact: Artifact,
-) -> ServingBuildIntent | None:
+) -> RuntimeArtifactBuildIntent | None:
     publication_spec = ctx.spec.publication_spec
     if publication_spec is None:
         return None
@@ -79,7 +78,7 @@ def _maybe_build_pure_transform_publication_bundle(
     registered_artifact: object,
     *,
     source_artifact: Artifact,
-    build_intent: ServingBuildIntent | None,
+    build_intent: RuntimeArtifactBuildIntent | None,
 ) -> RepresentationPublishSpec | None:
     if build_intent is None:
         return None
@@ -620,7 +619,7 @@ def _register(ctx: TransformContext) -> object | None:
                         status_code="INVALID_ARGUMENT",
                         retryable=False,
                     )
-                prepared = prepare_pure_transform_serving_registration(
+                prepared = prepare_pure_transform_runtime_registration(
                     build_intent=build_intent,
                     source_artifact=selected_source,
                     tensors=registration_tensors,
diff --git a/tensorcast/node_agent/executor.py b/tensorcast/node_agent/executor.py
index 73cc44c0..26fac6a8 100644
--- a/tensorcast/node_agent/executor.py
+++ b/tensorcast/node_agent/executor.py
@@ -27,7 +27,6 @@
 from tensorcast.api.plan.transforms import TransformSpec
 from tensorcast.api.store import Artifact, Store
 from tensorcast.api.store.runtime import StoreRuntimeContext
-from tensorcast.api.store.serving_builder import RepresentationPublishSpec
 from tensorcast.daemon_ctl import DaemonCtl, get_daemon_client
 from tensorcast.engine_adapter import (
     BatchResult,
@@ -44,8 +43,9 @@
 from tensorcast.types import (
     _SERVING_READINESS_FROM_PROTO,
     PrefetchRetentionPolicy,
-    ServingBindingSetTarget,
-    ServingBindingTarget,
+    RealizationTarget,
+    RealizationTargetSet,
+    RepresentationPublishSpec,
 )
 
 ArtifactActionResult = (
@@ -1040,7 +1040,7 @@ def _prefetch(
                 artifact, _ = self._artifact_from_selection(selection)
                 readiness = _SERVING_READINESS_FROM_PROTO.get(
                     int(action.requested_readiness),
-                    "serving_local_ready",
+                    "runtime_local_ready",
                 )
                 retention = (
                     PrefetchRetentionPolicy.from_proto(action.retention_policy)
@@ -1048,11 +1048,9 @@ def _prefetch(
                     else None
                 )
                 if serving_target_kind == "serving_binding_target":
-                    target = ServingBindingTarget.from_proto(
-                        action.serving_binding_target
-                    )
+                    target = RealizationTarget.from_proto(action.serving_binding_target)
                 else:
-                    target = ServingBindingSetTarget.from_proto(
+                    target = RealizationTargetSet.from_proto(
                         action.serving_binding_set_target
                     )
                 op = artifact.prefetch(
diff --git a/tensorcast/node_agent/server.py b/tensorcast/node_agent/server.py
index b57f0652..6a01eaaa 100644
--- a/tensorcast/node_agent/server.py
+++ b/tensorcast/node_agent/server.py
@@ -10,7 +10,6 @@
     ArtifactSetResult,
     selection_identity_to_proto,
 )
-from tensorcast.api.store.serving_builder import RepresentationPublishSpec
 from tensorcast.engine_adapter import (
     BatchOutcome,
     BatchResult,
@@ -21,6 +20,7 @@
 from tensorcast.node_agent.executor import NodeAgentExecutor
 from tensorcast.proto.node_agent.v1 import node_agent_pb2, node_agent_pb2_grpc
 from tensorcast.proto.plan.v1 import plan_pb2
+from tensorcast.types import RepresentationPublishSpec
 
 _STATE_MAP = {
     "pending": node_agent_pb2.OPERATION_STATE_PENDING,
diff --git a/tensorcast/pytorch/module_binding.py b/tensorcast/pytorch/module_binding.py
index cb75bd8c..c7b6cef7 100644
--- a/tensorcast/pytorch/module_binding.py
+++ b/tensorcast/pytorch/module_binding.py
@@ -10,7 +10,7 @@
 import torch
 from torch import nn
 
-import tensorcast.serving.contract as tc_contract
+import tensorcast.artifact_runtime.contract as tc_contract
 
 _RESERVED_TENSORCAST_PREFIX = "__tensorcast_meta__."
 
@@ -185,7 +185,7 @@ def align_runtime_binding_exclude_names(
         if len(missing) > 8:
             sample = f"{sample}, ..."
         raise RuntimeError(
-            "TensorCast serving artifact tensor names are missing from "
+            "TensorCast runtime artifact tensor names are missing from "
             f"the model: missing_count={len(missing)} [{sample}]"
         )
     extra_excluded = sorted(all_names - canonical)
@@ -211,7 +211,7 @@ def assert_runtime_tensors_match_expected_names(
     if len(unexpected) > 8:
         unexpected_sample = f"{unexpected_sample}, ..."
     raise RuntimeError(
-        "TensorCast serving artifact tensor set mismatch: "
+        "TensorCast runtime artifact tensor set mismatch: "
         f"missing_count={len(missing)} [{missing_sample}], "
         f"unexpected_count={len(unexpected)} [{unexpected_sample}]"
     )
diff --git a/tensorcast/pytorch/trace_capture.py b/tensorcast/pytorch/trace_capture.py
index c6791493..40654549 100644
--- a/tensorcast/pytorch/trace_capture.py
+++ b/tensorcast/pytorch/trace_capture.py
@@ -14,7 +14,7 @@
 from torch import nn
 from torch.utils._python_dispatch import TorchDispatchMode
 
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
diff --git a/tensorcast/retained_realization.py b/tensorcast/retained_realization.py
new file mode 100644
index 00000000..71c8099b
--- /dev/null
+++ b/tensorcast/retained_realization.py
@@ -0,0 +1,552 @@
+#  Copyright (c) 2026, TensorCast Team.
+"""Neutral retained realization claim helpers.
+
+Retained realization claims are serialized handoffs produced by artifact
+prefetch. They expose the trusted reservation credit needed before framework
+admission while keeping the existing retained binding authority validation as
+the source of truth during the migration away from serving-rooted public names.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import Any
+
+from tensorcast.api.errors import ArtifactError
+from tensorcast.api.store.realization_kernel import (
+    ArtifactRealizationHandle,
+    ArtifactRealizationSpec,
+)
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+)
+from tensorcast.retained_realization_authority import (
+    RetainedRealizationAuthority as RetainedRealizationAuthorityConfig,
+)
+from tensorcast.types import (
+    BindingReservationCapability,
+    BindingValueRef,
+    GroupRealizationAcquireRef,
+    PrefetchHandoff,
+    RealizationTarget,
+    RuntimeBindingMemberRef,
+)
+
+
+@dataclass(frozen=True)
+class RetainedRealizationExpectedDigests:
+    """Expected identity digests embedded in a retained realization claim."""
+
+    target_layout_hash: str
+    tensor_schema_hash: str
+    runtime_build_digest: str
+    resolved_spec_digest: str
+
+
+@dataclass(frozen=True)
+class RetainedRealizationClaim:
+    """Validated retained realization handoff for admission and acquire."""
+
+    _authority: ParsedRetainedRealizationAuthority
+
+    @property
+    def group_id(self) -> str:
+        return self._authority.group_id
+
+    @property
+    def local_ref(self) -> str | None:
+        return self._authority.local_serving_ref
+
+    @property
+    def binding_value_ref(self) -> BindingValueRef:
+        return self._authority.binding_value_ref
+
+    @property
+    def reservation_capability(self) -> BindingReservationCapability:
+        return self._authority.reservation_capability
+
+    @property
+    def daemon_id(self) -> str:
+        return self._authority.daemon_id
+
+    @property
+    def daemon_session_id(self) -> str:
+        return self._authority.daemon_session_id
+
+    @property
+    def device_uuid(self) -> str:
+        return self._authority.device_uuid
+
+    @property
+    def member(self) -> RuntimeBindingMemberRef:
+        return self._authority.member
+
+    @property
+    def reservation_bytes(self) -> int:
+        return self._authority.reservation_bytes
+
+    @property
+    def expected(self) -> RetainedRealizationExpectedDigests:
+        expected = self._authority.expected
+        return RetainedRealizationExpectedDigests(
+            target_layout_hash=expected.target_layout_hash,
+            tensor_schema_hash=expected.tensor_schema_hash,
+            runtime_build_digest=expected.runtime_build_digest,
+            resolved_spec_digest=expected.resolved_spec_digest,
+        )
+
+    @property
+    def readiness(self) -> str:
+        return self._authority.readiness
+
+    @property
+    def verification_state(self) -> str:
+        return self._authority.verification_state
+
+    @property
+    def serving_artifact_id(self) -> str | None:
+        return self._authority.serving_artifact_id
+
+    @property
+    def group_realization_acquire(self) -> GroupRealizationAcquireRef | None:
+        return self._authority.group_realization_acquire
+
+    @property
+    def authority(self) -> ParsedRetainedRealizationAuthority:
+        return self._authority
+
+    def as_authority(self) -> ParsedRetainedRealizationAuthority:
+        return self._authority
+
+    @staticmethod
+    def _request_facts(
+        spec: ArtifactRealizationSpec,
+        runtime_context: Any | None,
+    ) -> tuple[ArtifactRealizationSpec, Any]:
+        from tensorcast.artifact_runtime.request_facts import (
+            ModelRuntimeRequestFactsError,
+            resolve_model_runtime_request_facts,
+        )
+
+        try:
+            facts = resolve_model_runtime_request_facts(
+                spec=spec,
+                runtime_context=runtime_context,
+            )
+        except ModelRuntimeRequestFactsError as exc:
+            raise ArtifactError(
+                str(exc),
+                status_code="INVALID_ARGUMENT",
+                retryable=False,
+            ) from exc
+        return facts.spec, facts.context
+
+    def realize_model_runtime(
+        self,
+        spec: ArtifactRealizationSpec,
+        *,
+        runtime_host: Any,
+        runtime_context: Any | None = None,
+        profile_sink: Any | None = None,
+    ) -> ArtifactRealizationHandle:
+        """Realize this retained claim as a model runtime attachment."""
+
+        if runtime_host is None:
+            raise ArtifactError(
+                "retained model_runtime realization requires runtime_host",
+                status_code="INVALID_ARGUMENT",
+                retryable=False,
+            )
+        if spec.target_kind != "model_runtime":
+            raise ArtifactError(
+                "retained realization claim requires a model_runtime spec",
+                status_code="INVALID_ARGUMENT",
+                retryable=False,
+            )
+
+        from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration
+
+        resolved_spec, context = self._request_facts(spec, runtime_context)
+        attachment = ArtifactRuntimeIntegration(
+            profile_sink=profile_sink,
+            host=runtime_host,
+        ).realize_retained_model_runtime(
+            authority=self._authority,
+            spec=resolved_spec,
+            context=context,
+        )
+        handle = getattr(attachment.state, "model_runtime_handle", None)
+        if not isinstance(handle, ArtifactRealizationHandle):
+            raise ArtifactError(
+                "retained model_runtime realization completed without a "
+                "realization handle",
+                status_code="INTERNAL",
+                retryable=False,
+            )
+        return handle
+
+
+def parse_retained_realization_claim(
+    extra: Mapping[str, Any] | Any,
+    *,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> RetainedRealizationClaim:
+    """Parse and validate a retained realization claim from loader config."""
+
+    return RetainedRealizationClaim(
+        parse_retained_realization_authority(
+            extra,
+            expected_member=expected_member,
+        )
+    )
+
+
+def parse_retained_realization_authority(
+    extra: Mapping[str, Any] | Any,
+    *,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> ParsedRetainedRealizationAuthority:
+    """Parse and validate retained realization authority from runtime config."""
+
+    from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig
+
+    config = (
+        extra
+        if isinstance(extra, TensorCastRuntimeConfig)
+        else TensorCastRuntimeConfig.from_mapping(extra)
+    )
+    if config.retained_binding_acquire.mode != "external":
+        raise ValueError(
+            "TensorCast retained realization authority requires "
+            "retained_binding_acquire.mode='external' and "
+            "retained_binding_acquire.authority"
+        )
+    authority_config = _select_retained_realization_authority_config(
+        config,
+        expected_member=expected_member,
+    )
+
+    binding_value_ref = _model_validate(
+        BindingValueRef,
+        authority_config.binding_value_ref,
+        field_name="retained_binding_acquire.authority.binding_value_ref",
+    )
+    member = _model_validate(
+        RuntimeBindingMemberRef,
+        authority_config.member_ref,
+        field_name="retained_binding_acquire.authority.member_ref",
+    )
+    capability_payload = _payload_to_dict(
+        authority_config.reservation_capability,
+        field_name="retained_binding_acquire.authority.reservation_capability",
+    )
+    capability_payload.setdefault(
+        "binding_value_ref", binding_value_ref.model_dump(mode="python")
+    )
+    capability_payload.setdefault("member", member.model_dump(mode="python"))
+    reservation_capability = _model_validate(
+        BindingReservationCapability,
+        capability_payload,
+        field_name="retained_binding_acquire.authority.reservation_capability",
+    )
+    group_realization_acquire = None
+    if authority_config.group_realization_acquire is not None:
+        group_realization_acquire = _model_validate(
+            GroupRealizationAcquireRef,
+            authority_config.group_realization_acquire,
+            field_name="retained_binding_acquire.authority.group_realization_acquire",
+        )
+
+    authority = ParsedRetainedRealizationAuthority(
+        group_id=authority_config.group_id,
+        local_serving_ref=authority_config.local_serving_ref,
+        binding_value_ref=binding_value_ref,
+        reservation_capability=reservation_capability,
+        daemon_id=authority_config.daemon_id,
+        daemon_session_id=authority_config.daemon_session_id,
+        device_uuid=authority_config.device_uuid,
+        member=member,
+        reservation_bytes=int(authority_config.trusted_reservation_bytes),
+        expected=authority_config.expected,
+        readiness=authority_config.readiness,
+        verification_state=authority_config.verification_state or "local_only",
+        serving_artifact_id=authority_config.serving_artifact_id,
+        group_realization_acquire=group_realization_acquire,
+    )
+    _validate_retained_realization_authority_consistency(authority)
+    if expected_member is not None and authority.member != expected_member:
+        raise ValueError(
+            "TensorCast retained realization authority member does not match "
+            f"expected member: authority={authority.member!r}, "
+            f"expected={expected_member!r}"
+        )
+    return authority
+
+
+def retained_realization_claim_mode(extra: Mapping[str, Any] | None) -> str:
+    """Return the retained claim acquire mode encoded in extra config."""
+
+    if extra is None or not isinstance(extra, Mapping):
+        return "disabled"
+    from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig
+
+    return TensorCastRuntimeConfig.from_mapping(extra).retained_binding_acquire.mode
+
+
+def retained_realization_trusted_reservation_bytes(
+    load_config_or_extra: Any,
+    *,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> int:
+    """Return trusted retained reservation bytes after full claim validation."""
+
+    extra = getattr(
+        load_config_or_extra,
+        "model_loader_extra_config",
+        load_config_or_extra,
+    )
+    if extra is None or not isinstance(extra, Mapping):
+        return 0
+    if retained_realization_claim_mode(extra) != "external":
+        return 0
+    return parse_retained_realization_claim(
+        extra,
+        expected_member=expected_member,
+    ).reservation_bytes
+
+
+def retained_realization_claim_extra_from_handoff(
+    *,
+    handoff: PrefetchHandoff,
+    target: RealizationTarget,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> dict[str, Any]:
+    """Build serialized retained claim config from a prefetch handoff."""
+
+    return _retained_realization_claim_extra(
+        authority=_retained_realization_authority_from_handoff(
+            handoff=handoff,
+            target=target,
+            expected_member=expected_member,
+        ),
+        config_key="retained_binding_acquire",
+    )
+
+
+def retained_realization_claim_extra_json_from_handoff(
+    *,
+    handoff: PrefetchHandoff,
+    target: RealizationTarget,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> str:
+    """Serialize retained claim config using stable JSON ordering."""
+
+    return json.dumps(
+        retained_realization_claim_extra_from_handoff(
+            handoff=handoff,
+            target=target,
+            expected_member=expected_member,
+        ),
+        sort_keys=True,
+        separators=(",", ":"),
+    )
+
+
+def _payload_to_dict(value: Any, *, field_name: str) -> dict[str, Any]:
+    if hasattr(value, "model_dump"):
+        return dict(value.model_dump(mode="python"))
+    if isinstance(value, Mapping):
+        return dict(value)
+    if isinstance(value, str):
+        try:
+            parsed = json.loads(value)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"{field_name} must be a JSON object") from exc
+        if not isinstance(parsed, Mapping):
+            raise ValueError(f"{field_name} must be a JSON object")
+        return dict(parsed)
+    raise ValueError(f"{field_name} must be a dict or JSON object")
+
+
+def _model_validate(model_type: Any, value: Any, *, field_name: str) -> Any:
+    payload = _payload_to_dict(value, field_name=field_name)
+    try:
+        return model_type.model_validate(payload)
+    except Exception as exc:
+        raise ValueError(
+            f"{field_name} is invalid for TensorCast retained realization "
+            f"acquire: {exc}"
+        ) from exc
+
+
+def _select_retained_realization_authority_config(
+    config: Any,
+    *,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> RetainedRealizationAuthorityConfig:
+    acquire_config = config.retained_binding_acquire
+    authority_config = acquire_config.authority
+    if authority_config is not None:
+        return authority_config
+
+    authority_configs = tuple(acquire_config.authorities)
+    if not authority_configs:
+        raise ValueError(
+            "TensorCast retained realization authority requires "
+            "retained_binding_acquire.mode='external' and "
+            "retained_binding_acquire.authority or "
+            "retained_binding_acquire.authorities"
+        )
+    if expected_member is None:
+        if len(authority_configs) == 1:
+            return authority_configs[0]
+        raise ValueError(
+            "TensorCast retained realization authority set requires an expected "
+            "serving member to select the worker authority"
+        )
+
+    for index, candidate in enumerate(authority_configs):
+        member = _model_validate(
+            RuntimeBindingMemberRef,
+            candidate.member_ref,
+            field_name=(f"retained_binding_acquire.authorities[{index}].member_ref"),
+        )
+        if member == expected_member:
+            return candidate
+    raise ValueError(
+        "TensorCast retained realization authority set has no authority for "
+        f"expected member {expected_member!r}"
+    )
+
+
+def _validate_retained_realization_authority_consistency(
+    authority: ParsedRetainedRealizationAuthority,
+) -> None:
+    capability = authority.reservation_capability
+    if capability.binding_value_ref != authority.binding_value_ref:
+        raise ValueError(
+            "retained_binding_acquire.authority.reservation_capability."
+            "binding_value_ref must match retained_binding_acquire.authority."
+            "binding_value_ref"
+        )
+    if capability.daemon_id != authority.daemon_id:
+        raise ValueError(
+            "retained_binding_acquire.authority.reservation_capability."
+            "daemon_id mismatch"
+        )
+    if capability.daemon_session_id != authority.daemon_session_id:
+        raise ValueError(
+            "retained_binding_acquire.authority.reservation_capability."
+            "daemon_session_id mismatch"
+        )
+    if capability.device_uuid != authority.device_uuid:
+        raise ValueError(
+            "retained_binding_acquire.authority.reservation_capability."
+            "device_uuid mismatch"
+        )
+    if capability.member != authority.member:
+        raise ValueError(
+            "retained_binding_acquire.authority.reservation_capability.member mismatch"
+        )
+    if capability.reservation_bytes != authority.reservation_bytes:
+        raise ValueError(
+            "retained_binding_acquire.authority.reservation_capability."
+            "reservation_bytes must match retained_binding_acquire.authority."
+            "trusted_reservation_bytes"
+        )
+    if authority.member.group_id is not None and authority.member.group_id != (
+        authority.group_id
+    ):
+        raise ValueError(
+            "retained_binding_acquire.authority.member_ref.group_id must match "
+            "retained_binding_acquire.authority.group_id"
+        )
+    if (
+        authority.readiness == "runtime_published_ready"
+        and not authority.serving_artifact_id
+    ):
+        raise ValueError(
+            "retained_binding_acquire.authority.serving_artifact_id is required "
+            "when retained_binding_acquire.authority.readiness="
+            "'runtime_published_ready'"
+        )
+
+
+def _retained_realization_authority_from_handoff(
+    *,
+    handoff: PrefetchHandoff,
+    target: RealizationTarget,
+    expected_member: RuntimeBindingMemberRef | None = None,
+) -> dict[str, Any]:
+    member = handoff.member
+    if expected_member is not None and member != expected_member:
+        raise ValueError(
+            "Prefetched retained realization member does not match expected "
+            f"placement: prefetched={member}, expected={expected_member}"
+        )
+    authority: dict[str, Any] = {
+        "group_id": member.group_id or "",
+        "member_ref": _model_dump(member),
+        "daemon_id": handoff.daemon_id,
+        "daemon_session_id": handoff.daemon_session_id,
+        "device_uuid": handoff.device_uuid,
+        "binding_value_ref": _model_dump(handoff.binding_value_ref),
+        "reservation_capability": _model_dump(handoff.reservation_capability),
+        "local_serving_ref": handoff.local_serving_ref,
+        "readiness": str(getattr(handoff.readiness, "value", handoff.readiness)),
+        "verification_state": str(
+            getattr(
+                handoff.verification_state,
+                "value",
+                handoff.verification_state,
+            )
+        ),
+        "serving_artifact_id": handoff.serving_artifact_id,
+        "trusted_reservation_bytes": handoff.reservation_bytes,
+        "expected": {
+            "target_layout_hash": target.resolved_layout.target_layout_hash,
+            "tensor_schema_hash": target.resolved_layout.tensor_schema_hash,
+            "runtime_build_digest": target.runtime_build_digest,
+            "resolved_spec_digest": target.resolved_layout.spec_digest,
+        },
+    }
+    if handoff.group_realization_acquire is not None:
+        authority["group_realization_acquire"] = _model_dump(
+            handoff.group_realization_acquire
+        )
+    return authority
+
+
+def _retained_realization_claim_extra(
+    *,
+    authority: dict[str, Any],
+    config_key: str,
+) -> dict[str, Any]:
+    return {
+        config_key: {
+            "mode": "external",
+            "authority": authority,
+        },
+    }
+
+
+def _model_dump(value: Any) -> dict[str, Any]:
+    if hasattr(value, "model_dump"):
+        return dict(value.model_dump(mode="python"))
+    if isinstance(value, Mapping):
+        return dict(value)
+    raise TypeError(f"Cannot serialize {type(value)!r}")
+
+
+__all__ = [
+    "RetainedRealizationClaim",
+    "RetainedRealizationExpectedDigests",
+    "parse_retained_realization_authority",
+    "parse_retained_realization_claim",
+    "retained_realization_claim_mode",
+    "retained_realization_trusted_reservation_bytes",
+    "retained_realization_claim_extra_from_handoff",
+    "retained_realization_claim_extra_json_from_handoff",
+]
diff --git a/tensorcast/retained_realization_authority.py b/tensorcast/retained_realization_authority.py
new file mode 100644
index 00000000..66dd93ba
--- /dev/null
+++ b/tensorcast/retained_realization_authority.py
@@ -0,0 +1,148 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+"""Typed retained realization authority models."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+
+from tensorcast.types import (
+    BindingReservationCapability,
+    BindingValueRef,
+    GroupRealizationAcquireRef,
+    RuntimeBindingMemberRef,
+)
+
+_READINESS_STATES = {
+    "runtime_reserved",
+    "runtime_local_ready",
+    "runtime_published_ready",
+}
+
+
+def _normalize_optional_text(value: Any) -> str | None:
+    if value is None:
+        return None
+    normalized = str(value).strip()
+    return normalized or None
+
+
+def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
+    normalized = str(value).strip().lower()
+    if normalized not in allowed:
+        raise ValueError(
+            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
+        )
+    return normalized
+
+
+class RetainedRealizationExpectedDigests(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    target_layout_hash: str
+    tensor_schema_hash: str
+    runtime_build_digest: str
+    resolved_spec_digest: str
+
+    @field_validator(
+        "target_layout_hash",
+        "tensor_schema_hash",
+        "runtime_build_digest",
+        "resolved_spec_digest",
+        mode="before",
+    )
+    @classmethod
+    def _normalize_required_text(cls, value: Any) -> str:
+        normalized = _normalize_optional_text(value)
+        if normalized is None:
+            raise ValueError("expected digest fields must be non-empty")
+        return normalized
+
+
+class RetainedRealizationAuthority(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    group_id: str
+    member_ref: dict[str, Any]
+    daemon_id: str
+    daemon_session_id: str
+    device_uuid: str
+    binding_value_ref: dict[str, Any]
+    reservation_capability: dict[str, Any]
+    group_realization_acquire: dict[str, Any] | None = None
+    local_serving_ref: str | None = None
+    readiness: str
+    verification_state: str = "local_only"
+    serving_artifact_id: str | None = None
+    trusted_reservation_bytes: int = Field(ge=0)
+    expected: RetainedRealizationExpectedDigests
+
+    @field_validator(
+        "group_id",
+        "daemon_id",
+        "daemon_session_id",
+        "device_uuid",
+        mode="before",
+    )
+    @classmethod
+    def _normalize_required_text(cls, value: Any) -> str:
+        normalized = _normalize_optional_text(value)
+        if normalized is None:
+            raise ValueError("retained binding authority text fields required")
+        return normalized
+
+    @field_validator(
+        "local_serving_ref",
+        "verification_state",
+        "serving_artifact_id",
+        mode="before",
+    )
+    @classmethod
+    def _normalize_optional_fields(cls, value: Any) -> Any:
+        return _normalize_optional_text(value)
+
+    @field_validator("readiness", mode="before")
+    @classmethod
+    def _normalize_readiness(cls, value: Any) -> str:
+        return _normalize_enum(
+            value,
+            allowed=_READINESS_STATES,
+            field_name="retained_binding_acquire.authority.readiness",
+        )
+
+    @model_validator(mode="after")
+    def _validate_published_ready(self) -> RetainedRealizationAuthority:
+        if self.readiness == "runtime_published_ready" and not self.serving_artifact_id:
+            raise ValueError(
+                "retained_binding_acquire.authority.serving_artifact_id is required when "
+                "readiness='runtime_published_ready'"
+            )
+        return self
+
+
+@dataclass(frozen=True)
+class ParsedRetainedRealizationAuthority:
+    group_id: str
+    local_serving_ref: str | None
+    binding_value_ref: BindingValueRef
+    reservation_capability: BindingReservationCapability
+    daemon_id: str
+    daemon_session_id: str
+    device_uuid: str
+    member: RuntimeBindingMemberRef
+    reservation_bytes: int
+    expected: RetainedRealizationExpectedDigests
+    readiness: str
+    verification_state: str
+    serving_artifact_id: str | None = None
+    group_realization_acquire: GroupRealizationAcquireRef | None = None
+
+
+__all__ = [
+    "ParsedRetainedRealizationAuthority",
+    "RetainedRealizationAuthority",
+    "RetainedRealizationExpectedDigests",
+]
diff --git a/tensorcast/serving/__init__.py b/tensorcast/serving/__init__.py
deleted file mode 100644
index 9b93f041..00000000
--- a/tensorcast/serving/__init__.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-"""Curated public facade for TensorCast serving integrations."""
-
-from tensorcast.serving.binding_plan import ServingBindingPlan
-from tensorcast.serving.config import (
-    ArtifactBindStartPlan,
-    BootstrapSettings,
-    DiagnosticsSettings,
-    MaterializationSettings,
-    ReplicaPublicationPolicy,
-    RetainedBindingAcquireStartPlan,
-    ServingConfig,
-    ServingSettings,
-    ServingStartPlan,
-    ServingStartPlanError,
-    SourceBootstrapToBindingStartPlan,
-    plan_serving_start,
-)
-from tensorcast.serving.errors import (
-    AdmissionRejectedError,
-    ArtifactLocatorResolutionError,
-    AttachFinalizeError,
-    AuthorityValidationError,
-    CapabilityMissingError,
-    ConfigConflictError,
-    OwnershipTransferError,
-    PlacementAdmissionError,
-    PolicyMismatchError,
-    PublicationRequiredError,
-    ReplicaPublicationError,
-    RuntimeSwapError,
-    SchemaMismatchError,
-    ServingIntegrationError,
-    SourceProviderError,
-    TensorCastServingRuntimeError,
-)
-from tensorcast.serving.hosts import (
-    PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION,
-    PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION,
-    RECIPE_CACHE_POLICY_SCHEMA_VERSION,
-    SOURCE_CATALOG_REQUEST_SCHEMA_VERSION,
-    SOURCE_CATALOG_SCHEMA_VERSION,
-    SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION,
-    AdmissionDecision,
-    AdmissionPolicy,
-    AdmissionRequest,
-    CollectiveHost,
-    DefaultAdmissionPolicy,
-    FinalizeHookHost,
-    FinalizePhase,
-    FinalizePolicy,
-    FrameworkHost,
-    FrameworkIdentity,
-    IntegrationHost,
-    MaterializationExecutionFacts,
-    NativeLoadHost,
-    ObservabilitySink,
-    PlacementAdmissionFacts,
-    PlacementHost,
-    PlacementIdentityFacts,
-    PlacementMemberFacts,
-    RecipeCachePolicy,
-    RecipeTraceHost,
-    SourceCatalogProvider,
-    SourceCatalogRequest,
-    SourceDownloadPolicy,
-    SourceHost,
-    SourceSelector,
-    TensorCastEvent,
-    TensorSurfaceHost,
-    TorchTensorHost,
-    semantic_placement_digest,
-    serving_placement_from_framework_facts,
-)
-from tensorcast.serving.policy import (
-    SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION,
-    SERVING_POLICY_SCHEMA_VERSION,
-    ServingArtifactLocator,
-    ServingPolicy,
-    merge_serving_reload_extra_config,
-    normalize_serving_reload_request_payload,
-    ranked_version_key_for_member,
-)
-from tensorcast.serving.retained_binding import RetainedBindingAcquireSettings
-from tensorcast.serving.runtime_attachment import RuntimeAttachment
-from tensorcast.serving.runtime_config import (
-    DEFAULT_RUNTIME_PROFILE,
-    RuntimeConfigProfile,
-    RuntimeDaemonSettings,
-    RuntimeGlobalStoreSettings,
-    RuntimeSettings,
-    resolve_runtime_config_profile,
-)
-from tensorcast.serving.runtime_intent import (
-    BootstrapPolicy,
-    ExistingServingArtifact,
-    LocalSourceBootstrap,
-    RequestContext,
-    RetainedBindingAcquire,
-)
-from tensorcast.serving.runtime_view import RuntimeWorkerView
-
-__all__ = [
-    "AdmissionDecision",
-    "AdmissionPolicy",
-    "AdmissionRejectedError",
-    "AdmissionRequest",
-    "ArtifactBindStartPlan",
-    "ArtifactLocatorResolutionError",
-    "AttachFinalizeError",
-    "AuthorityValidationError",
-    "BootstrapPolicy",
-    "BootstrapSettings",
-    "CapabilityMissingError",
-    "CollectiveHost",
-    "ConfigConflictError",
-    "DEFAULT_RUNTIME_PROFILE",
-    "DefaultAdmissionPolicy",
-    "DiagnosticsSettings",
-    "ExistingServingArtifact",
-    "FinalizeHookHost",
-    "FinalizePhase",
-    "FinalizePolicy",
-    "FrameworkHost",
-    "FrameworkIdentity",
-    "IntegrationHost",
-    "LocalSourceBootstrap",
-    "MaterializationExecutionFacts",
-    "MaterializationSettings",
-    "NativeLoadHost",
-    "ObservabilitySink",
-    "OwnershipTransferError",
-    "PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION",
-    "PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION",
-    "PlacementAdmissionError",
-    "PlacementAdmissionFacts",
-    "PlacementHost",
-    "PlacementIdentityFacts",
-    "PlacementMemberFacts",
-    "PolicyMismatchError",
-    "PublicationRequiredError",
-    "RECIPE_CACHE_POLICY_SCHEMA_VERSION",
-    "ReplicaPublicationError",
-    "ReplicaPublicationPolicy",
-    "RecipeCachePolicy",
-    "RecipeTraceHost",
-    "RequestContext",
-    "RetainedBindingAcquire",
-    "RetainedBindingAcquireSettings",
-    "RetainedBindingAcquireStartPlan",
-    "RuntimeAttachment",
-    "RuntimeConfigProfile",
-    "RuntimeDaemonSettings",
-    "RuntimeGlobalStoreSettings",
-    "RuntimeSettings",
-    "RuntimeSwapError",
-    "RuntimeWorkerView",
-    "SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION",
-    "SERVING_POLICY_SCHEMA_VERSION",
-    "SOURCE_CATALOG_REQUEST_SCHEMA_VERSION",
-    "SOURCE_CATALOG_SCHEMA_VERSION",
-    "SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION",
-    "SchemaMismatchError",
-    "ServingArtifactLocator",
-    "ServingBindingPlan",
-    "ServingConfig",
-    "ServingIntegrationError",
-    "ServingPolicy",
-    "ServingSettings",
-    "ServingStartPlan",
-    "ServingStartPlanError",
-    "SourceBootstrapToBindingStartPlan",
-    "SourceCatalogProvider",
-    "SourceCatalogRequest",
-    "SourceDownloadPolicy",
-    "SourceHost",
-    "SourceProviderError",
-    "SourceSelector",
-    "TensorCastEvent",
-    "TensorCastServingRuntimeError",
-    "TensorSurfaceHost",
-    "TorchTensorHost",
-    "merge_serving_reload_extra_config",
-    "normalize_serving_reload_request_payload",
-    "plan_serving_start",
-    "ranked_version_key_for_member",
-    "resolve_runtime_config_profile",
-    "semantic_placement_digest",
-    "serving_placement_from_framework_facts",
-]
diff --git a/tensorcast/serving/_runtime_impl/__init__.py b/tensorcast/serving/_runtime_impl/__init__.py
deleted file mode 100644
index a72f7800..00000000
--- a/tensorcast/serving/_runtime_impl/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-"""Private serving runtime lifecycle implementation package."""
diff --git a/tensorcast/serving/artifact_manifest.py b/tensorcast/serving/artifact_manifest.py
deleted file mode 100644
index 8cbde919..00000000
--- a/tensorcast/serving/artifact_manifest.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-"""Serving artifact manifest parse and validation helpers."""
-
-from __future__ import annotations
-
-from typing import Any
-
-import torch
-
-import tensorcast as tc
-
-SERVING_ARTIFACT_SCHEMA_VERSION = int(
-    tc.ServingArtifactManifest.model_fields["schema_version"].default
-)
-SERVING_MANIFEST_TENSOR_NAME = tc.SERVING_MANIFEST_TENSOR_NAME
-
-
-def serving_manifest_from_tensor_bytes(
-    data: bytes | bytearray,
-) -> tc.ServingArtifactManifest:
-    return tc.ServingArtifactManifest.from_bytes(bytes(data))
-
-
-def read_serving_artifact_manifest_tensor(
-    artifact: Any,
-    *,
-    artifact_ref: str,
-    manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME,
-) -> tc.ServingArtifactManifest:
-    subset = artifact.subset([manifest_tensor_name])
-    try:
-        manifest_tensor = subset.tensor_dict(device="cpu")[manifest_tensor_name]
-    except Exception as cpu_exc:
-        try:
-            cuda_device = torch.device("cuda", torch.cuda.current_device())
-            manifest_tensor = subset.tensor_dict(device=cuda_device)[
-                manifest_tensor_name
-            ]
-        except Exception as cuda_exc:
-            raise RuntimeError(
-                f"Failed to materialize serving manifest from '{artifact_ref}' "
-                f"(cpu_error={cpu_exc!r}; cuda_error={cuda_exc!r})"
-            ) from cuda_exc
-    if manifest_tensor.dtype != torch.uint8 or manifest_tensor.dim() != 1:
-        raise RuntimeError("TensorCast serving manifest tensor must be 1D torch.uint8")
-    return serving_manifest_from_tensor_bytes(
-        bytes(manifest_tensor.detach().cpu().tolist())
-    )
-
-
-def cross_check_serving_artifact_manifest(
-    *,
-    manifest: Any | None,
-    descriptor_tensor_schema_hash: str,
-    tensor_names: tuple[str, ...],
-    expected_tensor_schema_hash: str,
-    serving_runtime_policy: tc.ServingRuntimePolicy | None = None,
-    expected_schema_version: int = SERVING_ARTIFACT_SCHEMA_VERSION,
-) -> Any:
-    if manifest is None:
-        raise RuntimeError("TensorCast serving artifact manifest is missing")
-    if manifest.schema_version != expected_schema_version:
-        raise RuntimeError(
-            "TensorCast serving artifact schema version mismatch: "
-            f"{manifest.schema_version} != {expected_schema_version}"
-        )
-    if manifest.artifact_kind != "serving":
-        raise RuntimeError(
-            f"TensorCast artifact is not a serving artifact: {manifest.artifact_kind}"
-        )
-    if (
-        serving_runtime_policy is not None
-        and serving_runtime_policy.serving_manifest_ref is not None
-        and manifest.serving_manifest_ref != serving_runtime_policy.serving_manifest_ref
-    ):
-        raise RuntimeError("TensorCast serving artifact manifest ref mismatch")
-    if (
-        serving_runtime_policy is not None
-        and serving_runtime_policy.expected_representation_contract_hash is not None
-        and manifest.representation_contract_hash
-        != serving_runtime_policy.expected_representation_contract_hash
-    ):
-        raise RuntimeError(
-            "TensorCast serving artifact representation contract mismatch"
-        )
-    if (
-        serving_runtime_policy is not None
-        and serving_runtime_policy.expected_serving_build_digest is not None
-        and manifest.serving_build_digest
-        != serving_runtime_policy.expected_serving_build_digest
-    ):
-        raise RuntimeError("TensorCast serving artifact build digest mismatch")
-    if (
-        serving_runtime_policy is not None
-        and getattr(
-            serving_runtime_policy,
-            "expected_topology_admission_digest",
-            None,
-        )
-        is not None
-        and getattr(manifest, "topology_admission_digest", None)
-        != serving_runtime_policy.expected_topology_admission_digest
-    ):
-        raise RuntimeError(
-            "TensorCast serving artifact topology admission digest mismatch"
-        )
-    if manifest.tensor_schema_hash != expected_tensor_schema_hash:
-        raise RuntimeError(
-            "TensorCast serving artifact tensor schema hash mismatch: "
-            f"manifest={manifest.tensor_schema_hash}, "
-            f"expected={expected_tensor_schema_hash}"
-        )
-    if descriptor_tensor_schema_hash != expected_tensor_schema_hash:
-        raise RuntimeError(
-            "TensorCast serving artifact descriptor schema hash mismatch: "
-            f"descriptor={descriptor_tensor_schema_hash}, "
-            f"expected={expected_tensor_schema_hash}"
-        )
-    if manifest.canonical_tensor_count != len(tensor_names):
-        raise RuntimeError("TensorCast serving artifact tensor count mismatch")
-    return manifest
-
-
-__all__ = [
-    "SERVING_ARTIFACT_SCHEMA_VERSION",
-    "SERVING_MANIFEST_TENSOR_NAME",
-    "cross_check_serving_artifact_manifest",
-    "read_serving_artifact_manifest_tensor",
-    "serving_manifest_from_tensor_bytes",
-]
diff --git a/tensorcast/serving/builder/__init__.py b/tensorcast/serving/builder/__init__.py
deleted file mode 100644
index d95ef31b..00000000
--- a/tensorcast/serving/builder/__init__.py
+++ /dev/null
@@ -1,214 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-"""Serving builder primitives shared by framework integrations."""
-
-from tensorcast.serving.binding_plan import ServingBindingPlan
-from tensorcast.serving.builder.binding_plan import (
-    TargetShapes,
-    lower_trace_plan_for_binding,
-    lower_trace_plan_for_realization,
-    range_spec_to_tensorcast_ranges,
-)
-from tensorcast.serving.builder.compiler import (
-    CompiledServingRecipe,
-    RecipeCompileInputs,
-    SemanticValidationSpec,
-    ServingBuildObserver,
-    ServingFacts,
-    SourceHullEntry,
-    TensorcastSemanticValidationSpec,
-    TensorcastServingFacts,
-    TensorSchemaEntry,
-    compile_serving_recipe,
-    compute_recipe_compile_key,
-    filter_tensor_schema_for_trace_plan,
-)
-from tensorcast.serving.builder.materialization import (
-    BindingFinalizeMaterializationResult,
-    allocate_tensors_from_schema,
-    apply_copy_plan,
-    collect_serving_tensors_from_model,
-    dtype_from_string,
-    iter_ranges,
-    load_source_tensors_for_recipe,
-    materialize_binding_finalize_serving_tensors,
-    materialize_pure_transform_serving_tensors,
-    materialize_recipe_copy_plan_tensors,
-    narrow_by_range_spec,
-    narrow_source_view,
-    run_binding_finalize_semantic_validation,
-    tensorcast_view_slices_from_trace_plan,
-    update_dst_coverage,
-    validate_binding_finalize_tensor_schema,
-    validate_dst_coverage,
-    validate_source_tensor_names,
-)
-from tensorcast.serving.builder.publication import (
-    RecipePublicationContext,
-    build_binding_finalize_admission_facts_from_context,
-    build_binding_finalize_build_intent,
-    build_binding_finalize_publication_bundle_from_context,
-    build_pure_transform_build_intent,
-    build_pure_transform_publication_bundle_from_context,
-    build_pure_transform_publication_spec_from_context,
-    build_recipe_serving_build_intent,
-    complete_pure_transform_recipe_publication,
-    prepare_binding_finalize_serving_registration_from_context,
-    prepare_pure_transform_serving_registration_from_context,
-)
-from tensorcast.serving.builder.recipe_cache import (
-    RECIPE_CACHE_PAYLOAD_VERSION,
-    compiled_recipe_from_dict,
-    compiled_recipe_to_dict,
-    load_compiled_recipe_cache,
-    write_compiled_recipe_cache,
-)
-from tensorcast.serving.builder.recipe_validation import (
-    serving_support_level_at_least,
-    serving_support_level_display_name,
-    validate_recipe_for_builder_mode,
-)
-from tensorcast.serving.builder.semantic_validation import (
-    evaluate_semantic_validation_spec,
-)
-from tensorcast.serving.builder.tensor_parity import (
-    TensorParityMismatch,
-    TensorParityProbe,
-    TensorParityReport,
-    build_tensor_parity_probes_from_realization_plan,
-    build_tensor_parity_probes_from_realization_plan_proto,
-    build_tensor_parity_probes_from_recipe,
-    build_tensor_parity_probes_from_trace_plan,
-    evaluate_recipe_tensor_parity,
-    evaluate_tensor_parity_probes,
-)
-from tensorcast.serving.builder.tensor_schema import (
-    validate_tensor_schema_against_tensors,
-)
-from tensorcast.serving.builder.trace_cache import (
-    TRACE_PLAN_CACHE_PAYLOAD_VERSION,
-    dump_trace_plan_debug,
-    load_trace_plan_cache,
-    trace_plan_debug_payload,
-    write_trace_plan_cache,
-)
-from tensorcast.serving.builder.trace_ir import (
-    CopyPlanEntry,
-    MultiRange,
-    Range,
-    RangeSpec,
-    TracePlan,
-    copy_plan_from_dict,
-    copy_plan_to_dict,
-    range_from_dict,
-    range_to_dict,
-    single_range_from_dict,
-    trace_plan_from_dict,
-    trace_plan_to_dict,
-)
-from tensorcast.serving.source_catalog import (
-    SourceCatalog,
-    SourceFileEntry,
-    SourceManifest,
-    SourceTensorMeta,
-    compute_source_metadata_fingerprint,
-    resolve_source_artifact_ref,
-    source_catalog_from_all_safetensors_dir,
-    source_catalog_from_canonical_index,
-    source_catalog_from_manifest,
-    source_catalog_from_selected_safetensors,
-)
-
-__all__ = [
-    "BindingFinalizeMaterializationResult",
-    "CompiledServingRecipe",
-    "CopyPlanEntry",
-    "MultiRange",
-    "Range",
-    "RangeSpec",
-    "RecipeCompileInputs",
-    "RecipePublicationContext",
-    "RECIPE_CACHE_PAYLOAD_VERSION",
-    "SemanticValidationSpec",
-    "SourceCatalog",
-    "SourceFileEntry",
-    "SourceManifest",
-    "SourceHullEntry",
-    "SourceTensorMeta",
-    "ServingBuildObserver",
-    "ServingBindingPlan",
-    "ServingFacts",
-    "TargetShapes",
-    "TRACE_PLAN_CACHE_PAYLOAD_VERSION",
-    "TracePlan",
-    "TensorSchemaEntry",
-    "TensorParityMismatch",
-    "TensorParityProbe",
-    "TensorParityReport",
-    "TensorcastSemanticValidationSpec",
-    "TensorcastServingFacts",
-    "allocate_tensors_from_schema",
-    "apply_copy_plan",
-    "build_binding_finalize_admission_facts_from_context",
-    "build_binding_finalize_build_intent",
-    "build_binding_finalize_publication_bundle_from_context",
-    "build_pure_transform_build_intent",
-    "build_pure_transform_publication_bundle_from_context",
-    "build_pure_transform_publication_spec_from_context",
-    "build_recipe_serving_build_intent",
-    "build_tensor_parity_probes_from_realization_plan",
-    "build_tensor_parity_probes_from_realization_plan_proto",
-    "build_tensor_parity_probes_from_recipe",
-    "build_tensor_parity_probes_from_trace_plan",
-    "collect_serving_tensors_from_model",
-    "compile_serving_recipe",
-    "complete_pure_transform_recipe_publication",
-    "compute_source_metadata_fingerprint",
-    "compute_recipe_compile_key",
-    "copy_plan_from_dict",
-    "copy_plan_to_dict",
-    "compiled_recipe_from_dict",
-    "compiled_recipe_to_dict",
-    "dump_trace_plan_debug",
-    "dtype_from_string",
-    "evaluate_semantic_validation_spec",
-    "evaluate_recipe_tensor_parity",
-    "evaluate_tensor_parity_probes",
-    "iter_ranges",
-    "load_source_tensors_for_recipe",
-    "lower_trace_plan_for_binding",
-    "lower_trace_plan_for_realization",
-    "materialize_binding_finalize_serving_tensors",
-    "materialize_pure_transform_serving_tensors",
-    "materialize_recipe_copy_plan_tensors",
-    "narrow_by_range_spec",
-    "narrow_source_view",
-    "prepare_binding_finalize_serving_registration_from_context",
-    "prepare_pure_transform_serving_registration_from_context",
-    "range_spec_to_tensorcast_ranges",
-    "range_from_dict",
-    "range_to_dict",
-    "filter_tensor_schema_for_trace_plan",
-    "resolve_source_artifact_ref",
-    "run_binding_finalize_semantic_validation",
-    "single_range_from_dict",
-    "source_catalog_from_all_safetensors_dir",
-    "source_catalog_from_canonical_index",
-    "source_catalog_from_manifest",
-    "source_catalog_from_selected_safetensors",
-    "tensorcast_view_slices_from_trace_plan",
-    "trace_plan_debug_payload",
-    "trace_plan_from_dict",
-    "trace_plan_to_dict",
-    "update_dst_coverage",
-    "serving_support_level_at_least",
-    "serving_support_level_display_name",
-    "validate_binding_finalize_tensor_schema",
-    "validate_dst_coverage",
-    "validate_recipe_for_builder_mode",
-    "validate_source_tensor_names",
-    "validate_tensor_schema_against_tensors",
-    "load_trace_plan_cache",
-    "load_compiled_recipe_cache",
-    "write_trace_plan_cache",
-    "write_compiled_recipe_cache",
-]
diff --git a/tensorcast/serving/config.py b/tensorcast/serving/config.py
deleted file mode 100644
index 865220d1..00000000
--- a/tensorcast/serving/config.py
+++ /dev/null
@@ -1,338 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Public TensorCast serving artifact runtime configuration schema."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import Any, Mapping
-
-from pydantic import BaseModel, ConfigDict, Field, field_validator
-
-from tensorcast.serving.policy import ServingArtifactLocator, ServingPolicy
-from tensorcast.serving.retained_binding import RetainedBindingAcquireSettings
-from tensorcast.serving.runtime_config import RuntimeSettings
-
-_BOOTSTRAP_MODES = {"disabled", "auto", "required"}
-_COLLECTIVE_MODES = {"auto", "required", "disabled"}
-_REPLICA_PUBLICATION_MODES = {"disabled", "optional", "required"}
-_REPLICA_PUBLICATION_TRIGGERS = {"after_vllm_ready"}
-_TOP_LEVEL_KEYS = {
-    "runtime",
-    "serving",
-    "bootstrap",
-    "materialization",
-    "retained_binding_acquire",
-    "diagnostics",
-    "replica_publication",
-}
-
-
-def _normalize_optional_text(value: Any) -> str | None:
-    if value is None:
-        return None
-    normalized = str(value).strip()
-    return normalized or None
-
-
-def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
-    normalized = str(value).strip().lower()
-    if normalized not in allowed:
-        raise ValueError(
-            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
-        )
-    return normalized
-
-
-class ServingSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    artifact_locator: ServingArtifactLocator | None = None
-    policy: ServingPolicy = ServingPolicy()
-
-
-class BootstrapSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    mode: str = "auto"
-    cache_dir: str | None = None
-    verify_source_checksums: bool = True
-
-    @field_validator("mode", mode="before")
-    @classmethod
-    def _normalize_mode(cls, value: Any) -> str:
-        if value is None:
-            return "auto"
-        return _normalize_enum(
-            value,
-            allowed=_BOOTSTRAP_MODES,
-            field_name="bootstrap.mode",
-        )
-
-    @field_validator("cache_dir", mode="before")
-    @classmethod
-    def _normalize_optional_fields(cls, value: Any) -> Any:
-        return _normalize_optional_text(value)
-
-
-class MaterializationSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    collective: str = "auto"
-
-    @field_validator("collective", mode="before")
-    @classmethod
-    def _normalize_collective(cls, value: Any) -> str:
-        if value is None:
-            return "auto"
-        return _normalize_enum(
-            value,
-            allowed=_COLLECTIVE_MODES,
-            field_name="materialization.collective",
-        )
-
-    def collective_policy_value(self) -> str:
-        return {
-            "auto": "collective_first",
-            "required": "require_collective",
-            "disabled": "disable_collective",
-        }[self.collective]
-
-
-class DiagnosticsSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    debug_path: str | None = None
-    verify_tensors: bool = False
-
-    @field_validator("debug_path", mode="before")
-    @classmethod
-    def _normalize_debug_path(cls, value: Any) -> Any:
-        return _normalize_optional_text(value)
-
-
-class ReplicaPublicationPolicy(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    mode: str = "disabled"
-    trigger: str = "after_vllm_ready"
-    async_publish: bool = True
-    timeout_s: float = 30.0
-    ttl_ms: int | None = None
-    drain_timeout_s: float = 30.0
-
-    @field_validator("mode", mode="before")
-    @classmethod
-    def _normalize_mode(cls, value: Any) -> str:
-        if value is None:
-            return "disabled"
-        return _normalize_enum(
-            value,
-            allowed=_REPLICA_PUBLICATION_MODES,
-            field_name="replica_publication.mode",
-        )
-
-    @field_validator("trigger", mode="before")
-    @classmethod
-    def _normalize_trigger(cls, value: Any) -> str:
-        if value is None:
-            return "after_vllm_ready"
-        return _normalize_enum(
-            value,
-            allowed=_REPLICA_PUBLICATION_TRIGGERS,
-            field_name="replica_publication.trigger",
-        )
-
-    @field_validator("async_publish")
-    @classmethod
-    def _validate_async_publish(cls, value: bool) -> bool:
-        if not value:
-            raise ValueError("replica_publication.async_publish=false is not supported")
-        return value
-
-    @field_validator("timeout_s", "drain_timeout_s")
-    @classmethod
-    def _validate_positive_timeout(cls, value: float) -> float:
-        normalized = float(value)
-        if normalized <= 0:
-            raise ValueError("replica_publication timeouts must be positive")
-        return normalized
-
-    @field_validator("ttl_ms")
-    @classmethod
-    def _reject_ttl(cls, value: int | None) -> int | None:
-        if value is not None:
-            raise ValueError("replica_publication.ttl_ms is not supported yet")
-        return value
-
-
-class ServingConfig(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    runtime: RuntimeSettings = RuntimeSettings()
-    serving: ServingSettings = ServingSettings()
-    bootstrap: BootstrapSettings = BootstrapSettings()
-    materialization: MaterializationSettings = MaterializationSettings()
-    retained_binding_acquire: RetainedBindingAcquireSettings = Field(
-        default_factory=RetainedBindingAcquireSettings,
-    )
-    diagnostics: DiagnosticsSettings = DiagnosticsSettings()
-    replica_publication: ReplicaPublicationPolicy = ReplicaPublicationPolicy()
-
-    @classmethod
-    def from_mapping(cls, data: Mapping[str, Any] | None) -> ServingConfig:
-        payload: Mapping[str, Any] = {} if data is None else data
-        if not isinstance(payload, Mapping):
-            raise ValueError("model_loader_extra_config must be a mapping")
-        unknown = {str(key) for key in payload if str(key) not in _TOP_LEVEL_KEYS}
-        if unknown:
-            raise ValueError(
-                "Unexpected TensorCast serving config keys in "
-                "model_loader_extra_config: "
-                f"{sorted(unknown)}"
-            )
-        serving = payload.get("serving")
-        if isinstance(serving, Mapping) and "selector" in serving:
-            raise ValueError(
-                "serving.selector is not supported; use serving.artifact_locator"
-            )
-        return cls.model_validate(dict(payload))
-
-    def to_mapping(self) -> dict[str, Any]:
-        return self.model_dump(mode="python")
-
-
-class ServingStartPlanError(ValueError):
-    """Startup configuration cannot be lowered into one serving plan."""
-
-
-@dataclass(frozen=True)
-class ServingStartPlan:
-    """Typed serving startup intent selected before runtime allocation."""
-
-    kind: str = field(init=False)
-
-
-@dataclass(frozen=True)
-class ArtifactBindStartPlan(ServingStartPlan):
-    """Bind a durable serving artifact selected by an artifact locator."""
-
-    artifact_locator: ServingArtifactLocator
-    policy: ServingPolicy
-    kind: str = field(default="artifact_bind", init=False)
-
-
-@dataclass(frozen=True)
-class SourceBootstrapToBindingStartPlan(ServingStartPlan):
-    """Bootstrap a source artifact into a daemon-owned binding value."""
-
-    source_selector: Any
-    bootstrap_policy: BootstrapSettings
-    kind: str = field(default="source_bootstrap_to_binding", init=False)
-
-
-@dataclass(frozen=True)
-class RetainedBindingAcquireStartPlan(ServingStartPlan):
-    """Acquire a retained binding authority prepared by artifact prefetch."""
-
-    authority: Any
-    kind: str = field(default="retained_binding_acquire", init=False)
-
-
-def _candidate_rejection_reasons(
-    *,
-    has_retained_authority: bool,
-    has_artifact_locator: bool,
-    has_source_selector: bool,
-    bootstrap_mode: str,
-) -> dict[str, str]:
-    source_reason = (
-        "bootstrap.mode is disabled"
-        if bootstrap_mode == "disabled"
-        else "source selector is unavailable"
-    )
-    return {
-        "retained_binding_acquire": (
-            "selected"
-            if has_retained_authority
-            else "retained_binding_acquire.mode is not external"
-        ),
-        "artifact_bind": (
-            "selected" if has_artifact_locator else "serving artifact locator missing"
-        ),
-        "source_bootstrap_to_binding": (
-            "selected"
-            if has_source_selector and bootstrap_mode in {"auto", "required"}
-            else source_reason
-        ),
-    }
-
-
-def _format_rejection_reasons(reasons: Mapping[str, str]) -> str:
-    return "; ".join(f"{name}: {reason}" for name, reason in reasons.items())
-
-
-def plan_serving_start(
-    *,
-    config: ServingConfig,
-    source_selector: Any | None,
-    expected_member: Any | None = None,
-) -> ServingStartPlan:
-    """Classify serving startup into exactly one canonical start plan."""
-
-    retained_requested = config.retained_binding_acquire.mode == "external"
-    artifact_locator = config.serving.artifact_locator
-    has_artifact_locator = artifact_locator is not None
-    bootstrap_mode = config.bootstrap.mode
-    has_source_selector = source_selector is not None
-
-    if retained_requested and has_artifact_locator:
-        raise ServingStartPlanError(
-            "TensorCast serving config cannot request both retained binding "
-            "acquire and durable serving artifact bind"
-        )
-    if bootstrap_mode == "required" and (retained_requested or has_artifact_locator):
-        raise ServingStartPlanError(
-            "TensorCast bootstrap.mode='required' is mutually exclusive with "
-            "retained binding acquire and durable serving artifact bind"
-        )
-    if bootstrap_mode == "disabled" and not (
-        retained_requested or has_artifact_locator
-    ):
-        raise ServingStartPlanError(
-            "TensorCast bootstrap.mode='disabled' requires retained binding "
-            "authority or durable serving artifact locator"
-        )
-
-    if retained_requested:
-        from tensorcast.serving.retained_binding import (
-            parse_retained_serving_binding_authority,
-        )
-
-        return RetainedBindingAcquireStartPlan(
-            authority=parse_retained_serving_binding_authority(
-                config,
-                expected_member=expected_member,
-            )
-        )
-    if artifact_locator is not None:
-        return ArtifactBindStartPlan(
-            artifact_locator=artifact_locator,
-            policy=config.serving.policy,
-        )
-    if bootstrap_mode in {"auto", "required"} and source_selector is not None:
-        return SourceBootstrapToBindingStartPlan(
-            source_selector=source_selector,
-            bootstrap_policy=config.bootstrap,
-        )
-
-    reasons = _candidate_rejection_reasons(
-        has_retained_authority=retained_requested,
-        has_artifact_locator=has_artifact_locator,
-        has_source_selector=has_source_selector,
-        bootstrap_mode=bootstrap_mode,
-    )
-    raise ServingStartPlanError(
-        "TensorCast serving config did not resolve to one startup plan; "
-        f"rejected candidates: {_format_rejection_reasons(reasons)}"
-    )
diff --git a/tensorcast/serving/contract.py b/tensorcast/serving/contract.py
deleted file mode 100644
index 09e21f81..00000000
--- a/tensorcast/serving/contract.py
+++ /dev/null
@@ -1,180 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Canonical serving runtime identity and hash helpers."""
-
-from __future__ import annotations
-
-import json
-from collections.abc import Mapping, Sequence
-from dataclasses import dataclass
-
-import torch
-
-from tensorcast.api.store.serving_builder import (
-    _hash_versioned_payload_to_multihash,
-    _normalize_logical_topology_payload,
-    compute_serving_tensor_schema_hash,
-)
-from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry
-from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef
-
-
-@dataclass(frozen=True)
-class RuntimeTensorSchemaEntry:
-    name: str
-    dtype: str
-    shape: tuple[int, ...]
-    stride: tuple[int, ...]
-    element_size: int
-    storage_offset: int
-
-
-def collect_runtime_tensor_schema(
-    tensors: Mapping[str, torch.Tensor],
-    *,
-    remove_duplicate: bool,
-) -> tuple[RuntimeTensorSchemaEntry, ...]:
-    schema: list[RuntimeTensorSchemaEntry] = []
-    seen_ptrs: set[int] = set()
-    for name, tensor in sorted(tensors.items()):
-        data_ptr = int(tensor.data_ptr())
-        if remove_duplicate and data_ptr in seen_ptrs:
-            continue
-        seen_ptrs.add(data_ptr)
-        storage_offset = int(tensor.storage_offset())
-        if storage_offset != 0:
-            raise ValueError(
-                "runtime tensor schema hash requires storage_offset == 0: "
-                f"{name} has storage_offset={storage_offset}"
-            )
-        schema.append(
-            RuntimeTensorSchemaEntry(
-                name=str(name),
-                dtype=str(tensor.dtype),
-                shape=tuple(int(dim) for dim in tensor.shape),
-                stride=tuple(int(dim) for dim in tensor.stride()),
-                element_size=int(tensor.element_size()),
-                storage_offset=storage_offset,
-            )
-        )
-    return tuple(schema)
-
-
-def compute_runtime_tensor_schema_hash(
-    schema: Sequence[RuntimeTensorSchemaEntry],
-) -> str:
-    entries: list[CanonicalIndexEntry] = []
-    segment_offset = 0
-    for entry in sorted(schema, key=lambda item: item.name):
-        if int(entry.storage_offset) != 0:
-            raise ValueError(
-                "runtime tensor schema hash requires storage_offset == 0: "
-                f"{entry.name} has storage_offset={entry.storage_offset}"
-            )
-        size_bytes = _schema_entry_size_bytes(entry)
-        entries.append(
-            CanonicalIndexEntry(
-                name=entry.name,
-                dtype=_torch_dtype_from_name(entry.dtype),
-                shape=entry.shape,
-                stride=entry.stride,
-                storage_offset=0,
-                segment_offset=segment_offset,
-                size_bytes=size_bytes,
-            )
-        )
-        segment_offset += size_bytes
-    return compute_serving_tensor_schema_hash(
-        CanonicalIndex(
-            entries=tuple(entries),
-            total_size_bytes=segment_offset,
-            avbs_hash="",
-        )
-    )
-
-
-def logical_topology_json(
-    topology_ref: ServingTopologyRef,
-    *,
-    framework_payload: Mapping[str, object],
-) -> str:
-    del topology_ref
-    normalized = _normalize_logical_topology_payload(
-        json.dumps(
-            dict(framework_payload),
-            sort_keys=True,
-            separators=(",", ":"),
-        )
-    )
-    if normalized is None:
-        raise ValueError("framework_payload must define a logical topology")
-    return json.dumps(normalized, sort_keys=True, separators=(",", ":"))
-
-
-def compute_runtime_representation_contract_hash(
-    *,
-    tensor_schema_hash: str,
-    topology_ref: ServingTopologyRef,
-    member_ref: ServingBindingMemberRef,
-    framework_name: str,
-    framework_version: str,
-    adapter_version: str,
-    serving_abi_version: str,
-    source_identity: Mapping[str, object],
-) -> str:
-    if not tensor_schema_hash:
-        raise ValueError("tensor_schema_hash must not be empty")
-    payload = {
-        "framework": {
-            "name": str(framework_name),
-            "version": str(framework_version),
-            "adapter_version": str(adapter_version),
-            "serving_abi_version": str(serving_abi_version),
-        },
-        "topology_ref": _stable_payload(topology_ref.model_dump(mode="python")),
-        "member_ref": _stable_payload(member_ref.model_dump(mode="python")),
-        "source_identity": _stable_payload(dict(source_identity)),
-        "tensor_schema_hash": str(tensor_schema_hash),
-    }
-    return _hash_versioned_payload_to_multihash(
-        "tensorcast.representation.runtime_contract.v1",
-        payload,
-    )
-
-
-def _schema_entry_size_bytes(entry: RuntimeTensorSchemaEntry) -> int:
-    elements = 1
-    for dim in entry.shape:
-        elements *= int(dim)
-    return int(elements * entry.element_size)
-
-
-def _torch_dtype_from_name(dtype_name: str) -> torch.dtype:
-    normalized = dtype_name.removeprefix("torch.")
-    dtype = getattr(torch, normalized, None)
-    if not isinstance(dtype, torch.dtype):
-        raise ValueError(f"unsupported runtime tensor dtype: {dtype_name}")
-    return dtype
-
-
-def _stable_payload(value: object) -> object:
-    if isinstance(value, Mapping):
-        return {
-            str(key): _stable_payload(value[key])
-            for key in sorted(value, key=lambda item: str(item))
-            if value[key] is not None
-        }
-    if isinstance(value, (list, tuple)):
-        return [_stable_payload(item) for item in value]
-    if isinstance(value, (str, int, float, bool)) or value is None:
-        return value
-    return str(value)
-
-
-__all__ = [
-    "RuntimeTensorSchemaEntry",
-    "collect_runtime_tensor_schema",
-    "compute_runtime_representation_contract_hash",
-    "compute_runtime_tensor_schema_hash",
-    "logical_topology_json",
-]
diff --git a/tensorcast/serving/policy.py b/tensorcast/serving/policy.py
deleted file mode 100644
index 91e1b7ff..00000000
--- a/tensorcast/serving/policy.py
+++ /dev/null
@@ -1,256 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Serving artifact locator and runtime policy schema."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-from typing import Any
-from urllib.parse import quote
-
-from pydantic import BaseModel, ConfigDict, field_validator, model_validator
-
-import tensorcast as tc
-
-_ARTIFACT_LOCATOR_KINDS = {"version_key", "artifact_ref", "ranked_version_key"}
-_POLICY_MODES = {"from_manifest", "pinned"}
-SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION = 1
-SERVING_POLICY_SCHEMA_VERSION = 1
-RANKED_VERSION_KEY_MEMBER_SEGMENT = "members"
-
-
-def _normalize_optional_text(value: Any) -> str | None:
-    if value is None:
-        return None
-    normalized = str(value).strip()
-    return normalized or None
-
-
-def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
-    normalized = str(value).strip().lower()
-    if normalized not in allowed:
-        raise ValueError(
-            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
-        )
-    return normalized
-
-
-def _member_id_from_ref(member: Any) -> str:
-    if member is None:
-        raise ValueError(
-            "ranked_version_key artifact locator resolution requires a serving member"
-        )
-    if isinstance(member, Mapping):
-        member_id = member.get("member_id")
-    else:
-        member_id = getattr(member, "member_id", None)
-    normalized = _normalize_optional_text(member_id)
-    if normalized is None:
-        raise ValueError(
-            "ranked_version_key artifact locator resolution requires member.member_id"
-        )
-    return normalized
-
-
-def _member_from_placement(placement: Any | None) -> Any | None:
-    if placement is None:
-        return None
-    if isinstance(placement, Mapping):
-        return placement.get("member")
-    return getattr(placement, "member", None)
-
-
-def ranked_version_key_for_member(version_key: str, member: Any) -> str:
-    base_key = _normalize_optional_text(version_key)
-    if base_key is None:
-        raise ValueError("ranked_version_key base value is required")
-    member_id = quote(_member_id_from_ref(member), safe=":._-")
-    return f"{base_key.rstrip('/')}/{RANKED_VERSION_KEY_MEMBER_SEGMENT}/{member_id}"
-
-
-class ServingArtifactLocator(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    kind: str
-    value: str
-    schema_version: int = SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION
-
-    @field_validator("kind", mode="before")
-    @classmethod
-    def _normalize_kind(cls, value: Any) -> str:
-        return _normalize_enum(
-            value,
-            allowed=_ARTIFACT_LOCATOR_KINDS,
-            field_name="serving.artifact_locator.kind",
-        )
-
-    @field_validator("value", mode="before")
-    @classmethod
-    def _normalize_value(cls, value: Any) -> str:
-        normalized = _normalize_optional_text(value)
-        if normalized is None:
-            raise ValueError("serving.artifact_locator.value is required")
-        return normalized
-
-    @classmethod
-    def artifact_ref(cls, artifact_ref: str) -> ServingArtifactLocator:
-        return cls(kind="artifact_ref", value=str(artifact_ref))
-
-    @classmethod
-    def version_key(cls, version_key: str) -> ServingArtifactLocator:
-        return cls(kind="version_key", value=str(version_key))
-
-    @classmethod
-    def ranked_version_key(cls, version_key: str) -> ServingArtifactLocator:
-        return cls(kind="ranked_version_key", value=str(version_key))
-
-    def resolve_version_key(
-        self,
-        *,
-        member: Any | None = None,
-        placement: Any | None = None,
-    ) -> str:
-        if self.kind == "artifact_ref":
-            return self.value
-        if self.kind == "ranked_version_key":
-            if member is None:
-                member = _member_from_placement(placement)
-            return ranked_version_key_for_member(self.value, member)
-        return self.value
-
-    def resolve_artifact_ref(
-        self,
-        *,
-        member: Any | None = None,
-        placement: Any | None = None,
-    ) -> str:
-        if self.kind == "artifact_ref":
-            return self.value
-
-        from tensorcast.api.store import get_runtime_context
-
-        artifact_id, _disk_path = get_runtime_context().resolve_key_mapping_cached(
-            key=self.resolve_version_key(member=member, placement=placement)
-        )
-        if not artifact_id:
-            raise ValueError(
-                "serving artifact locator version key did not resolve to a serving "
-                f"artifact: {self.value!r}"
-            )
-        return artifact_id
-
-
-class ServingPolicy(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    mode: str = "from_manifest"
-    manifest_ref: str | None = None
-    representation_contract_hash: str | None = None
-    serving_build_digest: str | None = None
-    schema_version: int = SERVING_POLICY_SCHEMA_VERSION
-
-    @field_validator("mode", mode="before")
-    @classmethod
-    def _normalize_mode(cls, value: Any) -> str:
-        if value is None:
-            return "from_manifest"
-        return _normalize_enum(
-            value,
-            allowed=_POLICY_MODES,
-            field_name="serving.policy.mode",
-        )
-
-    @field_validator(
-        "manifest_ref",
-        "representation_contract_hash",
-        "serving_build_digest",
-        mode="before",
-    )
-    @classmethod
-    def _normalize_optional_fields(cls, value: Any) -> Any:
-        return _normalize_optional_text(value)
-
-    @model_validator(mode="after")
-    def _validate_pinned_policy(self) -> ServingPolicy:
-        if self.mode != "pinned":
-            return self
-        missing = [
-            name
-            for name, value in (
-                ("manifest_ref", self.manifest_ref),
-                (
-                    "representation_contract_hash",
-                    self.representation_contract_hash,
-                ),
-                ("serving_build_digest", self.serving_build_digest),
-            )
-            if value is None
-        ]
-        if missing:
-            raise ValueError(
-                f"serving.policy.mode='pinned' requires {', '.join(missing)}"
-            )
-        return self
-
-    def to_runtime_policy(self) -> Any | None:
-        if self.mode == "from_manifest":
-            return None
-        return tc.ServingRuntimePolicy(
-            require_manifest=True,
-            serving_manifest_ref=self.manifest_ref,
-            expected_representation_contract_hash=(self.representation_contract_hash),
-            expected_serving_build_digest=self.serving_build_digest,
-        )
-
-
-def normalize_serving_reload_request_payload(
-    *,
-    artifact_locator: ServingArtifactLocator | Mapping[str, Any],
-    policy: ServingPolicy | Mapping[str, Any] | None = None,
-) -> tuple[dict[str, Any], dict[str, Any]]:
-    """Normalize public reload locator/policy data to the stable wire shape."""
-
-    parsed_locator = (
-        artifact_locator
-        if isinstance(artifact_locator, ServingArtifactLocator)
-        else ServingArtifactLocator.model_validate(artifact_locator)
-    )
-    parsed_policy = (
-        policy
-        if isinstance(policy, ServingPolicy)
-        else ServingPolicy.model_validate(policy or {"mode": "from_manifest"})
-    )
-    locator_payload = {
-        "kind": parsed_locator.kind,
-        "value": parsed_locator.value,
-    }
-    policy_payload: dict[str, Any] = {"mode": parsed_policy.mode}
-    if parsed_policy.manifest_ref is not None:
-        policy_payload["manifest_ref"] = parsed_policy.manifest_ref
-    if parsed_policy.representation_contract_hash is not None:
-        policy_payload["representation_contract_hash"] = (
-            parsed_policy.representation_contract_hash
-        )
-    if parsed_policy.serving_build_digest is not None:
-        policy_payload["serving_build_digest"] = parsed_policy.serving_build_digest
-    return locator_payload, policy_payload
-
-
-def merge_serving_reload_extra_config(
-    extra: Mapping[str, Any] | None,
-    *,
-    artifact_locator: ServingArtifactLocator | Mapping[str, Any],
-    policy: ServingPolicy | Mapping[str, Any] | None = None,
-) -> dict[str, Any]:
-    """Return model_loader_extra_config with a normalized serving reload request."""
-
-    normalized_locator, normalized_policy = normalize_serving_reload_request_payload(
-        artifact_locator=artifact_locator,
-        policy=policy,
-    )
-    merged_extra = dict(extra or {})
-    serving = dict(merged_extra.get("serving", {}))
-    serving["artifact_locator"] = normalized_locator
-    serving["policy"] = normalized_policy
-    merged_extra["serving"] = serving
-    return merged_extra
diff --git a/tensorcast/serving/runtime.py b/tensorcast/serving/runtime.py
deleted file mode 100644
index ef865e12..00000000
--- a/tensorcast/serving/runtime.py
+++ /dev/null
@@ -1,105 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Framework-facing TensorCast serving runtime API."""
-
-from __future__ import annotations
-
-from tensorcast.serving._runtime_impl.lifecycle import ServingRuntimeSession
-from tensorcast.serving.config import (
-    ArtifactBindStartPlan,
-    ReplicaPublicationPolicy,
-    RetainedBindingAcquireStartPlan,
-    ServingConfig,
-    ServingStartPlan,
-    ServingStartPlanError,
-    SourceBootstrapToBindingStartPlan,
-    plan_serving_start,
-)
-from tensorcast.serving.errors import (
-    AdmissionRejectedError,
-    ArtifactLocatorResolutionError,
-    AttachFinalizeError,
-    AuthorityValidationError,
-    CapabilityMissingError,
-    ConfigConflictError,
-    OwnershipTransferError,
-    PlacementAdmissionError,
-    PolicyMismatchError,
-    PublicationRequiredError,
-    ReplicaPublicationError,
-    RuntimeSwapError,
-    SchemaMismatchError,
-    ServingIntegrationError,
-    SourceProviderError,
-    TensorCastServingRuntimeError,
-)
-from tensorcast.serving.hosts import SourceSelector
-from tensorcast.serving.policy import (
-    ServingArtifactLocator,
-    ServingPolicy,
-    merge_serving_reload_extra_config,
-    normalize_serving_reload_request_payload,
-)
-from tensorcast.serving.runtime_attachment import RuntimeAttachment
-from tensorcast.serving.runtime_config import (
-    DEFAULT_RUNTIME_PROFILE,
-    RuntimeConfigProfile,
-    RuntimeDaemonSettings,
-    RuntimeGlobalStoreSettings,
-    RuntimeSettings,
-    resolve_runtime_config_profile,
-)
-from tensorcast.serving.runtime_intent import (
-    BootstrapPolicy,
-    ExistingServingArtifact,
-    LocalSourceBootstrap,
-    RequestContext,
-    RetainedBindingAcquire,
-)
-from tensorcast.serving.runtime_view import RuntimeWorkerView
-
-__all__ = [
-    "AdmissionRejectedError",
-    "ArtifactBindStartPlan",
-    "ArtifactLocatorResolutionError",
-    "AttachFinalizeError",
-    "AuthorityValidationError",
-    "BootstrapPolicy",
-    "CapabilityMissingError",
-    "ConfigConflictError",
-    "DEFAULT_RUNTIME_PROFILE",
-    "ExistingServingArtifact",
-    "LocalSourceBootstrap",
-    "OwnershipTransferError",
-    "PlacementAdmissionError",
-    "PolicyMismatchError",
-    "PublicationRequiredError",
-    "ReplicaPublicationError",
-    "ReplicaPublicationPolicy",
-    "RequestContext",
-    "RetainedBindingAcquire",
-    "RetainedBindingAcquireStartPlan",
-    "RuntimeAttachment",
-    "RuntimeConfigProfile",
-    "RuntimeDaemonSettings",
-    "RuntimeGlobalStoreSettings",
-    "RuntimeSettings",
-    "RuntimeSwapError",
-    "RuntimeWorkerView",
-    "SchemaMismatchError",
-    "ServingArtifactLocator",
-    "ServingConfig",
-    "ServingIntegrationError",
-    "ServingPolicy",
-    "ServingRuntimeSession",
-    "ServingStartPlan",
-    "ServingStartPlanError",
-    "SourceBootstrapToBindingStartPlan",
-    "SourceProviderError",
-    "SourceSelector",
-    "TensorCastServingRuntimeError",
-    "merge_serving_reload_extra_config",
-    "normalize_serving_reload_request_payload",
-    "plan_serving_start",
-    "resolve_runtime_config_profile",
-]
diff --git a/tensorcast/serving/runtime_config.py b/tensorcast/serving/runtime_config.py
deleted file mode 100644
index eb85fc3e..00000000
--- a/tensorcast/serving/runtime_config.py
+++ /dev/null
@@ -1,297 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Runtime startup settings for TensorCast serving artifact consumers."""
-
-from __future__ import annotations
-
-import importlib.resources
-import re
-from dataclasses import dataclass
-from pathlib import Path
-from threading import Lock
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict, field_validator
-
-import tensorcast as tc
-
-_INIT_LOCK = Lock()
-_INIT_KWARGS: dict[str, Any] | None = None
-_DEFAULT_GLOBAL_STORE_ADDRESS = "127.0.0.1:50051"
-DEFAULT_RUNTIME_PROFILE = "serving_single_node"
-
-_RUNTIME_MODES = {"auto", "connect", "create"}
-_GLOBAL_STORE_MODES = {"auto", "connect", "start", "none"}
-_PROFILE_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
-
-
-def _normalize_optional_text(value: Any) -> str | None:
-    if value is None:
-        return None
-    normalized = str(value).strip()
-    return normalized or None
-
-
-def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str:
-    normalized = str(value).strip().lower()
-    if normalized not in allowed:
-        raise ValueError(
-            f"{field_name} must be one of {sorted(allowed)}, got: {value!r}"
-        )
-    return normalized
-
-
-def _validate_existing_file(path: str, *, field_name: str) -> str:
-    candidate = Path(path).expanduser()
-    if not candidate.is_file():
-        raise ValueError(f"{field_name} must point to an existing file, got: {path!r}")
-    return str(candidate)
-
-
-def _default_resource_path(package: str, name: str) -> str | None:
-    try:
-        resource = importlib.resources.files(package).joinpath(name)
-    except (FileNotFoundError, ModuleNotFoundError):
-        return None
-    path = Path(str(resource))
-    return str(path) if path.is_file() else None
-
-
-def _normalize_profile_name(value: Any) -> str | None:
-    normalized = _normalize_optional_text(value)
-    if normalized is None:
-        return None
-    if not _PROFILE_NAME_PATTERN.fullmatch(normalized):
-        raise ValueError(
-            "runtime.profile must contain only letters, digits, '.', '_', or '-'"
-        )
-    return normalized
-
-
-def _profile_resource_path(profile: str, filename: str) -> str:
-    profile_name = _normalize_profile_name(profile)
-    if profile_name is None:
-        raise ValueError("runtime.profile must be non-empty")
-    try:
-        resource = (
-            importlib.resources.files("tensorcast")
-            .joinpath("config")
-            .joinpath("profiles")
-            .joinpath(profile_name)
-            .joinpath(filename)
-        )
-    except (FileNotFoundError, ModuleNotFoundError) as exc:
-        raise ValueError(
-            f"Unknown TensorCast runtime config profile: {profile_name!r}"
-        ) from exc
-    path = Path(str(resource))
-    if not path.is_file():
-        raise ValueError(f"Unknown TensorCast runtime config profile: {profile_name!r}")
-    return str(path)
-
-
-@dataclass(frozen=True)
-class RuntimeConfigProfile:
-    name: str
-    daemon_config_path: str
-    global_store_config_path: str
-
-
-def resolve_runtime_config_profile(profile: str) -> RuntimeConfigProfile:
-    profile_name = _normalize_profile_name(profile)
-    if profile_name is None:
-        raise ValueError("runtime.profile must be non-empty")
-    return RuntimeConfigProfile(
-        name=profile_name,
-        daemon_config_path=_profile_resource_path(
-            profile_name, "store_daemon_config.yaml"
-        ),
-        global_store_config_path=_profile_resource_path(
-            profile_name, "global_store_config.yaml"
-        ),
-    )
-
-
-class RuntimeDaemonSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    address: str | None = None
-    config_path: str | None = None
-    show_logs: bool = False
-
-    @field_validator("address", "config_path", mode="before")
-    @classmethod
-    def _normalize_optional_fields(cls, value: Any) -> Any:
-        return _normalize_optional_text(value)
-
-
-class RuntimeGlobalStoreSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    mode: str = "auto"
-    address: str | None = None
-    config_path: str | None = None
-
-    @field_validator("mode", mode="before")
-    @classmethod
-    def _normalize_mode(cls, value: Any) -> str:
-        if value is None:
-            return "auto"
-        return _normalize_enum(
-            value,
-            allowed=_GLOBAL_STORE_MODES,
-            field_name="runtime.global_store.mode",
-        )
-
-    @field_validator("address", "config_path", mode="before")
-    @classmethod
-    def _normalize_optional_fields(cls, value: Any) -> Any:
-        return _normalize_optional_text(value)
-
-    def resolved_mode(self, runtime_mode: str) -> str:
-        if self.mode != "auto":
-            return self.mode
-        if self.address is not None:
-            return "connect"
-        if self.config_path is not None:
-            return "start"
-        if runtime_mode in {"create", "auto"}:
-            return "start"
-        return "none"
-
-
-class RuntimeSettings(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    profile: str | None = DEFAULT_RUNTIME_PROFILE
-    mode: str = "auto"
-    daemon: RuntimeDaemonSettings = RuntimeDaemonSettings()
-    global_store: RuntimeGlobalStoreSettings = RuntimeGlobalStoreSettings()
-
-    @field_validator("profile", mode="before")
-    @classmethod
-    def _normalize_profile(cls, value: Any) -> str | None:
-        return _normalize_profile_name(value)
-
-    @field_validator("mode", mode="before")
-    @classmethod
-    def _normalize_mode(cls, value: Any) -> str:
-        if value is None:
-            return "auto"
-        return _normalize_enum(
-            value,
-            allowed=_RUNTIME_MODES,
-            field_name="runtime.mode",
-        )
-
-    @staticmethod
-    def _default_daemon_config_path() -> str | None:
-        return _default_resource_path("tensorcast", "daemon_config.yaml")
-
-    @staticmethod
-    def _default_global_store_config_path() -> str | None:
-        return _default_resource_path("tensorcast", "global_store_config.yaml")
-
-    def to_init_kwargs(
-        self,
-        *,
-        default_daemon_config_path: str | None = None,
-        default_global_store_config_path: str | None = None,
-    ) -> dict[str, Any]:
-        profile = (
-            resolve_runtime_config_profile(self.profile)
-            if self.profile is not None
-            else None
-        )
-        kwargs: dict[str, Any] = {
-            "mode": self.mode,
-            "show_daemon_logs": self.daemon.show_logs,
-        }
-        if self.daemon.address is not None:
-            kwargs["address"] = self.daemon.address
-
-        daemon_config_path = self.daemon.config_path
-        if daemon_config_path is None and self.mode in {"create", "auto"}:
-            daemon_config_path = (
-                profile.daemon_config_path
-                if profile is not None
-                else default_daemon_config_path or self._default_daemon_config_path()
-            )
-        if daemon_config_path is not None:
-            kwargs["daemon_config_path"] = _validate_existing_file(
-                daemon_config_path,
-                field_name="runtime.daemon.config_path",
-            )
-        elif self.mode in {"create", "auto"}:
-            raise ValueError(
-                "runtime.mode requires a daemon config file for create/auto; "
-                "set runtime.profile or runtime.daemon.config_path"
-            )
-
-        global_store_mode = self.global_store.resolved_mode(self.mode)
-        if global_store_mode != "none":
-            kwargs["global_store_mode"] = global_store_mode
-        if global_store_mode == "connect":
-            kwargs["global_store_address"] = (
-                self.global_store.address or _DEFAULT_GLOBAL_STORE_ADDRESS
-            )
-        elif global_store_mode == "start":
-            global_store_config_path = self.global_store.config_path
-            if global_store_config_path is None:
-                global_store_config_path = (
-                    profile.global_store_config_path
-                    if profile is not None
-                    else default_global_store_config_path
-                    or self._default_global_store_config_path()
-                )
-            if global_store_config_path is not None:
-                kwargs["global_store_config_path"] = _validate_existing_file(
-                    global_store_config_path,
-                    field_name="runtime.global_store.config_path",
-                )
-            else:
-                raise ValueError(
-                    "runtime.global_store.mode='start' requires a Global "
-                    "Store config file; set runtime.profile or "
-                    "runtime.global_store.config_path"
-                )
-
-        return kwargs
-
-    def ensure_initialized(
-        self,
-        *,
-        default_daemon_config_path: str | None = None,
-        default_global_store_config_path: str | None = None,
-    ) -> None:
-        init_kwargs = self.to_init_kwargs(
-            default_daemon_config_path=default_daemon_config_path,
-            default_global_store_config_path=default_global_store_config_path,
-        )
-        with _INIT_LOCK:
-            global _INIT_KWARGS
-            if tc.is_initialized():
-                if _INIT_KWARGS is None:
-                    raise RuntimeError(
-                        "TensorCast runtime was already initialized outside "
-                        "tensorcast.serving.RuntimeSettings."
-                    )
-                if init_kwargs != _INIT_KWARGS:
-                    raise RuntimeError(
-                        "TensorCast runtime already initialized with different "
-                        "settings. Existing="
-                        f"{_INIT_KWARGS}, requested={init_kwargs}"
-                    )
-                return
-            tc.init(**init_kwargs)
-            _INIT_KWARGS = dict(init_kwargs)
-
-
-__all__ = [
-    "DEFAULT_RUNTIME_PROFILE",
-    "RuntimeConfigProfile",
-    "RuntimeDaemonSettings",
-    "RuntimeGlobalStoreSettings",
-    "RuntimeSettings",
-    "resolve_runtime_config_profile",
-]
diff --git a/tensorcast/serving/runtime_contract.py b/tensorcast/serving/runtime_contract.py
deleted file mode 100644
index c8dd96e4..00000000
--- a/tensorcast/serving/runtime_contract.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Source-bound serving runtime contract readiness helpers."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any, Callable
-
-import tensorcast as tc
-from tensorcast.types import SourceBoundCapability
-
-MIN_SOURCE_BOUND_CONTRACT_VERSION = 4
-SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 = "collective_first_v4"
-REQUIRED_SOURCE_BOUND_CAPABILITIES = (
-    SourceBoundCapability.FIRST_CLASS_COLLECTIVE_INGRESS,
-    SourceBoundCapability.TYPED_EXECUTION_DIAGNOSTICS,
-    SourceBoundCapability.SINGLE_MINT_BINDING_CLOSEOUT,
-)
-
-
-@dataclass(frozen=True)
-class SourceBoundContractState:
-    server_config_present: bool
-    source_bound_contract_version: int
-    source_bound_capability_flags: int
-    source_bound_capability_names: tuple[str, ...]
-    source_bound_contract_ready: bool
-
-    @classmethod
-    def unavailable(cls) -> SourceBoundContractState:
-        return cls(
-            server_config_present=False,
-            source_bound_contract_version=0,
-            source_bound_capability_flags=0,
-            source_bound_capability_names=(),
-            source_bound_contract_ready=False,
-        )
-
-    @classmethod
-    def from_server_config(
-        cls,
-        server_config: Any | None,
-    ) -> SourceBoundContractState:
-        if server_config is None:
-            return cls.unavailable()
-        flags = int(getattr(server_config, "source_bound_capability_flags", 0) or 0)
-        version = int(getattr(server_config, "source_bound_contract_version", 0) or 0)
-        capability_names = tuple(
-            str(capability.name)
-            for capability in SourceBoundCapability
-            if flags & int(capability)
-        )
-        contract_ready = version >= MIN_SOURCE_BOUND_CONTRACT_VERSION and all(
-            flags & int(capability) for capability in REQUIRED_SOURCE_BOUND_CAPABILITIES
-        )
-        return cls(
-            server_config_present=True,
-            source_bound_contract_version=version,
-            source_bound_capability_flags=flags,
-            source_bound_capability_names=capability_names,
-            source_bound_contract_ready=contract_ready,
-        )
-
-
-def read_source_bound_contract_state(
-    *,
-    store_fn: Callable[[], Any] | None = None,
-) -> SourceBoundContractState:
-    try:
-        store = (store_fn or tc.store)()
-        capabilities = store.capabilities
-        server_config = getattr(capabilities, "server_config", None)
-    except Exception:
-        return SourceBoundContractState.unavailable()
-    return SourceBoundContractState.from_server_config(server_config)
-
-
-def source_bound_contract_profile_fields(
-    state: SourceBoundContractState,
-    path: str,
-) -> dict[str, object]:
-    return {
-        "source_bound_contract_version": int(state.source_bound_contract_version),
-        "source_bound_capability_flags": list(state.source_bound_capability_names),
-        "source_bound_contract_ready": bool(state.source_bound_contract_ready),
-        "source_bound_contract_path": path,
-    }
-
-
-__all__ = [
-    "MIN_SOURCE_BOUND_CONTRACT_VERSION",
-    "REQUIRED_SOURCE_BOUND_CAPABILITIES",
-    "SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4",
-    "SourceBoundContractState",
-    "read_source_bound_contract_state",
-    "source_bound_contract_profile_fields",
-]
diff --git a/tensorcast/serving/runtime_intent.py b/tensorcast/serving/runtime_intent.py
deleted file mode 100644
index 5e6c56c1..00000000
--- a/tensorcast/serving/runtime_intent.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Serving runtime intent DTOs."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-from dataclasses import dataclass, field
-from typing import Any
-
-from tensorcast.serving.errors import AuthorityValidationError
-from tensorcast.serving.hosts import RecipeCachePolicy, SourceSelector
-from tensorcast.serving.policy import ServingArtifactLocator, ServingPolicy
-from tensorcast.serving.retained_binding import ParsedRetainedServingBindingAuthority
-
-
-@dataclass(frozen=True)
-class BootstrapPolicy:
-    fields: Mapping[str, object] = field(default_factory=dict)
-
-
-class ServingIntent:
-    """Marker base class for serving lifecycle intent DTOs."""
-
-
-@dataclass(frozen=True)
-class ExistingServingArtifact(ServingIntent):
-    artifact_locator: ServingArtifactLocator | object
-    policy: ServingPolicy | object | None = None
-
-
-@dataclass(frozen=True)
-class LocalSourceBootstrap(ServingIntent):
-    source_selector: SourceSelector
-    bootstrap_policy: Any
-    cache_policy: RecipeCachePolicy | None = None
-
-
-@dataclass(frozen=True)
-class RetainedBindingAcquire(ServingIntent):
-    authority: ParsedRetainedServingBindingAuthority
-
-    def __post_init__(self) -> None:
-        if not isinstance(self.authority, ParsedRetainedServingBindingAuthority):
-            raise AuthorityValidationError(
-                "RetainedBindingAcquire.authority must be "
-                "ParsedRetainedServingBindingAuthority"
-            )
-
-
-@dataclass(frozen=True)
-class RequestContext:
-    framework_config: object | None = None
-    model_config: object | None = None
-    target_device: object | None = None
-    timeout_s: float | None = 30.0
-
-
-__all__ = [
-    "BootstrapPolicy",
-    "ExistingServingArtifact",
-    "LocalSourceBootstrap",
-    "RequestContext",
-    "RetainedBindingAcquire",
-    "ServingIntent",
-]
diff --git a/tensorcast/serving/session.py b/tensorcast/serving/session.py
deleted file mode 100644
index e947f089..00000000
--- a/tensorcast/serving/session.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-"""Common serving binding session state shell."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict
-
-from tensorcast.serving.policy import ServingArtifactLocator
-from tensorcast.types import BindingValueRef
-
-
-class ServingBindingState(BaseModel):
-    model_config = ConfigDict(frozen=True, extra="forbid")
-
-    state: str
-    artifact_locator: ServingArtifactLocator | None = None
-    serving_artifact_ref: str | None = None
-    manifest_ref: str | None = None
-    representation_contract_hash: str | None = None
-    serving_build_digest: str | None = None
-    binding_value_ref: BindingValueRef | None = None
-    local_serving_ref: str | None = None
-    readiness: str | None = None
-    updated_at: str | None = None
-
-    def to_response(self) -> dict[str, Any]:
-        return self.model_dump(mode="python")
diff --git a/tensorcast/serving/testing.py b/tensorcast/serving/testing.py
deleted file mode 100644
index 3124b346..00000000
--- a/tensorcast/serving/testing.py
+++ /dev/null
@@ -1,1115 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-"""Reusable conformance checks for framework serving integrations."""
-
-from __future__ import annotations
-
-from collections.abc import Iterable, Mapping
-from contextlib import contextmanager
-from dataclasses import dataclass, field
-from types import ModuleType, SimpleNamespace
-from typing import Any, cast
-
-import torch
-
-import tensorcast as tc
-from tensorcast.serving._runtime_impl import lifecycle as _integration
-from tensorcast.serving.retained_binding import (
-    ParsedRetainedServingBindingAuthority,
-    RetainedServingBindingExpectedDigests,
-)
-
-
-@dataclass(frozen=True)
-class ConformanceResult:
-    """Result from a lightweight serving runtime conformance check."""
-
-    checks: Mapping[str, bool] = field(default_factory=dict)
-    messages: Mapping[str, str] = field(default_factory=dict)
-    level: str | None = None
-
-    @property
-    def failed_checks(self) -> tuple[str, ...]:
-        return tuple(name for name, passed in self.checks.items() if not passed)
-
-    def failure_summary(self) -> str:
-        failed = self.failed_checks
-        if not failed:
-            return "TensorCast serving conformance checks passed"
-        lines = [
-            "TensorCast serving conformance checks failed"
-            + (f" for {self.level}" if self.level else "")
-            + ":"
-        ]
-        for name in failed:
-            message = self.messages.get(name, "No remediation hint available")
-            lines.append(f"- {name}: {message}")
-        return "\n".join(lines)
-
-    def assert_passed(self) -> None:
-        if self.failed_checks:
-            raise AssertionError(self.failure_summary())
-
-
-def _result(
-    *,
-    level: str,
-    checks: Mapping[str, bool],
-    messages: Mapping[str, str],
-) -> ConformanceResult:
-    result = ConformanceResult(checks=checks, messages=messages, level=level)
-    result.assert_passed()
-    return result
-
-
-_PUBLIC_BOUNDARY_MESSAGES = {
-    "has_session": (
-        "Expose ServingRuntimeSession from tensorcast.serving.runtime; Level 1 "
-        "frameworks should not construct lower-level lifecycle helpers."
-    ),
-    "has_attachment": (
-        "Expose RuntimeAttachment as the framework-held lifecycle token."
-    ),
-    "has_request_context": (
-        "Expose RequestContext so framework facts enter lifecycle calls through "
-        "one typed context object."
-    ),
-    "hides_admin_local_bootstrap": (
-        "Keep admin/local-bootstrap override DTOs out of the framework runtime "
-        "module; route them through admin/offline surfaces."
-    ),
-    "hides_low_level_bind": (
-        "Do not expose bind/swap/restore helpers from the runtime module; "
-        "frameworks should call ServingRuntimeSession.start/reload."
-    ),
-    "hides_projection_dtos": (
-        "Runtime endpoint projection DTOs live in tensorcast.serving.runtime_view."
-    ),
-    "hides_state_helpers": (
-        "Model attribute helpers live in tensorcast.serving.state."
-    ),
-}
-
-_FRAMEWORK_ISOLATION_MESSAGES = {
-    "no_vllm_imports": (
-        "Reference and conformance frameworks must not import vLLM. Move any "
-        "needed generic fact extraction into TensorCast hosts or testing helpers."
-    ),
-    "no_internal_runtime_imports": (
-        "Framework examples should not import TensorCast private/internal "
-        "runtime modules."
-    ),
-}
-
-
-def assert_public_runtime_boundary(runtime_module: ModuleType) -> ConformanceResult:
-    """Check that runtime imports expose framework APIs, not admin helpers."""
-
-    public_names = set(getattr(runtime_module, "__all__", ()))
-    checks = {
-        "has_session": "ServingRuntimeSession" in public_names,
-        "has_attachment": "RuntimeAttachment" in public_names,
-        "has_request_context": "RequestContext" in public_names,
-        "hides_admin_local_bootstrap": "AdminLocalSourceBootstrap" not in public_names
-        and "_AdminLocalSourceBootstrap" not in public_names,
-        "hides_low_level_bind": "bind_serving_artifact" not in public_names
-        and "swap_serving_artifact" not in public_names
-        and "restore_retained_binding" not in public_names,
-        "hides_projection_dtos": {
-            "PublishedReplicaProjection",
-            "ReloadResponseProjection",
-            "RuntimeEndpointProjection",
-            "SourceSelectionProjection",
-            "WeightVersionProjection",
-        }.isdisjoint(public_names),
-        "hides_state_helpers": {
-            "ModelAttributeRuntimeState",
-            "RuntimeAttachmentRecord",
-            "RuntimeAttachmentStore",
-        }.isdisjoint(public_names),
-    }
-    return _result(
-        level="public-runtime-boundary",
-        checks=checks,
-        messages=_PUBLIC_BOUNDARY_MESSAGES,
-    )
-
-
-def assert_framework_isolation(module_names: Iterable[str]) -> ConformanceResult:
-    """Check that a fake/reference framework avoids vLLM imports."""
-
-    names = tuple(str(name) for name in module_names)
-    checks = {
-        "no_vllm_imports": not any(
-            name == "vllm" or name.startswith("vllm.") for name in names
-        ),
-        "no_internal_runtime_imports": not any(
-            name.startswith("tensorcast.serving.internal") for name in names
-        ),
-    }
-    return _result(
-        level="framework-isolation",
-        checks=checks,
-        messages=_FRAMEWORK_ISOLATION_MESSAGES,
-    )
-
-
-class FakeArtifactView:
-    def __init__(self, names: Iterable[str] = ()) -> None:
-        self.names = tuple(names)
-
-    def bind(self, **kwargs: Any) -> "FakeBinding":
-        binding = FakeBinding()
-        binding.names = self.names
-        binding.bind_kwargs = kwargs
-        return binding
-
-
-class FakeArtifact:
-    def subset(self, names: Iterable[str]) -> FakeArtifactView:
-        return FakeArtifactView(names)
-
-
-class FakeBinding:
-    def __init__(self) -> None:
-        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
-        self.binding_layout_id = "layout-1"
-        self.names: tuple[str, ...] = ()
-        self.bind_kwargs: dict[str, Any] = {}
-        self.swapped: tuple[object, dict[str, Any]] | None = None
-        self.closed = False
-
-    def swap(self, artifact: object, **kwargs: Any) -> "FakeBinding":
-        self.swapped = (artifact, kwargs)
-        self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)}
-        return self
-
-    def close(self) -> None:
-        self.closed = True
-
-
-class FakeRestoredRetainedBinding:
-    def __init__(self) -> None:
-        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
-        self.binding_layout_id = "layout-1"
-        self.binding_value_ref = SimpleNamespace(
-            binding_id="binding-1",
-            binding_layout_id="layout-1",
-            binding_value_id="value-1",
-            seal_generation=1,
-        )
-        self.reservation_bytes = 4096
-        self.closed = False
-        self.transferred = False
-
-    def transfer_to_runtime(self) -> object:
-        self.transferred = True
-        return SimpleNamespace(close=lambda: None)
-
-    def close(self) -> None:
-        self.closed = True
-
-
-def _retained_authority(runtime_module: ModuleType) -> object:
-    member = tc.ServingBindingMemberRef(
-        member_id="member-0",
-        member_index=0,
-        member_count=1,
-        group_id="group-1",
-    )
-    binding_ref = tc.BindingValueRef(
-        binding_id="binding-1",
-        binding_layout_id="layout-1",
-        binding_value_id="value-1",
-        seal_generation=1,
-    )
-    capability = tc.BindingReservationCapability(
-        capability_id="capability-1",
-        binding_value_ref=binding_ref,
-        daemon_id="daemon-1",
-        daemon_session_id="session-1",
-        device_uuid="gpu-0",
-        member=member,
-        reservation_bytes=4096,
-        scope_digest="scope-1",
-    )
-    return ParsedRetainedServingBindingAuthority(
-        group_id="group-1",
-        local_serving_ref="binding-local:fake",
-        binding_value_ref=binding_ref,
-        reservation_capability=capability,
-        daemon_id="daemon-1",
-        daemon_session_id="session-1",
-        device_uuid="gpu-0",
-        member=member,
-        reservation_bytes=4096,
-        expected=RetainedServingBindingExpectedDigests(
-            target_layout_hash="layout-hash",
-            tensor_schema_hash="fake-schema",
-            serving_build_digest="build-digest",
-            resolved_spec_digest="spec-digest",
-        ),
-        readiness="serving_local_ready",
-        verification_state="local_only",
-    )
-
-
-class FakeRuntimeModel:
-    def __init__(self) -> None:
-        self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")}
-
-
-class FakeFrameworkHost:
-    def identity(self, model_config: object) -> _integration.FrameworkIdentity:
-        del model_config
-        return _integration.FrameworkIdentity(
-            framework_name="fakefw",
-            framework_version="fakefw-v1",
-            adapter_version="adapter-v1",
-            serving_abi_version="abi-v1",
-        )
-
-    def prepare_model_construction(
-        self,
-        framework_config: object | None,
-        model_config: object | None,
-    ) -> None:
-        del framework_config, model_config
-
-    def build_meta_model(
-        self,
-        framework_config: object | None,
-        model_config: object | None,
-    ) -> FakeRuntimeModel:
-        del framework_config, model_config
-        return FakeRuntimeModel()
-
-    def build_runtime_model(
-        self,
-        framework_config: object | None,
-        model_config: object | None,
-        target_device: object | None,
-    ) -> FakeRuntimeModel:
-        del framework_config, model_config, target_device
-        return FakeRuntimeModel()
-
-    def assert_model_ready_for_runtime_binding(
-        self,
-        model: FakeRuntimeModel,
-        *,
-        context: object,
-    ) -> None:
-        del context
-        if "w" not in model.tensors:
-            raise AssertionError("fake model missing runtime tensor 'w'")
-
-    def semantic_probes(
-        self,
-        model: FakeRuntimeModel,
-        model_config: object | None,
-    ) -> dict[str, object]:
-        del model, model_config
-        return {}
-
-
-class FakePlacementHost:
-    def identity_facts(
-        self,
-        framework_config: object | None,
-    ) -> _integration.PlacementIdentityFacts:
-        del framework_config
-        return _integration.PlacementIdentityFacts(
-            tensor_parallel_rank=0,
-            tensor_parallel_size=1,
-            pipeline_parallel_rank=0,
-            pipeline_parallel_size=1,
-            data_parallel_rank=0,
-            data_parallel_size=1,
-        )
-
-    def admission_facts(
-        self,
-        framework_config: object | None,
-    ) -> _integration.PlacementAdmissionFacts:
-        del framework_config
-        return _integration.PlacementAdmissionFacts()
-
-    def member_facts(
-        self,
-        framework_config: object | None,
-    ) -> _integration.PlacementMemberFacts:
-        del framework_config
-        return _integration.PlacementMemberFacts(
-            runtime_rank=0,
-            runtime_world_size=1,
-            member_id="member-0",
-            member_index=0,
-            member_count=1,
-            group_id_hint="group-1",
-        )
-
-    def execution_facts(
-        self,
-        framework_config: object | None,
-    ) -> _integration.MaterializationExecutionFacts:
-        del framework_config
-        return _integration.MaterializationExecutionFacts(
-            collective_rank=0,
-            collective_world_size=1,
-            tensor_parallel_ranks=(0,),
-        )
-
-
-class FakeTensorSurface:
-    def runtime_only_tensor_names(self, model: FakeRuntimeModel) -> tuple[str, ...]:
-        del model
-        return ()
-
-    def align_runtime_tensor_names(
-        self,
-        model: FakeRuntimeModel,
-        expected_names: Iterable[str],
-    ) -> int:
-        if set(expected_names) != set(model.tensors):
-            raise AssertionError("fake runtime tensor names do not match")
-        return 0
-
-    def collect_runtime_tensors(
-        self,
-        model: FakeRuntimeModel,
-        *,
-        remove_duplicate: bool = False,
-    ) -> dict[str, object]:
-        del remove_duplicate
-        return dict(model.tensors)
-
-    def collect_runtime_tensor_view(
-        self,
-        tensors: Mapping[str, object],
-    ) -> tuple[object, ...]:
-        del tensors
-        return ()
-
-    def compute_runtime_tensor_schema_hash(
-        self,
-        tensors: Mapping[str, object],
-        *,
-        remove_duplicate: bool = False,
-    ) -> str:
-        del tensors, remove_duplicate
-        return "fake-schema"
-
-    def attach_bound_tensors(
-        self,
-        model: FakeRuntimeModel,
-        tensors: Mapping[str, object],
-        *,
-        replace_meta_params: bool,
-    ) -> FakeRuntimeModel:
-        del replace_meta_params
-        model.tensors.update(cast(Mapping[str, torch.Tensor], tensors))
-        return model
-
-    def allocate_runtime_only_tensors(
-        self,
-        model: FakeRuntimeModel,
-        target_device: torch.device,
-    ) -> dict[str, object]:
-        del model, target_device
-        return {}
-
-    def snapshot_tensor_invariants(
-        self,
-        tensors: Mapping[str, object],
-    ) -> tuple[str, ...]:
-        return tuple(sorted(tensors))
-
-    def validate_tensor_invariants(
-        self,
-        before: tuple[str, ...],
-        after: Mapping[str, object],
-    ) -> None:
-        if before != tuple(sorted(after)):
-            raise AssertionError("fake tensor invariants changed")
-
-
-class SchemaMismatchTensorSurface(FakeTensorSurface):
-    def compute_runtime_tensor_schema_hash(
-        self,
-        tensors: Mapping[str, object],
-        *,
-        remove_duplicate: bool = False,
-    ) -> str:
-        del tensors, remove_duplicate
-        return "wrong-schema"
-
-
-class FakeSourceHost:
-    def source_selector(
-        self,
-        framework_config: object | None,
-        model_config: object | None,
-    ) -> object:
-        del framework_config, model_config
-        return _integration.SourceSelector.local_path("/tmp/fakefw-model")
-
-    def source_catalog_config(
-        self,
-        framework_config: object | None,
-        model_config: object | None,
-    ) -> None:
-        del framework_config, model_config
-        return None
-
-    def recipe_cache_policy(
-        self,
-        framework_config: object | None,
-        model_config: object | None,
-    ) -> None:
-        del framework_config, model_config
-        return None
-
-
-class FakeSourceCatalogProvider:
-    def __init__(self) -> None:
-        self.requests: list[object] = []
-
-    def build_catalog(self, request: object) -> object:
-        self.requests.append(request)
-        return SimpleNamespace(
-            source_artifact_ref=request.source_artifact_ref,
-            selected_files=(),
-        )
-
-
-class FakeServingArtifactResolver:
-    def resolve(self, artifact_ref: str) -> SimpleNamespace:
-        return SimpleNamespace(
-            artifact=FakeArtifact(),
-            artifact_ref=artifact_ref,
-            tensor_names=("w",),
-            manifest=SimpleNamespace(
-                representation_contract_hash=f"repr:{artifact_ref}",
-                source_artifact_ref="mi2:source",
-                serving_build_digest=f"build:{artifact_ref}",
-            ),
-        )
-
-    def cross_check(
-        self,
-        resolved_artifact: SimpleNamespace,
-        **kwargs: object,
-    ) -> SimpleNamespace:
-        del kwargs
-        return resolved_artifact
-
-
-def build_fake_runtime_host(hosts_module: ModuleType) -> object:
-    """Build a minimal non-vLLM host for runtime conformance checks."""
-
-    return hosts_module.IntegrationHost(
-        framework=FakeFrameworkHost(),
-        placement=FakePlacementHost(),
-        tensor_surface=FakeTensorSurface(),
-    )
-
-
-_LEVEL1_MESSAGES = {
-    "direct_start": (
-        "Durable serving artifact startup failed. Verify framework model "
-        "construction, tensor surface attach/schema behavior, placement facts, "
-        "and artifact resolver output."
-    ),
-    "runtime_initialized": (
-        "ServingRuntimeSession.start did not initialize RuntimeSettings before "
-        "binding the serving artifact."
-    ),
-    "describe": (
-        "ServingRuntimeSession.describe must return the typed RuntimeWorkerView "
-        "for the current attachment."
-    ),
-    "reload": (
-        "Durable serving artifact reload failed. Level 1 reload must use a "
-        "typed ServingArtifactLocator and ServingPolicy."
-    ),
-    "reload_identity_from_runtime_view": (
-        "Reload response identity must come from the runtime view, not from the "
-        "request payload."
-    ),
-    "source_capability_not_required": (
-        "Level 1 direct serving artifact start/reload must not require SourceHost."
-    ),
-    "source_catalog_not_required": (
-        "Level 1 direct serving artifact start/reload must not require "
-        "SourceCatalogProvider."
-    ),
-    "rejects_local_reload_artifact_locator": (
-        "Reload must reject local source selectors; local paths belong to "
-        "Level 2 bootstrap, not durable serving artifact reload."
-    ),
-    "rejects_untyped_reload_artifact_locator": (
-        "Reload must reject untyped artifact locator dictionaries on the public "
-        "runtime path. Use ServingArtifactLocator."
-    ),
-    "rejects_untyped_reload_policy": (
-        "Reload must reject untyped policy dictionaries on the public runtime "
-        "path. Use ServingPolicy."
-    ),
-}
-
-_LEVEL2_MESSAGES = {
-    "missing_source_catalog_fails_closed": (
-        "Local bootstrap requires a SourceCatalogProvider; TensorCast core owns "
-        "source identity and catalog request construction."
-    ),
-    "source_catalog_request_core_owned": (
-        "Source catalog providers must receive a core-owned SourceCatalogRequest "
-        "with typed source selector and source artifact identity."
-    ),
-    "recipe_build_receives_core_catalog": (
-        "Recipe build should consume the core source catalog, not framework "
-        "private catalog state."
-    ),
-    "missing_trace_capability_is_explicit": (
-        "Cache-miss local bootstrap must fail with a clear missing trace/native "
-        "load capability instead of AttributeError or fallback loading."
-    ),
-    "local_path_is_not_reload_artifact_locator": (
-        "Local path selectors must stay in bootstrap; reload accepts only durable "
-        "serving artifact locators."
-    ),
-}
-
-_LEVEL3_MESSAGES = {
-    "retained_acquire_public_start": (
-        "Retained binding acquire must enter through ServingRuntimeSession.start and "
-        "return a RuntimeAttachment with typed endpoint projection."
-    ),
-    "retained_acquire_uses_host_member": (
-        "Retained acquire must validate authority member facts against the "
-        "framework placement host."
-    ),
-    "retained_acquire_transfers_ownership": (
-        "Retained binding ownership must transfer into TensorCast runtime state "
-        "only after attach/finalize succeeds."
-    ),
-    "missing_authority_fails_closed": (
-        "Retained binding acquire config must include typed retained authority."
-    ),
-    "authority_mismatch_fails_closed": (
-        "Daemon/session/member authority mismatches must fail closed."
-    ),
-    "failure_cleanup_closes_untransferred_handle": (
-        "Attach/finalize failure must close an untransferred retained handle."
-    ),
-    "failure_path_used_retained_restore": (
-        "Retained binding failure coverage did not exercise restore ownership."
-    ),
-    "rejects_arbitrary_retained_authority": (
-        "Retained acquire must reject arbitrary authority objects; use the parsed "
-        "retained serving binding authority."
-    ),
-}
-
-
-def _retained_binding_acquire_config(runtime_module: ModuleType) -> dict[str, Any]:
-    authority = _retained_authority(runtime_module)
-    return {
-        "retained_binding_acquire": {
-            "mode": "external",
-            "authority": {
-                "group_id": authority.group_id,
-                "member_ref": authority.member.model_dump(mode="python"),
-                "daemon_id": authority.daemon_id,
-                "daemon_session_id": authority.daemon_session_id,
-                "device_uuid": authority.device_uuid,
-                "binding_value_ref": (
-                    authority.binding_value_ref.model_dump(mode="python")
-                ),
-                "reservation_capability": (
-                    authority.reservation_capability.model_dump(mode="python")
-                ),
-                "local_serving_ref": authority.local_serving_ref,
-                "readiness": authority.readiness,
-                "verification_state": authority.verification_state,
-                "serving_artifact_id": authority.serving_artifact_id,
-                "trusted_reservation_bytes": authority.reservation_bytes,
-                "expected": {
-                    "target_layout_hash": authority.expected.target_layout_hash,
-                    "tensor_schema_hash": authority.expected.tensor_schema_hash,
-                    "serving_build_digest": authority.expected.serving_build_digest,
-                    "resolved_spec_digest": authority.expected.resolved_spec_digest,
-                },
-            },
-        },
-    }
-
-
-@contextmanager
-def _patched_fake_runtime(runtime_module: ModuleType):
-    integration_module = cast(Any, _integration)
-    original_ensure_initialized = runtime_module.RuntimeSettings.ensure_initialized
-    original_contract_reader = integration_module.read_source_bound_contract_state
-    original_materialization_options = (
-        integration_module.ServingIntegration.build_materialization_options
-    )
-    initialized: list[object] = []
-
-    def ensure_initialized(self) -> None:
-        initialized.append(self)
-
-    runtime_module.RuntimeSettings.ensure_initialized = ensure_initialized
-    integration_module.read_source_bound_contract_state = lambda: SimpleNamespace(
-        source_bound_contract_ready=True,
-        source_bound_contract_version=4,
-        source_bound_capability_names=("collective",),
-    )
-    integration_module.ServingIntegration.build_materialization_options = (
-        lambda self, **kwargs: ("fake-materialization-options", kwargs)
-    )
-    try:
-        yield initialized
-    finally:
-        runtime_module.RuntimeSettings.ensure_initialized = original_ensure_initialized
-        integration_module.read_source_bound_contract_state = original_contract_reader
-        integration_module.ServingIntegration.build_materialization_options = (
-            original_materialization_options
-        )
-
-
-def assert_level1_runtime_conformance(
-    runtime_module: ModuleType,
-    hosts_module: ModuleType,
-    *,
-    host: object | None = None,
-) -> ConformanceResult:
-    """Run Level 1 durable serving artifact runtime conformance.
-
-    The suite intentionally uses only ``tensorcast.serving.runtime`` and
-    ``tensorcast.serving.hosts`` plus this testing module's fake host fixtures.
-    It covers direct artifact start, reload, describe, capability optionality,
-    strict public DTO rejection and no-vLLM-import contracts. It does not
-    instantiate local bootstrap or retained binding acquire intent DTOs.
-    """
-
-    checks: dict[str, bool] = {}
-    assert_public_runtime_boundary(runtime_module)
-    assert_framework_isolation(
-        (runtime_module.__name__, hosts_module.__name__, __name__)
-    )
-
-    with _patched_fake_runtime(runtime_module) as initialized:
-        host = host if host is not None else build_fake_runtime_host(hosts_module)
-        session = runtime_module.ServingRuntimeSession.from_config(
-            {
-                "bootstrap": {
-                    "mode": "disabled",
-                },
-                "serving": {
-                    "artifact_locator": {
-                        "kind": "artifact_ref",
-                        "value": "mi2:serving",
-                    },
-                },
-            },
-            host=host,
-            resolver=FakeServingArtifactResolver(),
-        )
-        attachment = session.start(
-            runtime_module.RequestContext(
-                framework_config=SimpleNamespace(),
-                model_config=SimpleNamespace(model="fake-model"),
-                target_device=torch.device("cuda:0"),
-            )
-        )
-        direct_payload = attachment.view.endpoint.to_weight_version_payload()
-        checks["direct_start"] = (
-            direct_payload.get("serving_artifact_ref") == "mi2:serving"
-            and direct_payload.get("source_artifact_ref") == "mi2:source"
-        )
-        checks["runtime_initialized"] = bool(initialized)
-
-        described = session.describe(attachment)
-        checks["describe"] = (
-            described.endpoint.to_weight_version_payload().get("serving_artifact_ref")
-            == "mi2:serving"
-        )
-
-        reloaded = session.reload(
-            current_attachment=attachment,
-            artifact_locator=runtime_module.ServingArtifactLocator.artifact_ref(
-                "mi2:serving-next"
-            ),
-            policy=runtime_module.ServingPolicy(),
-            context=runtime_module.RequestContext(
-                framework_config=SimpleNamespace(),
-                model_config=SimpleNamespace(model="fake-model"),
-            ),
-            model=attachment.model,
-        )
-        reload_response = reloaded.view.endpoint.to_reload_response_payload()
-        checks["reload"] = (
-            reload_response.get("serving_artifact_ref") == "mi2:serving-next"
-        )
-        checks["reload_identity_from_runtime_view"] = (
-            reloaded.state.runtime_view.serving_artifact_ref
-            == reload_response.get("serving_artifact_ref")
-        )
-        checks["source_capability_not_required"] = True
-        checks["source_catalog_not_required"] = True
-
-        try:
-            session.reload(
-                current_attachment=reloaded,
-                artifact_locator=runtime_module.SourceSelector.local_path("/tmp/model"),
-                policy=runtime_module.ServingPolicy(),
-                context=runtime_module.RequestContext(),
-            )
-        except _integration.ConfigConflictError:
-            checks["rejects_local_reload_artifact_locator"] = True
-        else:
-            checks["rejects_local_reload_artifact_locator"] = False
-
-        try:
-            session.reload(
-                current_attachment=reloaded,
-                artifact_locator={
-                    "kind": "artifact_ref",
-                    "value": "mi2:serving-next",
-                },
-                policy=runtime_module.ServingPolicy(),
-                context=runtime_module.RequestContext(),
-            )
-        except _integration.ConfigConflictError:
-            checks["rejects_untyped_reload_artifact_locator"] = True
-        else:
-            checks["rejects_untyped_reload_artifact_locator"] = False
-
-        try:
-            session.reload(
-                current_attachment=reloaded,
-                artifact_locator=runtime_module.ServingArtifactLocator.artifact_ref(
-                    "mi2:serving-next"
-                ),
-                policy={"mode": "from_manifest"},
-                context=runtime_module.RequestContext(),
-            )
-        except _integration.ConfigConflictError:
-            checks["rejects_untyped_reload_policy"] = True
-        else:
-            checks["rejects_untyped_reload_policy"] = False
-
-    return _result(level="level1-runtime", checks=checks, messages=_LEVEL1_MESSAGES)
-
-
-def assert_level2_local_bootstrap_conformance(
-    runtime_module: ModuleType,
-    hosts_module: ModuleType,
-) -> ConformanceResult:
-    """Run Level 2 local source bootstrap planning conformance."""
-
-    checks: dict[str, bool] = {}
-    with _patched_fake_runtime(runtime_module):
-        integration_module = cast(Any, _integration)
-        host_without_catalog = hosts_module.IntegrationHost(
-            framework=FakeFrameworkHost(),
-            placement=FakePlacementHost(),
-            tensor_surface=FakeTensorSurface(),
-            source=FakeSourceHost(),
-        )
-        session = runtime_module.ServingRuntimeSession.from_config(
-            {
-                "bootstrap": {
-                    "mode": "required",
-                },
-            },
-            host=host_without_catalog,
-        )
-        original_resolve_source_subject = (
-            integration_module.ServingIntegration.resolve_source_subject
-        )
-
-        def fake_resolve_source_subject(self, selector, **kwargs):
-            del self, selector, kwargs
-            return _integration.SourceSubject(
-                artifact_ref="mi2:source",
-                subject=SimpleNamespace(),
-                source_kind="fake",
-                metadata_fingerprint="meta",
-            )
-
-        integration_module.ServingIntegration.resolve_source_subject = (
-            fake_resolve_source_subject
-        )
-        try:
-            try:
-                session.start(
-                    runtime_module.RequestContext(
-                        framework_config=SimpleNamespace(),
-                        model_config=SimpleNamespace(model="fake-model"),
-                        target_device=torch.device("cuda:0"),
-                    )
-                )
-            except _integration.CapabilityMissingError as exc:
-                checks["missing_source_catalog_fails_closed"] = "source_catalog" in str(
-                    exc
-                )
-            else:
-                checks["missing_source_catalog_fails_closed"] = False
-
-            catalog_provider = FakeSourceCatalogProvider()
-            host_with_catalog = hosts_module.IntegrationHost(
-                framework=FakeFrameworkHost(),
-                placement=FakePlacementHost(),
-                tensor_surface=FakeTensorSurface(),
-                source=FakeSourceHost(),
-                source_catalog=catalog_provider,
-            )
-            session_with_catalog = runtime_module.ServingRuntimeSession.from_config(
-                {
-                    "bootstrap": {
-                        "mode": "required",
-                    },
-                },
-                host=host_with_catalog,
-            )
-            original_build_recipe = integration_module.RecipeBuildSession.build_recipe
-            captured_builds: list[Mapping[str, object]] = []
-
-            def fake_build_recipe(self, **kwargs):
-                del self
-                captured_builds.append(kwargs)
-                kwargs["framework_adapter"].trace_model_load(
-                    FakeRuntimeModel(),
-                    ["w"],
-                    {"w": SimpleNamespace(name="w")},
-                )
-
-            integration_module.RecipeBuildSession.build_recipe = fake_build_recipe
-            try:
-                try:
-                    session_with_catalog.start(
-                        runtime_module.RequestContext(
-                            framework_config=SimpleNamespace(),
-                            model_config=SimpleNamespace(model="fake-model"),
-                            target_device=torch.device("cuda:0"),
-                        )
-                    )
-                except _integration.CapabilityMissingError as exc:
-                    checks["missing_trace_capability_is_explicit"] = (
-                        "RecipeTraceHost" in str(exc) or "trace_model_load" in str(exc)
-                    )
-                else:
-                    checks["missing_trace_capability_is_explicit"] = False
-            finally:
-                integration_module.RecipeBuildSession.build_recipe = (
-                    original_build_recipe
-                )
-        finally:
-            integration_module.ServingIntegration.resolve_source_subject = (
-                original_resolve_source_subject
-            )
-
-        catalog_request = (
-            catalog_provider.requests[0] if catalog_provider.requests else None
-        )
-        checks["source_catalog_request_core_owned"] = (
-            catalog_request is not None
-            and getattr(catalog_request, "source_artifact_ref", None) == "mi2:source"
-            and isinstance(
-                getattr(catalog_request, "source_selector", None),
-                _integration.SourceSelector,
-            )
-        )
-        checks["recipe_build_receives_core_catalog"] = (
-            bool(captured_builds)
-            and captured_builds[0].get("source_catalog") is not None
-        )
-
-        attachment = runtime_module.RuntimeAttachment(
-            model=object(),
-            state=_integration.RuntimeBindingState(
-                runtime_view=_integration.RuntimeBindingView()
-            ),
-            view=runtime_module.RuntimeWorkerView.from_runtime_view(
-                _integration.RuntimeBindingView()
-            ),
-        )
-        try:
-            session.reload(
-                current_attachment=attachment,
-                artifact_locator=runtime_module.SourceSelector.local_path(
-                    "/tmp/fakefw-model"
-                ),
-                policy=runtime_module.ServingPolicy(),
-                context=runtime_module.RequestContext(),
-            )
-        except _integration.ConfigConflictError:
-            checks["local_path_is_not_reload_artifact_locator"] = True
-        else:
-            checks["local_path_is_not_reload_artifact_locator"] = False
-
-    return _result(
-        level="level2-local-bootstrap",
-        checks=checks,
-        messages=_LEVEL2_MESSAGES,
-    )
-
-
-def assert_level3_retained_binding_conformance(
-    runtime_module: ModuleType,
-    hosts_module: ModuleType,
-) -> ConformanceResult:
-    """Run Level 3 retained binding acquire conformance."""
-
-    checks: dict[str, bool] = {}
-    with _patched_fake_runtime(runtime_module):
-        integration_module = cast(Any, _integration)
-        host = build_fake_runtime_host(hosts_module)
-        retained_calls: list[Mapping[str, object]] = []
-        restored = FakeRestoredRetainedBinding()
-        original_restore_retained = integration_module.restore_retained_binding
-
-        @contextmanager
-        def fake_restore_retained(**kwargs: object):
-            retained_calls.append(kwargs)
-            yield restored
-
-        integration_module.restore_retained_binding = fake_restore_retained
-        try:
-            session = runtime_module.ServingRuntimeSession.from_config(
-                _retained_binding_acquire_config(runtime_module),
-                host=host,
-            )
-            retained = session.start(
-                runtime_module.RequestContext(
-                    framework_config=SimpleNamespace(),
-                    model_config=SimpleNamespace(model="fake-model"),
-                    target_device=torch.device("cuda:0"),
-                )
-            )
-        finally:
-            integration_module.restore_retained_binding = original_restore_retained
-        retained_payload = retained.view.endpoint.to_weight_version_payload()
-        checks["retained_acquire_public_start"] = (
-            retained_payload.get("local_serving_ref") == "binding-local:fake"
-            and retained_payload.get("binding_value_ref", {}).get("binding_value_id")
-            == "value-1"
-        )
-        checks["retained_acquire_uses_host_member"] = (
-            bool(retained_calls)
-            and getattr(retained_calls[0].get("expected_member"), "member_index", None)
-            == 0
-        )
-        checks["retained_acquire_transfers_ownership"] = restored.transferred
-
-        try:
-            runtime_module.ServingConfig.from_mapping(
-                {
-                    "retained_binding_acquire": {
-                        "mode": "external",
-                    },
-                }
-            )
-        except Exception:
-            checks["missing_authority_fails_closed"] = True
-        else:
-            checks["missing_authority_fails_closed"] = False
-
-        mismatch_config = dict(_retained_binding_acquire_config(runtime_module))
-        acquire = dict(mismatch_config["retained_binding_acquire"])
-        authority = dict(acquire["authority"])
-        capability = dict(authority["reservation_capability"])
-        capability["daemon_session_id"] = "wrong-session"
-        authority["reservation_capability"] = capability
-        acquire["authority"] = authority
-        mismatch_config["retained_binding_acquire"] = acquire
-        try:
-            mismatch_session = runtime_module.ServingRuntimeSession.from_config(
-                mismatch_config,
-                host=host,
-            )
-            mismatch_session.start(
-                runtime_module.RequestContext(
-                    framework_config=SimpleNamespace(),
-                    model_config=SimpleNamespace(model="fake-model"),
-                    target_device=torch.device("cuda:0"),
-                )
-            )
-        except Exception:
-            checks["authority_mismatch_fails_closed"] = True
-        else:
-            checks["authority_mismatch_fails_closed"] = False
-
-        failing_host = hosts_module.IntegrationHost(
-            framework=FakeFrameworkHost(),
-            placement=FakePlacementHost(),
-            tensor_surface=SchemaMismatchTensorSurface(),
-        )
-        failing_restored = FakeRestoredRetainedBinding()
-        failure_calls: list[Mapping[str, object]] = []
-
-        @contextmanager
-        def fake_restore_for_failure(**kwargs: object):
-            failure_calls.append(kwargs)
-            yield failing_restored
-
-        integration_module.restore_retained_binding = fake_restore_for_failure
-        try:
-            failing_session = runtime_module.ServingRuntimeSession.from_config(
-                _retained_binding_acquire_config(runtime_module),
-                host=failing_host,
-            )
-            try:
-                failing_session.start(
-                    runtime_module.RequestContext(
-                        framework_config=SimpleNamespace(),
-                        model_config=SimpleNamespace(model="fake-model"),
-                        target_device=torch.device("cuda:0"),
-                    )
-                )
-            except _integration.SchemaMismatchError:
-                checks["failure_cleanup_closes_untransferred_handle"] = (
-                    failing_restored.closed and not failing_restored.transferred
-                )
-            else:
-                checks["failure_cleanup_closes_untransferred_handle"] = False
-        finally:
-            integration_module.restore_retained_binding = original_restore_retained
-
-        checks["failure_path_used_retained_restore"] = bool(failure_calls)
-
-        try:
-            runtime_module.RetainedBindingAcquire(SimpleNamespace())
-        except _integration.AuthorityValidationError:
-            checks["rejects_arbitrary_retained_authority"] = True
-        else:
-            checks["rejects_arbitrary_retained_authority"] = False
-
-    return _result(
-        level="level3-retained-binding",
-        checks=checks,
-        messages=_LEVEL3_MESSAGES,
-    )
-
-
-__all__ = [
-    "ConformanceResult",
-    "FakeArtifact",
-    "FakeArtifactView",
-    "FakeBinding",
-    "FakeFrameworkHost",
-    "FakePlacementHost",
-    "FakeRestoredRetainedBinding",
-    "FakeRuntimeModel",
-    "FakeServingArtifactResolver",
-    "FakeSourceCatalogProvider",
-    "FakeSourceHost",
-    "FakeTensorSurface",
-    "SchemaMismatchTensorSurface",
-    "assert_framework_isolation",
-    "assert_level1_runtime_conformance",
-    "assert_level2_local_bootstrap_conformance",
-    "assert_level3_retained_binding_conformance",
-    "assert_public_runtime_boundary",
-    "build_fake_runtime_host",
-]
diff --git a/tensorcast/types.py b/tensorcast/types.py
index 8cef482d..4d22d17d 100644
--- a/tensorcast/types.py
+++ b/tensorcast/types.py
@@ -667,6 +667,9 @@ class SealAssemblyResult(BaseModel):
 _ASSEMBLY_CANONICAL_COVERAGE_CONTRACT = "canonical_full"
 SERVING_MANIFEST_TENSOR_NAME = "__tensorcast_meta__.manifest_json"
 SERVING_BUILD_DIGEST_VERSION = "tensorcast.serving_build_digest.v1"
+# Serving-manifest names below are persisted publication/wire ABI, not a
+# separate runtime source authority. Runtime-facing DTOs use artifact/runtime
+# names and map to these fields only at serialization boundaries.
 
 
 def _canonical_json_bytes(payload: object) -> bytes:
@@ -718,7 +721,7 @@ class FinalizeClass(str, Enum):
     UNKNOWN_BLOCKED = "unknown_blocked"
 
 
-class ServingSupportLevel(str, Enum):
+class RuntimeSupportLevel(str, Enum):
     BLOCKED = "blocked"
     SOURCE_BIND_BOOTSTRAP_ONLY = "source_bind_bootstrap_only"
     BUILDER_PUBLICATION_READY = "builder_publication_ready"
@@ -749,30 +752,30 @@ class ServingSupportLevel(str, Enum):
         FinalizeClass.UNKNOWN_BLOCKED
     ),
 }
-_PUBLICATION_SERVING_SUPPORT_LEVEL_TO_PROTO: dict[
-    ServingSupportLevel, publication_pb2.ServingSupportLevel
+_PUBLICATION_RUNTIME_SUPPORT_LEVEL_TO_PROTO: dict[
+    RuntimeSupportLevel, publication_pb2.ServingSupportLevel
 ] = {
-    ServingSupportLevel.BLOCKED: publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED,
-    ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: (
+    RuntimeSupportLevel.BLOCKED: publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED,
+    RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: (
         publication_pb2.SERVING_SUPPORT_LEVEL_SOURCE_BIND_BOOTSTRAP_ONLY
     ),
-    ServingSupportLevel.BUILDER_PUBLICATION_READY: (
+    RuntimeSupportLevel.BUILDER_PUBLICATION_READY: (
         publication_pb2.SERVING_SUPPORT_LEVEL_BUILDER_PUBLICATION_READY
     ),
-    ServingSupportLevel.RUNTIME_BIND_SWAP_READY: (
+    RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY: (
         publication_pb2.SERVING_SUPPORT_LEVEL_RUNTIME_BIND_SWAP_READY
     ),
 }
-_PUBLICATION_SERVING_SUPPORT_LEVEL_FROM_PROTO: dict[int, ServingSupportLevel] = {
-    int(publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED): ServingSupportLevel.BLOCKED,
+_PUBLICATION_RUNTIME_SUPPORT_LEVEL_FROM_PROTO: dict[int, RuntimeSupportLevel] = {
+    int(publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED): RuntimeSupportLevel.BLOCKED,
     int(publication_pb2.SERVING_SUPPORT_LEVEL_SOURCE_BIND_BOOTSTRAP_ONLY): (
-        ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY
+        RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY
     ),
     int(publication_pb2.SERVING_SUPPORT_LEVEL_BUILDER_PUBLICATION_READY): (
-        ServingSupportLevel.BUILDER_PUBLICATION_READY
+        RuntimeSupportLevel.BUILDER_PUBLICATION_READY
     ),
     int(publication_pb2.SERVING_SUPPORT_LEVEL_RUNTIME_BIND_SWAP_READY): (
-        ServingSupportLevel.RUNTIME_BIND_SWAP_READY
+        RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY
     ),
 }
 _PUBLICATION_ASSEMBLY_TARGET_KIND_TO_PROTO: dict[
@@ -1122,7 +1125,7 @@ def from_publication_proto(
         )
 
 
-class ServingBuildIntent(BaseModel):
+class RuntimeArtifactBuildIntent(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     representation_contract_hash: str | None = None
@@ -1134,7 +1137,7 @@ class ServingBuildIntent(BaseModel):
     source_artifact_ref: str | None = None
 
     @model_validator(mode="after")
-    def _validate_fields(self) -> "ServingBuildIntent":
+    def _validate_fields(self) -> "RuntimeArtifactBuildIntent":
         if (
             self.representation_contract_hash is not None
             and not self.representation_contract_hash
@@ -1182,7 +1185,7 @@ def to_publication_proto(self) -> publication_pb2.ServingBuildIntent:
     def from_publication_proto(
         cls,
         proto: publication_pb2.ServingBuildIntent,
-    ) -> "ServingBuildIntent":
+    ) -> "RuntimeArtifactBuildIntent":
         builder_mode = BuilderMode.PURE_TRANSFORM
         if int(proto.builder_mode) != int(publication_pb2.BUILDER_MODE_UNSPECIFIED):
             builder_mode = _PUBLICATION_BUILDER_MODE_FROM_PROTO[int(proto.builder_mode)]
@@ -1202,7 +1205,7 @@ def from_publication_proto(
 class PureTransformPublicationSpec(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    build_intent: ServingBuildIntent
+    build_intent: RuntimeArtifactBuildIntent
     contract_family: AssemblyContractFamily | None = None
     source_version_key: str | None = None
     serving_version_key: str | None = None
@@ -1212,7 +1215,7 @@ class PureTransformPublicationSpec(BaseModel):
     requirements: AssemblyRequirementSetRef | None = None
     readiness_policy: AssemblyReadinessPolicy | None = None
     structural_view_ids: tuple[str, ...] = ()
-    admission_facts: ServingAdmissionFacts | None = None
+    admission_facts: RuntimeAdmissionFacts | None = None
 
     @model_validator(mode="after")
     def _validate_publication_spec(self) -> "PureTransformPublicationSpec":
@@ -1262,7 +1265,9 @@ def from_proto(
         proto: publication_pb2.PureTransformPublicationSpec,
     ) -> "PureTransformPublicationSpec":
         return cls(
-            build_intent=ServingBuildIntent.from_publication_proto(proto.build_intent),
+            build_intent=RuntimeArtifactBuildIntent.from_publication_proto(
+                proto.build_intent
+            ),
             contract_family=cast(
                 AssemblyContractFamily | None,
                 str(proto.contract_family or "") or None,
@@ -1284,18 +1289,18 @@ def from_proto(
             ),
             structural_view_ids=tuple(str(item) for item in proto.structural_view_ids),
             admission_facts=(
-                ServingAdmissionFacts.from_publication_proto(proto.admission_facts)
+                RuntimeAdmissionFacts.from_publication_proto(proto.admission_facts)
                 if proto.HasField("admission_facts")
                 else None
             ),
         )
 
 
-class ServingAdmissionFacts(BaseModel):
+class RuntimeAdmissionFacts(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     finalize_class: FinalizeClass
-    support_level: ServingSupportLevel
+    support_level: RuntimeSupportLevel
     topology_admission_digest: str | None = None
     same_binding_fast_path_validated: bool = False
 
@@ -1307,7 +1312,7 @@ def _empty_digest_is_none(cls, value: object) -> object:
         return value
 
     @model_validator(mode="after")
-    def _validate_admission_facts(self) -> "ServingAdmissionFacts":
+    def _validate_admission_facts(self) -> "RuntimeAdmissionFacts":
         if (
             self.finalize_class == FinalizeClass.REPRESENTATION_CHANGING
             and not self.same_binding_fast_path_validated
@@ -1324,8 +1329,8 @@ def validate_for_representation_publish(self, *, builder_mode: BuilderMode) -> N
                 "representation publish requires a non-blocked finalize_class"
             )
         if self.support_level not in {
-            ServingSupportLevel.BUILDER_PUBLICATION_READY,
-            ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+            RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
+            RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         }:
             raise ValueError(
                 "representation publish requires support_level to admit builder publication"
@@ -1354,12 +1359,12 @@ def validate_for_representation_publish(self, *, builder_mode: BuilderMode) -> N
 
     def admits_builder_publication(self) -> bool:
         return self.support_level in {
-            ServingSupportLevel.BUILDER_PUBLICATION_READY,
-            ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+            RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
+            RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         }
 
     def admits_runtime_bind_swap(self) -> bool:
-        return self.support_level == ServingSupportLevel.RUNTIME_BIND_SWAP_READY
+        return self.support_level == RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY
 
     def require_runtime_bind_swap_ready(self) -> None:
         if not self.admits_runtime_bind_swap():
@@ -1376,7 +1381,7 @@ def require_serving_key_activation_ready(self) -> None:
     def to_publication_proto(self) -> publication_pb2.ServingAdmissionFacts:
         proto = publication_pb2.ServingAdmissionFacts(
             finalize_class=_PUBLICATION_FINALIZE_CLASS_TO_PROTO[self.finalize_class],
-            support_level=_PUBLICATION_SERVING_SUPPORT_LEVEL_TO_PROTO[
+            support_level=_PUBLICATION_RUNTIME_SUPPORT_LEVEL_TO_PROTO[
                 self.support_level
             ],
             same_binding_fast_path_validated=bool(
@@ -1391,18 +1396,18 @@ def to_publication_proto(self) -> publication_pb2.ServingAdmissionFacts:
     def from_publication_proto(
         cls,
         proto: publication_pb2.ServingAdmissionFacts,
-    ) -> "ServingAdmissionFacts":
+    ) -> "RuntimeAdmissionFacts":
         if int(proto.finalize_class) == int(publication_pb2.FINALIZE_CLASS_UNSPECIFIED):
-            raise ValueError("ServingAdmissionFacts.finalize_class must be specified")
+            raise ValueError("RuntimeAdmissionFacts.finalize_class must be specified")
         if int(proto.support_level) == int(
             publication_pb2.SERVING_SUPPORT_LEVEL_UNSPECIFIED
         ):
-            raise ValueError("ServingAdmissionFacts.support_level must be specified")
+            raise ValueError("RuntimeAdmissionFacts.support_level must be specified")
         return cls(
             finalize_class=_PUBLICATION_FINALIZE_CLASS_FROM_PROTO[
                 int(proto.finalize_class)
             ],
-            support_level=_PUBLICATION_SERVING_SUPPORT_LEVEL_FROM_PROTO[
+            support_level=_PUBLICATION_RUNTIME_SUPPORT_LEVEL_FROM_PROTO[
                 int(proto.support_level)
             ],
             topology_admission_digest=(
@@ -1414,7 +1419,7 @@ def from_publication_proto(
         )
 
 
-class ServingArtifactManifest(BaseModel):
+class RuntimeArtifactManifest(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     schema_version: int = 1
@@ -1435,7 +1440,7 @@ class ServingArtifactManifest(BaseModel):
     topology_admission_digest: str | None = None
 
     @model_validator(mode="after")
-    def _validate_manifest(self) -> "ServingArtifactManifest":
+    def _validate_manifest(self) -> "RuntimeArtifactManifest":
         if self.schema_version <= 0:
             raise ValueError("schema_version must be positive")
         if self.artifact_kind != "serving":
@@ -1465,14 +1470,14 @@ def _validate_manifest(self) -> "ServingArtifactManifest":
     def from_build_intent(
         cls,
         *,
-        intent: ServingBuildIntent,
+        intent: RuntimeArtifactBuildIntent,
         representation_contract_hash: str | None = None,
         tensor_schema_hash: str,
         canonical_tensor_count: int,
         serving_manifest_ref: str | None = None,
         logical_topology_json: str | None = None,
         topology_admission_digest: str | None = None,
-    ) -> "ServingArtifactManifest":
+    ) -> "RuntimeArtifactManifest":
         resolved_representation_contract_hash = (
             representation_contract_hash or intent.representation_contract_hash
         )
@@ -1505,7 +1510,7 @@ def to_bytes(self) -> bytes:
         return _canonical_json_bytes(self.model_dump(mode="json"))
 
     @classmethod
-    def from_bytes(cls, payload: bytes | bytearray | str) -> "ServingArtifactManifest":
+    def from_bytes(cls, payload: bytes | bytearray | str) -> "RuntimeArtifactManifest":
         raw = (
             payload.decode("utf-8")
             if isinstance(payload, (bytes, bytearray))
@@ -1517,8 +1522,8 @@ def to_runtime_policy(
         self,
         *,
         require_manifest: bool = True,
-    ) -> "ServingRuntimePolicy":
-        return ServingRuntimePolicy(
+    ) -> "RuntimeArtifactPolicy":
+        return RuntimeArtifactPolicy(
             require_manifest=bool(require_manifest),
             serving_manifest_ref=str(self.serving_manifest_ref),
             expected_representation_contract_hash=str(
@@ -1533,7 +1538,7 @@ def to_runtime_policy(
         )
 
 
-class ServingRuntimePolicy(BaseModel):
+class RuntimeArtifactPolicy(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     require_manifest: bool = True
@@ -1543,7 +1548,7 @@ class ServingRuntimePolicy(BaseModel):
     expected_topology_admission_digest: str | None = None
 
     @model_validator(mode="after")
-    def _validate_policy(self) -> "ServingRuntimePolicy":
+    def _validate_policy(self) -> "RuntimeArtifactPolicy":
         if self.serving_manifest_ref is not None:
             parse_serving_manifest_ref(self.serving_manifest_ref)
         return self
@@ -1578,7 +1583,7 @@ def to_proto(self) -> store_daemon_pb2.ServingArtifactRuntimePolicy:
     def from_proto(
         cls,
         proto: store_daemon_pb2.ServingArtifactRuntimePolicy,
-    ) -> "ServingRuntimePolicy":
+    ) -> "RuntimeArtifactPolicy":
         return cls(
             require_manifest=bool(proto.require_manifest),
             serving_manifest_ref=str(proto.serving_manifest_ref or "") or None,
@@ -1643,25 +1648,25 @@ def from_proto(
         )
 
 
-ServingBindingReadiness = Literal[
-    "serving_reserved",
-    "serving_local_ready",
-    "serving_published_ready",
+RuntimeBindingReadiness = Literal[
+    "runtime_reserved",
+    "runtime_local_ready",
+    "runtime_published_ready",
 ]
 
 _SERVING_READINESS_TO_PROTO: dict[
-    ServingBindingReadiness, operation_pb2.ServingBindingReadiness
+    RuntimeBindingReadiness, operation_pb2.ServingBindingReadiness
 ] = {
-    "serving_reserved": operation_pb2.SERVING_BINDING_READINESS_RESERVED,
-    "serving_local_ready": operation_pb2.SERVING_BINDING_READINESS_LOCAL_READY,
-    "serving_published_ready": operation_pb2.SERVING_BINDING_READINESS_PUBLISHED_READY,
+    "runtime_reserved": operation_pb2.SERVING_BINDING_READINESS_RESERVED,
+    "runtime_local_ready": operation_pb2.SERVING_BINDING_READINESS_LOCAL_READY,
+    "runtime_published_ready": operation_pb2.SERVING_BINDING_READINESS_PUBLISHED_READY,
 }
-_SERVING_READINESS_FROM_PROTO: dict[int, ServingBindingReadiness] = {
+_SERVING_READINESS_FROM_PROTO: dict[int, RuntimeBindingReadiness] = {
     int(value): key for key, value in _SERVING_READINESS_TO_PROTO.items()
 }
 
 
-class ServingTopologyRef(BaseModel):
+class RuntimeTopologyRef(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     schema_version: int = 1
@@ -1671,7 +1676,7 @@ class ServingTopologyRef(BaseModel):
     runtime_topology_diagnostics_ref: str | None = None
 
     @model_validator(mode="after")
-    def _validate_topology(self) -> "ServingTopologyRef":
+    def _validate_topology(self) -> "RuntimeTopologyRef":
         if int(self.schema_version) <= 0:
             raise ValueError("schema_version must be positive")
         if not self.schema_topology_digest:
@@ -1696,7 +1701,7 @@ def to_proto(self) -> operation_pb2.ServingTopologyRef:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingTopologyRef
-    ) -> "ServingTopologyRef":
+    ) -> "RuntimeTopologyRef":
         return cls(
             schema_version=int(proto.schema_version),
             schema_topology_digest=str(proto.schema_topology_digest),
@@ -1718,7 +1723,7 @@ def from_proto(
         )
 
 
-class ServingBindingMemberRef(BaseModel):
+class RuntimeBindingMemberRef(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     member_id: str
@@ -1727,7 +1732,7 @@ class ServingBindingMemberRef(BaseModel):
     group_id: str | None = None
 
     @model_validator(mode="after")
-    def _validate_member(self) -> "ServingBindingMemberRef":
+    def _validate_member(self) -> "RuntimeBindingMemberRef":
         if not self.member_id:
             raise ValueError("member_id must not be empty")
         if int(self.member_index) < 0:
@@ -1753,7 +1758,7 @@ def to_proto(self) -> operation_pb2.ServingBindingMemberRef:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingMemberRef
-    ) -> "ServingBindingMemberRef":
+    ) -> "RuntimeBindingMemberRef":
         return cls(
             member_id=str(proto.member_id),
             member_index=int(proto.member_index),
@@ -1795,52 +1800,52 @@ def from_proto(cls, proto: operation_pb2.BlobRef) -> "BlobRef":
         )
 
 
-ServingBindingSourceKind = Literal[
+RuntimeBindingSourceKind = Literal[
     "checkpoint_artifact",
-    "serving_artifact",
-    "serving_artifact_set",
+    "runtime_artifact",
+    "runtime_artifact_set",
 ]
-ServingBindingSourceReuseMode = Literal[
-    "checkpoint_to_serving",
-    "serving_direct_member_copy",
-    "serving_transform_required",
+RuntimeBindingSourceReuseMode = Literal[
+    "checkpoint_to_runtime",
+    "runtime_direct_member_copy",
+    "runtime_transform_required",
     "unsupported",
 ]
 
 _SOURCE_KIND_TO_PROTO: dict[
-    ServingBindingSourceKind, operation_pb2.ServingBindingSourceKind
+    RuntimeBindingSourceKind, operation_pb2.ServingBindingSourceKind
 ] = {
     "checkpoint_artifact": operation_pb2.SERVING_BINDING_SOURCE_KIND_CHECKPOINT_ARTIFACT,
-    "serving_artifact": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT,
-    "serving_artifact_set": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT_SET,
+    "runtime_artifact": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT,
+    "runtime_artifact_set": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT_SET,
 }
-_SOURCE_KIND_FROM_PROTO: dict[int, ServingBindingSourceKind] = {
+_SOURCE_KIND_FROM_PROTO: dict[int, RuntimeBindingSourceKind] = {
     int(value): key for key, value in _SOURCE_KIND_TO_PROTO.items()
 }
 _SOURCE_REUSE_TO_PROTO: dict[
-    ServingBindingSourceReuseMode, operation_pb2.ServingBindingSourceReuseMode
+    RuntimeBindingSourceReuseMode, operation_pb2.ServingBindingSourceReuseMode
 ] = {
-    "checkpoint_to_serving": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_CHECKPOINT_TO_SERVING,
-    "serving_direct_member_copy": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_DIRECT_MEMBER_COPY,
-    "serving_transform_required": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_TRANSFORM_REQUIRED,
+    "checkpoint_to_runtime": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_CHECKPOINT_TO_SERVING,
+    "runtime_direct_member_copy": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_DIRECT_MEMBER_COPY,
+    "runtime_transform_required": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_TRANSFORM_REQUIRED,
     "unsupported": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_UNSUPPORTED,
 }
-_SOURCE_REUSE_FROM_PROTO: dict[int, ServingBindingSourceReuseMode] = {
+_SOURCE_REUSE_FROM_PROTO: dict[int, RuntimeBindingSourceReuseMode] = {
     int(value): key for key, value in _SOURCE_REUSE_TO_PROTO.items()
 }
 
 
-class ServingBindingSourceMemberRef(BaseModel):
+class RuntimeBindingSourceMemberRef(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    member: ServingBindingMemberRef
+    member: RuntimeBindingMemberRef
     artifact_ref: str
     serving_manifest_ref: str | None = None
     tensor_schema_hash: str | None = None
     target_layout_hash: str | None = None
 
     @model_validator(mode="after")
-    def _validate_source_member(self) -> "ServingBindingSourceMemberRef":
+    def _validate_source_member(self) -> "RuntimeBindingSourceMemberRef":
         if not self.artifact_ref:
             raise ValueError("artifact_ref must not be empty")
         for field_name in (
@@ -1869,9 +1874,9 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceMemberRef:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingSourceMemberRef
-    ) -> "ServingBindingSourceMemberRef":
+    ) -> "RuntimeBindingSourceMemberRef":
         return cls(
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             artifact_ref=str(proto.artifact_ref),
             serving_manifest_ref=(
                 str(proto.serving_manifest_ref)
@@ -1891,21 +1896,21 @@ def from_proto(
         )
 
 
-class ServingBindingSourceRef(BaseModel):
+class RuntimeBindingSourceRef(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    source_kind: ServingBindingSourceKind
+    source_kind: RuntimeBindingSourceKind
     artifact_selection_digest: str
     source_artifact_ref: str | None = None
     source_schema_hash: str
     representation_contract_hash: str | None = None
-    serving_build_digest: str | None = None
+    runtime_build_digest: str | None = None
     tensor_schema_hash: str | None = None
-    topology: ServingTopologyRef | None = None
-    members: tuple[ServingBindingSourceMemberRef, ...] = ()
+    topology: RuntimeTopologyRef | None = None
+    members: tuple[RuntimeBindingSourceMemberRef, ...] = ()
 
     @model_validator(mode="after")
-    def _validate_source(self) -> "ServingBindingSourceRef":
+    def _validate_source(self) -> "RuntimeBindingSourceRef":
         if not self.artifact_selection_digest:
             raise ValueError("artifact_selection_digest must not be empty")
         if not self.source_schema_hash:
@@ -1917,7 +1922,7 @@ def _validate_source(self) -> "ServingBindingSourceRef":
                 )
             if self.members:
                 raise ValueError("checkpoint_artifact sources must not carry members")
-        if self.source_kind == "serving_artifact_set":
+        if self.source_kind == "runtime_artifact_set":
             if self.topology is None:
                 raise ValueError(
                     "topology is required for serving_artifact_set sources"
@@ -1938,8 +1943,8 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceRef:
             proto.source_artifact_ref = str(self.source_artifact_ref)
         if self.representation_contract_hash is not None:
             proto.representation_contract_hash = str(self.representation_contract_hash)
-        if self.serving_build_digest is not None:
-            proto.serving_build_digest = str(self.serving_build_digest)
+        if self.runtime_build_digest is not None:
+            proto.serving_build_digest = str(self.runtime_build_digest)
         if self.tensor_schema_hash is not None:
             proto.tensor_schema_hash = str(self.tensor_schema_hash)
         if self.topology is not None:
@@ -1950,10 +1955,10 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceRef:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingSourceRef
-    ) -> "ServingBindingSourceRef":
+    ) -> "RuntimeBindingSourceRef":
         source_kind = _SOURCE_KIND_FROM_PROTO.get(int(proto.source_kind))
         if source_kind is None:
-            raise ValueError("ServingBindingSourceRef source_kind is required")
+            raise ValueError("RuntimeBindingSourceRef source_kind is required")
         return cls(
             source_kind=source_kind,
             artifact_selection_digest=str(proto.artifact_selection_digest),
@@ -1968,7 +1973,7 @@ def from_proto(
                 if proto.HasField("representation_contract_hash")
                 else None
             ),
-            serving_build_digest=(
+            runtime_build_digest=(
                 str(proto.serving_build_digest)
                 if proto.HasField("serving_build_digest")
                 else None
@@ -1979,36 +1984,36 @@ def from_proto(
                 else None
             ),
             topology=(
-                ServingTopologyRef.from_proto(proto.topology)
+                RuntimeTopologyRef.from_proto(proto.topology)
                 if proto.HasField("topology")
                 else None
             ),
             members=tuple(
-                ServingBindingSourceMemberRef.from_proto(member)
+                RuntimeBindingSourceMemberRef.from_proto(member)
                 for member in proto.members
             ),
         )
 
 
-class ServingBindingSourceReuseDecision(BaseModel):
+class RuntimeBindingSourceReuseDecision(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    mode: ServingBindingSourceReuseMode
+    mode: RuntimeBindingSourceReuseMode
     representation_contract_hash: str | None = None
     work_plan_hash: str | None = None
     reason: str | None = None
 
     @model_validator(mode="after")
-    def _validate_reuse(self) -> "ServingBindingSourceReuseDecision":
+    def _validate_reuse(self) -> "RuntimeBindingSourceReuseDecision":
         for field_name in ("representation_contract_hash", "work_plan_hash", "reason"):
             value = getattr(self, field_name)
             if value is not None and not value:
                 raise ValueError(f"{field_name} must not be empty when provided")
-        if self.mode == "serving_transform_required" and not (
+        if self.mode == "runtime_transform_required" and not (
             self.work_plan_hash or self.reason
         ):
             raise ValueError(
-                "serving_transform_required requires work_plan_hash or reason"
+                "runtime_transform_required requires work_plan_hash or reason"
             )
         if self.mode == "unsupported" and not self.reason:
             raise ValueError("unsupported source reuse requires reason")
@@ -2029,10 +2034,10 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceReuseDecision:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingSourceReuseDecision
-    ) -> "ServingBindingSourceReuseDecision":
+    ) -> "RuntimeBindingSourceReuseDecision":
         mode = _SOURCE_REUSE_FROM_PROTO.get(int(proto.mode))
         if mode is None:
-            raise ValueError("ServingBindingSourceReuseDecision mode is required")
+            raise ValueError("RuntimeBindingSourceReuseDecision mode is required")
         return cls(
             mode=mode,
             representation_contract_hash=(
@@ -2047,45 +2052,45 @@ def from_proto(
         )
 
 
-def plan_serving_binding_source_reuse(
+def plan_runtime_binding_source_reuse(
     *,
-    source: ServingBindingSourceRef,
-    topology: ServingTopologyRef,
-    member: ServingBindingMemberRef,
+    source: RuntimeBindingSourceRef,
+    topology: RuntimeTopologyRef,
+    member: RuntimeBindingMemberRef,
     tensor_schema_hash: str,
     target_layout_hash: str,
     representation_contract_hash: str | None = None,
-) -> ServingBindingSourceReuseDecision:
+) -> RuntimeBindingSourceReuseDecision:
     if source.source_kind == "checkpoint_artifact":
-        return ServingBindingSourceReuseDecision(
-            mode="checkpoint_to_serving",
+        return RuntimeBindingSourceReuseDecision(
+            mode="checkpoint_to_runtime",
             representation_contract_hash=representation_contract_hash,
         )
-    if source.source_kind not in {"serving_artifact", "serving_artifact_set"}:
-        return ServingBindingSourceReuseDecision(
+    if source.source_kind not in {"runtime_artifact", "runtime_artifact_set"}:
+        return RuntimeBindingSourceReuseDecision(
             mode="unsupported",
-            reason=f"unsupported serving binding source kind: {source.source_kind}",
+            reason=f"unsupported runtime binding source kind: {source.source_kind}",
         )
     if (
         representation_contract_hash is not None
         and source.representation_contract_hash is not None
         and representation_contract_hash != source.representation_contract_hash
     ):
-        return ServingBindingSourceReuseDecision(
-            mode="serving_transform_required",
+        return RuntimeBindingSourceReuseDecision(
+            mode="runtime_transform_required",
             reason="source representation contract does not match target",
         )
     if source.topology is not None and source.topology != topology:
-        return ServingBindingSourceReuseDecision(
-            mode="serving_transform_required",
+        return RuntimeBindingSourceReuseDecision(
+            mode="runtime_transform_required",
             reason="source topology does not match target topology",
         )
     if (
         source.tensor_schema_hash is not None
         and source.tensor_schema_hash != tensor_schema_hash
     ):
-        return ServingBindingSourceReuseDecision(
-            mode="serving_transform_required",
+        return RuntimeBindingSourceReuseDecision(
+            mode="runtime_transform_required",
             reason="source tensor schema does not match target tensor schema",
         )
     matching_members = [
@@ -2093,43 +2098,43 @@ def plan_serving_binding_source_reuse(
         for source_member in source.members
         if source_member.member == member
     ]
-    if source.source_kind == "serving_artifact_set" and not matching_members:
-        return ServingBindingSourceReuseDecision(
-            mode="serving_transform_required",
-            reason="source serving set does not contain target member",
+    if source.source_kind == "runtime_artifact_set" and not matching_members:
+        return RuntimeBindingSourceReuseDecision(
+            mode="runtime_transform_required",
+            reason="source runtime set does not contain target member",
         )
     for source_member in matching_members:
         if (
             source_member.tensor_schema_hash is not None
             and source_member.tensor_schema_hash != tensor_schema_hash
         ):
-            return ServingBindingSourceReuseDecision(
-                mode="serving_transform_required",
+            return RuntimeBindingSourceReuseDecision(
+                mode="runtime_transform_required",
                 reason="source member tensor schema does not match target",
             )
         if (
             source_member.target_layout_hash is not None
             and source_member.target_layout_hash != target_layout_hash
         ):
-            return ServingBindingSourceReuseDecision(
-                mode="serving_transform_required",
+            return RuntimeBindingSourceReuseDecision(
+                mode="runtime_transform_required",
                 reason="source member layout does not match target layout",
             )
-    return ServingBindingSourceReuseDecision(
-        mode="serving_direct_member_copy",
+    return RuntimeBindingSourceReuseDecision(
+        mode="runtime_direct_member_copy",
         representation_contract_hash=representation_contract_hash
         or source.representation_contract_hash,
     )
 
 
-class ServingBindingResolvedLayout(BaseModel):
+class RuntimeBindingResolvedLayout(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     binding_layout_id: str
-    source: ServingBindingSourceRef
-    source_reuse: ServingBindingSourceReuseDecision
-    topology: ServingTopologyRef
-    member: ServingBindingMemberRef
+    source: RuntimeBindingSourceRef
+    source_reuse: RuntimeBindingSourceReuseDecision
+    topology: RuntimeTopologyRef
+    member: RuntimeBindingMemberRef
     target_layout: bytes
     target_index_bytes: bytes
     target_layout_hash: str
@@ -2140,7 +2145,7 @@ class ServingBindingResolvedLayout(BaseModel):
     dst_specs_bytes: bytes | None = None
 
     @model_validator(mode="after")
-    def _validate_layout(self) -> "ServingBindingResolvedLayout":
+    def _validate_layout(self) -> "RuntimeBindingResolvedLayout":
         if not self.binding_layout_id:
             raise ValueError("binding_layout_id must not be empty")
         if not self.target_layout:
@@ -2153,13 +2158,13 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout":
             raise ValueError("tensor_schema_hash must not be empty")
         if not self.spec_digest:
             raise ValueError("spec_digest must not be empty")
-        if self.source_reuse.mode == "serving_direct_member_copy":
+        if self.source_reuse.mode == "runtime_direct_member_copy":
             if self.source.source_kind not in {
-                "serving_artifact",
-                "serving_artifact_set",
+                "runtime_artifact",
+                "runtime_artifact_set",
             }:
                 raise ValueError(
-                    "serving_direct_member_copy requires a serving artifact source"
+                    "runtime_direct_member_copy requires a runtime artifact source"
                 )
             if (
                 self.source.representation_contract_hash is not None
@@ -2174,7 +2179,7 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout":
                 self.source.tensor_schema_hash != self.tensor_schema_hash
             ):
                 raise ValueError(
-                    "serving_direct_member_copy tensor_schema_hash must match target"
+                    "runtime_direct_member_copy tensor_schema_hash must match target"
                 )
             matching_members = [
                 source_member
@@ -2182,11 +2187,11 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout":
                 if source_member.member == self.member
             ]
             if (
-                self.source.source_kind == "serving_artifact_set"
+                self.source.source_kind == "runtime_artifact_set"
                 and not matching_members
             ):
                 raise ValueError(
-                    "serving_direct_member_copy requires a matching source member"
+                    "runtime_direct_member_copy requires a matching source member"
                 )
             for source_member in matching_members:
                 if (
@@ -2194,14 +2199,14 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout":
                     and source_member.target_layout_hash != self.target_layout_hash
                 ):
                     raise ValueError(
-                        "serving_direct_member_copy target_layout_hash must match source member"
+                        "runtime_direct_member_copy target_layout_hash must match source member"
                     )
                 if (
                     source_member.tensor_schema_hash is not None
                     and source_member.tensor_schema_hash != self.tensor_schema_hash
                 ):
                     raise ValueError(
-                        "serving_direct_member_copy tensor_schema_hash must match source member"
+                        "runtime_direct_member_copy tensor_schema_hash must match source member"
                     )
         return self
 
@@ -2229,15 +2234,15 @@ def to_proto(self) -> operation_pb2.ServingBindingResolvedLayout:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingResolvedLayout
-    ) -> "ServingBindingResolvedLayout":
+    ) -> "RuntimeBindingResolvedLayout":
         return cls(
             binding_layout_id=str(proto.binding_layout_id),
-            source=ServingBindingSourceRef.from_proto(proto.source),
-            source_reuse=ServingBindingSourceReuseDecision.from_proto(
+            source=RuntimeBindingSourceRef.from_proto(proto.source),
+            source_reuse=RuntimeBindingSourceReuseDecision.from_proto(
                 proto.source_reuse
             ),
-            topology=ServingTopologyRef.from_proto(proto.topology),
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            topology=RuntimeTopologyRef.from_proto(proto.topology),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             target_layout=bytes(proto.target_layout),
             target_index_bytes=bytes(proto.target_index_bytes),
             target_layout_hash=str(proto.target_layout_hash),
@@ -2261,22 +2266,22 @@ def from_proto(
         )
 
 
-class ServingBindingTarget(BaseModel):
+class RealizationTarget(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     runtime: str
     device: str | int
     device_uuid: str | None = None
-    source: ServingBindingSourceRef
-    topology: ServingTopologyRef
-    member: ServingBindingMemberRef
+    source: RuntimeBindingSourceRef
+    topology: RuntimeTopologyRef
+    member: RuntimeBindingMemberRef
     model_config_digest: str
     load_config_digest: str | None = None
-    serving_build_digest: str
-    resolved_layout: ServingBindingResolvedLayout
+    runtime_build_digest: str
+    resolved_layout: RuntimeBindingResolvedLayout
 
     @model_validator(mode="after")
-    def _validate_target(self) -> "ServingBindingTarget":
+    def _validate_target(self) -> "RealizationTarget":
         if not self.runtime:
             raise ValueError("runtime must not be empty")
         if str(self.device) == "":
@@ -2287,19 +2292,19 @@ def _validate_target(self) -> "ServingBindingTarget":
             raise ValueError("model_config_digest must not be empty")
         if self.load_config_digest is not None and not self.load_config_digest:
             raise ValueError("load_config_digest must not be empty when provided")
-        if not self.serving_build_digest:
-            raise ValueError("serving_build_digest must not be empty")
+        if not self.runtime_build_digest:
+            raise ValueError("runtime_build_digest must not be empty")
         if self.source != self.resolved_layout.source:
             raise ValueError("resolved_layout.source must match target source")
         if self.source.topology is not None and self.source.topology != self.topology:
             raise ValueError("source topology must match target topology when provided")
         if (
-            self.resolved_layout.source_reuse.mode == "serving_direct_member_copy"
-            and self.source.serving_build_digest is not None
-            and self.source.serving_build_digest != self.serving_build_digest
+            self.resolved_layout.source_reuse.mode == "runtime_direct_member_copy"
+            and self.source.runtime_build_digest is not None
+            and self.source.runtime_build_digest != self.runtime_build_digest
         ):
             raise ValueError(
-                "serving_direct_member_copy serving_build_digest must match source"
+                "runtime_direct_member_copy runtime_build_digest must match source"
             )
         if self.topology != self.resolved_layout.topology:
             raise ValueError("resolved_layout.topology must match target topology")
@@ -2312,7 +2317,7 @@ def to_proto(self) -> operation_pb2.ServingBindingTarget:
             runtime=str(self.runtime),
             device=str(self.device),
             model_config_digest=str(self.model_config_digest),
-            serving_build_digest=str(self.serving_build_digest),
+            serving_build_digest=str(self.runtime_build_digest),
         )
         if self.device_uuid is not None:
             proto.device_uuid = str(self.device_uuid)
@@ -2327,40 +2332,40 @@ def to_proto(self) -> operation_pb2.ServingBindingTarget:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingTarget
-    ) -> "ServingBindingTarget":
+    ) -> "RealizationTarget":
         return cls(
             runtime=str(proto.runtime),
             device=str(proto.device),
             device_uuid=str(proto.device_uuid)
             if proto.HasField("device_uuid")
             else None,
-            source=ServingBindingSourceRef.from_proto(proto.source),
-            topology=ServingTopologyRef.from_proto(proto.topology),
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            source=RuntimeBindingSourceRef.from_proto(proto.source),
+            topology=RuntimeTopologyRef.from_proto(proto.topology),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             model_config_digest=str(proto.model_config_digest),
             load_config_digest=(
                 str(proto.load_config_digest)
                 if proto.HasField("load_config_digest")
                 else None
             ),
-            serving_build_digest=str(proto.serving_build_digest),
-            resolved_layout=ServingBindingResolvedLayout.from_proto(
+            runtime_build_digest=str(proto.serving_build_digest),
+            resolved_layout=RuntimeBindingResolvedLayout.from_proto(
                 proto.resolved_layout
             ),
         )
 
 
-class ServingBindingSetTarget(BaseModel):
+class RealizationTargetSet(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     runtime: str
-    source: ServingBindingSourceRef
-    topology: ServingTopologyRef
+    source: RuntimeBindingSourceRef
+    topology: RuntimeTopologyRef
     group_id: str
-    members: tuple[ServingBindingTarget, ...]
+    members: tuple[RealizationTarget, ...]
 
     @model_validator(mode="after")
-    def _validate_set_target(self) -> "ServingBindingSetTarget":
+    def _validate_set_target(self) -> "RealizationTargetSet":
         if not self.runtime:
             raise ValueError("runtime must not be empty")
         if not self.group_id:
@@ -2391,40 +2396,40 @@ def to_proto(self) -> operation_pb2.ServingBindingSetTarget:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingSetTarget
-    ) -> "ServingBindingSetTarget":
+    ) -> "RealizationTargetSet":
         return cls(
             runtime=str(proto.runtime),
-            source=ServingBindingSourceRef.from_proto(proto.source),
-            topology=ServingTopologyRef.from_proto(proto.topology),
+            source=RuntimeBindingSourceRef.from_proto(proto.source),
+            topology=RuntimeTopologyRef.from_proto(proto.topology),
             group_id=str(proto.group_id),
             members=tuple(
-                ServingBindingTarget.from_proto(member) for member in proto.members
+                RealizationTarget.from_proto(member) for member in proto.members
             ),
         )
 
 
-class ServingBindingResolvedSpecCacheEntry(BaseModel):
+class RuntimeRealizationSpecCacheEntry(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     schema_version: int
     cache_key_digest: str
     spec_digest: str
     runtime: str
-    source: ServingBindingSourceRef
-    source_reuse: ServingBindingSourceReuseDecision
-    topology: ServingTopologyRef
-    member: ServingBindingMemberRef
+    source: RuntimeBindingSourceRef
+    source_reuse: RuntimeBindingSourceReuseDecision
+    topology: RuntimeTopologyRef
+    member: RuntimeBindingMemberRef
     source_schema_hash: str
     model_config_digest: str
     load_config_digest: str | None = None
-    serving_build_digest: str
+    runtime_build_digest: str
     binding_layout_id: str
     target_layout_hash: str
     tensor_schema_hash: str
     blob_refs: Mapping[str, BlobRef]
 
     @model_validator(mode="after")
-    def _validate_cache_entry(self) -> "ServingBindingResolvedSpecCacheEntry":
+    def _validate_cache_entry(self) -> "RuntimeRealizationSpecCacheEntry":
         if int(self.schema_version) <= 0:
             raise ValueError("schema_version must be positive")
         for field_name in (
@@ -2433,7 +2438,7 @@ def _validate_cache_entry(self) -> "ServingBindingResolvedSpecCacheEntry":
             "runtime",
             "source_schema_hash",
             "model_config_digest",
-            "serving_build_digest",
+            "runtime_build_digest",
             "binding_layout_id",
             "target_layout_hash",
             "tensor_schema_hash",
@@ -2453,7 +2458,7 @@ def canonical_key_json(self) -> str:
             "load_config_digest": self.load_config_digest,
             "topology": self.topology.model_dump(mode="json", exclude_none=True),
             "member": self.member.model_dump(mode="json", exclude_none=True),
-            "serving_build_digest": self.serving_build_digest,
+            "runtime_build_digest": self.runtime_build_digest,
             "source": self.source.model_dump(mode="json", exclude_none=True),
             "source_reuse": self.source_reuse.model_dump(
                 mode="json", exclude_none=True
@@ -2472,7 +2477,7 @@ def canonical_spec_core_json(self) -> str:
             "target_layout_hash": self.target_layout_hash,
             "tensor_schema_hash": self.tensor_schema_hash,
             "source_schema_hash": self.source_schema_hash,
-            "serving_build_digest": self.serving_build_digest,
+            "runtime_build_digest": self.runtime_build_digest,
             "source_reuse": self.source_reuse.model_dump(
                 mode="json", exclude_none=True
             ),
@@ -2496,7 +2501,7 @@ def to_proto(self) -> operation_pb2.ServingBindingResolvedSpecCacheEntry:
             runtime=str(self.runtime),
             source_schema_hash=str(self.source_schema_hash),
             model_config_digest=str(self.model_config_digest),
-            serving_build_digest=str(self.serving_build_digest),
+            serving_build_digest=str(self.runtime_build_digest),
             binding_layout_id=str(self.binding_layout_id),
             target_layout_hash=str(self.target_layout_hash),
             tensor_schema_hash=str(self.tensor_schema_hash),
@@ -2514,18 +2519,18 @@ def to_proto(self) -> operation_pb2.ServingBindingResolvedSpecCacheEntry:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.ServingBindingResolvedSpecCacheEntry
-    ) -> "ServingBindingResolvedSpecCacheEntry":
+    ) -> "RuntimeRealizationSpecCacheEntry":
         return cls(
             schema_version=int(proto.schema_version),
             cache_key_digest=str(proto.cache_key_digest),
             spec_digest=str(proto.spec_digest),
             runtime=str(proto.runtime),
-            source=ServingBindingSourceRef.from_proto(proto.source),
-            source_reuse=ServingBindingSourceReuseDecision.from_proto(
+            source=RuntimeBindingSourceRef.from_proto(proto.source),
+            source_reuse=RuntimeBindingSourceReuseDecision.from_proto(
                 proto.source_reuse
             ),
-            topology=ServingTopologyRef.from_proto(proto.topology),
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            topology=RuntimeTopologyRef.from_proto(proto.topology),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             source_schema_hash=str(proto.source_schema_hash),
             model_config_digest=str(proto.model_config_digest),
             load_config_digest=(
@@ -2533,7 +2538,7 @@ def from_proto(
                 if proto.HasField("load_config_digest")
                 else None
             ),
-            serving_build_digest=str(proto.serving_build_digest),
+            runtime_build_digest=str(proto.serving_build_digest),
             binding_layout_id=str(proto.binding_layout_id),
             target_layout_hash=str(proto.target_layout_hash),
             tensor_schema_hash=str(proto.tensor_schema_hash),
@@ -2614,7 +2619,7 @@ class BindingReservationCapability(BaseModel):
     daemon_id: str
     daemon_session_id: str
     device_uuid: str
-    member: ServingBindingMemberRef
+    member: RuntimeBindingMemberRef
     reservation_bytes: int
     scope_digest: str
     expires_at_ms: int | None = None
@@ -2661,7 +2666,7 @@ def from_proto(
             daemon_id=str(proto.daemon_id),
             daemon_session_id=str(proto.daemon_session_id),
             device_uuid=str(proto.device_uuid),
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             reservation_bytes=int(proto.reservation_bytes),
             scope_digest=str(proto.scope_digest),
             expires_at_ms=(
@@ -2718,7 +2723,7 @@ def from_proto(
         )
 
 
-class PrefetchedServingBinding(BaseModel):
+class PrefetchHandoff(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     local_serving_ref: str | None = None
@@ -2726,10 +2731,10 @@ class PrefetchedServingBinding(BaseModel):
     daemon_id: str
     daemon_session_id: str
     device_uuid: str
-    member: ServingBindingMemberRef
+    member: RuntimeBindingMemberRef
     reservation_bytes: int
     reservation_capability: BindingReservationCapability
-    readiness: ServingBindingReadiness
+    readiness: RuntimeBindingReadiness
     verification_state: BindingValueVerificationState
     serving_artifact_id: str | None = None
     expires_at_ms: int | None = None
@@ -2738,7 +2743,7 @@ class PrefetchedServingBinding(BaseModel):
     report: object | None = Field(default=None, exclude=True, repr=False)
 
     @model_validator(mode="after")
-    def _validate_result(self) -> "PrefetchedServingBinding":
+    def _validate_result(self) -> "PrefetchHandoff":
         if self.local_serving_ref is not None and not self.local_serving_ref:
             raise ValueError("local_serving_ref must not be empty when provided")
         for field_name in ("daemon_id", "daemon_session_id", "device_uuid"):
@@ -2799,7 +2804,7 @@ def to_proto(self) -> operation_pb2.PrefetchServingBindingResult:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.PrefetchServingBindingResult
-    ) -> "PrefetchedServingBinding":
+    ) -> "PrefetchHandoff":
         readiness = _SERVING_READINESS_FROM_PROTO.get(int(proto.readiness))
         if readiness is None:
             raise ValueError("PrefetchServingBindingResult readiness is required")
@@ -2831,7 +2836,7 @@ def from_proto(
             daemon_id=str(proto.daemon_id),
             daemon_session_id=str(proto.daemon_session_id),
             device_uuid=str(proto.device_uuid),
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             reservation_bytes=int(proto.reservation_bytes),
             reservation_capability=BindingReservationCapability.from_proto(
                 proto.reservation_capability
@@ -2853,10 +2858,10 @@ def from_proto(
         )
 
 
-class PrefetchedServingBindingMemberFailure(BaseModel):
+class PrefetchHandoffMemberFailure(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    member: ServingBindingMemberRef
+    member: RuntimeBindingMemberRef
     code: str
     message: str
     phase: str | None = None
@@ -2864,7 +2869,7 @@ class PrefetchedServingBindingMemberFailure(BaseModel):
     spec_digest: str | None = None
 
     @model_validator(mode="after")
-    def _validate_failure(self) -> "PrefetchedServingBindingMemberFailure":
+    def _validate_failure(self) -> "PrefetchHandoffMemberFailure":
         if not self.code:
             raise ValueError("code must not be empty")
         if not self.message:
@@ -2894,9 +2899,9 @@ def to_proto(self) -> operation_pb2.PrefetchServingBindingMemberFailure:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.PrefetchServingBindingMemberFailure
-    ) -> "PrefetchedServingBindingMemberFailure":
+    ) -> "PrefetchHandoffMemberFailure":
         return cls(
-            member=ServingBindingMemberRef.from_proto(proto.member),
+            member=RuntimeBindingMemberRef.from_proto(proto.member),
             code=str(proto.code),
             message=str(proto.message),
             phase=str(proto.phase) if proto.HasField("phase") else None,
@@ -2911,21 +2916,21 @@ def from_proto(
         )
 
 
-class PrefetchedServingBindingSet(BaseModel):
+class PrefetchHandoffSet(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     runtime: str
-    topology: ServingTopologyRef
+    topology: RuntimeTopologyRef
     group_id: str
-    members: tuple[PrefetchedServingBinding, ...]
-    readiness: ServingBindingReadiness
+    members: tuple[PrefetchHandoff, ...]
+    readiness: RuntimeBindingReadiness
     expires_at_ms: int | None = None
-    member_failures: tuple[PrefetchedServingBindingMemberFailure, ...] = ()
+    member_failures: tuple[PrefetchHandoffMemberFailure, ...] = ()
     partial: bool = False
     report: object | None = Field(default=None, exclude=True, repr=False)
 
     @model_validator(mode="after")
-    def _validate_result_set(self) -> "PrefetchedServingBindingSet":
+    def _validate_result_set(self) -> "PrefetchHandoffSet":
         if not self.runtime:
             raise ValueError("runtime must not be empty")
         if not self.group_id:
@@ -2935,7 +2940,7 @@ def _validate_result_set(self) -> "PrefetchedServingBindingSet":
         if self.expires_at_ms is not None and int(self.expires_at_ms) < 0:
             raise ValueError("expires_at_ms must be non-negative")
         if self.partial and not self.member_failures:
-            raise ValueError("partial serving binding set requires member_failures")
+            raise ValueError("partial runtime binding set requires member_failures")
         success_member_ids = {member.member.member_id for member in self.members}
         failed_member_ids = {
             failure.member.member_id for failure in self.member_failures
@@ -2943,7 +2948,7 @@ def _validate_result_set(self) -> "PrefetchedServingBindingSet":
         overlap = success_member_ids & failed_member_ids
         if overlap:
             raise ValueError(
-                "serving binding set member cannot be both success and failure"
+                "runtime binding set member cannot be both success and failure"
             )
         return self
 
@@ -2966,44 +2971,44 @@ def to_proto(self) -> operation_pb2.PrefetchServingBindingSetResult:
     @classmethod
     def from_proto(
         cls, proto: operation_pb2.PrefetchServingBindingSetResult
-    ) -> "PrefetchedServingBindingSet":
+    ) -> "PrefetchHandoffSet":
         readiness = _SERVING_READINESS_FROM_PROTO.get(int(proto.readiness))
         if readiness is None:
             raise ValueError("PrefetchServingBindingSetResult readiness is required")
         return cls(
             runtime=str(proto.runtime),
-            topology=ServingTopologyRef.from_proto(proto.topology),
+            topology=RuntimeTopologyRef.from_proto(proto.topology),
             group_id=str(proto.group_id),
             members=tuple(
-                PrefetchedServingBinding.from_proto(member) for member in proto.members
+                PrefetchHandoff.from_proto(member) for member in proto.members
             ),
             readiness=readiness,
             expires_at_ms=(
                 int(proto.expires_at_ms) if proto.HasField("expires_at_ms") else None
             ),
             member_failures=tuple(
-                PrefetchedServingBindingMemberFailure.from_proto(failure)
+                PrefetchHandoffMemberFailure.from_proto(failure)
                 for failure in proto.member_failures
             ),
             partial=bool(proto.partial),
         )
 
 
-class ServingPublicationSubject(BaseModel):
+class RuntimePublicationSubject(BaseModel):
     model_config = ConfigDict(frozen=True)
 
     serving_artifact_id: str | None = None
     binding_value_ref: BindingValueRef | None = None
 
     @model_validator(mode="after")
-    def _validate_subject(self) -> "ServingPublicationSubject":
+    def _validate_subject(self) -> "RuntimePublicationSubject":
         artifact_id = self.serving_artifact_id
         binding_value_ref = self.binding_value_ref
         if artifact_id is not None and not artifact_id:
             raise ValueError("serving_artifact_id must not be empty")
         if (artifact_id is None) == (binding_value_ref is None):
             raise ValueError(
-                "ServingPublicationSubject requires exactly one of serving_artifact_id or binding_value_ref"
+                "RuntimePublicationSubject requires exactly one of serving_artifact_id or binding_value_ref"
             )
         return self
 
@@ -3048,9 +3053,8 @@ def to_store_proto(self) -> publication_pb2.ServingPublicationSubject:
     @classmethod
     def from_proto(
         cls,
-        proto: publication_pb2.ServingPublicationSubject
-        | publication_pb2.ServingPublicationSubject,
-    ) -> "ServingPublicationSubject":
+        proto: publication_pb2.ServingPublicationSubject,
+    ) -> "RuntimePublicationSubject":
         ref_case = proto.WhichOneof("ref")
         if ref_case == "serving_artifact_id":
             return cls(serving_artifact_id=str(proto.serving_artifact_id))
@@ -3058,13 +3062,13 @@ def from_proto(
             return cls(
                 binding_value_ref=BindingValueRef.from_proto(proto.binding_value)
             )
-        raise ValueError("ServingPublicationSubject requires exactly one ref")
+        raise ValueError("RuntimePublicationSubject requires exactly one ref")
 
 
 class RepresentationPublishContract(BaseModel):
     model_config = ConfigDict(frozen=True)
 
-    subject: ServingPublicationSubject
+    subject: RuntimePublicationSubject
     serving_manifest_ref: str
     representation_contract_hash: str
     serving_build_digest: str
@@ -3117,7 +3121,7 @@ def from_proto(
             raise ValueError(
                 "RepresentationPublishContract requires a serving publication subject"
             )
-        subject = ServingPublicationSubject.from_proto(proto.subject)
+        subject = RuntimePublicationSubject.from_proto(proto.subject)
         return cls(
             subject=subject,
             serving_manifest_ref=str(proto.serving_manifest_ref),
@@ -3130,7 +3134,7 @@ def from_proto(
 
     def validate_against_manifest(
         self,
-        manifest: ServingArtifactManifest,
+        manifest: RuntimeArtifactManifest,
     ) -> None:
         if manifest.serving_manifest_ref != self.serving_manifest_ref:
             raise ValueError(
@@ -3157,13 +3161,13 @@ def to_runtime_policy(
         self,
         *,
         require_manifest: bool = True,
-    ) -> ServingRuntimePolicy:
+    ) -> RuntimeArtifactPolicy:
         serving_artifact_id = self.serving_artifact_id
         if serving_artifact_id is None:
             raise ValueError(
                 "binding publication subjects do not resolve to a serving runtime policy until closeout promotion completes"
             )
-        return ServingRuntimePolicy(
+        return RuntimeArtifactPolicy(
             require_manifest=bool(require_manifest),
             serving_manifest_ref=str(self.serving_manifest_ref),
             expected_representation_contract_hash=str(
@@ -3194,7 +3198,7 @@ def from_publication_proto(
             raise ValueError(
                 "RepresentationPublishContract requires a serving publication subject"
             )
-        subject = ServingPublicationSubject.from_proto(proto.subject)
+        subject = RuntimePublicationSubject.from_proto(proto.subject)
         return cls(
             subject=subject,
             serving_manifest_ref=str(proto.serving_manifest_ref),
@@ -3316,7 +3320,7 @@ class RepresentationPublishSpec(BaseModel):
 
     serving_artifact_id: str | None = None
     serving_manifest_ref: str
-    serving_manifest: ServingArtifactManifest
+    serving_manifest: RuntimeArtifactManifest
     serving_manifest_bytes: bytes
     canonical_index: object | None = None
     representation_publish_contract: RepresentationPublishContract
@@ -3327,7 +3331,7 @@ class RepresentationPublishSpec(BaseModel):
     layout_id: str | None = None
     requirements: AssemblyRequirementSetRef | None = None
     readiness_policy: AssemblyReadinessPolicy | None = None
-    admission_facts: ServingAdmissionFacts | None = None
+    admission_facts: RuntimeAdmissionFacts | None = None
 
     @model_validator(mode="after")
     def _validate_representation_publish_spec(self) -> "RepresentationPublishSpec":
@@ -3338,7 +3342,7 @@ def _validate_representation_publish_spec(self) -> "RepresentationPublishSpec":
             "canonical_full",
         }:
             raise ValueError("contract_family must be one of: pp, ep, canonical_full")
-        manifest_from_bytes = ServingArtifactManifest.from_bytes(
+        manifest_from_bytes = RuntimeArtifactManifest.from_bytes(
             self.serving_manifest_bytes
         )
         if manifest_from_bytes != self.serving_manifest:
@@ -3408,11 +3412,11 @@ def _validate_representation_publish_spec(self) -> "RepresentationPublishSpec":
     def manifest_tensor_name(self) -> str:
         return parse_serving_manifest_ref(self.serving_manifest_ref)
 
-    def require_serving_runtime_policy(
+    def require_runtime_artifact_policy(
         self,
         *,
         require_manifest: bool = True,
-    ) -> ServingRuntimePolicy:
+    ) -> RuntimeArtifactPolicy:
         if self.admission_facts is not None:
             self.admission_facts.require_runtime_bind_swap_ready()
         return self.representation_publish_contract.to_runtime_policy(
@@ -3465,7 +3469,7 @@ def from_proto(
             representation_publish_contract=representation_publish_contract,
         )
         manifest_bytes = bytes(proto.serving_manifest_bytes)
-        manifest = ServingArtifactManifest.from_bytes(manifest_bytes)
+        manifest = RuntimeArtifactManifest.from_bytes(manifest_bytes)
         return cls(
             serving_artifact_id=representation_publish_contract.serving_artifact_id,
             serving_manifest_ref=representation_publish_contract.serving_manifest_ref,
@@ -3491,7 +3495,7 @@ def from_proto(
                 else None
             ),
             admission_facts=(
-                ServingAdmissionFacts.from_publication_proto(proto.admission_facts)
+                RuntimeAdmissionFacts.from_publication_proto(proto.admission_facts)
                 if proto.HasField("admission_facts")
                 else None
             ),
@@ -3671,7 +3675,7 @@ class PublishedModelVersion(BaseModel):
     serving_manifest_ref: str | None = None
     serving_execution_diagnostics: ExecutionDiagnostics | None = None
 
-    def require_serving_runtime_policy(self) -> ServingRuntimePolicy:
+    def require_runtime_artifact_policy(self) -> RuntimeArtifactPolicy:
         if not self.serving_manifest_ref:
             raise ValueError(
                 "PublishedModelVersion does not carry serving_manifest_ref"
@@ -3684,7 +3688,7 @@ def require_serving_runtime_policy(self) -> ServingRuntimePolicy:
             raise ValueError(
                 "PublishedModelVersion does not carry serving_build_digest"
             )
-        return ServingRuntimePolicy(
+        return RuntimeArtifactPolicy(
             require_manifest=True,
             serving_manifest_ref=str(self.serving_manifest_ref),
             expected_representation_contract_hash=str(
@@ -3694,32 +3698,32 @@ def require_serving_runtime_policy(self) -> ServingRuntimePolicy:
         )
 
 
-ServingRuntimePolicyInput = Union[
-    ServingRuntimePolicy,
-    ServingArtifactManifest,
+RuntimeArtifactPolicyInput = Union[
+    RuntimeArtifactPolicy,
+    RuntimeArtifactManifest,
     RepresentationPublishContract,
     RepresentationPublishSpec,
     PublishedModelVersion,
 ]
 
 
-def coerce_serving_runtime_policy(
-    value: ServingRuntimePolicyInput | None,
-) -> ServingRuntimePolicy | None:
+def coerce_runtime_artifact_policy(
+    value: RuntimeArtifactPolicyInput | None,
+) -> RuntimeArtifactPolicy | None:
     if value is None:
         return None
-    if isinstance(value, ServingRuntimePolicy):
+    if isinstance(value, RuntimeArtifactPolicy):
         return value
-    if isinstance(value, ServingArtifactManifest):
+    if isinstance(value, RuntimeArtifactManifest):
         return value.to_runtime_policy()
     if isinstance(value, RepresentationPublishContract):
         return value.to_runtime_policy()
     if isinstance(value, RepresentationPublishSpec):
-        return value.require_serving_runtime_policy()
+        return value.require_runtime_artifact_policy()
     if isinstance(value, PublishedModelVersion):
-        return value.require_serving_runtime_policy()
+        return value.require_runtime_artifact_policy()
     raise TypeError(
-        "serving runtime policy requires ServingRuntimePolicy, ServingArtifactManifest, "
+        "runtime artifact policy requires RuntimeArtifactPolicy, RuntimeArtifactManifest, "
         "RepresentationPublishContract, RepresentationPublishSpec, or PublishedModelVersion"
     )
 
@@ -3939,28 +3943,28 @@ class DeregisterArtifactOutcome(BaseModel):
     "BeginRegisterArtifactResult",
     "ArtifactDescriptor",
     "BindingValueRef",
-    "ServingBindingReadiness",
-    "ServingBindingSourceKind",
-    "ServingBindingSourceReuseMode",
-    "ServingTopologyRef",
-    "ServingBindingMemberRef",
+    "RuntimeBindingReadiness",
+    "RuntimeBindingSourceKind",
+    "RuntimeBindingSourceReuseMode",
+    "RuntimeTopologyRef",
+    "RuntimeBindingMemberRef",
     "BlobRef",
-    "ServingBindingSourceMemberRef",
-    "ServingBindingSourceRef",
-    "ServingBindingSourceReuseDecision",
-    "plan_serving_binding_source_reuse",
-    "ServingBindingResolvedLayout",
-    "ServingBindingTarget",
-    "ServingBindingSetTarget",
-    "ServingBindingResolvedSpecCacheEntry",
+    "RuntimeBindingSourceMemberRef",
+    "RuntimeBindingSourceRef",
+    "RuntimeBindingSourceReuseDecision",
+    "plan_runtime_binding_source_reuse",
+    "RuntimeBindingResolvedLayout",
+    "RealizationTarget",
+    "RealizationTargetSet",
+    "RuntimeRealizationSpecCacheEntry",
     "PrefetchRetentionPolicy",
     "BindingReservationCapability",
     "GroupRealizationAcquireRef",
-    "PrefetchedServingBinding",
-    "PrefetchedServingBindingMemberFailure",
-    "PrefetchedServingBindingSet",
+    "PrefetchHandoff",
+    "PrefetchHandoffMemberFailure",
+    "PrefetchHandoffSet",
     "BuilderMode",
-    "ServingPublicationSubject",
+    "RuntimePublicationSubject",
     "AssemblyCloseoutContract",
     "AssemblyAttemptRef",
     "AssemblyContractFamily",
@@ -3977,15 +3981,15 @@ class DeregisterArtifactOutcome(BaseModel):
     "RepresentationPublishContract",
     "RepresentationPublishSpec",
     "PublicDiskSourceHandle",
-    "ServingAdmissionFacts",
+    "RuntimeArtifactBuildIntent",
+    "RuntimeArtifactManifest",
+    "RuntimeArtifactPolicy",
+    "RuntimeArtifactPolicyInput",
+    "RuntimeAdmissionFacts",
     "ViewRegistrationKind",
     "SealAssemblyResult",
-    "ServingArtifactManifest",
-    "ServingBuildIntent",
     "SERVING_BUILD_DIGEST_VERSION",
-    "ServingRuntimePolicy",
-    "ServingRuntimePolicyInput",
-    "ServingSupportLevel",
+    "RuntimeSupportLevel",
     "SERVING_MANIFEST_TENSOR_NAME",
     "PlanBase",
     "CoalescedPlan",
@@ -4002,6 +4006,6 @@ class DeregisterArtifactOutcome(BaseModel):
     "VramRegionHandle",
     "DeregisterArtifactOutcome",
     "build_serving_manifest_ref",
-    "coerce_serving_runtime_policy",
+    "coerce_runtime_artifact_policy",
     "parse_serving_manifest_ref",
 ]
diff --git a/tests/python/api/test_artifact_handle.py b/tests/python/api/test_artifact_handle.py
index 091c5ad6..2ce63633 100644
--- a/tests/python/api/test_artifact_handle.py
+++ b/tests/python/api/test_artifact_handle.py
@@ -30,8 +30,8 @@
 from tensorcast.proto.daemon.v2 import store_daemon_pb2
 from tensorcast.types import (
     BuilderMode,
-    ServingArtifactManifest,
-    ServingRuntimePolicy,
+    RuntimeArtifactManifest,
+    RuntimeArtifactPolicy,
     build_serving_manifest_ref,
 )
 
@@ -211,7 +211,7 @@ def __init__(self, client: _ClientStub) -> None:
         self._artifact_cache = ArtifactCache(
             daemon_endpoint="daemon", ttl_seconds=10, max_entries=8
         )
-        self._key_cache: dict[str, tuple[str | None, str | None]] = {}
+        self._key_cache: dict[str, tuple[str | None, str | None, int | None]] = {}
         self._client = client
 
     def ensure_client(self) -> _ClientStub:
@@ -228,8 +228,10 @@ def invalidate_artifact(
     ) -> None:
         self._artifact_cache.invalidate_artifact(artifact_id or "", reason=reason)
 
-    def resolve_key_mapping_cached(self, *, key: str) -> tuple[str | None, str | None]:
-        return self._key_cache.get(key, (None, None))
+    def resolve_key_mapping_cached(
+        self, *, key: str
+    ) -> tuple[str | None, str | None, int | None]:
+        return self._key_cache.get(key, (None, None, None))
 
     def cache_key_mapping(
         self,
@@ -237,10 +239,11 @@ def cache_key_mapping(
         *,
         artifact_id: str | None,
         disk_path: str | None = None,
+        generation: int | None = None,
         ttl_override=None,
     ) -> None:
         del ttl_override
-        self._key_cache[key] = (artifact_id, disk_path)
+        self._key_cache[key] = (artifact_id, disk_path, generation)
 
 
 class _PipelineStub:
@@ -718,7 +721,7 @@ def _fake_bind_owned(self, **kwargs):
 
     monkeypatch.setattr(Artifact, "_bind_owned", _fake_bind_owned)
 
-    manifest = ServingArtifactManifest(
+    manifest = RuntimeArtifactManifest(
         framework_name="torch",
         adapter_version="adapter-v1",
         serving_abi_version="abi-v1",
@@ -733,12 +736,12 @@ def _fake_bind_owned(self, **kwargs):
 
     result = artifact.bind(
         device="cuda:0",
-        serving_runtime_policy=manifest,
+        runtime_artifact_policy=manifest,
     )
 
     assert result is fake_binding
     assert captured["device"] == torch.device("cuda:0")
-    assert captured["serving_runtime_policy"] == ServingRuntimePolicy(
+    assert captured["runtime_artifact_policy"] == RuntimeArtifactPolicy(
         require_manifest=True,
         serving_manifest_ref="tensor:__alt_manifest__.json",
         expected_representation_contract_hash="bafkrepresentation",
@@ -746,7 +749,7 @@ def _fake_bind_owned(self, **kwargs):
     )
 
 
-def test_tensor_dict_and_adopted_binding_share_source_selection_with_separate_target_digests(
+def test_tensor_dict_and_mapped_bindings_share_source_selection_with_separate_target_digests(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
     canonical_bytes, payload = _build_payload({"foo": torch.ones(2)})
@@ -800,6 +803,50 @@ def test_tensor_dict_and_adopted_binding_share_source_selection_with_separate_ta
             "last_source_bound_plan_diagnostics": None,
         },
     )()
+    owned_binding_value = type(
+        "_OwnedBindingValueStub",
+        (),
+        {
+            "binding_id": "owned-binding",
+            "binding_layout_id": "bl1:owned",
+            "binding_value_id": "owned-value-1",
+            "seal_generation": 1,
+            "source_artifact_id": "aid",
+            "is_artifact_backed": True,
+            "verification_state": 0,
+            "is_published": False,
+        },
+    )()
+    owned_layout = type(
+        "_OwnedLayoutStub",
+        (),
+        {
+            "binding_layout_id": "bl1:owned",
+            "target_layout": store_daemon_pb2.TargetLayout(
+                view_id="mapped:v1:owned-target"
+            ),
+            "target_index_bytes": canonical_bytes,
+            "dst_specs": (),
+        },
+    )()
+    fake_owned_binding = type(
+        "_OwnedBindingStub",
+        (),
+        {
+            "binding_id": "owned-binding",
+            "binding_layout_id": "bl1:owned",
+            "layout": owned_layout,
+            "current_value": owned_binding_value,
+            "staged_value": None,
+            "last_materialization_diagnostics": {
+                "source": "disk",
+                "total_bytes": 8,
+                "retry_reason_buckets": {},
+            },
+            "last_execution_diagnostics": None,
+            "last_source_bound_plan_diagnostics": None,
+        },
+    )()
 
     def _fake_execute_bind_into(self, target_tensors, **kwargs):
         del self
@@ -807,7 +854,16 @@ def _fake_execute_bind_into(self, target_tensors, **kwargs):
         captured.update(kwargs)
         return fake_binding
 
+    owned_captured: dict[str, object] = {}
+
+    def _fake_execute_bind_owned(self, device, **kwargs):
+        del self
+        owned_captured["device"] = device
+        owned_captured.update(kwargs)
+        return fake_owned_binding
+
     monkeypatch.setattr(Artifact, "_execute_bind_into", _fake_execute_bind_into)
+    monkeypatch.setattr(Artifact, "_execute_bind_owned", _fake_execute_bind_owned)
 
     tensor_handle = artifact.realize(ArtifactRealizationSpec.tensor_dict(device="cpu"))
     adopted_handle = artifact.realize(
@@ -817,29 +873,49 @@ def _fake_execute_bind_into(self, target_tensors, **kwargs):
             packing="byte_space",
         )
     )
+    owned_handle = artifact.realize(
+        ArtifactRealizationSpec.binding(
+            device="cuda:0",
+            mapping=copy_plan,
+            packing="byte_space",
+        )
+    )
 
     tensor_report = tensor_handle.report
     adopted_report = adopted_handle.report
+    owned_report = owned_handle.report
     assert adopted_handle.binding() is fake_binding
     assert captured["mapping"] == copy_plan
+    assert owned_handle.binding() is fake_owned_binding
+    assert owned_captured["mapping"] == copy_plan
     assert (
         adopted_report.source_selection_digest == tensor_report.source_selection_digest
     )
+    assert owned_report.source_selection_digest == tensor_report.source_selection_digest
     assert adopted_report.target_layout_digest
     assert adopted_report.copy_plan_digest
+    assert owned_report.target_layout_digest == "binding-layout:bl1:owned"
+    assert owned_report.copy_plan_digest == "mapped:v1:owned-target"
+    assert owned_report.representation_admission is not None
+    assert owned_report.representation_admission.transform_required is True
     assert adopted_report.target_layout_digest != adopted_report.source_selection_digest
     assert adopted_report.copy_plan_digest != adopted_report.target_layout_digest
+    assert owned_report.target_layout_digest != owned_report.source_selection_digest
+    assert owned_report.copy_plan_digest != owned_report.target_layout_digest
     assert str(adopted_report.copy_plan_digest).startswith("mapped:v1:")
     assert adopted_report.target_plan is not None
+    assert owned_report.target_plan is not None
     assert adopted_report.target_plan.target_layout_digest == (
         adopted_report.target_layout_digest
     )
     assert (
         adopted_report.target_plan.copy_plan_digest == adopted_report.copy_plan_digest
     )
+    assert owned_report.target_plan.copy_plan_digest == owned_report.copy_plan_digest
 
     tensor_handle.close()
     adopted_handle.close()
+    owned_handle.close()
 
 
 def test_tensor_into_materializes_subset_only():
@@ -971,6 +1047,27 @@ def test_subset_clone_handles_multiple_identifiers():
     assert clone.tensor_names == ("foo",)
 
 
+def test_key_mapping_generation_flows_into_realization_selection_digest():
+    canonical_bytes, payload = _build_payload({"foo": torch.ones(1)})
+    runtime = _RuntimeStub(_ClientStub(canonical_bytes))
+    runtime.cache_key_mapping("mapped-v7", artifact_id="aid", generation=7)
+    runtime.cache_key_mapping("mapped-v8", artifact_id="aid", generation=8)
+    store = _StoreStub(runtime, _PipelineStub(payload))
+
+    selection_v7 = Artifact(
+        store_ref=_store_ref(store),
+        key="mapped-v7",
+    )._resolve_realization_selection()
+    selection_v8 = Artifact(
+        store_ref=_store_ref(store),
+        key="mapped-v8",
+    )._resolve_realization_selection()
+
+    assert selection_v7.generation_hint == 7
+    assert selection_v8.generation_hint == 8
+    assert selection_v7.source_selection_digest != selection_v8.source_selection_digest
+
+
 def test_describe_uses_cached_generation_without_fetch():
     canonical_bytes, payload = _build_payload({"foo": torch.ones(1)})
     runtime = _RuntimeStub(_ClientStub(canonical_bytes))
diff --git a/tests/python/api/test_artifact_tensor_subset.py b/tests/python/api/test_artifact_tensor_subset.py
index 71cb8578..1725985b 100644
--- a/tests/python/api/test_artifact_tensor_subset.py
+++ b/tests/python/api/test_artifact_tensor_subset.py
@@ -11,8 +11,8 @@
 from tensorcast.api.store.artifact import Artifact
 from tensorcast.api.store.cache import ArtifactCache
 from tensorcast.api.store.common import canonical_index_from_bytes
-from tensorcast.api.store.types import ArtifactError, StoreOptions
 from tensorcast.api.store.retry import build_retry_policies
+from tensorcast.api.store.types import ArtifactError, StoreOptions
 
 
 def _canonical_index_bytes() -> bytes:
diff --git a/tests/python/api/test_mapped_binding.py b/tests/python/api/test_mapped_binding.py
index 1a10bd1c..2c906586 100644
--- a/tests/python/api/test_mapped_binding.py
+++ b/tests/python/api/test_mapped_binding.py
@@ -819,7 +819,7 @@ def test_bind_into_mapping_propagates_collective_hint_in_operation_id(
     import tensorcast.api._device as device_mod
 
     artifact_mod = importlib.import_module("tensorcast.api.store.artifact")
-    store_mod = importlib.import_module("tensorcast.api.store.__init__")
+    store_mod = importlib.import_module("tensorcast.api.store")
 
     monkeypatch.setattr(device_mod, "device_uuid_for", lambda device_id: "gpu-0")
     monkeypatch.setattr(artifact_mod, "device_uuid_for", lambda device_id: "gpu-0")
diff --git a/tests/python/api/test_plan_spec.py b/tests/python/api/test_plan_spec.py
index e07fc582..021d563d 100644
--- a/tests/python/api/test_plan_spec.py
+++ b/tests/python/api/test_plan_spec.py
@@ -25,7 +25,7 @@
 from tensorcast.api.store import (
     BuilderMode,
     RepresentationPublishSpec,
-    ServingBuildIntent,
+    RuntimeArtifactBuildIntent,
     build_pure_transform_publication_bundle_from_registered_artifact,
 )
 from tensorcast.api.store.artifact import Artifact
@@ -44,13 +44,64 @@
 from tensorcast.proto.common.v1 import common_pb2
 from tensorcast.proto.node_agent.v1 import node_agent_pb2
 from tensorcast.proto.plan.v1 import plan_pb2
-from tensorcast.types import build_serving_manifest_ref
+from tensorcast.types import (
+    RealizationTarget,
+    RuntimeBindingMemberRef,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeTopologyRef,
+    build_serving_manifest_ref,
+)
 
 
 def _canonical_index_bytes() -> bytes:
     return b'{"w":[0,4,[1],[1],"torch.float32",0]}'
 
 
+def _realization_target() -> RealizationTarget:
+    topology = RuntimeTopologyRef(schema_topology_digest="topology-schema")
+    member = RuntimeBindingMemberRef(
+        member_id="member-0",
+        member_index=0,
+        member_count=1,
+        group_id="group-1",
+    )
+    source = RuntimeBindingSourceRef(
+        source_kind="checkpoint_artifact",
+        artifact_selection_digest="selection-digest",
+        source_artifact_ref="mi2:source",
+        source_schema_hash="source-schema",
+    )
+    resolved_layout = RuntimeBindingResolvedLayout(
+        binding_layout_id="layout-1",
+        source=source,
+        source_reuse=RuntimeBindingSourceReuseDecision(
+            mode="checkpoint_to_runtime",
+            representation_contract_hash="repr-contract",
+        ),
+        topology=topology,
+        member=member,
+        target_layout=b"target-layout",
+        target_index_bytes=b"target-index",
+        target_layout_hash="target-layout-hash",
+        tensor_schema_hash="tensor-schema",
+        spec_digest="spec-digest",
+        source_schema_hash="source-schema",
+    )
+    return RealizationTarget(
+        runtime="vllm",
+        device="cuda:0",
+        device_uuid="GPU-0",
+        source=source,
+        topology=topology,
+        member=member,
+        model_config_digest="model-config",
+        runtime_build_digest="serving-build",
+        resolved_layout=resolved_layout,
+    )
+
+
 def _sample_publish_manifest() -> PublishManifest:
     artifact_manifest = ManifestResult.from_artifact_selections(
         engine_request_id="rid-transfer",
@@ -148,6 +199,33 @@ def test_plan_view_selection_hash_populated() -> None:
     assert list(selection.tensor_names) == ["w"]
 
 
+def test_plan_prefetch_accepts_realization_target() -> None:
+    store = _StoreStub()
+    canonical_bytes = _canonical_index_bytes()
+    artifact = Artifact(
+        store_ref=_store_ref(store),
+        artifact_id="mi2:target-test",
+        canonical_index_bytes=canonical_bytes,
+        canonical_index=canonical_index_from_bytes(canonical_bytes),
+    )
+    target = _realization_target()
+    plan = Plan(CallContext(request_id="req-target"))
+    worker = Worker(
+        worker_id="worker-target",
+        daemon_address="127.0.0.1:50051",
+        daemon_id="daemon-target",
+    )
+
+    ref = plan.on_worker(worker).prefetch(artifact, target=target)
+
+    spec = plan.to_spec()
+    assert spec.steps[0].step_id == ref.step_id
+    prefetch = spec.steps[0].action.prefetch
+    assert prefetch.HasField("serving_binding_target")
+    assert prefetch.serving_binding_target.runtime == "vllm"
+    assert prefetch.serving_binding_target.member.member_id == "member-0"
+
+
 def test_plan_publish_serializes_canonical_action() -> None:
     ctx = CallContext(request_id="req-cache", idempotency_key="idem-cache")
     plan = Plan(ctx)
@@ -233,7 +311,7 @@ def test_plan_transform_register_pure_transform_builds_repo_owned_spec() -> None
 
     step_ref = plan.on_instance(inst).transform_register_pure_transform(
         artifact,
-        build_intent=ServingBuildIntent(
+        build_intent=RuntimeArtifactBuildIntent(
             builder_mode=BuilderMode.PURE_TRANSFORM,
             framework_name="torch",
             adapter_version="adapter-v7",
@@ -681,7 +759,7 @@ def test_plan_result_decodes_pure_transform_publication_result() -> None:
         lease=None,
     )
     bundle = build_pure_transform_publication_bundle_from_registered_artifact(
-        build_intent=ServingBuildIntent(
+        build_intent=RuntimeArtifactBuildIntent(
             representation_contract_hash="bafkrepresentation",
             builder_mode=BuilderMode.PURE_TRANSFORM,
             framework_name="torch",
diff --git a/tests/python/api/test_prefetch_operation.py b/tests/python/api/test_prefetch_operation.py
index ffc3ea22..24667ea1 100644
--- a/tests/python/api/test_prefetch_operation.py
+++ b/tests/python/api/test_prefetch_operation.py
@@ -29,16 +29,16 @@
     BindingValueRef,
     BindingValueVerificationState,
     GroupRealizationAcquireRef,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingSet,
-    ServingBindingMemberRef,
-    ServingBindingResolvedLayout,
-    ServingBindingSetTarget,
-    ServingBindingSourceMemberRef,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingBindingTarget,
-    ServingTopologyRef,
+    PrefetchHandoff,
+    PrefetchHandoffSet,
+    RealizationTarget,
+    RealizationTargetSet,
+    RuntimeBindingMemberRef,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceMemberRef,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeTopologyRef,
 )
 
 
@@ -85,12 +85,12 @@ def release_replica(self, ticket: store_daemon_pb2.ReplicaTicket):
 
     def _prefetched_binding(
         self,
-        target: ServingBindingTarget,
+        target: RealizationTarget,
         *,
         readiness: object,
         staged_value: bool = False,
         wait_for_publish: bool = False,
-    ) -> PrefetchedServingBinding:
+    ) -> PrefetchHandoff:
         device_uuid = str(target.device_uuid or "GPU-0")
         suffix = target.member.member_index + 1
         value_id = f"staged-value-{suffix}" if staged_value else f"value-{suffix}"
@@ -111,7 +111,7 @@ def _prefetched_binding(
             scope_digest=f"scope-digest-{suffix}",
             expires_at_ms=1234,
         )
-        return PrefetchedServingBinding(
+        return PrefetchHandoff(
             local_serving_ref=f"binding-local:binding-{suffix}:{value_id}",
             binding_value_ref=binding_ref,
             daemon_id="daemon-1",
@@ -141,12 +141,12 @@ def _prefetched_binding(
 
     def prefetch_serving_binding(self, **kwargs):
         self.prefetch_binding_calls.append(kwargs)
-        target = cast(ServingBindingTarget | ServingBindingSetTarget, kwargs["target"])
+        target = cast(RealizationTarget | RealizationTargetSet, kwargs["target"])
         operation_id = str(kwargs.get("operation_id") or "prefetch-binding-op")
         readiness = kwargs["requested_readiness"]
-        if isinstance(target, ServingBindingSetTarget):
-            staged_members = target.source.source_kind == "serving_artifact_set"
-            result = PrefetchedServingBindingSet(
+        if isinstance(target, RealizationTargetSet):
+            staged_members = target.source.source_kind == "runtime_artifact_set"
+            result = PrefetchHandoffSet(
                 runtime=target.runtime,
                 topology=target.topology,
                 group_id=target.group_id,
@@ -224,10 +224,10 @@ def _store_ref(store: _Store) -> Any:
     return cast(Any, weakref.ref(store))
 
 
-def _serving_target(
+def _realization_target(
     *,
-    topology: ServingTopologyRef | None = None,
-    source: ServingBindingSourceRef | None = None,
+    topology: RuntimeTopologyRef | None = None,
+    source: RuntimeBindingSourceRef | None = None,
     member_id: str = "member-0",
     member_index: int = 0,
     member_count: int = 1,
@@ -238,25 +238,25 @@ def _serving_target(
     target_index_bytes: bytes = b"target-index",
     target_layout_hash: str = "target-layout-hash",
     spec_digest: str = "spec-digest",
-) -> ServingBindingTarget:
-    topology = topology or ServingTopologyRef(schema_topology_digest="topology-schema")
-    member = ServingBindingMemberRef(
+) -> RealizationTarget:
+    topology = topology or RuntimeTopologyRef(schema_topology_digest="topology-schema")
+    member = RuntimeBindingMemberRef(
         member_id=member_id,
         member_index=member_index,
         member_count=member_count,
         group_id="group-1",
     )
-    source = source or ServingBindingSourceRef(
+    source = source or RuntimeBindingSourceRef(
         source_kind="checkpoint_artifact",
         artifact_selection_digest="selection-digest",
         source_artifact_ref="mi2:source",
         source_schema_hash="source-schema",
     )
-    source_reuse = ServingBindingSourceReuseDecision(
-        mode="checkpoint_to_serving",
+    source_reuse = RuntimeBindingSourceReuseDecision(
+        mode="checkpoint_to_runtime",
         representation_contract_hash="repr-contract",
     )
-    resolved_layout = ServingBindingResolvedLayout(
+    resolved_layout = RuntimeBindingResolvedLayout(
         binding_layout_id=binding_layout_id,
         source=source,
         source_reuse=source_reuse,
@@ -271,7 +271,7 @@ def _serving_target(
         copy_plan_bytes=b"copy-plan",
         dst_specs_bytes=b"dst-specs",
     )
-    return ServingBindingTarget(
+    return RealizationTarget(
         runtime="vllm",
         device=device,
         device_uuid=device_uuid,
@@ -279,20 +279,20 @@ def _serving_target(
         topology=topology,
         member=member,
         model_config_digest="model-config",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         resolved_layout=resolved_layout,
     )
 
 
-def _serving_target_set() -> ServingBindingSetTarget:
-    topology = ServingTopologyRef(schema_topology_digest="topology-schema")
-    source = ServingBindingSourceRef(
+def _realization_target_set() -> RealizationTargetSet:
+    topology = RuntimeTopologyRef(schema_topology_digest="topology-schema")
+    source = RuntimeBindingSourceRef(
         source_kind="checkpoint_artifact",
         artifact_selection_digest="selection-digest",
         source_artifact_ref="mi2:source",
         source_schema_hash="source-schema",
     )
-    target_0 = _serving_target(
+    target_0 = _realization_target(
         topology=topology,
         source=source,
         member_id="member-0",
@@ -306,7 +306,7 @@ def _serving_target_set() -> ServingBindingSetTarget:
         target_layout_hash="target-layout-hash-0",
         spec_digest="spec-digest-0",
     )
-    target_1 = _serving_target(
+    target_1 = _realization_target(
         topology=topology,
         source=source,
         member_id="member-1",
@@ -320,7 +320,7 @@ def _serving_target_set() -> ServingBindingSetTarget:
         target_layout_hash="target-layout-hash-1",
         spec_digest="spec-digest-1",
     )
-    return ServingBindingSetTarget(
+    return RealizationTargetSet(
         runtime="vllm",
         source=source,
         topology=topology,
@@ -329,37 +329,37 @@ def _serving_target_set() -> ServingBindingSetTarget:
     )
 
 
-def _serving_artifact_set_target_set() -> ServingBindingSetTarget:
-    topology = ServingTopologyRef(schema_topology_digest="topology-schema")
-    member_0 = ServingBindingMemberRef(
+def _serving_artifact_realization_target_set() -> RealizationTargetSet:
+    topology = RuntimeTopologyRef(schema_topology_digest="topology-schema")
+    member_0 = RuntimeBindingMemberRef(
         member_id="member-0",
         member_index=0,
         member_count=2,
         group_id="group-1",
     )
-    member_1 = ServingBindingMemberRef(
+    member_1 = RuntimeBindingMemberRef(
         member_id="member-1",
         member_index=1,
         member_count=2,
         group_id="group-1",
     )
-    source = ServingBindingSourceRef(
-        source_kind="serving_artifact_set",
+    source = RuntimeBindingSourceRef(
+        source_kind="runtime_artifact_set",
         artifact_selection_digest="artifact-set-selection",
         source_schema_hash="source-schema",
         topology=topology,
         members=(
-            ServingBindingSourceMemberRef(
+            RuntimeBindingSourceMemberRef(
                 member=member_0,
                 artifact_ref="mi2:serving-member-0",
             ),
-            ServingBindingSourceMemberRef(
+            RuntimeBindingSourceMemberRef(
                 member=member_1,
                 artifact_ref="mi2:serving-member-1",
             ),
         ),
     )
-    target_0 = _serving_target(
+    target_0 = _realization_target(
         topology=topology,
         source=source,
         member_id=member_0.member_id,
@@ -373,7 +373,7 @@ def _serving_artifact_set_target_set() -> ServingBindingSetTarget:
         target_layout_hash="target-layout-hash-0",
         spec_digest="spec-digest-0",
     )
-    target_1 = _serving_target(
+    target_1 = _realization_target(
         topology=topology,
         source=source,
         member_id=member_1.member_id,
@@ -387,7 +387,7 @@ def _serving_artifact_set_target_set() -> ServingBindingSetTarget:
         target_layout_hash="target-layout-hash-1",
         spec_digest="spec-digest-1",
     )
-    return ServingBindingSetTarget(
+    return RealizationTargetSet(
         runtime="vllm",
         source=source,
         topology=topology,
@@ -518,11 +518,11 @@ def test_realize_async_prefetch_targets_emit_report_shaped_profile_events(
     )
     _ = replica_op.result(timeout_s=1.0)
     retained_op = artifact.realize_async(
-        ArtifactRealizationSpec.retained_binding(target=_serving_target())
+        ArtifactRealizationSpec.retained_binding(target=_realization_target())
     )
     _ = retained_op.result(timeout_s=1.0)
     target_set_op = artifact.realize_async(
-        ArtifactRealizationSpec.target_set(target=_serving_target_set())
+        ArtifactRealizationSpec.target_set(target=_realization_target_set())
     )
     _ = target_set_op.result(timeout_s=1.0)
 
@@ -551,7 +551,7 @@ def test_realize_async_prefetch_targets_emit_report_shaped_profile_events(
 def test_realize_async_retained_binding_completed_operation_status_and_cancel() -> None:
     store = _Store()
     artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid")
-    target = _serving_target()
+    target = _realization_target()
 
     op = artifact.realize_async(ArtifactRealizationSpec.retained_binding(target=target))
 
@@ -559,18 +559,18 @@ def test_realize_async_retained_binding_completed_operation_status_and_cancel()
     assert op.done() is True
     assert op.cancel() is False
     result = op.result(timeout_s=1.0)
-    assert isinstance(result, PrefetchedServingBinding)
+    assert isinstance(result, PrefetchHandoff)
 
 
 def test_realize_async_retained_binding_attaches_report_to_result() -> None:
     store = _Store()
     artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid")
-    target = _serving_target()
+    target = _realization_target()
 
     op = artifact.realize_async(ArtifactRealizationSpec.retained_binding(target=target))
     result = op.result(timeout_s=1.0)
 
-    assert isinstance(result, PrefetchedServingBinding)
+    assert isinstance(result, PrefetchHandoff)
     assert store._runtime.ensure_client().prefetch_binding_calls
     assert result.report is not None
     report = cast(ArtifactRealizationReport, result.report)
@@ -598,19 +598,19 @@ def test_realize_async_retained_binding_attaches_report_to_result() -> None:
     assert retained.binding_layout_id == "layout-1"
     assert retained.binding_value_id == "value-1"
     assert retained.reservation_bytes == 1024
-    assert retained.readiness == "serving_local_ready"
+    assert retained.readiness == "runtime_local_ready"
     assert retained.verification_state == "local_only"
 
 
 def test_realize_async_retained_binding_set_attaches_target_set_report() -> None:
     store = _Store()
     artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid")
-    target = _serving_target_set()
+    target = _realization_target_set()
 
     op = artifact.realize_async(ArtifactRealizationSpec.target_set(target=target))
     result = op.result(timeout_s=1.0)
 
-    assert isinstance(result, PrefetchedServingBindingSet)
+    assert isinstance(result, PrefetchHandoffSet)
     assert store._runtime.ensure_client().prefetch_binding_calls
     assert result.report is not None
     report = cast(ArtifactRealizationReport, result.report)
@@ -668,22 +668,22 @@ def test_retained_binding_realization_rejects_target_set_bypass() -> None:
 
     with pytest.raises(
         tc.ArtifactError,
-        match="ServingBindingSetTarget requires target_set realization",
+        match="RealizationTargetSet requires target_set realization",
     ):
         artifact.realize_async(
-            ArtifactRealizationSpec.retained_binding(target=_serving_target_set())
+            ArtifactRealizationSpec.retained_binding(target=_realization_target_set())
         )
 
 
 def test_realize_async_target_set_per_part_selection_reports_group_lifecycle() -> None:
     store = _Store()
     artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid")
-    target = _serving_artifact_set_target_set()
+    target = _serving_artifact_realization_target_set()
 
     op = artifact.realize_async(ArtifactRealizationSpec.target_set(target=target))
     result = op.result(timeout_s=1.0)
 
-    assert isinstance(result, PrefetchedServingBindingSet)
+    assert isinstance(result, PrefetchHandoffSet)
     assert result.report is not None
     report = cast(ArtifactRealizationReport, result.report)
     assert report.target_kind == "target_set"
@@ -692,7 +692,7 @@ def test_realize_async_target_set_per_part_selection_reports_group_lifecycle() -
         "release_group_staged_acquire",
     )
     assert report.target_set is not None
-    assert report.target_set.source_kind == "serving_artifact_set"
+    assert report.target_set.source_kind == "runtime_artifact_set"
     assert report.target_set.source_selection_mode == "per_part_selection"
     assert report.target_set.publish_barrier is True
     assert report.target_set.group_realization_transaction_ids == ("txn-1",)
@@ -729,12 +729,12 @@ def test_realize_async_target_set_per_part_selection_reports_group_lifecycle() -
 def test_prefetch_target_set_uses_target_set_realization_spec() -> None:
     store = _Store()
     artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid")
-    target = _serving_target_set()
+    target = _realization_target_set()
 
     op = artifact.prefetch(target=target)
     result = op.result(timeout_s=1.0)
 
-    assert isinstance(result, PrefetchedServingBindingSet)
+    assert isinstance(result, PrefetchHandoffSet)
     assert store._runtime.ensure_client().prefetch_binding_calls[0]["target"] == target
     report = cast(ArtifactRealizationReport, result.report)
     assert report.target_kind == "target_set"
diff --git a/tests/python/api/test_public_surface.py b/tests/python/api/test_public_surface.py
index aa2624c4..0b0cf585 100644
--- a/tests/python/api/test_public_surface.py
+++ b/tests/python/api/test_public_surface.py
@@ -2,10 +2,92 @@
 
 from __future__ import annotations
 
+import importlib
+import importlib.util
 import inspect
+from pathlib import Path
 
 import tensorcast as tc
-from tensorcast.api.store import Store
+import tensorcast.artifact_runtime.diagnostics as tc_runtime_diagnostics
+import tensorcast.artifact_runtime.readiness as tc_runtime_readiness
+from tensorcast.api.store import (
+    ArtifactRealizationHandle,
+    ArtifactRealizationReport,
+    ArtifactRealizationSpec,
+    PrefetchHandoff,
+    PrefetchHandoffMemberFailure,
+    PrefetchHandoffSet,
+    RealizationTarget,
+    RealizationTargetSet,
+    RuntimeArtifactBuildIntent,
+    RuntimeArtifactManifest,
+    RuntimeArtifactPolicy,
+    RuntimeBindingMemberRef,
+    RuntimeBindingReadiness,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceKind,
+    RuntimeBindingSourceMemberRef,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeBindingSourceReuseMode,
+    RuntimeRealizationSpecCacheEntry,
+    RuntimeTopologyRef,
+    Store,
+)
+from tensorcast.artifact_runtime.attachment import (
+    RuntimeAttachment,
+    RuntimeBindingState,
+)
+from tensorcast.artifact_runtime.config import (
+    RuntimeArtifactLocator,
+    RuntimeStartPlanError,
+    TensorCastRuntimeConfig,
+    plan_runtime_start,
+)
+from tensorcast.artifact_runtime.diagnostics import RuntimeRealizationReport
+from tensorcast.artifact_runtime.host import (
+    RuntimeAdmissionDecision,
+    RuntimeAdmissionPolicy,
+    RuntimeAdmissionRequest,
+    RuntimeHostCapabilities,
+    RuntimePlacement,
+    RuntimeProfile,
+    RuntimeTensorView,
+)
+from tensorcast.artifact_runtime.intent import RuntimeRequestContext
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.policy import RuntimePolicy
+from tensorcast.artifact_runtime.publication.actions import (
+    RuntimeReplicaPublicationSettings,
+    project_runtime_replica_publication_state,
+    publish_runtime_replica,
+    retire_runtime_replica,
+    runtime_replica_publication_settings,
+)
+from tensorcast.artifact_runtime.reload import (
+    merge_runtime_reload_extra_config,
+    normalize_runtime_reload_request_payload,
+    reload_runtime_attachment,
+)
+from tensorcast.artifact_runtime.state import (
+    ModelAttributeNames,
+    ModelAttributeRuntimeState,
+    OneShotRuntimeHook,
+)
+from tensorcast.artifact_runtime.view import (
+    BindingValueRefProjection,
+    RuntimeEndpointProjection,
+    RuntimeWorkerView,
+    SourceSelectionProjection,
+    WeightVersionProjection,
+    aggregate_runtime_view_outputs,
+)
+from tensorcast.retained_realization import (
+    RetainedRealizationClaim,
+    RetainedRealizationExpectedDigests,
+    retained_realization_claim_extra_from_handoff,
+    retained_realization_claim_extra_json_from_handoff,
+)
 
 
 def test_tensorcast_exports_artifact_helpers() -> None:
@@ -13,6 +95,267 @@ def test_tensorcast_exports_artifact_helpers() -> None:
     assert callable(tc.artifact)
     assert hasattr(tc, "artifact_async")
     assert callable(tc.artifact_async)
+    assert tc.ArtifactRealizationSpec is ArtifactRealizationSpec
+    assert tc.ArtifactRealizationHandle is ArtifactRealizationHandle
+    assert tc.ArtifactRealizationReport is ArtifactRealizationReport
+    assert "ArtifactRealizationSpec" in tc.__all__
+    assert tc.RetainedRealizationClaim is RetainedRealizationClaim
+    assert "RetainedRealizationClaim" in tc.__all__
+    assert tc.RetainedRealizationExpectedDigests is RetainedRealizationExpectedDigests
+    assert "RetainedRealizationExpectedDigests" in tc.__all__
+    assert callable(tc.parse_retained_realization_claim)
+    assert (
+        tc.retained_realization_claim_extra_from_handoff
+        is retained_realization_claim_extra_from_handoff
+    )
+    assert "retained_realization_claim_extra_from_handoff" in tc.__all__
+    assert (
+        tc.retained_realization_claim_extra_json_from_handoff
+        is retained_realization_claim_extra_json_from_handoff
+    )
+    assert "retained_realization_claim_extra_json_from_handoff" in tc.__all__
+    assert tc.RuntimeAttachment is RuntimeAttachment
+    assert tc.RuntimeBindingState is RuntimeBindingState
+    assert tc.RuntimeRequestContext is RuntimeRequestContext
+    assert tc.RuntimeAdmissionDecision is RuntimeAdmissionDecision
+    assert tc.RuntimeAdmissionPolicy is RuntimeAdmissionPolicy
+    assert tc.RuntimeAdmissionRequest is RuntimeAdmissionRequest
+    assert tc.RuntimeHostCapabilities is RuntimeHostCapabilities
+    assert tc.RuntimePlacement is RuntimePlacement
+    assert tc.RuntimeProfile is RuntimeProfile
+    assert tc.RuntimeTensorView is RuntimeTensorView
+    assert tc.ArtifactLocator is ArtifactLocator
+    assert tc.RuntimeArtifactLocator is RuntimeArtifactLocator
+    assert tc.RuntimePolicy is RuntimePolicy
+    assert tc.RuntimeRealizationReport is RuntimeRealizationReport
+    assert tc.RuntimeArtifactBuildIntent is RuntimeArtifactBuildIntent
+    assert tc.RuntimeArtifactManifest is RuntimeArtifactManifest
+    assert tc.RuntimeArtifactPolicy is RuntimeArtifactPolicy
+    assert tc.RealizationTarget is RealizationTarget
+    assert tc.RealizationTargetSet is RealizationTargetSet
+    assert tc.RuntimeBindingMemberRef is RuntimeBindingMemberRef
+    assert tc.RuntimeBindingReadiness is RuntimeBindingReadiness
+    assert tc.RuntimeBindingResolvedLayout is RuntimeBindingResolvedLayout
+    assert tc.RuntimeBindingSourceKind is RuntimeBindingSourceKind
+    assert tc.RuntimeBindingSourceMemberRef is RuntimeBindingSourceMemberRef
+    assert tc.RuntimeBindingSourceRef is RuntimeBindingSourceRef
+    assert tc.RuntimeBindingSourceReuseDecision is RuntimeBindingSourceReuseDecision
+    assert tc.RuntimeBindingSourceReuseMode is RuntimeBindingSourceReuseMode
+    assert tc.RuntimeRealizationSpecCacheEntry is RuntimeRealizationSpecCacheEntry
+    assert tc.RuntimeTopologyRef is RuntimeTopologyRef
+    assert tc.PrefetchHandoff is PrefetchHandoff
+    assert tc.PrefetchHandoffMemberFailure is PrefetchHandoffMemberFailure
+    assert tc.PrefetchHandoffSet is PrefetchHandoffSet
+    for removed_name in (
+        "ServingBindingTarget",
+        "ServingBindingSetTarget",
+        "PrefetchedServingBinding",
+        "PrefetchedServingBindingSet",
+        "ServingBuildIntent",
+        "ServingArtifactManifest",
+        "ServingRuntimePolicy",
+        "ServingRealizationReport",
+        "ServingBindingMemberRef",
+        "ServingBindingReadiness",
+        "ServingBindingResolvedLayout",
+        "ServingBindingResolvedSpecCacheEntry",
+        "ServingBindingSourceKind",
+        "ServingBindingSourceMemberRef",
+        "ServingBindingSourceRef",
+        "ServingBindingSourceReuseDecision",
+        "ServingBindingSourceReuseMode",
+        "ServingTopologyRef",
+        "ServingAdmissionFacts",
+        "ServingPublicationSubject",
+        "ServingSupportLevel",
+        "PreparedServingRegistration",
+        "RegisteredServingPublication",
+        "CapabilityDirectoryClient",
+        "CapabilityDirectoryOptions",
+        "RegisteredRuntimeArtifactPublication",
+        "PreparedRuntimeArtifactRegistration",
+        "RuntimePublicationSubject",
+        "RuntimeAdmissionFacts",
+        "RuntimeSupportLevel",
+        "build_serving_publication_bundle",
+        "build_serving_publication_bundle_from_registered_artifact",
+        "build_runtime_artifact_publication_bundle",
+        "build_runtime_artifact_publication_bundle_from_registered_artifact",
+        "build_serving_manifest_ref",
+        "SERVING_BUILD_DIGEST_VERSION",
+        "compute_serving_tensor_schema_hash",
+        "compute_runtime_artifact_tensor_schema_hash",
+        "count_canonical_serving_tensors",
+        "count_canonical_runtime_tensors",
+        "prepare_pure_transform_serving_registration",
+        "prepare_binding_finalize_serving_registration",
+        "prepare_serving_registration",
+        "prepare_pure_transform_runtime_registration",
+        "prepare_binding_finalize_runtime_registration",
+        "prepare_runtime_artifact_registration",
+        "parse_serving_manifest_ref",
+        "plan_serving_binding_source_reuse",
+        "retained_realization_claim_extra_from_prefetched_binding",
+        "retained_realization_claim_extra_json",
+    ):
+        assert removed_name not in tc.__all__
+        assert not hasattr(tc, removed_name)
+    assert tc.RuntimeStartPlanError is RuntimeStartPlanError
+    assert tc.TensorCastRuntimeConfig is TensorCastRuntimeConfig
+    assert tc.plan_runtime_start is plan_runtime_start
+    assert tc.runtime is importlib.import_module("tensorcast.runtime")
+    assert "runtime" in tc.__all__
+    assert "serving" not in tc.__all__
+    assert not hasattr(tc, "serving")
+    assert "RuntimeAttachment" in tc.__all__
+    assert "RuntimeBindingState" in tc.__all__
+    assert "RuntimeRequestContext" in tc.__all__
+    assert "RuntimeAdmissionDecision" in tc.__all__
+    assert "RuntimeAdmissionPolicy" in tc.__all__
+    assert "RuntimeAdmissionRequest" in tc.__all__
+    assert "RuntimeHostCapabilities" in tc.__all__
+    assert "RuntimePlacement" in tc.__all__
+    assert "RuntimeProfile" in tc.__all__
+    assert "RuntimeTensorView" in tc.__all__
+    assert "ArtifactLocator" in tc.__all__
+    assert "RuntimeArtifactLocator" in tc.__all__
+    assert "RuntimePolicy" in tc.__all__
+    assert "RuntimeRealizationReport" in tc.__all__
+
+    assert "RuntimeArtifactBuildIntent" in tc.__all__
+    assert "RuntimeArtifactManifest" in tc.__all__
+    assert "RuntimeArtifactPolicy" in tc.__all__
+    assert "RealizationTarget" in tc.__all__
+    assert "RealizationTargetSet" in tc.__all__
+    assert "RuntimeBindingMemberRef" in tc.__all__
+    assert "RuntimeBindingReadiness" in tc.__all__
+    assert "RuntimeBindingResolvedLayout" in tc.__all__
+    assert "RuntimeBindingSourceKind" in tc.__all__
+    assert "RuntimeBindingSourceMemberRef" in tc.__all__
+    assert "RuntimeBindingSourceRef" in tc.__all__
+    assert "RuntimeBindingSourceReuseDecision" in tc.__all__
+    assert "RuntimeBindingSourceReuseMode" in tc.__all__
+    assert "RuntimeRealizationSpecCacheEntry" in tc.__all__
+    assert "RuntimeTopologyRef" in tc.__all__
+    assert "PrefetchHandoff" in tc.__all__
+    assert "PrefetchHandoffMemberFailure" in tc.__all__
+    assert "PrefetchHandoffSet" in tc.__all__
+    assert "RuntimeStartPlanError" in tc.__all__
+    assert "TensorCastRuntimeConfig" in tc.__all__
+    assert "plan_runtime_start" in tc.__all__
+    assert "ServingRealizationReport" not in tc_runtime_diagnostics.__all__
+    assert not hasattr(tc_runtime_diagnostics, "ServingRealizationReport")
+    for removed_module in (
+        "tensorcast.serving",
+        "tensorcast.serving.runtime",
+        "tensorcast.serving.diagnostics",
+        "tensorcast.serving.config",
+        "tensorcast.serving.contract",
+        "tensorcast.serving.hosts",
+        "tensorcast.serving.readiness",
+        "tensorcast.serving.runtime_attachment",
+        "tensorcast.serving.runtime_config",
+        "tensorcast.serving.runtime_contract",
+        "tensorcast.serving.runtime_intent",
+        "tensorcast.serving.runtime_view",
+        "tensorcast.serving.policy",
+        "tensorcast.serving.session",
+        "tensorcast.serving.source_catalog",
+        "tensorcast.serving.state",
+        "tensorcast.serving._runtime_impl",
+        "tensorcast.serving._runtime_impl.lifecycle",
+        "tensorcast.serving.admin",
+        "tensorcast.serving.artifact_manifest",
+        "tensorcast.serving.binding_runtime",
+        "tensorcast.serving.builder",
+        "tensorcast.serving.builder.tensor_parity",
+        "tensorcast.serving.dto",
+        "tensorcast.serving.errors",
+        "tensorcast.serving.local_ready",
+        "tensorcast.serving.replica_publication",
+        "tensorcast.serving.resolver",
+        "tensorcast.serving.retained_binding",
+        "tensorcast.serving.testing",
+    ):
+        try:
+            spec = importlib.util.find_spec(removed_module)
+        except ModuleNotFoundError:
+            spec = None
+        assert spec is None
+    assert tc.ModelAttributeNames is ModelAttributeNames
+    assert tc.ModelAttributeRuntimeState is ModelAttributeRuntimeState
+    assert tc.OneShotRuntimeHook is OneShotRuntimeHook
+    assert "ModelAttributeNames" in tc.__all__
+    assert "ModelAttributeRuntimeState" in tc.__all__
+    assert "OneShotRuntimeHook" in tc.__all__
+    assert tc.BindingValueRefProjection is BindingValueRefProjection
+    assert tc.RuntimeEndpointProjection is RuntimeEndpointProjection
+    assert tc.RuntimeWorkerView is RuntimeWorkerView
+    assert tc.SourceSelectionProjection is SourceSelectionProjection
+    assert tc.WeightVersionProjection is WeightVersionProjection
+    assert "BindingValueRefProjection" in tc.__all__
+    assert "RuntimeEndpointProjection" in tc.__all__
+    assert "RuntimeWorkerView" in tc.__all__
+    assert "SourceSelectionProjection" in tc.__all__
+    assert "WeightVersionProjection" in tc.__all__
+    assert tc.aggregate_runtime_view_outputs is aggregate_runtime_view_outputs
+    assert "aggregate_runtime_view_outputs" in tc.__all__
+    assert tc.publish_runtime_replica is publish_runtime_replica
+    assert tc.project_runtime_replica_publication_state is (
+        project_runtime_replica_publication_state
+    )
+    assert tc.retire_runtime_replica is retire_runtime_replica
+    assert "publish_runtime_replica" in tc.__all__
+    assert "retire_runtime_replica" in tc.__all__
+    assert tc.RuntimeReplicaPublicationSettings is (RuntimeReplicaPublicationSettings)
+    assert tc.runtime_replica_publication_settings is (
+        runtime_replica_publication_settings
+    )
+    assert "RuntimeReplicaPublicationSettings" in tc.__all__
+    assert "runtime_replica_publication_settings" in tc.__all__
+    assert tc.reload_runtime_attachment is reload_runtime_attachment
+    assert "reload_runtime_attachment" in tc.__all__
+    assert tc.merge_runtime_reload_extra_config is merge_runtime_reload_extra_config
+    assert tc.normalize_runtime_reload_request_payload is (
+        normalize_runtime_reload_request_payload
+    )
+    assert "merge_runtime_reload_extra_config" in tc.__all__
+    assert "normalize_runtime_reload_request_payload" in tc.__all__
+    assert callable(tc_runtime_diagnostics.binding_layout_tensor_count)
+    assert tc_runtime_readiness.ReadinessInventoryAdmissionPolicy is not None
+
+
+def test_public_sdk_surface_does_not_open_global_store_channels() -> None:
+    forbidden = (
+        "GlobalStoreCompositeStub",
+        "tensorcast.global_store",
+        "global_store_pb2",
+        "global_store_pb2_grpc",
+        "tensorcast.proto.global_store",
+        "grpc.insecure_channel",
+        "grpc.secure_channel",
+        "grpc.aio.insecure_channel",
+        "grpc.aio.secure_channel",
+    )
+    roots = (
+        Path("tensorcast/__init__.py"),
+        Path("tensorcast/api"),
+        Path("tensorcast/artifact_runtime"),
+        Path("tensorcast/retained_realization.py"),
+    )
+    checked = [
+        path
+        for root in roots
+        for path in ([root] if root.is_file() else sorted(root.rglob("*.py")))
+    ]
+    offenders = [
+        f"{path}:{token}"
+        for path in checked
+        for token in forbidden
+        if token in path.read_text(encoding="utf-8")
+    ]
+
+    assert offenders == []
 
 
 def test_tensorcast_exports_programmable_primitives() -> None:
diff --git a/tests/python/api/test_realization_kernel.py b/tests/python/api/test_realization_kernel.py
index c72c6b9a..4a3da07e 100644
--- a/tests/python/api/test_realization_kernel.py
+++ b/tests/python/api/test_realization_kernel.py
@@ -35,6 +35,7 @@
     envelope_for_target_set,
     envelope_for_tensor_dict,
     lifecycle_plan_for_envelope,
+    materialization_source_label,
     model_runtime_report_for,
     mounted_source_target_digest,
     publishability_report_for,
@@ -65,7 +66,7 @@
     BindingReservationCapability,
     BindingValueRef,
     GroupRealizationAcquireRef,
-    ServingBindingMemberRef,
+    RuntimeBindingMemberRef,
 )
 
 
@@ -95,6 +96,21 @@ def _canonical_index_bytes() -> bytes:
     )
 
 
+def test_materialization_source_label_uses_realization_report_vocabulary() -> None:
+    assert (
+        materialization_source_label(
+            store_daemon_pb2.MATERIALIZATION_SOURCE_LOCAL_REPLICA
+        )
+        == "local_replica"
+    )
+    assert materialization_source_label(
+        store_daemon_pb2.MATERIALIZATION_SOURCE_P2P
+    ) == ("p2p")
+    assert materialization_source_label(
+        store_daemon_pb2.MATERIALIZATION_SOURCE_DISK
+    ) == ("disk")
+
+
 def test_resolve_artifact_selection_subset_digest_is_stable() -> None:
     index_bytes = _canonical_index_bytes()
 
@@ -563,7 +579,7 @@ def retained_state(
             binding_value_id=f"value-{member_index}",
             seal_generation=1,
         )
-        member = ServingBindingMemberRef(
+        member = RuntimeBindingMemberRef(
             member_id=f"member-{member_index}",
             member_index=member_index,
             member_count=2,
@@ -588,7 +604,7 @@ def retained_state(
             daemon_session_id="session-1",
             device_uuid=f"GPU-{member_index}",
             reservation_bytes=reservation_bytes,
-            readiness="serving_local_ready",
+            readiness="runtime_local_ready",
             verification_state="local_only",
             staged_value=staged_value,
             group_realization_acquire=GroupRealizationAcquireRef(
@@ -1267,7 +1283,7 @@ def test_model_runtime_report_wraps_runtime_attachment_report() -> None:
         artifact_id="mi2:test:serving",
         canonical_index_bytes=_canonical_index_bytes(),
         tensor_names=("a",),
-        artifact_profile="serving_artifact",
+        artifact_profile="runtime_artifact",
         authority_scope="daemon_mediated_runtime_attachment",
     )
     target_plan = RealizationTargetPlan(
@@ -1350,7 +1366,7 @@ def test_publication_spec_and_handle_facade_own_release_contract() -> None:
         publish_replica=lambda: None,
         size_bytes=1024,
     )
-    spec = ArtifactRealizationSpec.publication(target=projection, timeout_s=5)
+    spec = ArtifactRealizationSpec._publication(target=projection, timeout_s=5)
     target_plan = RealizationTargetPlan(
         kind=spec.target_kind,
         target_layout_digest="layout-1",
@@ -1502,6 +1518,7 @@ def test_binding_envelope_and_report_capture_identity_diagnostics() -> None:
     target_plan = RealizationTargetPlan(
         kind="binding_owned",
         device="cuda:0",
+        target_layout_digest="binding-layout:bl1:test",
         binding_layout_id="bl1:test",
     )
 
@@ -1543,6 +1560,9 @@ def test_binding_envelope_and_report_capture_identity_diagnostics() -> None:
     assert report.binding.binding_layout_id == "bl1:test"
     assert report.binding.binding_value_id == "value-1"
     assert report.binding.value_state == "current"
+    assert report.view_subset_hash == selection.view_subset_hash.hex()
+    assert report.logical_layout_hash == selection.logical_layout_hash.hex()
+    assert report.selection_hash == selection.selection_hash.hex()
     assert report.binding.publication_eligible is True
     assert report.binding.publish_requested is True
     assert report.binding.published is True
@@ -1627,6 +1647,8 @@ def test_binding_envelope_and_report_capture_identity_diagnostics() -> None:
     assert execution_dict["planner_reject_reason_buckets"] == {"not_collective": 2}
 
     profile_payload = artifact_realization_profile_payload(report)
+    assert profile_payload["logical_layout_hash"] == selection.logical_layout_hash.hex()
+    assert profile_payload["selection_hash"] == selection.selection_hash.hex()
     assert profile_payload["execution_actual_executor_path"] == "mixed_collective"
     assert profile_payload["execution_residual_bytes"] == 4
     assert profile_payload["execution_plan_kind"] == "collective_first_mixed"
@@ -1710,6 +1732,7 @@ def test_local_ready_pending_verification_report_records_admission_state() -> No
     )
     target_plan = RealizationTargetPlan(
         kind="binding_owned",
+        target_layout_digest="binding-layout:layout-local-ready",
         binding_layout_id="layout-local-ready",
     )
     envelope = envelope_for_binding(binding, target_kind="binding_owned")
@@ -1744,7 +1767,7 @@ def test_runtime_attachment_envelope_and_report_capture_release_contract() -> No
         artifact_id="mi2:test:serving",
         canonical_index_bytes=_canonical_index_bytes(),
         tensor_names=("a",),
-        artifact_profile="serving_artifact",
+        artifact_profile="runtime_artifact",
         authority_scope="daemon_mediated_runtime_attachment",
     )
     target_plan = RealizationTargetPlan(
@@ -1832,7 +1855,7 @@ def _target_set_retained_member(member_index: int) -> RealizationRetainedBinding
         reservation_bytes=1024,
         reservation_capability_id=f"cap-{suffix}",
         reservation_scope_digest=f"scope-{suffix}",
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state="local_only",
     )
 
@@ -1862,7 +1885,7 @@ def test_retained_binding_report_captures_capability_expiry() -> None:
         daemon_session_id="session-1",
         device_uuid="GPU-0",
         reservation_bytes=4096,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state="local_only",
         expires_at_ms=4_102_444_800_000,
     )
@@ -1892,7 +1915,7 @@ def test_target_set_report_groups_retained_member_facts() -> None:
             reservation_bytes=1024,
             reservation_capability_id="cap-0",
             reservation_scope_digest="scope-0",
-            readiness="serving_local_ready",
+            readiness="runtime_local_ready",
             verification_state="local_only",
             staged_value=True,
             group_realization_transaction_id="txn-1",
@@ -1915,7 +1938,7 @@ def test_target_set_report_groups_retained_member_facts() -> None:
             reservation_bytes=2048,
             reservation_capability_id="cap-1",
             reservation_scope_digest="scope-1",
-            readiness="serving_local_ready",
+            readiness="runtime_local_ready",
             verification_state="local_only",
             staged_value=True,
             group_realization_transaction_id="txn-1",
@@ -1950,7 +1973,7 @@ def test_target_set_report_groups_retained_member_facts() -> None:
         runtime="vllm",
         group_id="group-1",
         topology=target.topology,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         partial=False,
         member_failures=(),
     )
@@ -2040,6 +2063,7 @@ def test_reports_share_core_realization_fields_across_targets() -> None:
     binding_target = RealizationTargetPlan(
         kind="binding_owned",
         device="cuda:0",
+        target_layout_digest="binding-layout:bl1:test",
         binding_layout_id="bl1:test",
     )
     binding_envelope = envelope_for_binding(binding, target_kind="binding_owned")
@@ -2208,7 +2232,7 @@ def test_reports_share_core_realization_fields_across_targets() -> None:
     assert retained_profile["retained_binding_capability_expires_at_ms"] == (
         4_102_444_800_000,
     )
-    assert retained_profile["retained_binding_readiness"] == ("serving_local_ready",)
+    assert retained_profile["retained_binding_readiness"] == ("runtime_local_ready",)
     assert retained_profile["retained_binding_verification_states"] == ("local_only",)
 
 
@@ -2258,7 +2282,7 @@ def test_target_set_strategy_and_lifecycle_plans_capture_group_barriers() -> Non
                     group_id="group-1",
                 ),
                 model_config_digest="model-config",
-                serving_build_digest="serving-build",
+                runtime_build_digest="serving-build",
                 source=source,
                 resolved_layout=SimpleNamespace(
                     target_layout_hash="target-layout-0",
@@ -2276,7 +2300,7 @@ def test_target_set_strategy_and_lifecycle_plans_capture_group_barriers() -> Non
                     group_id="group-1",
                 ),
                 model_config_digest="model-config",
-                serving_build_digest="serving-build",
+                runtime_build_digest="serving-build",
                 source=source,
                 resolved_layout=SimpleNamespace(
                     target_layout_hash="target-layout-1",
@@ -2333,7 +2357,7 @@ def test_target_set_report_marks_serving_artifact_set_as_per_part() -> None:
         runtime="vllm",
         group_id="group-1",
         source=SimpleNamespace(
-            source_kind="serving_artifact_set",
+            source_kind="runtime_artifact_set",
             artifact_selection_digest="artifact-set-selection",
             source_artifact_ref=None,
             members=(
@@ -2366,7 +2390,7 @@ def test_target_set_report_marks_serving_artifact_set_as_per_part() -> None:
         source_selection_digest="fallback-selection",
     )
 
-    assert report.source_kind == "serving_artifact_set"
+    assert report.source_kind == "runtime_artifact_set"
     assert report.source_selection_mode == "per_part_selection"
     assert [member.source_artifact_ref for member in report.members] == [
         "mi2:serving-member-0",
@@ -2466,6 +2490,154 @@ def test_risk_labels_are_derived_from_target_plan_and_envelope() -> None:
     )
 
 
+_RISK_CLOSURE_MATRIX: tuple[dict[str, str], ...] = (
+    {
+        "risk": "Selection resolver becomes too broad.",
+        "admission_field": "artifact_id/key exclusivity, view_id, generation_hint",
+        "envelope_field": "target_layout_digest remains target-plan owned",
+        "report_field": "source_selection_digest",
+        "guardrail_test": "test_resolve_artifact_selection_keeps_target_plan_identity_separate",
+        "blocking_condition": "target layout or copy-plan policy moves into selection",
+    },
+    {
+        "risk": "SDK direct Global Store access survives behind helper APIs.",
+        "admission_field": "authority_scope",
+        "envelope_field": "owner_kind",
+        "report_field": "authority_scope",
+        "guardrail_test": "test_sdk_api_paths_do_not_open_global_store_channels",
+        "blocking_condition": "SDK artifact realization opens Global Store channels",
+    },
+    {
+        "risk": "`PublicDiskSourceHandle` becomes a permanent source authority.",
+        "admission_field": "artifact_profile=mounted_source",
+        "envelope_field": "backing_kind=mounted_source_metadata",
+        "report_field": "mounted_source.source_artifact_id",
+        "guardrail_test": "test_mounted_source_realize_rejects_non_msa1_subject",
+        "blocking_condition": "mounted source executes without msa1 identity",
+    },
+    {
+        "risk": "Mapped target layout is confused with source selection.",
+        "admission_field": "target_layout_digest",
+        "envelope_field": "projection_kind",
+        "report_field": "copy_plan_digest",
+        "guardrail_test": "test_resolve_artifact_selection_accepts_mapped_source_view_hint",
+        "blocking_condition": "mapped/adopted target reports reuse selection digest as layout",
+    },
+    {
+        "risk": "TensorDict accidentally inherits binding lifecycle.",
+        "admission_field": "target_kind=tensor_dict",
+        "envelope_field": "projection_kind=tensor_dict",
+        "report_field": "publishability.reason",
+        "guardrail_test": "test_tensor_dict_handle_rejects_binding_lifecycle_capabilities",
+        "blocking_condition": "TensorDict handle can publish, promote, or retain",
+    },
+    {
+        "risk": "TensorDict projections release daemon payloads too early or leak them.",
+        "admission_field": "release_strictness",
+        "envelope_field": "release_policy",
+        "report_field": "envelope.release_policy",
+        "guardrail_test": "test_tensor_subset_materialization_and_release",
+        "blocking_condition": "projection close does not unload daemon payload exactly once",
+    },
+    {
+        "risk": "Resource lifecycle remains path-specific under a unified API.",
+        "admission_field": "release_strictness",
+        "envelope_field": "release_policy",
+        "report_field": "lifecycle_plan.capability",
+        "guardrail_test": "test_release_contract_lifecycle_matrix_runs_policy_actions_once",
+        "blocking_condition": "cleanup action exists outside a release contract",
+    },
+    {
+        "risk": "Handle-lease mint failure silently weakens export lifetime.",
+        "admission_field": "export_lifetime_kind",
+        "envelope_field": "export_kind",
+        "report_field": "envelope.export_lifetime_kind",
+        "guardrail_test": "test_cpu_memfd_materialization_fails_before_tensor_restore_without_export_authority",
+        "blocking_condition": "CPU memfd or CUDA IPC export succeeds without token authority",
+    },
+    {
+        "risk": "CPU TensorDict mutability stays ambiguous.",
+        "admission_field": "mutability_contract",
+        "envelope_field": "mutability_contract",
+        "report_field": "envelope.mutability_contract",
+        "guardrail_test": "test_tensor_dict_projection_rejects_mapping_mutations",
+        "blocking_condition": "TensorDict mapping mutation succeeds",
+    },
+    {
+        "risk": "`get_into` hides expensive fallback copies.",
+        "admission_field": "fallback_policy",
+        "envelope_field": "fallback_reason_buckets",
+        "report_field": "copy_bytes",
+        "guardrail_test": "test_get_into_returns_fallback_result_and_unloads",
+        "blocking_condition": "temporary-payload fallback has no report bucket",
+    },
+    {
+        "risk": "Prefetch grows a second continuation model.",
+        "admission_field": "operation_id",
+        "envelope_field": "projection_kind=prefetch_handoff",
+        "report_field": "operation_backend",
+        "guardrail_test": "test_realize_async_retained_replica_operation_status_wait_and_cancel",
+        "blocking_condition": "prefetch bypasses Operation status/wait/cancel",
+    },
+    {
+        "risk": "Binding paths bypass strategy planning.",
+        "admission_field": "fallback_policy",
+        "envelope_field": "direct_write_bytes",
+        "report_field": "strategy_plan.fallback_policy",
+        "guardrail_test": "test_binding_envelope_and_report_capture_identity_diagnostics",
+        "blocking_condition": "binding materialization report lacks strategy facts",
+    },
+    {
+        "risk": "Tensor-aware strategy loses lane/residual visibility.",
+        "admission_field": "execution_plan_kind",
+        "envelope_field": "temporary_replica_bytes",
+        "report_field": "execution_commit.lane_allocation_bytes",
+        "guardrail_test": "test_binding_envelope_and_report_capture_identity_diagnostics",
+        "blocking_condition": "mixed execution omits lane, residual, or reject buckets",
+    },
+    {
+        "risk": "TP grows special-case orchestration.",
+        "admission_field": "target_set.source_selection_mode",
+        "envelope_field": "projection_kind=target_set",
+        "report_field": "target_set.members",
+        "guardrail_test": "test_group_member_same_and_per_part_selection_identity",
+        "blocking_condition": "TP path adds non-target-set realization state",
+    },
+    {
+        "risk": "RPC cleanup is attempted too early.",
+        "admission_field": "controller plan validation",
+        "envelope_field": "resource_authorities",
+        "report_field": "controller plan spans",
+        "guardrail_test": "daemon controller realization plan tests",
+        "blocking_condition": "proto cleanup lands before shared controller path",
+    },
+    {
+        "risk": "Target-state behavior regresses while compatibility code is deleted.",
+        "admission_field": "scenario acceptance coverage",
+        "envelope_field": "runtime_attachment release_policy",
+        "report_field": "model_runtime.runtime_attachment_target_kind",
+        "guardrail_test": "serving integration/runtime publication scenarios",
+        "blocking_condition": "compatibility code deletion lacks runtime scenario coverage",
+    },
+)
+
+
+def test_risk_closure_matrix_has_unique_risks_and_enforcement_fields() -> None:
+    required_fields = (
+        "admission_field",
+        "envelope_field",
+        "report_field",
+        "guardrail_test",
+        "blocking_condition",
+    )
+    matrix_by_risk = {entry["risk"]: entry for entry in _RISK_CLOSURE_MATRIX}
+
+    assert len(matrix_by_risk) == len(_RISK_CLOSURE_MATRIX)
+    for risk, entry in matrix_by_risk.items():
+        for field in required_fields:
+            assert entry[field], f"{risk} missing {field}"
+
+
 def test_sdk_realization_paths_do_not_import_selection_builder() -> None:
     checked = (
         Path("tensorcast/api/store/__init__.py"),
@@ -2473,6 +2645,7 @@ def test_sdk_realization_paths_do_not_import_selection_builder() -> None:
         Path("tensorcast/api/store/artifact.py"),
         Path("tensorcast/api/store/materialization.py"),
         Path("tensorcast/api/store/inplace_slot.py"),
+        Path("tensorcast/api/store/runtime_realization_reference_consumer.py"),
         Path("tensorcast/api/plan/plan.py"),
     )
     offenders: list[str] = []
@@ -2583,7 +2756,9 @@ def test_client_binding_rollbacks_log_cleanup_failures_instead_of_suppressing()
     assert "logger.exception" in helper_source
 
 
-def test_realization_lifecycle_code_does_not_silently_suppress_broad_exceptions() -> None:
+def test_realization_lifecycle_code_does_not_silently_suppress_broad_exceptions() -> (
+    None
+):
     guarded_paths = (
         Path("tensorcast/api/_register.py"),
         Path("tensorcast/api/store/__init__.py"),
@@ -2601,10 +2776,10 @@ def test_realization_lifecycle_code_does_not_silently_suppress_broad_exceptions(
         Path("tensorcast/global_store/rpc/replica_registration_rpc_handler.py"),
         Path("tensorcast/global_store/rpc/transport_rpc_handler.py"),
         Path("tensorcast/global_store/services/instance_service.py"),
-        Path("tensorcast/serving/retained_binding.py"),
-        Path("tensorcast/serving/_runtime_impl/lifecycle.py"),
-        Path("tensorcast/serving/local_ready.py"),
-        Path("tensorcast/serving/recipe_build.py"),
+        Path("tensorcast/artifact_runtime/binding/retained.py"),
+        Path("tensorcast/artifact_runtime/lifecycle.py"),
+        Path("tensorcast/artifact_runtime/recipe/local_ready.py"),
+        Path("tensorcast/artifact_runtime/recipe/build.py"),
     )
     offenders = [
         str(path)
@@ -2674,7 +2849,9 @@ def test_mounted_source_config_no_longer_exposes_absolute_fallback_mode() -> Non
     assert offenders == []
 
 
-def test_daemon_canonical_index_loading_uses_explicit_authority_not_disk_fallback() -> None:
+def test_daemon_canonical_index_loading_uses_explicit_authority_not_disk_fallback() -> (
+    None
+):
     guarded_paths = (
         Path("daemon/service/controllers/materialization_index_source_utils.h"),
         Path("daemon/service/controllers/materialization_index_source_utils.cc"),
diff --git a/tests/python/api/test_register_stable_dram_streaming.py b/tests/python/api/test_register_stable_dram_streaming.py
index c310b977..a9ec88f8 100644
--- a/tests/python/api/test_register_stable_dram_streaming.py
+++ b/tests/python/api/test_register_stable_dram_streaming.py
@@ -8,8 +8,8 @@
 from dataclasses import dataclass
 from typing import Any
 
-import torch
 import pytest
+import torch
 
 from tensorcast.api._config import PlanType, RegisterArtifactOptions
 from tensorcast.api._errors import TensorCastError
diff --git a/tests/python/api/test_retrieval_options.py b/tests/python/api/test_retrieval_options.py
index f7024f73..e8b8ee48 100644
--- a/tests/python/api/test_retrieval_options.py
+++ b/tests/python/api/test_retrieval_options.py
@@ -50,6 +50,27 @@ def test_get_options_parse_topology() -> None:
     )
 
 
+def test_execution_topology_keeps_unspecified_collective_policy() -> None:
+    opts = GetArtifactOptions(
+        execution_topology=ExecutionTopologyContext(
+            collective_group=CollectiveLoadGroup(
+                group_id="group-a",
+                world_size=4,
+                rank=2,
+            )
+        )
+    )
+
+    assert opts.execution_topology is not None
+    assert opts.execution_topology.collective_group is not None
+    assert opts.execution_topology.collective_policy is None
+
+
+def test_collective_policy_parse_rejects_unspecified_value() -> None:
+    with pytest.raises(ValueError, match="must be explicit"):
+        CollectivePolicyMode.parse(None)
+
+
 def test_store_options_accept_execution_scoped_defaults() -> None:
     opts = StoreOptions(get=GetArtifactOptions(source=RetrievalPreset.DISK_ONLY))
     assert opts.get is not None
diff --git a/tests/python/api/test_serving_binding_reference_consumer.py b/tests/python/api/test_runtime_realization_reference_consumer.py
similarity index 88%
rename from tests/python/api/test_serving_binding_reference_consumer.py
rename to tests/python/api/test_runtime_realization_reference_consumer.py
index 91ccc645..4ea0c01b 100644
--- a/tests/python/api/test_serving_binding_reference_consumer.py
+++ b/tests/python/api/test_runtime_realization_reference_consumer.py
@@ -2,13 +2,16 @@
 
 from __future__ import annotations
 
+import importlib.util
+
 from google.protobuf.any_pb2 import Any
 
+import tensorcast.api.store as store_api
 from tensorcast.api.context import GroupRealization
-from tensorcast.api.store.serving_binding_reference_consumer import (
+from tensorcast.api.store.runtime_realization_reference_consumer import (
     REFERENCE_RUNTIME,
-    ReferenceServingAcquireResult,
-    ReferenceServingTensorSpec,
+    ReferenceRuntimeAcquireResult,
+    ReferenceRuntimeTensorSpec,
     acquire_reference_binding,
     build_reference_resolved_spec,
     prefetch_reference_binding,
@@ -23,12 +26,28 @@
     BindingValueRef,
     BindingValueVerificationState,
     GroupRealizationAcquireRef,
-    PrefetchedServingBinding,
+    PrefetchHandoff,
 )
 
 
+def test_legacy_reference_consumer_surface_is_removed() -> None:
+    spec = importlib.util.find_spec(
+        "tensorcast.api.store.serving_binding_reference_consumer"
+    )
+    assert spec is None
+    for removed_name in (
+        "ReferenceServingAcquireResult",
+        "ReferenceServingResolvedSpec",
+        "ReferenceServingTensorSpec",
+        "unpack_prefetched_serving_binding",
+        "unpack_prefetched_serving_binding_set",
+    ):
+        assert removed_name not in store_api.__all__
+        assert not hasattr(store_api, removed_name)
+
+
 class _FakeDaemonClient:
-    def __init__(self, prefetched: PrefetchedServingBinding) -> None:
+    def __init__(self, prefetched: PrefetchHandoff) -> None:
         self.prefetched = prefetched
         self.prefetch_calls: list[dict[str, object]] = []
         self.acquire_calls: list[dict[str, object]] = []
@@ -68,7 +87,7 @@ def release_placement_lease(self, **kwargs: object):
         return store_daemon_pb2.ReleasePlacementLeaseResponse()
 
 
-def _prefetched() -> PrefetchedServingBinding:
+def _prefetched() -> PrefetchHandoff:
     member = build_reference_resolved_spec(
         source_artifact_id="mi2:source",
         artifact_selection_digest="selection",
@@ -90,7 +109,7 @@ def _prefetched() -> PrefetchedServingBinding:
         reservation_bytes=4,
         scope_digest="scope",
     )
-    return PrefetchedServingBinding(
+    return PrefetchHandoff(
         local_serving_ref="binding-local:binding-1:value-1",
         binding_value_ref=ref,
         daemon_id="daemon-1",
@@ -99,12 +118,12 @@ def _prefetched() -> PrefetchedServingBinding:
         member=member,
         reservation_bytes=4,
         reservation_capability=capability,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state=BindingValueVerificationState.LOCAL_ONLY,
     )
 
 
-def _staged_prefetched() -> PrefetchedServingBinding:
+def _staged_prefetched() -> PrefetchHandoff:
     base = _prefetched()
     ref = BindingValueRef(
         binding_id=base.binding_value_ref.binding_id,
@@ -139,7 +158,7 @@ def test_reference_consumer_writes_cache_and_rebuilds_target(tmp_path) -> None:
         source_artifact_id="mi2:source",
         artifact_selection_digest="selection",
         device_uuid="gpu-0",
-        tensor=ReferenceServingTensorSpec(name="weight", size_bytes=16, shape=(4,)),
+        tensor=ReferenceRuntimeTensorSpec(name="weight", size_bytes=16, shape=(4,)),
     )
 
     record = write_reference_resolved_spec_cache_entry(
@@ -182,7 +201,7 @@ def test_reference_consumer_prefetch_acquire_and_release_lifecycle(tmp_path) ->
     )
     release_reference_acquire(fake_client, acquire_result=acquired)
 
-    assert isinstance(acquired, ReferenceServingAcquireResult)
+    assert isinstance(acquired, ReferenceRuntimeAcquireResult)
     assert acquired.has_cuda_ipc_handle is True
     assert acquired.lease_token == b"lease-token"
     assert fake_client.released_tokens == [b"lease-token"]
diff --git a/tests/python/api/test_serving_binding_spec_cache.py b/tests/python/api/test_runtime_realization_spec_cache.py
similarity index 88%
rename from tests/python/api/test_serving_binding_spec_cache.py
rename to tests/python/api/test_runtime_realization_spec_cache.py
index cd57508d..606151d0 100644
--- a/tests/python/api/test_serving_binding_spec_cache.py
+++ b/tests/python/api/test_runtime_realization_spec_cache.py
@@ -3,44 +3,56 @@
 from __future__ import annotations
 
 import hashlib
+import importlib.util
 import json
 import threading
 
 import pytest
 
-from tensorcast.api.store.serving_binding_spec_cache import (
-    ServingBindingSpecCacheGroupIndex,
+from tensorcast.api.store.runtime_realization_spec_cache import (
+    RuntimeRealizationSpecCacheGroupIndex,
     read_matching_resolved_spec_cache_entry,
     read_resolved_spec_cache_entry,
     read_resolved_spec_cache_group_index,
-    serving_binding_spec_cache_root,
+    runtime_realization_spec_cache_root,
     write_resolved_spec_cache_entry,
     write_resolved_spec_cache_group_index,
 )
 from tensorcast.types import (
     BlobRef,
-    ServingBindingMemberRef,
-    ServingBindingResolvedSpecCacheEntry,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeRealizationSpecCacheEntry,
+    RuntimeTopologyRef,
 )
 
 
+def _find_spec_or_none(module_name: str):
+    try:
+        return importlib.util.find_spec(module_name)
+    except ModuleNotFoundError:
+        return None
+
+
+def test_old_serving_binding_spec_cache_module_is_removed() -> None:
+    assert _find_spec_or_none("tensorcast.api.store.serving_binding_spec_cache") is None
+
+
 def _entry(
     *,
     blob: bytes = b"layout-bytes",
     member_index: int = 0,
     member_count: int = 1,
-) -> ServingBindingResolvedSpecCacheEntry:
-    topology = ServingTopologyRef(schema_topology_digest="topology-schema")
-    member = ServingBindingMemberRef(
+) -> RuntimeRealizationSpecCacheEntry:
+    topology = RuntimeTopologyRef(schema_topology_digest="topology-schema")
+    member = RuntimeBindingMemberRef(
         member_id=f"member-{member_index}",
         member_index=member_index,
         member_count=member_count,
         group_id="group-1",
     )
-    source = ServingBindingSourceRef(
+    source = RuntimeBindingSourceRef(
         source_kind="checkpoint_artifact",
         artifact_selection_digest="selection-digest",
         source_artifact_ref="mi2:checkpoint",
@@ -51,21 +63,21 @@ def _entry(
         sha256=hashlib.sha256(blob).hexdigest(),
         size_bytes=len(blob),
     )
-    draft = ServingBindingResolvedSpecCacheEntry(
+    draft = RuntimeRealizationSpecCacheEntry(
         schema_version=1,
         cache_key_digest="placeholder",
         spec_digest="placeholder",
         runtime="vllm",
         source=source,
-        source_reuse=ServingBindingSourceReuseDecision(
-            mode="checkpoint_to_serving",
+        source_reuse=RuntimeBindingSourceReuseDecision(
+            mode="checkpoint_to_runtime",
             representation_contract_hash="repr-contract",
         ),
         topology=topology,
         member=member,
         source_schema_hash="source-schema",
         model_config_digest="model-config",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         binding_layout_id="layout-1",
         target_layout_hash="target-layout-hash",
         tensor_schema_hash="tensor-schema",
@@ -80,14 +92,14 @@ def _entry(
 
 
 def _with_recomputed_spec_digest(
-    entry: ServingBindingResolvedSpecCacheEntry,
-) -> ServingBindingResolvedSpecCacheEntry:
+    entry: RuntimeRealizationSpecCacheEntry,
+) -> RuntimeRealizationSpecCacheEntry:
     return entry.model_copy(update={"spec_digest": entry.computed_spec_digest()})
 
 
 def _with_recomputed_digests(
-    entry: ServingBindingResolvedSpecCacheEntry,
-) -> ServingBindingResolvedSpecCacheEntry:
+    entry: RuntimeRealizationSpecCacheEntry,
+) -> RuntimeRealizationSpecCacheEntry:
     with_cache_key = entry.model_copy(
         update={"cache_key_digest": entry.computed_cache_key_digest()}
     )
@@ -121,7 +133,7 @@ def test_first_cold_start_cache_write_publishes_readable_entry(tmp_path) -> None
 
     record = read_resolved_spec_cache_entry(tmp_path, entry.cache_key_digest)
     spec_dir = (
-        serving_binding_spec_cache_root(tmp_path)
+        runtime_realization_spec_cache_root(tmp_path)
         / "specs"
         / "sha256"
         / entry.spec_digest
@@ -229,7 +241,7 @@ def test_resolved_spec_cache_rejects_unsupported_key_schema_version(tmp_path) ->
         blobs={"target_layout": blob},
     )
     key_path = (
-        serving_binding_spec_cache_root(tmp_path)
+        runtime_realization_spec_cache_root(tmp_path)
         / "keys"
         / "sha256"
         / f"{entry.cache_key_digest}.json"
@@ -253,7 +265,7 @@ def test_resolved_spec_cache_rejects_unsupported_manifest_schema_version(
         blobs={"target_layout": blob},
     )
     manifest_path = (
-        serving_binding_spec_cache_root(tmp_path)
+        runtime_realization_spec_cache_root(tmp_path)
         / "specs"
         / "sha256"
         / entry.spec_digest
@@ -278,7 +290,7 @@ def test_resolved_spec_cache_rejects_unsupported_manifest_producer_version(
         blobs={"target_layout": blob},
     )
     manifest_path = (
-        serving_binding_spec_cache_root(tmp_path)
+        runtime_realization_spec_cache_root(tmp_path)
         / "specs"
         / "sha256"
         / entry.spec_digest
@@ -318,7 +330,7 @@ def test_resolved_spec_cache_cleans_tmp_after_publish(tmp_path) -> None:
         blobs={"target_layout": blob},
     )
 
-    tmp_dir = serving_binding_spec_cache_root(tmp_path) / "tmp"
+    tmp_dir = runtime_realization_spec_cache_root(tmp_path) / "tmp"
     assert tmp_dir.exists()
     assert list(tmp_dir.iterdir()) == []
 
@@ -384,7 +396,7 @@ def test_resolved_spec_cache_group_index_roundtrip(tmp_path) -> None:
         entry=entry_1,
         blobs={"target_layout": blob_1},
     )
-    draft = ServingBindingSpecCacheGroupIndex(
+    draft = RuntimeRealizationSpecCacheGroupIndex(
         group_cache_key_digest="placeholder",
         runtime="vllm",
         topology=entry_0.topology,
@@ -414,7 +426,7 @@ def test_resolved_spec_cache_group_index_rejects_empty_runtime(tmp_path) -> None
         entry=entry,
         blobs={"target_layout": b"layout-bytes"},
     )
-    draft = ServingBindingSpecCacheGroupIndex(
+    draft = RuntimeRealizationSpecCacheGroupIndex(
         group_cache_key_digest="placeholder",
         runtime="",
         topology=entry.topology,
@@ -437,7 +449,7 @@ def test_resolved_spec_cache_group_index_rejects_member_mismatch(tmp_path) -> No
         entry=entry,
         blobs={"target_layout": blob},
     )
-    draft = ServingBindingSpecCacheGroupIndex(
+    draft = RuntimeRealizationSpecCacheGroupIndex(
         group_cache_key_digest="placeholder",
         runtime="vllm",
         topology=entry.topology,
@@ -460,7 +472,7 @@ def test_resolved_spec_cache_group_lookup_validates_member_cache(tmp_path) -> No
         entry=entry,
         blobs={"target_layout": blob},
     )
-    draft = ServingBindingSpecCacheGroupIndex(
+    draft = RuntimeRealizationSpecCacheGroupIndex(
         group_cache_key_digest="placeholder",
         runtime="vllm",
         topology=entry.topology,
@@ -472,7 +484,7 @@ def test_resolved_spec_cache_group_lookup_validates_member_cache(tmp_path) -> No
     )
     write_resolved_spec_cache_group_index(tmp_path, index=index)
     key_path = (
-        serving_binding_spec_cache_root(tmp_path)
+        runtime_realization_spec_cache_root(tmp_path)
         / "keys"
         / "sha256"
         / f"{entry.cache_key_digest}.json"
diff --git a/tests/python/api/test_prefetch_serving_binding_target.py b/tests/python/api/test_runtime_realization_target.py
similarity index 72%
rename from tests/python/api/test_prefetch_serving_binding_target.py
rename to tests/python/api/test_runtime_realization_target.py
index e685a9d9..5020bc2d 100644
--- a/tests/python/api/test_prefetch_serving_binding_target.py
+++ b/tests/python/api/test_runtime_realization_target.py
@@ -12,31 +12,31 @@
     BindingValueRef,
     BindingValueVerificationState,
     GroupRealizationAcquireRef,
-    PrefetchedServingBinding,
-    PrefetchedServingBindingMemberFailure,
-    PrefetchedServingBindingSet,
+    PrefetchHandoff,
+    PrefetchHandoffMemberFailure,
+    PrefetchHandoffSet,
     PrefetchRetentionPolicy,
-    ServingBindingMemberRef,
-    ServingBindingResolvedLayout,
-    ServingBindingSetTarget,
-    ServingBindingSourceMemberRef,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingBindingTarget,
-    ServingTopologyRef,
-    plan_serving_binding_source_reuse,
+    RealizationTarget,
+    RealizationTargetSet,
+    RuntimeBindingMemberRef,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceMemberRef,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeTopologyRef,
+    plan_runtime_binding_source_reuse,
 )
 
 
-def _topology() -> ServingTopologyRef:
-    return ServingTopologyRef(
+def _topology() -> RuntimeTopologyRef:
+    return RuntimeTopologyRef(
         schema_topology_digest="topology-schema",
         admission_topology_digest="topology-admission",
     )
 
 
-def _member() -> ServingBindingMemberRef:
-    return ServingBindingMemberRef(
+def _member() -> RuntimeBindingMemberRef:
+    return RuntimeBindingMemberRef(
         member_id="member-0",
         member_index=0,
         member_count=1,
@@ -44,8 +44,8 @@ def _member() -> ServingBindingMemberRef:
     )
 
 
-def _checkpoint_source() -> ServingBindingSourceRef:
-    return ServingBindingSourceRef(
+def _checkpoint_source() -> RuntimeBindingSourceRef:
+    return RuntimeBindingSourceRef(
         source_kind="checkpoint_artifact",
         artifact_selection_digest="selection-digest",
         source_artifact_ref="mi2:checkpoint",
@@ -55,17 +55,17 @@ def _checkpoint_source() -> ServingBindingSourceRef:
 
 def _resolved_layout(
     *,
-    source: ServingBindingSourceRef | None = None,
-    source_reuse: ServingBindingSourceReuseDecision | None = None,
-    topology: ServingTopologyRef | None = None,
-    member: ServingBindingMemberRef | None = None,
-) -> ServingBindingResolvedLayout:
-    return ServingBindingResolvedLayout(
+    source: RuntimeBindingSourceRef | None = None,
+    source_reuse: RuntimeBindingSourceReuseDecision | None = None,
+    topology: RuntimeTopologyRef | None = None,
+    member: RuntimeBindingMemberRef | None = None,
+) -> RuntimeBindingResolvedLayout:
+    return RuntimeBindingResolvedLayout(
         binding_layout_id="layout-1",
         source=source or _checkpoint_source(),
         source_reuse=source_reuse
-        or ServingBindingSourceReuseDecision(
-            mode="checkpoint_to_serving",
+        or RuntimeBindingSourceReuseDecision(
+            mode="checkpoint_to_runtime",
             representation_contract_hash="repr-contract",
         ),
         topology=topology or _topology(),
@@ -83,16 +83,16 @@ def _resolved_layout(
 
 def _target(
     *,
-    source: ServingBindingSourceRef | None = None,
-    source_reuse: ServingBindingSourceReuseDecision | None = None,
-    topology: ServingTopologyRef | None = None,
-    member: ServingBindingMemberRef | None = None,
+    source: RuntimeBindingSourceRef | None = None,
+    source_reuse: RuntimeBindingSourceReuseDecision | None = None,
+    topology: RuntimeTopologyRef | None = None,
+    member: RuntimeBindingMemberRef | None = None,
     device: str = "cuda:0",
     device_uuid: str = "GPU-0",
     target_layout: bytes = b"target-layout",
     target_index_bytes: bytes = b"target-index",
     target_layout_hash: str = "target-layout-hash",
-) -> ServingBindingTarget:
+) -> RealizationTarget:
     resolved_source = source or _checkpoint_source()
     resolved_topology = topology or _topology()
     resolved_member = member or _member()
@@ -108,7 +108,7 @@ def _target(
             "target_layout_hash": target_layout_hash,
         }
     )
-    return ServingBindingTarget(
+    return RealizationTarget(
         runtime="vllm",
         device=device,
         device_uuid=device_uuid,
@@ -116,7 +116,7 @@ def _target(
         topology=resolved_topology,
         member=resolved_member,
         model_config_digest="model-config",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         resolved_layout=resolved_layout,
     )
 
@@ -131,19 +131,19 @@ def test_prefetch_signature_keeps_device_and_adds_target() -> None:
     assert "retention" in params
 
 
-def test_serving_binding_target_proto_roundtrip_includes_source() -> None:
+def test_runtime_target_proto_roundtrip_includes_source() -> None:
     target = _target()
 
-    roundtripped = ServingBindingTarget.from_proto(target.to_proto())
+    roundtripped = RealizationTarget.from_proto(target.to_proto())
 
     assert roundtripped == target
     assert roundtripped.source.source_kind == "checkpoint_artifact"
-    assert roundtripped.resolved_layout.source_reuse.mode == "checkpoint_to_serving"
+    assert roundtripped.resolved_layout.source_reuse.mode == "checkpoint_to_runtime"
 
 
-def test_serving_binding_set_requires_shared_source() -> None:
+def test_runtime_target_set_requires_shared_source() -> None:
     target = _target()
-    serving_set = ServingBindingSetTarget(
+    runtime_set = RealizationTargetSet(
         runtime="vllm",
         source=target.source,
         topology=target.topology,
@@ -151,19 +151,19 @@ def test_serving_binding_set_requires_shared_source() -> None:
         members=(target,),
     )
 
-    assert ServingBindingSetTarget.from_proto(serving_set.to_proto()) == serving_set
+    assert RealizationTargetSet.from_proto(runtime_set.to_proto()) == runtime_set
 
 
-def test_serving_binding_set_allows_distinct_member_device_and_layout_specs() -> None:
+def test_runtime_target_set_allows_distinct_member_device_and_layout_specs() -> None:
     topology = _topology()
     source = _checkpoint_source()
-    member_0 = ServingBindingMemberRef(
+    member_0 = RuntimeBindingMemberRef(
         member_id="member-0",
         member_index=0,
         member_count=2,
         group_id="group-1",
     )
-    member_1 = ServingBindingMemberRef(
+    member_1 = RuntimeBindingMemberRef(
         member_id="member-1",
         member_index=1,
         member_count=2,
@@ -190,16 +190,16 @@ def test_serving_binding_set_allows_distinct_member_device_and_layout_specs() ->
         target_layout_hash="target-layout-hash-member-1",
     )
 
-    serving_set = ServingBindingSetTarget(
+    runtime_set = RealizationTargetSet(
         runtime="vllm",
         source=source,
         topology=topology,
         group_id="group-1",
         members=(target_0, target_1),
     )
-    roundtripped = ServingBindingSetTarget.from_proto(serving_set.to_proto())
+    roundtripped = RealizationTargetSet.from_proto(runtime_set.to_proto())
 
-    assert roundtripped == serving_set
+    assert roundtripped == runtime_set
     assert {member.device_uuid for member in roundtripped.members} == {
         "GPU-0",
         "GPU-1",
@@ -210,21 +210,21 @@ def test_serving_binding_set_allows_distinct_member_device_and_layout_specs() ->
     assert all(member.topology == topology for member in roundtripped.members)
 
 
-def test_direct_serving_member_copy_requires_matching_member_schema_and_layout() -> (
+def test_direct_runtime_member_copy_requires_matching_member_schema_and_layout() -> (
     None
 ):
     topology = _topology()
     member = _member()
-    source = ServingBindingSourceRef(
-        source_kind="serving_artifact_set",
+    source = RuntimeBindingSourceRef(
+        source_kind="runtime_artifact_set",
         artifact_selection_digest="selection-digest",
         source_schema_hash="source-schema",
         representation_contract_hash="repr-contract",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         tensor_schema_hash="tensor-schema",
         topology=topology,
         members=(
-            ServingBindingSourceMemberRef(
+            RuntimeBindingSourceMemberRef(
                 member=member,
                 artifact_ref="mi2:serving-member",
                 tensor_schema_hash="tensor-schema",
@@ -232,29 +232,29 @@ def test_direct_serving_member_copy_requires_matching_member_schema_and_layout()
             ),
         ),
     )
-    reuse = ServingBindingSourceReuseDecision(
-        mode="serving_direct_member_copy",
+    reuse = RuntimeBindingSourceReuseDecision(
+        mode="runtime_direct_member_copy",
         representation_contract_hash="repr-contract",
     )
 
     target = _target(source=source, source_reuse=reuse)
 
-    assert target.resolved_layout.source_reuse.mode == "serving_direct_member_copy"
+    assert target.resolved_layout.source_reuse.mode == "runtime_direct_member_copy"
 
 
-def test_direct_serving_member_copy_planner_admits_only_matching_source() -> None:
+def test_direct_runtime_member_copy_planner_admits_only_matching_source() -> None:
     topology = _topology()
     member = _member()
-    source = ServingBindingSourceRef(
-        source_kind="serving_artifact_set",
+    source = RuntimeBindingSourceRef(
+        source_kind="runtime_artifact_set",
         artifact_selection_digest="selection-digest",
         source_schema_hash="source-schema",
         representation_contract_hash="repr-contract",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         tensor_schema_hash="tensor-schema",
         topology=topology,
         members=(
-            ServingBindingSourceMemberRef(
+            RuntimeBindingSourceMemberRef(
                 member=member,
                 artifact_ref="mi2:serving-member",
                 tensor_schema_hash="tensor-schema",
@@ -263,7 +263,7 @@ def test_direct_serving_member_copy_planner_admits_only_matching_source() -> Non
         ),
     )
 
-    decision = plan_serving_binding_source_reuse(
+    decision = plan_runtime_binding_source_reuse(
         source=source,
         topology=topology,
         member=member,
@@ -272,22 +272,22 @@ def test_direct_serving_member_copy_planner_admits_only_matching_source() -> Non
         representation_contract_hash="repr-contract",
     )
 
-    assert decision.mode == "serving_direct_member_copy"
+    assert decision.mode == "runtime_direct_member_copy"
 
 
-def test_direct_serving_member_copy_planner_returns_transform_for_topology_mismatch() -> (
+def test_direct_runtime_member_copy_planner_returns_transform_for_topology_mismatch() -> (
     None
 ):
-    source = ServingBindingSourceRef(
-        source_kind="serving_artifact_set",
+    source = RuntimeBindingSourceRef(
+        source_kind="runtime_artifact_set",
         artifact_selection_digest="selection-digest",
         source_schema_hash="source-schema",
         representation_contract_hash="repr-contract",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         tensor_schema_hash="tensor-schema",
-        topology=ServingTopologyRef(schema_topology_digest="different-topology"),
+        topology=RuntimeTopologyRef(schema_topology_digest="different-topology"),
         members=(
-            ServingBindingSourceMemberRef(
+            RuntimeBindingSourceMemberRef(
                 member=_member(),
                 artifact_ref="mi2:serving-member",
                 tensor_schema_hash="tensor-schema",
@@ -295,7 +295,7 @@ def test_direct_serving_member_copy_planner_returns_transform_for_topology_misma
             ),
         ),
     )
-    decision = plan_serving_binding_source_reuse(
+    decision = plan_runtime_binding_source_reuse(
         source=source,
         topology=_topology(),
         member=_member(),
@@ -304,20 +304,20 @@ def test_direct_serving_member_copy_planner_returns_transform_for_topology_misma
         representation_contract_hash="repr-contract",
     )
 
-    assert decision.mode == "serving_transform_required"
+    assert decision.mode == "runtime_transform_required"
     assert "topology" in (decision.reason or "")
 
 
 def test_transform_required_decision_is_serializable_but_not_direct_copy() -> None:
-    decision = ServingBindingSourceReuseDecision(
-        mode="serving_transform_required",
+    decision = RuntimeBindingSourceReuseDecision(
+        mode="runtime_transform_required",
         work_plan_hash="work-plan",
     )
 
-    assert ServingBindingSourceReuseDecision.from_proto(decision.to_proto()) == decision
+    assert RuntimeBindingSourceReuseDecision.from_proto(decision.to_proto()) == decision
 
 
-def test_prefetched_serving_binding_result_proto_roundtrip() -> None:
+def test_prefetch_handoff_result_proto_roundtrip() -> None:
     member = _member()
     binding_ref = BindingValueRef(
         binding_id="binding-1",
@@ -336,7 +336,7 @@ def test_prefetched_serving_binding_result_proto_roundtrip() -> None:
         scope_digest="scope",
         expires_at_ms=1234,
     )
-    result = PrefetchedServingBinding(
+    result = PrefetchHandoff(
         local_serving_ref="binding-local:binding-1:value-1",
         binding_value_ref=binding_ref,
         daemon_id="daemon-1",
@@ -345,16 +345,16 @@ def test_prefetched_serving_binding_result_proto_roundtrip() -> None:
         member=member,
         reservation_bytes=1024,
         reservation_capability=capability,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state=BindingValueVerificationState.LOCAL_ONLY,
         serving_artifact_id=None,
         expires_at_ms=1234,
     )
 
-    assert PrefetchedServingBinding.from_proto(result.to_proto()) == result
+    assert PrefetchHandoff.from_proto(result.to_proto()) == result
 
 
-def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None:
+def test_prefetch_handoff_staged_result_proto_roundtrip() -> None:
     member = _member()
     binding_ref = BindingValueRef(
         binding_id="binding-1",
@@ -372,7 +372,7 @@ def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None:
         reservation_bytes=1024,
         scope_digest="scope",
     )
-    result = PrefetchedServingBinding(
+    result = PrefetchHandoff(
         local_serving_ref="binding-local:binding-1:staged-value-1",
         binding_value_ref=binding_ref,
         daemon_id="daemon-1",
@@ -381,7 +381,7 @@ def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None:
         member=member,
         reservation_bytes=1024,
         reservation_capability=capability,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state=BindingValueVerificationState.LOCAL_ONLY,
         staged_value=True,
         group_realization_acquire=GroupRealizationAcquireRef(
@@ -394,10 +394,10 @@ def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None:
         ),
     )
 
-    assert PrefetchedServingBinding.from_proto(result.to_proto()) == result
+    assert PrefetchHandoff.from_proto(result.to_proto()) == result
 
 
-def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None:
+def test_prefetch_handoff_set_partial_diagnostics_roundtrip() -> None:
     member = _member()
     binding_ref = BindingValueRef(
         binding_id="binding-1",
@@ -415,7 +415,7 @@ def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None:
         reservation_bytes=1024,
         scope_digest="scope",
     )
-    success = PrefetchedServingBinding(
+    success = PrefetchHandoff(
         local_serving_ref="binding-local:binding-1:value-1",
         binding_value_ref=binding_ref,
         daemon_id="daemon-1",
@@ -424,16 +424,16 @@ def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None:
         member=member,
         reservation_bytes=1024,
         reservation_capability=capability,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state=BindingValueVerificationState.LOCAL_ONLY,
     )
-    failed_member = ServingBindingMemberRef(
+    failed_member = RuntimeBindingMemberRef(
         member_id="member-1",
         member_index=1,
         member_count=2,
         group_id="group-1",
     )
-    failure = PrefetchedServingBindingMemberFailure(
+    failure = PrefetchHandoffMemberFailure(
         member=failed_member,
         code="FAILED_PRECONDITION",
         message="resolved spec mismatch",
@@ -441,20 +441,20 @@ def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None:
         cache_key_digest="cache-key",
         spec_digest="spec",
     )
-    result = PrefetchedServingBindingSet(
+    result = PrefetchHandoffSet(
         runtime="vllm",
         topology=_topology(),
         group_id="group-1",
         members=(success,),
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         member_failures=(failure,),
         partial=True,
     )
 
-    assert PrefetchedServingBindingSet.from_proto(result.to_proto()) == result
+    assert PrefetchHandoffSet.from_proto(result.to_proto()) == result
 
 
-def test_prefetched_serving_binding_set_rejects_overlap_between_success_and_failure() -> (
+def test_prefetch_handoff_set_rejects_overlap_between_success_and_failure() -> (
     None
 ):
     member = _member()
@@ -474,7 +474,7 @@ def test_prefetched_serving_binding_set_rejects_overlap_between_success_and_fail
         reservation_bytes=1024,
         scope_digest="scope",
     )
-    success = PrefetchedServingBinding(
+    success = PrefetchHandoff(
         binding_value_ref=binding_ref,
         daemon_id="daemon-1",
         daemon_session_id="session-1",
@@ -482,22 +482,22 @@ def test_prefetched_serving_binding_set_rejects_overlap_between_success_and_fail
         member=member,
         reservation_bytes=1024,
         reservation_capability=capability,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state=BindingValueVerificationState.LOCAL_ONLY,
     )
-    failure = PrefetchedServingBindingMemberFailure(
+    failure = PrefetchHandoffMemberFailure(
         member=member,
         code="FAILED_PRECONDITION",
         message="same member failed",
     )
 
     with pytest.raises(ValueError, match="both success and failure"):
-        PrefetchedServingBindingSet(
+        PrefetchHandoffSet(
             runtime="vllm",
             topology=_topology(),
             group_id="group-1",
             members=(success,),
-            readiness="serving_local_ready",
+            readiness="runtime_local_ready",
             member_failures=(failure,),
             partial=True,
         )
diff --git a/tests/python/test_serving_artifact_manifest.py b/tests/python/artifact_runtime/artifact/test_manifest.py
similarity index 58%
rename from tests/python/test_serving_artifact_manifest.py
rename to tests/python/artifact_runtime/artifact/test_manifest.py
index 06145dce..3004a362 100644
--- a/tests/python/test_serving_artifact_manifest.py
+++ b/tests/python/artifact_runtime/artifact/test_manifest.py
@@ -6,14 +6,14 @@
 import torch
 
 import tensorcast as tc
-from tensorcast.serving.artifact_manifest import (
+from tensorcast.artifact_runtime.artifact.manifest import (
     SERVING_MANIFEST_TENSOR_NAME,
-    cross_check_serving_artifact_manifest,
-    read_serving_artifact_manifest_tensor,
+    cross_check_runtime_artifact_manifest,
+    read_runtime_artifact_manifest_tensor,
 )
 
 
-def _manifest(**overrides) -> tc.ServingArtifactManifest:
+def _manifest(**overrides) -> tc.RuntimeArtifactManifest:
     values = {
         "framework_name": "vllm",
         "adapter_version": "adapter-v1",
@@ -26,7 +26,22 @@ def _manifest(**overrides) -> tc.ServingArtifactManifest:
         "build_pipeline_version": "pipeline-v1",
     }
     values.update(overrides)
-    return tc.ServingArtifactManifest(**values)
+    return tc.RuntimeArtifactManifest(**values)
+
+
+class _ManifestTensorResult:
+    def __init__(
+        self,
+        tensors: dict[str, torch.Tensor],
+        releases: list[str],
+        marker: object,
+    ) -> None:
+        self.tensors = tensors
+        self._releases = releases
+        self._marker = marker
+
+    def release(self) -> None:
+        self._releases.append(str(self._marker))
 
 
 def test_serving_artifact_manifest_builds_runtime_policy() -> None:
@@ -48,14 +63,14 @@ def test_serving_artifact_manifest_policy_round_trips_topology_digest() -> None:
     ).to_runtime_policy()
 
     assert policy.expected_topology_admission_digest == "topology-digest"
-    assert tc.ServingRuntimePolicy.from_proto(policy.to_proto()) == policy
+    assert tc.RuntimeArtifactPolicy.from_proto(policy.to_proto()) == policy
 
 
-def test_cross_check_serving_artifact_manifest_accepts_matching_contract() -> None:
+def test_cross_check_runtime_artifact_manifest_accepts_matching_contract() -> None:
     manifest = _manifest()
 
     assert (
-        cross_check_serving_artifact_manifest(
+        cross_check_runtime_artifact_manifest(
             manifest=manifest,
             descriptor_tensor_schema_hash="schema-hash",
             tensor_names=("w",),
@@ -65,9 +80,9 @@ def test_cross_check_serving_artifact_manifest_accepts_matching_contract() -> No
     )
 
 
-def test_cross_check_serving_artifact_manifest_rejects_mismatch() -> None:
+def test_cross_check_runtime_artifact_manifest_rejects_mismatch() -> None:
     with pytest.raises(RuntimeError, match="tensor schema hash mismatch"):
-        cross_check_serving_artifact_manifest(
+        cross_check_runtime_artifact_manifest(
             manifest=_manifest(tensor_schema_hash="other"),
             descriptor_tensor_schema_hash="schema-hash",
             tensor_names=("w",),
@@ -75,62 +90,80 @@ def test_cross_check_serving_artifact_manifest_rejects_mismatch() -> None:
         )
 
 
-def test_read_serving_artifact_manifest_tensor_reads_uint8_payload() -> None:
+def test_read_runtime_artifact_manifest_tensor_reads_uint8_payload() -> None:
     manifest = _manifest()
 
     class _Artifact:
+        def __init__(self) -> None:
+            self.releases: list[str] = []
+
         def subset(self, names):
             assert names == [SERVING_MANIFEST_TENSOR_NAME]
             return self
 
-        def tensor_dict(self, *, device: str):
+        def tensor_dict_with_diagnostics(self, *, device: str):
             assert device == "cpu"
-            return {
-                SERVING_MANIFEST_TENSOR_NAME: torch.tensor(
-                    list(manifest.to_bytes()), dtype=torch.uint8
-                )
-            }
-
+            return _ManifestTensorResult(
+                {
+                    SERVING_MANIFEST_TENSOR_NAME: torch.tensor(
+                        list(manifest.to_bytes()), dtype=torch.uint8
+                    )
+                },
+                self.releases,
+                device,
+            )
+
+    artifact = _Artifact()
     assert (
-        read_serving_artifact_manifest_tensor(
-            _Artifact(),
+        read_runtime_artifact_manifest_tensor(
+            artifact,
             artifact_ref="mi2:serving",
         )
         == manifest
     )
+    assert artifact.releases == ["cpu"]
 
 
-def test_read_serving_artifact_manifest_tensor_falls_back_to_cuda_payload(
+def test_read_runtime_artifact_manifest_tensor_falls_back_to_cuda_payload(
     monkeypatch,
 ) -> None:
     manifest = _manifest()
     monkeypatch.setattr(torch.cuda, "current_device", lambda: 2)
 
     class _Artifact:
+        def __init__(self) -> None:
+            self.releases: list[str] = []
+
         def subset(self, names):
             assert names == [SERVING_MANIFEST_TENSOR_NAME]
             return self
 
-        def tensor_dict(self, *, device: str):
+        def tensor_dict_with_diagnostics(self, *, device: str):
             if device == "cpu":
                 raise RuntimeError("CPU replica is not loaded")
             assert device == torch.device("cuda", 2)
-            return {
-                SERVING_MANIFEST_TENSOR_NAME: torch.tensor(
-                    list(manifest.to_bytes()), dtype=torch.uint8
-                )
-            }
-
+            return _ManifestTensorResult(
+                {
+                    SERVING_MANIFEST_TENSOR_NAME: torch.tensor(
+                        list(manifest.to_bytes()), dtype=torch.uint8
+                    )
+                },
+                self.releases,
+                device,
+            )
+
+    artifact = _Artifact()
     assert (
-        read_serving_artifact_manifest_tensor(
-            _Artifact(),
+        read_runtime_artifact_manifest_tensor(
+            artifact,
             artifact_ref="mi2:serving",
         )
         == manifest
     )
+    assert artifact.releases == ["cuda:2"]
 
 
-def test_read_serving_artifact_manifest_tensor_reports_cpu_and_cuda_errors(
+def test_read_runtime_artifact_manifest_tensor_reports_cpu_and_cuda_errors(
     monkeypatch,
 ) -> None:
     monkeypatch.setattr(torch.cuda, "current_device", lambda: 3)
@@ -140,14 +173,14 @@ def subset(self, names):
             assert names == [SERVING_MANIFEST_TENSOR_NAME]
             return self
 
-        def tensor_dict(self, *, device: str):
+        def tensor_dict_with_diagnostics(self, *, device: str):
             if device == "cpu":
                 raise RuntimeError("CPU replica is not loaded")
             assert device == torch.device("cuda", 3)
             raise RuntimeError("CUDA replica is not loaded")
 
     with pytest.raises(RuntimeError) as exc_info:
-        read_serving_artifact_manifest_tensor(
+        read_runtime_artifact_manifest_tensor(
             _Artifact(),
             artifact_ref="mi2:serving",
         )
@@ -157,30 +190,61 @@ def tensor_dict(self, *, device: str):
     assert "CUDA replica is not loaded" in message
 
 
-def test_cross_check_serving_artifact_manifest_enforces_runtime_policy() -> None:
+def test_read_runtime_artifact_manifest_tensor_releases_invalid_payload() -> None:
+    class _Artifact:
+        def __init__(self) -> None:
+            self.releases: list[str] = []
+
+        def subset(self, names):
+            assert names == [SERVING_MANIFEST_TENSOR_NAME]
+            return self
+
+        def tensor_dict_with_diagnostics(self, *, device: str):
+            assert device == "cpu"
+            return _ManifestTensorResult(
+                {
+                    SERVING_MANIFEST_TENSOR_NAME: torch.tensor(
+                        [1.0],
+                        dtype=torch.float32,
+                    )
+                },
+                self.releases,
+                device,
+            )
+
+    artifact = _Artifact()
+    with pytest.raises(RuntimeError, match="1D torch.uint8"):
+        read_runtime_artifact_manifest_tensor(
+            artifact,
+            artifact_ref="mi2:serving",
+        )
+    assert artifact.releases == ["cpu"]
+
+
+def test_cross_check_runtime_artifact_manifest_enforces_runtime_policy() -> None:
     manifest = _manifest(topology_admission_digest="topology-digest")
     policy = manifest.to_runtime_policy()
 
     assert (
-        cross_check_serving_artifact_manifest(
+        cross_check_runtime_artifact_manifest(
             manifest=manifest,
             descriptor_tensor_schema_hash="schema-hash",
             tensor_names=("w",),
             expected_tensor_schema_hash="schema-hash",
-            serving_runtime_policy=policy,
+            runtime_artifact_policy=policy,
         )
         == manifest
     )
 
     with pytest.raises(RuntimeError, match="manifest ref mismatch"):
-        cross_check_serving_artifact_manifest(
+        cross_check_runtime_artifact_manifest(
             manifest=manifest.model_copy(
                 update={"serving_manifest_ref": "tensor:other_manifest"}
             ),
             descriptor_tensor_schema_hash="schema-hash",
             tensor_names=("w",),
             expected_tensor_schema_hash="schema-hash",
-            serving_runtime_policy=policy,
+            runtime_artifact_policy=policy,
         )
 
 
@@ -209,7 +273,7 @@ def test_cross_check_serving_artifact_manifest_enforces_runtime_policy() -> None
         ),
     ],
 )
-def test_cross_check_serving_artifact_manifest_rejects_pinned_policy_mismatch(
+def test_cross_check_runtime_artifact_manifest_rejects_pinned_policy_mismatch(
     manifest_update,
     policy_update,
     match,
@@ -218,10 +282,10 @@ def test_cross_check_serving_artifact_manifest_rejects_pinned_policy_mismatch(
     policy = manifest.to_runtime_policy().model_copy(update=policy_update)
 
     with pytest.raises(RuntimeError, match=match):
-        cross_check_serving_artifact_manifest(
+        cross_check_runtime_artifact_manifest(
             manifest=manifest.model_copy(update=manifest_update),
             descriptor_tensor_schema_hash="schema-hash",
             tensor_names=("w",),
             expected_tensor_schema_hash="schema-hash",
-            serving_runtime_policy=policy,
+            runtime_artifact_policy=policy,
         )
diff --git a/tests/python/test_serving_resolver.py b/tests/python/artifact_runtime/artifact/test_resolver.py
similarity index 76%
rename from tests/python/test_serving_resolver.py
rename to tests/python/artifact_runtime/artifact/test_resolver.py
index ce023231..3c9be22f 100644
--- a/tests/python/test_serving_resolver.py
+++ b/tests/python/artifact_runtime/artifact/test_resolver.py
@@ -9,9 +9,9 @@
 import torch
 
 import tensorcast as tc
-import tensorcast.serving.resolver as resolver_mod
-from tensorcast.serving.dto import PreparedServingArtifact
-from tensorcast.serving.resolver import ServingArtifactResolver
+import tensorcast.artifact_runtime.artifact.resolver as resolver_mod
+from tensorcast.artifact_runtime.artifact.resolver import RuntimeArtifactResolver
+from tensorcast.artifact_runtime.dto import PreparedRuntimeArtifact
 
 
 class _FakeArtifact:
@@ -41,8 +41,8 @@ def _descriptor(*, include_manifest: bool = True) -> Any:
     )
 
 
-def _manifest(*, tensor_schema_hash: str) -> tc.ServingArtifactManifest:
-    return tc.ServingArtifactManifest(
+def _manifest(*, tensor_schema_hash: str) -> tc.RuntimeArtifactManifest:
+    return tc.RuntimeArtifactManifest(
         framework_name="vllm",
         adapter_version="adapter-v1",
         serving_abi_version="abi-v1",
@@ -56,21 +56,21 @@ def _manifest(*, tensor_schema_hash: str) -> tc.ServingArtifactManifest:
     )
 
 
-def test_resolve_serving_artifact_cross_checks_manifest(monkeypatch) -> None:
+def test_resolve_runtime_artifact_cross_checks_manifest(monkeypatch) -> None:
     descriptor = _descriptor()
     artifact = _FakeArtifact(descriptor)
-    resolver = ServingArtifactResolver(
+    resolver = RuntimeArtifactResolver(
         manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=int(
-            tc.ServingArtifactManifest.model_fields["schema_version"].default
+            tc.RuntimeArtifactManifest.model_fields["schema_version"].default
         ),
+        open_artifact_fn=lambda _ref: artifact,
     )
     tensor_schema_hash = resolver.compute_descriptor_tensor_schema_hash(descriptor)
     manifest = _manifest(tensor_schema_hash=tensor_schema_hash)
-    monkeypatch.setattr(resolver_mod, "open_artifact", lambda ref: artifact)
     monkeypatch.setattr(
         resolver_mod.tc_artifact_manifest,
-        "read_serving_artifact_manifest_tensor",
+        "read_runtime_artifact_manifest_tensor",
         lambda *_args, **_kwargs: manifest,
     )
 
@@ -82,7 +82,7 @@ def test_resolve_serving_artifact_cross_checks_manifest(monkeypatch) -> None:
         resolver.cross_check(
             resolved,
             expected_tensor_schema_hash=tensor_schema_hash,
-            serving_runtime_policy=manifest.to_runtime_policy(),
+            runtime_artifact_policy=manifest.to_runtime_policy(),
         )
         is resolved
     )
@@ -93,15 +93,15 @@ def test_resolve_serving_artifact_cross_checks_manifest(monkeypatch) -> None:
         )
 
 
-def test_resolve_serving_artifact_rejects_missing_manifest_tensor() -> None:
-    resolver = ServingArtifactResolver(
+def test_resolve_runtime_artifact_rejects_missing_manifest_tensor() -> None:
+    resolver = RuntimeArtifactResolver(
         manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=int(
-            tc.ServingArtifactManifest.model_fields["schema_version"].default
+            tc.RuntimeArtifactManifest.model_fields["schema_version"].default
         ),
     )
 
-    with pytest.raises(RuntimeError, match="missing serving manifest tensor"):
+    with pytest.raises(RuntimeError, match="missing runtime manifest tensor"):
         resolver.read_manifest(
             _FakeArtifact(_descriptor(include_manifest=False)),
             artifact_ref="mi2:test:serving",
@@ -109,19 +109,19 @@ def test_resolve_serving_artifact_rejects_missing_manifest_tensor() -> None:
 
 
 def test_resolve_prepared_rejects_local_ready_only_summary() -> None:
-    resolver = ServingArtifactResolver(
+    resolver = RuntimeArtifactResolver(
         manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=int(
-            tc.ServingArtifactManifest.model_fields["schema_version"].default
+            tc.RuntimeArtifactManifest.model_fields["schema_version"].default
         ),
     )
-    prepared = PreparedServingArtifact(
+    prepared = PreparedRuntimeArtifact(
         source_artifact_ref="disk:/model",
         serving_artifact_ref=None,
         serving_manifest_ref="tensor:manifest",
         representation_contract_hash="repr-hash",
         serving_build_digest="build-digest",
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         family="demo",
         tensor_schema_hash="schema-hash",
     )
@@ -133,35 +133,34 @@ def test_resolve_prepared_rejects_local_ready_only_summary() -> None:
 def test_resolve_prepared_reads_manifest_tensor(monkeypatch) -> None:
     descriptor = _descriptor()
     artifact = _FakeArtifact(descriptor)
-    resolver = ServingArtifactResolver(
+    resolver = RuntimeArtifactResolver(
         manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=int(
-            tc.ServingArtifactManifest.model_fields["schema_version"].default
+            tc.RuntimeArtifactManifest.model_fields["schema_version"].default
         ),
+        open_artifact_fn=lambda _ref: artifact,
     )
     tensor_schema_hash = resolver.compute_descriptor_tensor_schema_hash(descriptor)
     manifest = _manifest(tensor_schema_hash=tensor_schema_hash)
-    prepared = PreparedServingArtifact(
+    prepared = PreparedRuntimeArtifact(
         source_artifact_ref="mi2:test:source",
         serving_artifact_ref="mi2:test:serving",
         serving_manifest_ref="tensor:manifest",
         representation_contract_hash="repr-hash",
         serving_build_digest="build-digest",
-        readiness="serving_published_ready",
+        readiness="runtime_published_ready",
         family="demo",
         tensor_schema_hash=tensor_schema_hash,
     )
     calls = {"read_manifest": 0}
 
-    monkeypatch.setattr(resolver_mod, "open_artifact", lambda ref: artifact)
-
     def _read_manifest(*_args, **_kwargs):
         calls["read_manifest"] += 1
         return manifest
 
     monkeypatch.setattr(
         resolver_mod.tc_artifact_manifest,
-        "read_serving_artifact_manifest_tensor",
+        "read_runtime_artifact_manifest_tensor",
         _read_manifest,
     )
 
@@ -174,30 +173,30 @@ def _read_manifest(*_args, **_kwargs):
 def test_resolve_prepared_rejects_manifest_summary_mismatch(monkeypatch) -> None:
     descriptor = _descriptor()
     artifact = _FakeArtifact(descriptor)
-    resolver = ServingArtifactResolver(
+    resolver = RuntimeArtifactResolver(
         manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME,
         schema_version=int(
-            tc.ServingArtifactManifest.model_fields["schema_version"].default
+            tc.RuntimeArtifactManifest.model_fields["schema_version"].default
         ),
+        open_artifact_fn=lambda _ref: artifact,
     )
     tensor_schema_hash = resolver.compute_descriptor_tensor_schema_hash(descriptor)
     manifest = _manifest(tensor_schema_hash=tensor_schema_hash).model_copy(
         update={"serving_build_digest": "other-build"}
     )
-    prepared = PreparedServingArtifact(
+    prepared = PreparedRuntimeArtifact(
         source_artifact_ref="mi2:test:source",
         serving_artifact_ref="mi2:test:serving",
         serving_manifest_ref="tensor:manifest",
         representation_contract_hash="repr-hash",
         serving_build_digest="build-digest",
-        readiness="serving_published_ready",
+        readiness="runtime_published_ready",
         family="demo",
         tensor_schema_hash=tensor_schema_hash,
     )
-    monkeypatch.setattr(resolver_mod, "open_artifact", lambda ref: artifact)
     monkeypatch.setattr(
         resolver_mod.tc_artifact_manifest,
-        "read_serving_artifact_manifest_tensor",
+        "read_runtime_artifact_manifest_tensor",
         lambda *_args, **_kwargs: manifest,
     )
 
diff --git a/tests/python/test_serving_binding_runtime.py b/tests/python/artifact_runtime/binding/test_execution.py
similarity index 74%
rename from tests/python/test_serving_binding_runtime.py
rename to tests/python/artifact_runtime/binding/test_execution.py
index 868ea9a5..805c4f6b 100644
--- a/tests/python/test_serving_binding_runtime.py
+++ b/tests/python/artifact_runtime/binding/test_execution.py
@@ -5,25 +5,24 @@
 from types import SimpleNamespace
 
 import tensorcast as tc
-from tensorcast.serving.binding_runtime import (
-    bind_serving_artifact,
+from tensorcast.api._config import CollectivePolicyMode
+from tensorcast.artifact_runtime.binding.execution import (
+    bind_runtime_artifact,
     build_materialization_execution_context,
-    swap_serving_artifact,
+    swap_runtime_artifact,
 )
 from tensorcast.types import CollectivePolicy
 
 
-def test_bind_and_swap_serving_artifact_delegate_to_artifact_handles() -> None:
+def test_bind_and_swap_runtime_artifact_delegate_to_artifact_handles() -> None:
     calls: list[tuple[str, object]] = []
 
     class _Subset:
-
         def bind(self, **kwargs):
             calls.append(("bind", kwargs))
             return "binding"
 
     class _Artifact:
-
         def subset(self, names):
             calls.append(("subset", tuple(names)))
             return _Subset()
@@ -37,19 +36,25 @@ def swap(self, artifact, **kwargs):
 
     resolved = SimpleNamespace(artifact=_Artifact())
 
-    assert bind_serving_artifact(
-        resolved_artifact=resolved,
-        tensor_names=("a", "b"),
-        device="cuda:0",
-        serving_runtime_policy="policy",
-        options="options",
-    ) == "binding"
-    assert swap_serving_artifact(
-        binding=_Binding(),
-        resolved_artifact=resolved,
-        serving_runtime_policy="policy",
-        options="options",
-    ) == "swapped"
+    assert (
+        bind_runtime_artifact(
+            resolved_artifact=resolved,
+            tensor_names=("a", "b"),
+            device="cuda:0",
+            runtime_artifact_policy="policy",
+            options="options",
+        )
+        == "binding"
+    )
+    assert (
+        swap_runtime_artifact(
+            binding=_Binding(),
+            resolved_artifact=resolved,
+            runtime_artifact_policy="policy",
+            options="options",
+        )
+        == "swapped"
+    )
 
     swapped_artifact = calls[3][1][0]
     assert isinstance(swapped_artifact, _Subset)
@@ -59,7 +64,7 @@ def swap(self, artifact, **kwargs):
             "bind",
             {
                 "device": "cuda:0",
-                "serving_runtime_policy": "policy",
+                "runtime_artifact_policy": "policy",
                 "options": "options",
             },
         ),
@@ -69,7 +74,7 @@ def swap(self, artifact, **kwargs):
             (
                 swapped_artifact,
                 {
-                    "serving_runtime_policy": "policy",
+                    "runtime_artifact_policy": "policy",
                     "options": "options",
                 },
             ),
@@ -77,14 +82,13 @@ def swap(self, artifact, **kwargs):
     ]
 
 
-def test_swap_serving_artifact_prefers_binding_target_tensor_names() -> None:
+def test_swap_runtime_artifact_prefers_binding_target_tensor_names() -> None:
     calls: list[tuple[str, object]] = []
 
     class _Subset:
         pass
 
     class _Artifact:
-
         def subset(self, names):
             calls.append(("subset", tuple(names)))
             return _Subset()
@@ -96,15 +100,18 @@ def swap(self, artifact, **kwargs):
             calls.append(("swap", artifact))
             return "swapped"
 
-    resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("a", ))
+    resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("a",))
 
-    assert swap_serving_artifact(
-        binding=_Binding(),
-        resolved_artifact=resolved,
-        tensor_names=("a", ),
-        serving_runtime_policy=None,
-        options=None,
-    ) == "swapped"
+    assert (
+        swap_runtime_artifact(
+            binding=_Binding(),
+            resolved_artifact=resolved,
+            tensor_names=("a",),
+            runtime_artifact_policy=None,
+            options=None,
+        )
+        == "swapped"
+    )
 
     assert calls[0] == (
         "subset",
@@ -113,14 +120,13 @@ def swap(self, artifact, **kwargs):
     assert isinstance(calls[1][1], _Subset)
 
 
-def test_swap_serving_artifact_prefers_binding_layout_tensor_order() -> None:
+def test_swap_runtime_artifact_prefers_binding_layout_tensor_order() -> None:
     calls: list[tuple[str, object]] = []
 
     class _Subset:
         pass
 
     class _Artifact:
-
         def subset(self, names):
             calls.append(("subset", tuple(names)))
             return _Subset()
@@ -147,13 +153,16 @@ def swap(self, artifact, **kwargs):
 
     resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("a", "b"))
 
-    assert swap_serving_artifact(
-        binding=_Binding(),
-        resolved_artifact=resolved,
-        tensor_names=("b", ),
-        serving_runtime_policy=None,
-        options=None,
-    ) == "swapped"
+    assert (
+        swap_runtime_artifact(
+            binding=_Binding(),
+            resolved_artifact=resolved,
+            tensor_names=("b",),
+            runtime_artifact_policy=None,
+            options=None,
+        )
+        == "swapped"
+    )
 
     assert calls[0] == (
         "subset",
@@ -181,12 +190,17 @@ def test_materialization_execution_context_builds_collective_options() -> None:
     assert isinstance(options, tc.GetArtifactOptions)
     assert options.execution_topology.collective_group is not None
     assert options.execution_topology.collective_group.group_id == "group-1"
+    assert (
+        options.execution_topology.collective_policy
+        is CollectivePolicyMode.COLLECTIVE_FIRST
+    )
     assert profile["collective_requested"] is True
     assert profile["source_locality"] == "shared_source"
 
 
-def test_materialization_execution_context_disables_collective_when_unavailable(
-) -> None:
+def test_materialization_execution_context_disables_collective_when_unavailable() -> (
+    None
+):
     options, profile = build_materialization_execution_context(
         artifact_ref="mi2:test:serving",
         operation_scope="startup.bind",
diff --git a/tests/python/test_serving_builder_binding_plan.py b/tests/python/artifact_runtime/binding/test_plan.py
similarity index 97%
rename from tests/python/test_serving_builder_binding_plan.py
rename to tests/python/artifact_runtime/binding/test_plan.py
index 6c9e2d1c..d37f3427 100644
--- a/tests/python/test_serving_builder_binding_plan.py
+++ b/tests/python/artifact_runtime/binding/test_plan.py
@@ -2,11 +2,11 @@
 
 from __future__ import annotations
 
-from tensorcast.serving.builder.binding_plan import (
+from tensorcast.artifact_runtime.binding.plan import (
     lower_trace_plan_for_binding,
     lower_trace_plan_for_realization,
 )
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
diff --git a/tests/python/test_serving_retained_binding_acquire.py b/tests/python/artifact_runtime/binding/test_retained.py
similarity index 74%
rename from tests/python/test_serving_retained_binding_acquire.py
rename to tests/python/artifact_runtime/binding/test_retained.py
index dceb8a97..be74b272 100644
--- a/tests/python/test_serving_retained_binding_acquire.py
+++ b/tests/python/artifact_runtime/binding/test_retained.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import json
 from contextlib import contextmanager
 from dataclasses import replace
 from types import SimpleNamespace
@@ -10,32 +11,44 @@
 import pytest
 import torch
 
-import tensorcast as tc
-from tensorcast.serving.retained_binding import (
-    ParsedRetainedServingBindingAuthority,
-    RetainedServingBindingAuthority,
-    RetainedServingBindingExpectedDigests,
-    acquire_retained_serving_binding,
-    acquire_retained_serving_binding_lease,
-    parse_retained_serving_binding_authority,
+import tensorcast.retained_realization as retained_realization_module
+import tensorcast.retained_realization_authority as retained_authority_module
+from tensorcast.artifact_runtime.binding.retained import (
+    acquire_retained_binding,
+    acquire_retained_binding_lease,
     promote_current_value_and_wait,
     retained_binding_acquire_mode,
-    retained_serving_binding_extra_from_prefetched_binding,
-    retained_serving_binding_extra_json,
-    retained_serving_binding_trusted_reservation_bytes,
+)
+from tensorcast.retained_realization import (
+    RetainedRealizationClaim,
+    RetainedRealizationExpectedDigests,
+    parse_retained_realization_authority,
+    parse_retained_realization_claim,
+    retained_realization_claim_extra_from_handoff,
+    retained_realization_claim_extra_json_from_handoff,
+    retained_realization_claim_mode,
+    retained_realization_trusted_reservation_bytes,
+)
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+    RetainedRealizationAuthority,
+)
+from tensorcast.retained_realization_authority import (
+    RetainedRealizationExpectedDigests as RetainedRealizationAuthorityExpectedDigests,
 )
 from tensorcast.types import (
     BindingReservationCapability,
     BindingValueRef,
     BindingValueVerificationState,
     GroupRealizationAcquireRef,
+    PrefetchHandoff,
     PrefetchRetentionPolicy,
-    ServingBindingMemberRef,
-    ServingBindingResolvedLayout,
-    ServingBindingSourceRef,
-    ServingBindingSourceReuseDecision,
-    ServingBindingTarget,
-    ServingTopologyRef,
+    RealizationTarget,
+    RuntimeBindingMemberRef,
+    RuntimeBindingResolvedLayout,
+    RuntimeBindingSourceRef,
+    RuntimeBindingSourceReuseDecision,
+    RuntimeTopologyRef,
 )
 
 
@@ -45,9 +58,9 @@ def _authority(
     member_index: int = 0,
     member_count: int = 1,
     expires_at_ms: int | None = None,
-) -> ParsedRetainedServingBindingAuthority:
+) -> ParsedRetainedRealizationAuthority:
     suffix = member_index + 1
-    member = ServingBindingMemberRef(
+    member = RuntimeBindingMemberRef(
         member_id=f"member-{member_index}",
         member_index=member_index,
         member_count=member_count,
@@ -70,7 +83,7 @@ def _authority(
         scope_digest="scope-1",
         expires_at_ms=expires_at_ms,
     )
-    return ParsedRetainedServingBindingAuthority(
+    return ParsedRetainedRealizationAuthority(
         group_id="group-1",
         local_serving_ref=f"binding-local:binding-{suffix}:value-{suffix}",
         binding_value_ref=binding_ref,
@@ -80,19 +93,19 @@ def _authority(
         device_uuid=f"gpu-{member_index}",
         member=member,
         reservation_bytes=reservation_bytes,
-        expected=RetainedServingBindingExpectedDigests(
+        expected=RetainedRealizationAuthorityExpectedDigests(
             target_layout_hash="layout-hash",
             tensor_schema_hash="schema-hash",
-            serving_build_digest="build-digest",
+            runtime_build_digest="build-digest",
             resolved_spec_digest="spec-digest",
         ),
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state="local_only",
     )
 
 
 def _authority_payload(
-    authority: ParsedRetainedServingBindingAuthority,
+    authority: ParsedRetainedRealizationAuthority,
 ) -> dict[str, object]:
     return {
         "group_id": authority.group_id,
@@ -125,6 +138,44 @@ def _set_nested(
     current[path[-1]] = value
 
 
+def test_serving_retained_binding_does_not_export_legacy_authority_aliases() -> None:
+    import tensorcast.artifact_runtime.binding.retained as retained_binding_module
+
+    assert not hasattr(
+        retained_binding_module, "ParsedRetainedServingBindingAuthority"
+    )
+    assert not hasattr(retained_binding_module, "RetainedServingBindingAuthority")
+    assert not hasattr(
+        retained_binding_module, "RetainedServingBindingExpectedDigests"
+    )
+
+
+def test_retained_realization_authority_module_hides_serving_aliases() -> None:
+    public_names = set(retained_authority_module.__all__)
+
+    assert "ParsedRetainedServingBindingAuthority" not in public_names
+    assert "RetainedServingBindingAuthority" not in public_names
+    assert "RetainedServingBindingExpectedDigests" not in public_names
+    assert not hasattr(
+        retained_authority_module, "ParsedRetainedServingBindingAuthority"
+    )
+    assert not hasattr(retained_authority_module, "RetainedServingBindingAuthority")
+    assert not hasattr(
+        retained_authority_module, "RetainedServingBindingExpectedDigests"
+    )
+
+
+def test_retained_realization_module_hides_prefetched_compat_helpers() -> None:
+    public_names = set(retained_realization_module.__all__)
+
+    for removed_name in (
+        "retained_realization_claim_extra_from_prefetched_binding",
+        "retained_realization_claim_extra_json",
+    ):
+        assert removed_name not in public_names
+        assert not hasattr(retained_realization_module, removed_name)
+
+
 def _response(*, reservation_bytes: int = 4096, lease_token: bytes = b"lease"):
     return SimpleNamespace(
         reservation_bytes=reservation_bytes,
@@ -138,15 +189,15 @@ def _response(*, reservation_bytes: int = 4096, lease_token: bytes = b"lease"):
     )
 
 
-def _topology() -> ServingTopologyRef:
-    return ServingTopologyRef(
+def _topology() -> RuntimeTopologyRef:
+    return RuntimeTopologyRef(
         schema_topology_digest="topology-schema",
         admission_topology_digest="topology-admission",
     )
 
 
-def _source() -> ServingBindingSourceRef:
-    return ServingBindingSourceRef(
+def _source() -> RuntimeBindingSourceRef:
+    return RuntimeBindingSourceRef(
         source_kind="checkpoint_artifact",
         artifact_selection_digest="selection-digest",
         source_artifact_ref="mi2:checkpoint",
@@ -155,17 +206,17 @@ def _source() -> ServingBindingSourceRef:
 
 
 def _target(
-    member: ServingBindingMemberRef,
+    member: RuntimeBindingMemberRef,
     *,
-    topology: ServingTopologyRef | None = None,
-) -> ServingBindingTarget:
+    topology: RuntimeTopologyRef | None = None,
+) -> RealizationTarget:
     resolved_topology = topology or _topology()
     source = _source()
-    source_reuse = ServingBindingSourceReuseDecision(
-        mode="checkpoint_to_serving",
+    source_reuse = RuntimeBindingSourceReuseDecision(
+        mode="checkpoint_to_runtime",
         representation_contract_hash="repr-contract",
     )
-    resolved_layout = ServingBindingResolvedLayout(
+    resolved_layout = RuntimeBindingResolvedLayout(
         binding_layout_id="layout-1",
         source=source,
         source_reuse=source_reuse,
@@ -178,7 +229,7 @@ def _target(
         spec_digest="spec-digest",
         source_schema_hash="source-schema",
     )
-    return ServingBindingTarget(
+    return RealizationTarget(
         runtime="vllm",
         device="cuda:0",
         device_uuid="GPU-0",
@@ -186,16 +237,16 @@ def _target(
         topology=resolved_topology,
         member=member,
         model_config_digest="model-config",
-        serving_build_digest="serving-build",
+        runtime_build_digest="serving-build",
         resolved_layout=resolved_layout,
     )
 
 
 def _prefetched(
-    member: ServingBindingMemberRef,
+    member: RuntimeBindingMemberRef,
     *,
     reservation_bytes: int = 4096,
-) -> tc.PrefetchedServingBinding:
+) -> PrefetchHandoff:
     binding_ref = BindingValueRef(
         binding_id="binding-1",
         binding_layout_id="layout-1",
@@ -212,7 +263,7 @@ def _prefetched(
         reservation_bytes=reservation_bytes,
         scope_digest="scope-1",
     )
-    return tc.PrefetchedServingBinding(
+    return PrefetchHandoff(
         local_serving_ref="binding-local:binding-1:value-1",
         binding_value_ref=binding_ref,
         daemon_id="daemon-1",
@@ -221,7 +272,7 @@ def _prefetched(
         member=member,
         reservation_bytes=reservation_bytes,
         reservation_capability=capability,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state=BindingValueVerificationState.LOCAL_ONLY,
     )
 
@@ -252,11 +303,11 @@ def ensure_client(self):
         return self.client
 
 
-def test_acquire_retained_serving_binding_lease_releases_unrestored_lease_on_context_exit():
+def test_acquire_retained_binding_lease_releases_unrestored_lease_on_context_exit():
     authority = _authority()
     client = _Client(_response())
 
-    with acquire_retained_serving_binding_lease(
+    with acquire_retained_binding_lease(
         authority,
         runtime=_Runtime(client),
         caller_pid=123,
@@ -276,11 +327,11 @@ def test_acquire_retained_serving_binding_lease_releases_unrestored_lease_on_con
     assert lease.release_contract.released is True
 
 
-def test_acquire_retained_serving_binding_uses_authority():
+def test_acquire_retained_binding_uses_authority():
     authority = _authority()
     client = _Client(_response())
 
-    with acquire_retained_serving_binding(
+    with acquire_retained_binding(
         authority=authority,
         runtime=_Runtime(client),
         caller_pid=456,
@@ -298,7 +349,7 @@ def test_acquire_retained_binding_rejects_expired_capability_before_daemon_call(
 
     with (
         pytest.raises(ValueError, match="reservation_capability has expired"),
-        acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)),
+        acquire_retained_binding_lease(authority, runtime=_Runtime(client)),
     ):
         pass
 
@@ -310,7 +361,7 @@ def test_retained_binding_debug_status_tracks_capability_ttl_and_lifecycle():
     authority = _authority(expires_at_ms=4_102_444_800_000)
     client = _Client(_response())
 
-    with acquire_retained_serving_binding_lease(
+    with acquire_retained_binding_lease(
         authority, runtime=_Runtime(client)
     ) as lease:
         acquired_status = lease.debug_status()
@@ -318,7 +369,7 @@ def test_retained_binding_debug_status_tracks_capability_ttl_and_lifecycle():
         assert acquired_status["state"] == "acquired"
         assert acquired_status["reservation_capability_id"] == "capability-1"
         assert acquired_status["reservation_expires_at_ms"] == 4_102_444_800_000
-        assert acquired_status["readiness"] == "serving_local_ready"
+        assert acquired_status["readiness"] == "runtime_local_ready"
         assert acquired_status["verification_state"] == "local_only"
         assert acquired_status["lease_token_present"] is True
         assert acquired_status["release_policy"] == (
@@ -361,8 +412,8 @@ def test_retained_prefetch_retention_policy_round_trips_ttl_and_idle_retire():
     assert round_tripped.allow_acquire_after_creator_exit is True
 
 
-def test_acquire_retained_serving_binding_acquires_local_ready(monkeypatch):
-    member = ServingBindingMemberRef(
+def test_acquire_retained_binding_acquires_local_ready(monkeypatch):
+    member = RuntimeBindingMemberRef(
         member_id="member-0",
         member_index=0,
         member_count=1,
@@ -375,7 +426,7 @@ def test_acquire_retained_serving_binding_acquires_local_ready(monkeypatch):
         store_api, "device_uuid_for", lambda device_index: f"gpu-{device_index}"
     )
 
-    with acquire_retained_serving_binding(
+    with acquire_retained_binding(
         local_serving_ref="binding-local:binding-1:value-1",
         target_device=torch.device("cuda:3"),
         expected_member=member,
@@ -384,7 +435,7 @@ def test_acquire_retained_serving_binding_acquires_local_ready(monkeypatch):
         runtime=_Runtime(client),
         caller_pid=789,
     ) as lease:
-        assert lease.authority.readiness == "serving_local_ready"
+        assert lease.authority.readiness == "runtime_local_ready"
 
     assert (
         client.acquire_calls[0]["local_serving_ref"]
@@ -404,7 +455,7 @@ def fail_restore(**_kwargs):
 
     with (
         pytest.raises(RuntimeError, match="restore failed"),
-        acquire_retained_serving_binding_lease(
+        acquire_retained_binding_lease(
             authority, runtime=_Runtime(client)
         ) as lease,
     ):
@@ -420,7 +471,7 @@ def test_attached_close_releases_once_after_successful_restore():
     authority = _authority()
     client = _Client(_response())
 
-    with acquire_retained_serving_binding_lease(
+    with acquire_retained_binding_lease(
         authority, runtime=_Runtime(client)
     ) as lease:
         attached = lease.restore(
@@ -437,7 +488,7 @@ def test_transfer_to_runtime_moves_close_ownership():
     authority = _authority()
     client = _Client(_response())
 
-    with acquire_retained_serving_binding_lease(
+    with acquire_retained_binding_lease(
         authority, runtime=_Runtime(client)
     ) as lease:
         attached = lease.restore(
@@ -463,7 +514,7 @@ def test_restored_lease_releases_on_context_exit_when_not_transferred():
 
     with (
         pytest.raises(RuntimeError, match="attach failed"),
-        acquire_retained_serving_binding_lease(
+        acquire_retained_binding_lease(
             authority, runtime=_Runtime(client)
         ) as lease,
     ):
@@ -480,7 +531,7 @@ def test_retained_binding_lifecycle_rejects_invalid_transitions():
     authority = _authority()
     client = _Client(_response())
 
-    with acquire_retained_serving_binding_lease(
+    with acquire_retained_binding_lease(
         authority, runtime=_Runtime(client)
     ) as lease:
         lease.close()
@@ -491,7 +542,7 @@ def test_retained_binding_lifecycle_rejects_invalid_transitions():
             )
 
     client = _Client(_response())
-    with acquire_retained_serving_binding_lease(
+    with acquire_retained_binding_lease(
         authority, runtime=_Runtime(client)
     ) as lease:
         attached = lease.restore(
@@ -508,7 +559,7 @@ def test_retained_binding_lifecycle_rejects_invalid_transitions():
             attached.transfer_to_runtime()
 
 
-def test_acquire_retained_serving_binding_lease_rejects_mismatched_acquire_response():
+def test_acquire_retained_binding_lease_rejects_mismatched_acquire_response():
     authority = _authority()
     response = _response()
     response.current_value.binding_value_id = "other-value"
@@ -516,20 +567,20 @@ def test_acquire_retained_serving_binding_lease_rejects_mismatched_acquire_respo
 
     with (
         pytest.raises(RuntimeError, match="different binding value"),
-        acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)),
+        acquire_retained_binding_lease(authority, runtime=_Runtime(client)),
     ):
         pass
 
     assert client.released_tokens == [b"lease"]
 
 
-def test_acquire_retained_serving_binding_lease_releases_mismatched_reservation_response():
+def test_acquire_retained_binding_lease_releases_mismatched_reservation_response():
     authority = _authority(reservation_bytes=4096)
     client = _Client(_response(reservation_bytes=8192))
 
     with (
         pytest.raises(RuntimeError, match="reservation byte mismatch"),
-        acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)),
+        acquire_retained_binding_lease(authority, runtime=_Runtime(client)),
     ):
         pass
 
@@ -587,7 +638,7 @@ def test_parse_retained_binding_authority_rejects_inconsistent_authority(
     }
 
     with pytest.raises(ValueError, match=match):
-        parse_retained_serving_binding_authority(extra)
+        parse_retained_realization_authority(extra)
 
 
 def test_parse_retained_binding_authority_rejects_member_group_mismatch():
@@ -604,13 +655,13 @@ def test_parse_retained_binding_authority_rejects_member_group_mismatch():
     }
 
     with pytest.raises(ValueError, match="member_ref.group_id"):
-        parse_retained_serving_binding_authority(extra)
+        parse_retained_realization_authority(extra)
 
 
 def test_parse_retained_binding_authority_requires_published_artifact_scope():
     authority = replace(
         _authority(),
-        readiness="serving_published_ready",
+        readiness="runtime_published_ready",
         serving_artifact_id=None,
     )
     extra = {
@@ -621,16 +672,16 @@ def test_parse_retained_binding_authority_requires_published_artifact_scope():
     }
 
     with pytest.raises(ValueError, match="serving_artifact_id"):
-        parse_retained_serving_binding_authority(extra)
+        parse_retained_realization_authority(extra)
 
 
 def test_acquire_retained_binding_rejects_reserved_authority_before_daemon_call():
-    authority = replace(_authority(), readiness="serving_reserved")
+    authority = replace(_authority(), readiness="runtime_reserved")
     client = _Client(_response())
 
     with (
-        pytest.raises(ValueError, match="serving_reserved"),
-        acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)),
+        pytest.raises(ValueError, match="runtime_reserved"),
+        acquire_retained_binding_lease(authority, runtime=_Runtime(client)),
     ):
         pass
 
@@ -653,7 +704,7 @@ def test_acquire_retained_binding_requires_group_publish_wait_before_attach():
 
     with (
         pytest.raises(ValueError, match="wait for group publish"),
-        acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)),
+        acquire_retained_binding_lease(authority, runtime=_Runtime(client)),
     ):
         pass
 
@@ -673,30 +724,30 @@ def test_acquire_retained_binding_passes_group_publish_wait_authority():
     authority = replace(_authority(), group_realization_acquire=group_acquire)
     client = _Client(_response())
 
-    with acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)):
+    with acquire_retained_binding_lease(authority, runtime=_Runtime(client)):
         pass
 
     assert client.acquire_calls[0]["group_realization_acquire"] == group_acquire
     assert client.released_tokens == [b"lease"]
 
 
-def test_retained_binding_public_helpers_build_extra_from_prefetched_binding():
+def test_retained_binding_public_helpers_build_extra_from_handoff():
     member = _authority().member
     prefetched = _prefetched(member, reservation_bytes=8192)
     target = _target(member)
 
-    extra = retained_serving_binding_extra_from_prefetched_binding(
-        prefetched=prefetched,
+    extra = retained_realization_claim_extra_from_handoff(
+        handoff=prefetched,
         target=target,
         expected_member=member,
     )
-    authority = parse_retained_serving_binding_authority(extra)
+    authority = parse_retained_realization_authority(extra)
 
     assert "retained_binding_acquire" in extra
     assert retained_binding_acquire_mode(extra) == "external"
     assert isinstance(extra["retained_binding_acquire"]["authority"], dict)
     assert (
-        RetainedServingBindingAuthority.model_validate(
+        RetainedRealizationAuthority.model_validate(
             extra["retained_binding_acquire"]["authority"]
         ).trusted_reservation_bytes
         == 8192
@@ -705,25 +756,95 @@ def test_retained_binding_public_helpers_build_extra_from_prefetched_binding():
     assert authority.reservation_bytes == 8192
     assert authority.expected.target_layout_hash == "target-layout-hash"
     assert authority.expected.tensor_schema_hash == "tensor-schema"
-    assert authority.expected.serving_build_digest == "serving-build"
+    assert authority.expected.runtime_build_digest == "serving-build"
     assert authority.expected.resolved_spec_digest == "spec-digest"
-    assert retained_serving_binding_trusted_reservation_bytes(extra) == 8192
+    assert retained_realization_trusted_reservation_bytes(extra) == 8192
     assert (
-        retained_serving_binding_trusted_reservation_bytes(
+        retained_realization_trusted_reservation_bytes(
             SimpleNamespace(model_loader_extra_config=extra)
         )
         == 8192
     )
-    assert '"mode":"external"' in retained_serving_binding_extra_json(
-        prefetched=prefetched,
+    assert '"mode":"external"' in retained_realization_claim_extra_json_from_handoff(
+        handoff=prefetched,
         target=target,
         expected_member=member,
     )
-    assert '"retained_binding_acquire"' in retained_serving_binding_extra_json(
-        prefetched=prefetched,
+    assert '"retained_binding_acquire"' in retained_realization_claim_extra_json_from_handoff(
+        handoff=prefetched,
+        target=target,
+        expected_member=member,
+    )
+
+
+def test_retained_realization_claim_helpers_use_primary_authority_contract():
+    member = _authority().member
+    handoff = _prefetched(member, reservation_bytes=8192)
+    target = _target(member)
+
+    extra = retained_realization_claim_extra_from_handoff(
+        handoff=handoff,
         target=target,
         expected_member=member,
     )
+    claim = parse_retained_realization_claim(extra, expected_member=member)
+    authority = parse_retained_realization_authority(extra, expected_member=member)
+
+    assert isinstance(claim, RetainedRealizationClaim)
+    assert claim.authority == authority
+    assert claim.as_authority() == authority
+    assert parse_retained_realization_authority(extra, expected_member=member) == (
+        authority
+    )
+    assert claim.group_id == authority.group_id
+    assert claim.local_ref == authority.local_serving_ref
+    assert claim.binding_value_ref == authority.binding_value_ref
+    assert claim.reservation_capability == authority.reservation_capability
+    assert claim.daemon_id == authority.daemon_id
+    assert claim.daemon_session_id == authority.daemon_session_id
+    assert claim.device_uuid == authority.device_uuid
+    assert claim.member == member
+    assert claim.reservation_bytes == 8192
+    assert isinstance(claim.expected, RetainedRealizationExpectedDigests)
+    assert claim.expected.tensor_schema_hash == "tensor-schema"
+    assert claim.readiness == "runtime_local_ready"
+    assert claim.verification_state == "local_only"
+    assert claim.serving_artifact_id == authority.serving_artifact_id
+    assert claim.group_realization_acquire == authority.group_realization_acquire
+    assert retained_realization_claim_mode(extra) == "external"
+    assert retained_realization_trusted_reservation_bytes(extra) == 8192
+    assert (
+        retained_realization_trusted_reservation_bytes(
+            SimpleNamespace(model_loader_extra_config=extra)
+        )
+        == 8192
+    )
+    assert retained_realization_claim_extra_from_handoff(
+        handoff=handoff,
+        target=target,
+        expected_member=member,
+    ) == extra
+    assert json.loads(
+        retained_realization_claim_extra_json_from_handoff(
+            handoff=handoff,
+            target=target,
+            expected_member=member,
+        )
+    ) == extra
+
+
+def test_retained_realization_claim_trusted_bytes_fail_closed_on_mismatch():
+    payload = _authority_payload(_authority())
+    _set_nested(payload, ("reservation_capability", "reservation_bytes"), 8192)
+    extra = {
+        "retained_binding_acquire": {
+            "mode": "external",
+            "authority": payload,
+        },
+    }
+
+    with pytest.raises(ValueError, match="reservation_bytes"):
+        retained_realization_trusted_reservation_bytes(extra)
 
 
 def test_retained_binding_authority_set_selects_expected_member():
@@ -747,7 +868,7 @@ def test_retained_binding_authority_set_selects_expected_member():
         },
     }
 
-    selected = parse_retained_serving_binding_authority(
+    selected = parse_retained_realization_authority(
         extra,
         expected_member=authority1.member,
     )
@@ -755,7 +876,7 @@ def test_retained_binding_authority_set_selects_expected_member():
     assert selected.member == authority1.member
     assert selected.reservation_bytes == 8192
     assert (
-        retained_serving_binding_trusted_reservation_bytes(
+        retained_realization_trusted_reservation_bytes(
             extra,
             expected_member=authority1.member,
         )
@@ -777,7 +898,7 @@ def test_retained_binding_authority_set_requires_expected_member():
     }
 
     with pytest.raises(ValueError, match="expected serving member"):
-        parse_retained_serving_binding_authority(extra)
+        parse_retained_realization_authority(extra)
 
 
 def test_retained_binding_extra_preserves_group_realization_acquire():
@@ -797,12 +918,12 @@ def test_retained_binding_extra_preserves_group_realization_acquire():
         }
     )
 
-    extra = retained_serving_binding_extra_from_prefetched_binding(
-        prefetched=prefetched,
+    extra = retained_realization_claim_extra_from_handoff(
+        handoff=prefetched,
         target=target,
         expected_member=member,
     )
-    authority = parse_retained_serving_binding_authority(extra)
+    authority = parse_retained_realization_authority(extra)
 
     assert authority.group_realization_acquire is not None
     assert authority.group_realization_acquire.transaction_id == "txn-1"
@@ -814,8 +935,8 @@ def test_retained_binding_extra_rejects_unexpected_member():
     unexpected = member.model_copy(update={"member_id": "other"})
 
     with pytest.raises(ValueError, match="does not match expected placement"):
-        retained_serving_binding_extra_from_prefetched_binding(
-            prefetched=_prefetched(member),
+        retained_realization_claim_extra_from_handoff(
+            handoff=_prefetched(member),
             target=_target(member),
             expected_member=unexpected,
         )
diff --git a/tests/python/test_serving_replica_publication.py b/tests/python/artifact_runtime/publication/test_replica.py
similarity index 78%
rename from tests/python/test_serving_replica_publication.py
rename to tests/python/artifact_runtime/publication/test_replica.py
index 28572b81..9771248c 100644
--- a/tests/python/test_serving_replica_publication.py
+++ b/tests/python/artifact_runtime/publication/test_replica.py
@@ -9,18 +9,24 @@
 
 import pytest
 
-import tensorcast.serving._runtime_impl.lifecycle as integration_mod
-from tensorcast.serving.config import ServingConfig
-from tensorcast.serving.errors import ReplicaPublicationError
-from tensorcast.serving.hosts import IntegrationHost
-from tensorcast.serving.policy import ServingArtifactLocator
-from tensorcast.serving.runtime import RequestContext, ServingRuntimeSession
-from tensorcast.serving.runtime_attachment import (
+import tensorcast.artifact_runtime.lifecycle as integration_mod
+from tensorcast.artifact_runtime.attachment import (
     RuntimeAttachment,
     RuntimeBindingState,
     RuntimeBindingView,
 )
-from tensorcast.serving.runtime_view import (
+from tensorcast.artifact_runtime.errors import ReplicaPublicationError
+from tensorcast.artifact_runtime.intent import RuntimeRequestContext
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.policy import RuntimePolicy
+from tensorcast.artifact_runtime.publication.actions import (
+    project_runtime_replica_publication_state,
+    publish_runtime_replica,
+    retire_runtime_replica,
+    runtime_replica_publication_settings,
+)
+from tensorcast.artifact_runtime.reload import reload_runtime_attachment
+from tensorcast.artifact_runtime.view import (
     PublishedReplicaProjection,
     RuntimeWorkerView,
 )
@@ -100,18 +106,56 @@ def publish_replica_operation(self) -> _Operation:
         raise RuntimeError("publish failed")
 
 
-def _session(
+def _settings(
+    config: dict[str, object] | None = None,
+):
+    return runtime_replica_publication_settings(config)
+
+
+def _publish(
+    attachment: RuntimeAttachment,
     config: dict[str, object] | None = None,
     *,
     profile_sink: object | None = None,
-) -> ServingRuntimeSession:
-    return ServingRuntimeSession.from_config(
-        ServingConfig.from_mapping(config),
-        host=IntegrationHost(
-            framework=cast(Any, object()),
-            placement=cast(Any, object()),
+) -> RuntimeAttachment:
+    settings = _settings(config or {"replica_publication": {"mode": "required"}})
+    return publish_runtime_replica(
+        current_attachment=attachment,
+        policy=settings.policy,
+        ensure_runtime_initialized=settings.ensure_runtime_initialized,
+        profile_sink=cast(Any, profile_sink),
+    )
+
+
+def _retire(
+    attachment: RuntimeAttachment,
+    config: dict[str, object] | None = None,
+    *,
+    reason: str = "retire",
+    drain_timeout_s: float | None = None,
+    profile_sink: object | None = None,
+) -> RuntimeAttachment:
+    settings = _settings(config or {"replica_publication": {"mode": "required"}})
+    return retire_runtime_replica(
+        current_attachment=attachment,
+        reason=reason,
+        drain_timeout_s=drain_timeout_s,
+        default_drain_timeout_s=settings.drain_timeout_s,
+        ensure_runtime_initialized=settings.ensure_runtime_initialized,
+        profile_sink=cast(Any, profile_sink),
+    )
+
+
+def _reload(attachment: RuntimeAttachment) -> RuntimeAttachment:
+    return reload_runtime_attachment(
+        current_attachment=attachment,
+        artifact_locator=ArtifactLocator.artifact_ref("mi2:next"),
+        policy=RuntimePolicy(),
+        runtime_host=cast(Any, object()),
+        runtime_context=RuntimeRequestContext(),
+        ensure_runtime_initialized=lambda: pytest.fail(
+            "active publication rejection must precede runtime init"
         ),
-        profile_sink=profile_sink,
     )
 
 
@@ -132,7 +176,7 @@ def _attachment(
         representation_contract_hash="repr-hash",
         tensor_schema_hash="schema-hash",
         binding_value_ref=binding_value_ref,
-        readiness="serving",
+        readiness="runtime_ready",
     )
     state = RuntimeBindingState(
         binding=binding,
@@ -173,8 +217,8 @@ def _with_published_replica(
     return replace(attachment, view=replace(attachment.view, endpoint=endpoint))
 
 
-def test_serving_config_parses_replica_publication_policy() -> None:
-    config = ServingConfig.from_mapping(
+def test_runtime_config_parses_replica_publication_policy() -> None:
+    settings = _settings(
         {
             "replica_publication": {
                 "mode": "REQUIRED",
@@ -184,11 +228,12 @@ def test_serving_config_parses_replica_publication_policy() -> None:
             },
         }
     )
+    policy = settings.policy
 
-    assert config.replica_publication.mode == "required"
-    assert config.replica_publication.trigger == "after_vllm_ready"
-    assert config.replica_publication.timeout_s == 5
-    assert config.replica_publication.drain_timeout_s == 7
+    assert policy.mode == "required"
+    assert policy.trigger == "after_vllm_ready"
+    assert policy.timeout_s == 5
+    assert policy.drain_timeout_s == 7
 
 
 @pytest.mark.parametrize(
@@ -205,11 +250,11 @@ def test_serving_config_parses_replica_publication_policy() -> None:
         },
     ],
 )
-def test_serving_config_rejects_invalid_replica_publication_policy(
+def test_runtime_config_rejects_invalid_replica_publication_policy(
     payload: dict[str, object],
 ) -> None:
     with pytest.raises(ValueError):
-        ServingConfig.from_mapping({"replica_publication": payload})
+        _settings({"replica_publication": payload})
 
 
 def test_publish_current_replica_rejects_local_ready_attachment(
@@ -220,21 +265,21 @@ def test_publish_current_replica_rejects_local_ready_attachment(
         "ensure_initialized",
         lambda self: None,
     )
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-                "drain_timeout_s": 3,
-            },
-        }
-    )
     attachment = _attachment(
         _PublicationBinding(),
         serving_artifact_ref=None,
     )
 
     with pytest.raises(ReplicaPublicationError, match="artifact-backed"):
-        session.publish_current_replica(current_attachment=attachment)
+        _publish(
+            attachment,
+            {
+                "replica_publication": {
+                    "mode": "required",
+                    "drain_timeout_s": 3,
+                },
+            },
+        )
 
 
 def test_publish_current_replica_rejects_missing_publication_capability(
@@ -245,18 +290,15 @@ def test_publish_current_replica_rejects_missing_publication_capability(
         "ensure_initialized",
         lambda self: None,
     )
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-                "drain_timeout_s": 3,
-            },
-        }
-    )
-
     with pytest.raises(ReplicaPublicationError, match="publish_replica"):
-        session.publish_current_replica(
-            current_attachment=_attachment(_MissingPublicationCapabilityBinding())
+        _publish(
+            _attachment(_MissingPublicationCapabilityBinding()),
+            {
+                "replica_publication": {
+                    "mode": "required",
+                    "drain_timeout_s": 3,
+                },
+            },
         )
 
 
@@ -270,10 +312,9 @@ def test_publish_current_replica_rejects_artifact_scope_mismatch(
     )
     binding = _PublicationBinding()
     binding.artifact_id = "mi2:other:serving"
-    session = _session({"replica_publication": {"mode": "required"}})
 
     with pytest.raises(ReplicaPublicationError, match="does not match"):
-        session.publish_current_replica(current_attachment=_attachment(binding))
+        _publish(_attachment(binding), {"replica_publication": {"mode": "required"}})
 
 
 def test_publish_current_replica_returns_published_projection(
@@ -288,9 +329,8 @@ def test_publish_current_replica_returns_published_projection(
     )
     binding = _PublicationBinding()
     attachment = _attachment(binding)
-    session = _session({"replica_publication": {"mode": "required"}})
 
-    published = session.publish_current_replica(current_attachment=attachment)
+    published = _publish(attachment, {"replica_publication": {"mode": "required"}})
 
     projection = published.view.endpoint.weight_version.published_replica
     assert projection is not None
@@ -362,14 +402,13 @@ def test_project_current_replica_publication_state_returns_typed_projection(
     )
     binding = _PublicationBinding()
     attachment = _attachment(binding)
-    session = _session({"replica_publication": {"mode": "required"}})
 
-    publishing = session.project_current_replica_publication_state(
+    publishing = project_runtime_replica_publication_state(
         current_attachment=attachment,
         state="publishing",
         reason="after_vllm_ready",
     )
-    published = session.publish_current_replica(current_attachment=publishing)
+    published = _publish(publishing, {"replica_publication": {"mode": "required"}})
 
     pending = publishing.view.endpoint.weight_version.published_replica
     assert pending is not None
@@ -405,10 +444,9 @@ def test_publish_current_replica_error_carries_failed_projection(
         lambda self: None,
     )
     attachment = _attachment(_FailingPublicationBinding())
-    session = _session({"replica_publication": {"mode": "required"}})
 
     with pytest.raises(ReplicaPublicationError) as raised:
-        session.publish_current_replica(current_attachment=attachment)
+        _publish(attachment, {"replica_publication": {"mode": "required"}})
 
     failed = raised.value.attachment
     assert isinstance(failed, RuntimeAttachment)
@@ -428,19 +466,18 @@ def test_publish_and_retire_emit_profile_metrics(
     )
     events: list[dict[str, Any]] = []
     binding = _PublicationBinding()
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-            }
-        },
-        profile_sink=events.append,
-    )
+    config = {
+        "replica_publication": {
+            "mode": "required",
+        }
+    }
 
-    published = session.publish_current_replica(current_attachment=_attachment(binding))
-    retired = session.retire_current_replica(
-        current_attachment=published,
+    published = _publish(_attachment(binding), config, profile_sink=events.append)
+    retired = _retire(
+        published,
+        config,
         reason="shutdown",
+        profile_sink=events.append,
     )
 
     assert retired.view.endpoint.weight_version.published_replica is not None
@@ -465,10 +502,9 @@ def test_publish_current_replica_is_idempotent_for_matching_active_projection(
     )
     binding = _PublicationBinding()
     attachment = _attachment(binding)
-    session = _session({"replica_publication": {"mode": "required"}})
 
-    published = session.publish_current_replica(current_attachment=attachment)
-    replayed = session.publish_current_replica(current_attachment=published)
+    published = _publish(attachment, {"replica_publication": {"mode": "required"}})
+    replayed = _publish(published, {"replica_publication": {"mode": "required"}})
 
     assert replayed is published
     assert binding.publish_calls == 1
@@ -483,12 +519,14 @@ def test_publish_current_replica_rejects_mismatched_active_projection(
         lambda self: None,
     )
     binding = _PublicationBinding()
-    session = _session({"replica_publication": {"mode": "required"}})
-    published = session.publish_current_replica(current_attachment=_attachment(binding))
+    published = _publish(
+        _attachment(binding),
+        {"replica_publication": {"mode": "required"}},
+    )
     binding.published_lease_id = "lease-2"
 
     with pytest.raises(ReplicaPublicationError, match="does not match"):
-        session.publish_current_replica(current_attachment=published)
+        _publish(published, {"replica_publication": {"mode": "required"}})
 
 
 def test_publish_current_replica_rejects_stale_publish_result(
@@ -499,12 +537,11 @@ def test_publish_current_replica_rejects_stale_publish_result(
         "ensure_initialized",
         lambda self: None,
     )
-    session = _session({"replica_publication": {"mode": "required"}})
     binding = _PublicationBinding(seal_generation=2)
     attachment = _attachment(binding)
 
     with pytest.raises(ReplicaPublicationError, match="stale") as raised:
-        session.publish_current_replica(current_attachment=attachment)
+        _publish(attachment, {"replica_publication": {"mode": "required"}})
 
     stale = raised.value.attachment
     assert isinstance(stale, RuntimeAttachment)
@@ -532,10 +569,9 @@ def test_publish_current_replica_rejects_binding_value_scope_mismatch(
         seal_generation=1,
         serving_artifact_id="mi2:test:serving",
     )
-    session = _session({"replica_publication": {"mode": "required"}})
 
     with pytest.raises(ReplicaPublicationError, match="stale"):
-        session.publish_current_replica(current_attachment=_attachment(binding))
+        _publish(_attachment(binding), {"replica_publication": {"mode": "required"}})
 
 
 def test_retire_current_replica_is_idempotent_for_unpublished_attachment(
@@ -546,10 +582,11 @@ def test_retire_current_replica_is_idempotent_for_unpublished_attachment(
         "ensure_initialized",
         lambda self: None,
     )
-    session = _session({"replica_publication": {"mode": "required"}})
     attachment = _attachment(_PublicationBinding())
 
-    assert session.retire_current_replica(current_attachment=attachment) is attachment
+    assert (
+        _retire(attachment, {"replica_publication": {"mode": "required"}}) is attachment
+    )
 
 
 def test_runtime_binding_state_close_retires_binding_only_publication() -> None:
@@ -588,17 +625,16 @@ def test_retire_current_replica_handles_binding_only_publication(
     binding = _PublicationBinding()
     binding.published_lease_id = lease_id
     binding.published_replica_id = replica_id
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-                "drain_timeout_s": 3,
-            },
-        }
-    )
+    config = {
+        "replica_publication": {
+            "mode": "required",
+            "drain_timeout_s": 3,
+        },
+    }
 
-    retired = session.retire_current_replica(
-        current_attachment=_attachment(binding),
+    retired = _retire(
+        _attachment(binding),
+        config,
         reason="reload",
     )
 
@@ -622,14 +658,12 @@ def test_retire_current_replica_refreshes_stale_terminal_projection(
     binding = _PublicationBinding()
     binding.published_lease_id = "lease-orphan"
     binding.published_replica_id = "replica-orphan"
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-                "drain_timeout_s": 3,
-            },
-        }
-    )
+    config = {
+        "replica_publication": {
+            "mode": "required",
+            "drain_timeout_s": 3,
+        },
+    }
     attachment = _with_published_replica(
         _attachment(binding),
         state="retired",
@@ -637,8 +671,9 @@ def test_retire_current_replica_refreshes_stale_terminal_projection(
         replica_id=None,
     )
 
-    retired = session.retire_current_replica(
-        current_attachment=attachment,
+    retired = _retire(
+        attachment,
+        config,
         reason="reload",
     )
 
@@ -662,21 +697,20 @@ def test_retire_current_replica_terminalizes_publishing_projection(
     binding = _PublicationBinding()
     binding.published_lease_id = "lease-1"
     binding.published_replica_id = "replica-1"
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-                "drain_timeout_s": 3,
-            },
-        }
-    )
+    config = {
+        "replica_publication": {
+            "mode": "required",
+            "drain_timeout_s": 3,
+        },
+    }
     publishing = _with_published_replica(
         _attachment(binding),
         state="publishing",
     )
 
-    retired = session.retire_current_replica(
-        current_attachment=publishing,
+    retired = _retire(
+        publishing,
+        config,
         reason="reload",
     )
 
@@ -696,18 +730,17 @@ def test_retire_current_replica_updates_projection(
         lambda self: None,
     )
     binding = _PublicationBinding()
-    session = _session(
-        {
-            "replica_publication": {
-                "mode": "required",
-                "drain_timeout_s": 3,
-            },
-        }
-    )
-    published = session.publish_current_replica(current_attachment=_attachment(binding))
+    config = {
+        "replica_publication": {
+            "mode": "required",
+            "drain_timeout_s": 3,
+        },
+    }
+    published = _publish(_attachment(binding), config)
 
-    retired = session.retire_current_replica(
-        current_attachment=published,
+    retired = _retire(
+        published,
+        config,
         reason="reload",
     )
 
@@ -726,18 +759,13 @@ def test_reload_rejects_active_published_replica(
         "ensure_initialized",
         lambda self: None,
     )
-    session = _session({"replica_publication": {"mode": "required"}})
-    published = session.publish_current_replica(
-        current_attachment=_attachment(_PublicationBinding())
+    published = _publish(
+        _attachment(_PublicationBinding()),
+        {"replica_publication": {"mode": "required"}},
     )
 
     with pytest.raises(ReplicaPublicationError, match="retiring"):
-        session.reload(
-            current_attachment=published,
-            artifact_locator=ServingArtifactLocator.artifact_ref("mi2:next"),
-            policy=None,
-            context=RequestContext(),
-        )
+        _reload(published)
 
 
 @pytest.mark.parametrize("projection_state", [None, "retired"])
@@ -761,12 +789,6 @@ def test_reload_rejects_binding_lease_without_active_projection(
             lease_id="lease-orphan",
             replica_id="replica-orphan",
         )
-    session = _session({"replica_publication": {"mode": "required"}})
 
     with pytest.raises(ReplicaPublicationError, match="retire"):
-        session.reload(
-            current_attachment=attachment,
-            artifact_locator=ServingArtifactLocator.artifact_ref("mi2:next"),
-            policy=None,
-            context=RequestContext(),
-        )
+        _reload(attachment)
diff --git a/tests/python/test_serving_recipe_build_session.py b/tests/python/artifact_runtime/recipe/test_build_session.py
similarity index 92%
rename from tests/python/test_serving_recipe_build_session.py
rename to tests/python/artifact_runtime/recipe/test_build_session.py
index 53ff206b..aa9d7a46 100644
--- a/tests/python/test_serving_recipe_build_session.py
+++ b/tests/python/artifact_runtime/recipe/test_build_session.py
@@ -5,27 +5,31 @@
 import torch
 from torch import nn
 
-from tensorcast.serving.dto import ServingPlacement
-from tensorcast.serving.recipe_build import (
+from tensorcast.artifact_runtime.dto import RuntimePlacement
+from tensorcast.artifact_runtime.recipe.build import (
     COMPILED_RECIPE_MEMORY_CACHE,
     DEFAULT_RECIPE_BUILD_MEMORY_CACHE_ENTRIES,
     TRACE_PLAN_MEMORY_CACHE,
     RecipeBuildCacheConfig,
     RecipeBuildMemoryCache,
     RecipeBuildSession,
-    ServingBindingPlan,
+    RuntimeBindingPlan,
     compute_recipe_cache_key,
     compute_trace_cache_key,
 )
-from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef
+from tensorcast.types import (
+    RuntimeBindingMemberRef,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
+)
 
 
 def _identity(**updates):
-    topology = ServingTopologyRef(
+    topology = RuntimeTopologyRef(
         schema_topology_digest="topology-a",
         logical_topology_ref="tensorcast://topology/a",
     )
-    member = ServingBindingMemberRef(
+    member = RuntimeBindingMemberRef(
         member_id="dp0:pp0:tp0",
         member_index=0,
         member_count=1,
@@ -49,7 +53,7 @@ def _identity(**updates):
         "placement": {"tp_rank": 0},
     }
     payload.update(updates)
-    return ServingBindingPlan(**payload)
+    return RuntimeBindingPlan(**payload)
 
 
 def test_recipe_build_session_keys_track_framework_and_placement():
@@ -132,8 +136,8 @@ def test_recipe_build_session_owns_cache_io(monkeypatch):
     session = RecipeBuildSession(_identity())
     calls = []
 
-    import tensorcast.serving.builder.recipe_cache as recipe_cache
-    import tensorcast.serving.builder.trace_cache as trace_cache
+    import tensorcast.artifact_runtime.recipe.cache as recipe_cache
+    import tensorcast.artifact_runtime.recipe.trace_cache as trace_cache
 
     monkeypatch.setattr(
         trace_cache,
@@ -285,7 +289,7 @@ def test_default_recipe_build_memory_caches_are_bounded():
 def test_recipe_build_session_owns_compile_identity_and_cached_rebind():
     session = RecipeBuildSession(_identity(tp_rank=2, tp_world_size=4))
 
-    serving_facts = type(
+    runtime_facts = type(
         "Facts",
         (),
         {
@@ -296,7 +300,7 @@ def test_recipe_build_session_owns_compile_identity_and_cached_rebind():
         },
     )()
 
-    identity = session.compile_identity(serving_facts=serving_facts)
+    identity = session.compile_identity(runtime_facts=runtime_facts)
 
     assert identity.model_id == "model"
     assert identity.tp_rank == 2
@@ -314,27 +318,27 @@ class _Catalog:
         canonical_index_hash: str
 
     # Use a dataclass recipe so rebind_cached_recipe_template can preserve type
-    # through dataclasses.replace, matching the real CompiledServingRecipe.
+    # through dataclasses.replace, matching the real CompiledRuntimeRecipe.
     import tensorcast as tc
-    from tensorcast.serving.builder.compiler import (
-        CompiledServingRecipe,
+    from tensorcast.artifact_runtime.recipe.compiler import (
+        CompiledRuntimeRecipe,
+        TensorcastRuntimeFacts,
         TensorcastSemanticValidationSpec,
-        TensorcastServingFacts,
         TensorSchemaEntry,
         realization_plan_digest,
         target_tensor_schema_hash,
     )
-    from tensorcast.serving.builder.trace_ir import TracePlan
+    from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan
 
-    real_recipe = CompiledServingRecipe(
+    real_recipe = CompiledRuntimeRecipe(
         compile_key="old",
         source_artifact_ref="old",
         source_metadata_fingerprint="old",
-        serving_facts=TensorcastServingFacts(
+        runtime_facts=TensorcastRuntimeFacts(
             framework_name="vllm",
             adapter_version="adapter-v1",
             serving_abi_version="abi-v1",
-            support_level=tc.ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             runtime_only_tensor_names=(),
             process_after_load_class=tc.FinalizeClass.RUNTIME_ONLY,
             post_bind_finalize_class=tc.FinalizeClass.RUNTIME_ONLY,
@@ -358,7 +362,7 @@ class _Catalog:
     compile_payload = identity.compile_payload(
         source_artifact_ref="mi2:test:source",
         source_metadata_fingerprint="meta-b",
-        serving_facts=real_recipe.serving_facts,
+        runtime_facts=real_recipe.runtime_facts,
         tensor_schema=(
             TensorSchemaEntry(
                 name="w",
@@ -411,7 +415,7 @@ class _Catalog:
     assert rebound.binding_plan.realization_plan_digest == realization_plan_digest(
         rebound.realization_plan_proto
     )
-    assert rebound.binding_plan.serving_facts is rebound.serving_facts
+    assert rebound.binding_plan.runtime_facts is rebound.runtime_facts
     assert rebound.binding_plan.trace_plan is rebound.trace_plan
     assert rebound.binding_plan.tensor_schema == rebound.tensor_schema
     assert rebound.binding_plan.source_hull == rebound.source_hull
@@ -433,14 +437,14 @@ class _Catalog:
     resolved_compile_payload = rebound.binding_plan.compile_payload(
         source_artifact_ref=rebound.source_artifact_ref,
         source_metadata_fingerprint=rebound.source_metadata_fingerprint,
-        serving_facts=rebound.serving_facts,
+        runtime_facts=rebound.runtime_facts,
         tensor_schema=rebound.tensor_schema,
         semantic_validation_spec=rebound.semantic_validation_spec,
     )
     identity_compile_payload = identity.compile_payload(
         source_artifact_ref=rebound.source_artifact_ref,
         source_metadata_fingerprint=rebound.source_metadata_fingerprint,
-        serving_facts=rebound.serving_facts,
+        runtime_facts=rebound.runtime_facts,
         tensor_schema=rebound.tensor_schema,
         semantic_validation_spec=rebound.semantic_validation_spec,
     )
@@ -456,7 +460,7 @@ class _Catalog:
 
 def test_recipe_build_session_owns_recipe_metadata_collection():
     import tensorcast as tc
-    from tensorcast.serving.builder.compiler import (
+    from tensorcast.artifact_runtime.recipe.compiler import (
         TensorcastSemanticValidationSpec,
     )
 
@@ -482,7 +486,7 @@ def serving_abi_version(self, model_config):
 
         def support_level(self, model, model_config):
             assert model_config == "model-config"
-            return tc.ServingSupportLevel.BUILDER_PUBLICATION_READY
+            return RuntimeSupportLevel.BUILDER_PUBLICATION_READY
 
         def runtime_only_tensor_names(self, model):
             return ("runtime_only",)
@@ -500,14 +504,14 @@ def semantic_probes(self, model, model_config):
     model = _Model()
     adapter = _Adapter()
 
-    facts = session.collect_serving_facts(model, "model-config", adapter)
+    facts = session.collect_runtime_facts(model, "model-config", adapter)
     assert facts.framework_name == "fakefw"
     assert facts.runtime_only_tensor_names == ("runtime_only",)
 
     schema = session.collect_tensor_schema(
         model,
         runtime_only_tensor_names=facts.runtime_only_tensor_names,
-        is_reserved_serving_tensor_name=lambda name: name.startswith(
+        is_reserved_runtime_tensor_name=lambda name: name.startswith(
             "__tensorcast_meta__."
         ),
     )
@@ -536,8 +540,8 @@ def semantic_probes(self, model, model_config):
 
 def test_recipe_build_session_build_recipe_runs_core_orchestration():
     import tensorcast as tc
-    from tensorcast.serving.builder.trace_ir import CopyPlanEntry, TracePlan
-    from tensorcast.serving.source_catalog import (
+    from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, TracePlan
+    from tensorcast.artifact_runtime.source import (
         SourceCatalog,
         SourceTensorMeta,
         compute_source_metadata_fingerprint,
@@ -567,7 +571,7 @@ def serving_abi_version(self, model_config):
 
         def support_level(self, model, model_config):
             assert model_config.model == "fake-model"
-            return tc.ServingSupportLevel.BUILDER_PUBLICATION_READY
+            return RuntimeSupportLevel.BUILDER_PUBLICATION_READY
 
         def runtime_only_tensor_names(self, model):
             return ("runtime_only",)
@@ -641,7 +645,7 @@ def cleanup_after_recipe_build(
             trace_tp_slices=True,
             trace_cache_schema_version=7,
         ),
-        is_reserved_serving_tensor_name=lambda name: name.startswith(
+        is_reserved_runtime_tensor_name=lambda name: name.startswith(
             "__tensorcast_meta__."
         ),
         trace_capture_fn=lambda *_args: trace_plan,
@@ -672,7 +676,7 @@ def test_recipe_build_session_owns_cached_recipe_context_match():
         {
             "source_metadata_fingerprint": "meta-a",
             "topology_ref": None,
-            "member_ref": ServingBindingMemberRef(
+            "member_ref": RuntimeBindingMemberRef(
                 member_id="member-1",
                 member_index=1,
                 member_count=2,
@@ -683,7 +687,7 @@ def test_recipe_build_session_owns_cached_recipe_context_match():
         "Placement",
         (),
         {
-            "member": ServingBindingMemberRef(
+            "member": RuntimeBindingMemberRef(
                 member_id="member-1",
                 member_index=1,
                 member_count=2,
@@ -714,7 +718,7 @@ def test_recipe_build_session_owns_cached_recipe_context_match():
             "Placement",
             (),
             {
-                "member": ServingBindingMemberRef(
+                "member": RuntimeBindingMemberRef(
                     member_id="member-0",
                     member_index=0,
                     member_count=2,
@@ -730,19 +734,19 @@ def test_recipe_cache_match_uses_serving_member_identity():
     recipe = SimpleNamespace(
         source_metadata_fingerprint="meta-a",
         topology_ref=None,
-        member_ref=ServingBindingMemberRef(
+        member_ref=RuntimeBindingMemberRef(
             member_id="dp0:pp0:tp1",
             member_index=9,
             member_count=16,
             group_id="group-1",
         ),
     )
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="topology-digest",
             logical_topology_ref="tensorcast://placement/topology",
         ),
-        member=ServingBindingMemberRef(
+        member=RuntimeBindingMemberRef(
             member_id="dp0:pp0:tp1",
             member_index=9,
             member_count=16,
@@ -766,9 +770,9 @@ def test_recipe_cache_match_uses_serving_member_identity():
     assert not session.cached_recipe_matches_context(
         recipe,
         source_catalog=source_catalog,
-        placement=ServingPlacement(
+        placement=RuntimePlacement(
             topology=placement.topology,
-            member=ServingBindingMemberRef(
+            member=RuntimeBindingMemberRef(
                 member_id="dp0:pp0:tp2",
                 member_index=10,
                 member_count=16,
diff --git a/tests/python/test_serving_builder_recipe_cache.py b/tests/python/artifact_runtime/recipe/test_cache.py
similarity index 86%
rename from tests/python/test_serving_builder_recipe_cache.py
rename to tests/python/artifact_runtime/recipe/test_cache.py
index 8fb15d6c..ad7db2dc 100644
--- a/tests/python/test_serving_builder_recipe_cache.py
+++ b/tests/python/artifact_runtime/recipe/test_cache.py
@@ -6,28 +6,28 @@
 
 from tensorcast.api.store import BindingRealizationEntry
 from tensorcast.api.store import Range as StoreRange
-from tensorcast.serving.builder.compiler import (
-    CompiledServingRecipe,
-    SourceHullEntry,
-    TensorcastSemanticValidationSpec,
-    TensorcastServingFacts,
-    TensorSchemaEntry,
-)
-from tensorcast.serving.builder.recipe_cache import (
+from tensorcast.artifact_runtime.recipe.cache import (
     RECIPE_CACHE_PAYLOAD_VERSION,
     load_compiled_recipe_cache,
     write_compiled_recipe_cache,
 )
-from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan
+from tensorcast.artifact_runtime.recipe.compiler import (
+    CompiledRuntimeRecipe,
+    SourceHullEntry,
+    TensorcastRuntimeFacts,
+    TensorcastSemanticValidationSpec,
+    TensorSchemaEntry,
+)
+from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan
 from tensorcast.types import (
     FinalizeClass,
-    ServingBindingMemberRef,
-    ServingSupportLevel,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
 )
 
 
-def _recipe() -> CompiledServingRecipe:
+def _recipe() -> CompiledRuntimeRecipe:
     trace_plan = TracePlan(
         copy_plan=[
             CopyPlanEntry(
@@ -43,16 +43,16 @@ def _recipe() -> CompiledServingRecipe:
         tensorcast_slices={"x": Range(dim=0, start=0, end=4)},
         src_hull={"x": Range(dim=0, start=0, end=4)},
     )
-    return CompiledServingRecipe(
+    return CompiledRuntimeRecipe(
         compile_key="compile-key",
         source_artifact_ref="msa1:test-source",
         source_metadata_fingerprint="metadata-fingerprint",
-        serving_facts=TensorcastServingFacts(
+        runtime_facts=TensorcastRuntimeFacts(
             framework_name="vllm",
             framework_version="vllm-test",
             adapter_version="adapter-v1",
             serving_abi_version="abi-v1",
-            support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+            support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
             runtime_only_tensor_names=("runtime",),
             process_after_load_class=FinalizeClass.RUNTIME_ONLY,
             post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY,
@@ -77,11 +77,11 @@ def _recipe() -> CompiledServingRecipe:
             ),
         ),
         realization_fallback_plan=(),
-        topology_ref=ServingTopologyRef(
+        topology_ref=RuntimeTopologyRef(
             schema_topology_digest="topology-digest",
             logical_topology_ref="tensorcast://topology/topology-digest",
         ),
-        member_ref=ServingBindingMemberRef(
+        member_ref=RuntimeBindingMemberRef(
             member_id="dp0:pp0:tp0",
             member_index=0,
             member_count=2,
@@ -119,7 +119,7 @@ def test_compiled_recipe_cache_round_trips(tmp_path: Path) -> None:
     assert payload["compiled_recipe"]["trace_plan_summary"]["expected_dst_names"] == [
         "w"
     ]
-    assert payload["compiled_recipe"]["serving_facts"]["framework_version"] == (
+    assert payload["compiled_recipe"]["runtime_facts"]["framework_version"] == (
         "vllm-test"
     )
     assert payload["compiled_recipe"]["topology_ref"]["schema_topology_digest"] == (
diff --git a/tests/python/test_serving_builder_compiler.py b/tests/python/artifact_runtime/recipe/test_compiler.py
similarity index 79%
rename from tests/python/test_serving_builder_compiler.py
rename to tests/python/artifact_runtime/recipe/test_compiler.py
index 77dd8da9..eecc1b45 100644
--- a/tests/python/test_serving_builder_compiler.py
+++ b/tests/python/artifact_runtime/recipe/test_compiler.py
@@ -7,23 +7,23 @@
 import pytest
 import torch
 
-from tensorcast.serving.builder.compiler import (
+from tensorcast.artifact_runtime.recipe.compiler import (
     RecipeCompileInputs,
-    ServingBindingPlan,
+    RuntimeBindingPlan,
+    TensorcastRuntimeFacts,
     TensorcastSemanticValidationSpec,
-    TensorcastServingFacts,
     TensorSchemaEntry,
-    compile_serving_recipe,
+    compile_runtime_recipe,
     realization_plan_digest,
     target_tensor_schema_hash,
 )
-from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan
-from tensorcast.serving.source_catalog import SourceCatalog, SourceTensorMeta
+from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan
+from tensorcast.artifact_runtime.source import SourceCatalog, SourceTensorMeta
 from tensorcast.types import (
     FinalizeClass,
-    ServingBindingMemberRef,
-    ServingSupportLevel,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeSupportLevel,
+    RuntimeTopologyRef,
 )
 
 
@@ -64,28 +64,28 @@ def _trace_plan() -> TracePlan:
     )
 
 
-def _serving_facts(adapter_version: str = "adapter-v1") -> TensorcastServingFacts:
-    return TensorcastServingFacts(
+def _runtime_facts(adapter_version: str = "adapter-v1") -> TensorcastRuntimeFacts:
+    return TensorcastRuntimeFacts(
         framework_name="vllm",
         framework_version="vllm-test",
         adapter_version=adapter_version,
         serving_abi_version="abi-v1",
-        support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+        support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
         runtime_only_tensor_names=("runtime_only",),
         process_after_load_class=FinalizeClass.RUNTIME_ONLY,
         post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY,
     )
 
 
-def _topology_ref(digest: str = "topology-digest") -> ServingTopologyRef:
-    return ServingTopologyRef(
+def _topology_ref(digest: str = "topology-digest") -> RuntimeTopologyRef:
+    return RuntimeTopologyRef(
         schema_topology_digest=digest,
         logical_topology_ref=f"tensorcast://topology/{digest}",
     )
 
 
-def _member_ref(index: int = 0, count: int = 1) -> ServingBindingMemberRef:
-    return ServingBindingMemberRef(
+def _member_ref(index: int = 0, count: int = 1) -> RuntimeBindingMemberRef:
+    return RuntimeBindingMemberRef(
         member_id=f"dp0:pp0:tp{index}",
         member_index=index,
         member_count=count,
@@ -93,8 +93,8 @@ def _member_ref(index: int = 0, count: int = 1) -> ServingBindingMemberRef:
     )
 
 
-def _identity() -> ServingBindingPlan:
-    return ServingBindingPlan(
+def _identity() -> RuntimeBindingPlan:
+    return RuntimeBindingPlan(
         model_id="fake-model",
         model_revision=None,
         dtype="torch.float16",
@@ -113,7 +113,7 @@ def _inputs(**overrides) -> RecipeCompileInputs:
     values = {
         "source_catalog": _source_catalog(),
         "trace_plan": _trace_plan(),
-        "serving_facts": _serving_facts(),
+        "runtime_facts": _runtime_facts(),
         "tensor_schema": (
             TensorSchemaEntry(
                 name="w",
@@ -142,10 +142,10 @@ def event(self, name: str, payload) -> None:
         self.events.append((name, dict(payload)))
 
 
-def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None:
+def test_compile_runtime_recipe_assembles_recipe_from_pure_inputs() -> None:
     observer = _Observer()
 
-    recipe = compile_serving_recipe(
+    recipe = compile_runtime_recipe(
         identity=_identity(),
         inputs=_inputs(),
         observer=observer,
@@ -154,7 +154,7 @@ def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None:
     assert recipe.compile_key
     assert recipe.source_artifact_ref == "mi2:test:source"
     assert recipe.source_metadata_fingerprint == "metadata-fingerprint"
-    assert recipe.serving_facts.framework_name == "vllm"
+    assert recipe.runtime_facts.framework_name == "vllm"
     assert recipe.trace_plan.expected_src_names == {"x"}
     assert [entry.name for entry in recipe.tensor_schema] == ["w"]
     assert [entry.name for entry in recipe.source_hull] == ["x"]
@@ -173,7 +173,7 @@ def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None:
     assert recipe.binding_plan.realization_plan_digest == realization_plan_digest(
         recipe.realization_plan_proto
     )
-    assert recipe.binding_plan.serving_facts is recipe.serving_facts
+    assert recipe.binding_plan.runtime_facts is recipe.runtime_facts
     assert recipe.binding_plan.trace_plan is recipe.trace_plan
     assert recipe.binding_plan.tensor_schema == recipe.tensor_schema
     assert recipe.binding_plan.source_hull == recipe.source_hull
@@ -210,13 +210,13 @@ def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None:
     ]
 
 
-def test_compile_serving_recipe_compile_key_invalidates_on_pure_inputs() -> None:
-    recipe_a = compile_serving_recipe(identity=_identity(), inputs=_inputs())
-    recipe_b = compile_serving_recipe(
+def test_compile_runtime_recipe_compile_key_invalidates_on_pure_inputs() -> None:
+    recipe_a = compile_runtime_recipe(identity=_identity(), inputs=_inputs())
+    recipe_b = compile_runtime_recipe(
         identity=replace(_identity(), adapter_version="adapter-v2"),
-        inputs=_inputs(serving_facts=_serving_facts("adapter-v2")),
+        inputs=_inputs(runtime_facts=_runtime_facts("adapter-v2")),
     )
-    recipe_c = compile_serving_recipe(
+    recipe_c = compile_runtime_recipe(
         identity=replace(
             _identity(),
             topology_ref=_topology_ref("topology-digest-b"),
@@ -229,25 +229,25 @@ def test_compile_serving_recipe_compile_key_invalidates_on_pure_inputs() -> None
     assert recipe_a.compile_key != recipe_c.compile_key
 
 
-def test_compile_serving_recipe_rejects_identity_fact_mismatch() -> None:
-    with pytest.raises(ValueError, match="ServingBindingPlan must match"):
-        compile_serving_recipe(
+def test_compile_runtime_recipe_rejects_identity_fact_mismatch() -> None:
+    with pytest.raises(ValueError, match="RuntimeBindingPlan must match"):
+        compile_runtime_recipe(
             identity=replace(_identity(), adapter_version="adapter-v2"),
             inputs=_inputs(),
         )
 
 
-def test_compile_serving_recipe_rejects_missing_destination_schema() -> None:
+def test_compile_runtime_recipe_rejects_missing_destination_schema() -> None:
     with pytest.raises(ValueError, match="tensor_schema is missing"):
-        compile_serving_recipe(
+        compile_runtime_recipe(
             identity=_identity(),
             inputs=_inputs(tensor_schema=()),
         )
 
 
-def test_compile_serving_recipe_rejects_synthetic_source_identity() -> None:
+def test_compile_runtime_recipe_rejects_synthetic_source_identity() -> None:
     with pytest.raises(ValueError, match="real imported source artifact"):
-        compile_serving_recipe(
+        compile_runtime_recipe(
             identity=_identity(),
             inputs=_inputs(source_catalog=_source_catalog("disk:/tmp/fake")),
         )
diff --git a/tests/python/test_serving_builder_materialization.py b/tests/python/artifact_runtime/recipe/test_materialization.py
similarity index 95%
rename from tests/python/test_serving_builder_materialization.py
rename to tests/python/artifact_runtime/recipe/test_materialization.py
index 74cea99d..d2013998 100644
--- a/tests/python/test_serving_builder_materialization.py
+++ b/tests/python/artifact_runtime/recipe/test_materialization.py
@@ -3,11 +3,11 @@
 import pytest
 import torch
 
-from tensorcast.serving.builder.materialization import (
+from tensorcast.artifact_runtime.recipe.materialization import (
     apply_copy_plan,
     validate_dst_coverage,
 )
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
diff --git a/tests/python/test_serving_builder_publication.py b/tests/python/artifact_runtime/recipe/test_publication.py
similarity index 93%
rename from tests/python/test_serving_builder_publication.py
rename to tests/python/artifact_runtime/recipe/test_publication.py
index f082ff34..cd8f3852 100644
--- a/tests/python/test_serving_builder_publication.py
+++ b/tests/python/artifact_runtime/recipe/test_publication.py
@@ -2,10 +2,12 @@
 
 import pytest
 
-from tensorcast.serving.builder.publication import (
+from tensorcast.artifact_runtime.publication.context import (
     RecipePublicationContext,
-    build_binding_finalize_publication_bundle_from_context,
     build_pure_transform_build_intent,
+)
+from tensorcast.artifact_runtime.recipe.publication import (
+    build_binding_finalize_publication_bundle_from_context,
     build_pure_transform_publication_spec_from_context,
 )
 from tensorcast.types import BindingValueRef, BuilderMode
@@ -46,8 +48,7 @@ def test_publication_context_passes_framework_topology_json() -> None:
     assert spec.logical_topology_json == '{"topology":"framework-owned"}'
 
 
-def test_binding_finalize_publication_requires_explicit_admission_facts(
-) -> None:
+def test_binding_finalize_publication_requires_explicit_admission_facts() -> None:
     with pytest.raises(ValueError, match="explicit admission_facts"):
         build_binding_finalize_publication_bundle_from_context(
             _context(),
diff --git a/tests/python/test_serving_builder_tensor_parity.py b/tests/python/artifact_runtime/recipe/test_tensor_parity.py
similarity index 98%
rename from tests/python/test_serving_builder_tensor_parity.py
rename to tests/python/artifact_runtime/recipe/test_tensor_parity.py
index 5fa321bb..3ebf50a7 100644
--- a/tests/python/test_serving_builder_tensor_parity.py
+++ b/tests/python/artifact_runtime/recipe/test_tensor_parity.py
@@ -6,8 +6,7 @@
 
 from tensorcast.api.store import BindingRealizationEntry
 from tensorcast.api.store import Range as StoreRange
-from tensorcast.proto.daemon.v2 import store_daemon_pb2
-from tensorcast.serving.builder.tensor_parity import (
+from tensorcast.artifact_runtime.recipe.tensor_parity import (
     build_tensor_parity_probes_from_realization_plan,
     build_tensor_parity_probes_from_realization_plan_proto,
     build_tensor_parity_probes_from_recipe,
@@ -15,12 +14,13 @@
     evaluate_recipe_tensor_parity,
     evaluate_tensor_parity_probes,
 )
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
     TracePlan,
 )
+from tensorcast.proto.daemon.v2 import store_daemon_pb2
 
 
 def _trace_plan() -> TracePlan:
diff --git a/tests/python/test_serving_builder_trace_cache.py b/tests/python/artifact_runtime/recipe/test_trace_cache.py
similarity index 90%
rename from tests/python/test_serving_builder_trace_cache.py
rename to tests/python/artifact_runtime/recipe/test_trace_cache.py
index 4e2d034b..44075777 100644
--- a/tests/python/test_serving_builder_trace_cache.py
+++ b/tests/python/artifact_runtime/recipe/test_trace_cache.py
@@ -4,13 +4,13 @@
 
 import json
 
-from tensorcast.serving.builder.trace_cache import (
+from tensorcast.artifact_runtime.recipe.trace_cache import (
     dump_trace_plan_debug,
     load_trace_plan_cache,
     trace_plan_debug_payload,
     write_trace_plan_cache,
 )
-from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan
+from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan
 
 
 def _trace_plan() -> TracePlan:
@@ -42,7 +42,7 @@ def test_trace_plan_cache_round_trips_versioned_payload(tmp_path) -> None:
 
 
 def test_trace_plan_cache_rejects_unversioned_raw_payload(tmp_path) -> None:
-    from tensorcast.serving.builder.trace_ir import trace_plan_to_dict
+    from tensorcast.artifact_runtime.recipe.trace_ir import trace_plan_to_dict
 
     path = tmp_path / "unversioned.json"
     trace_plan = _trace_plan()
diff --git a/tests/python/test_serving_builder_trace_ir.py b/tests/python/artifact_runtime/recipe/test_trace_ir.py
similarity index 95%
rename from tests/python/test_serving_builder_trace_ir.py
rename to tests/python/artifact_runtime/recipe/test_trace_ir.py
index 5f03898c..2d8a474a 100644
--- a/tests/python/test_serving_builder_trace_ir.py
+++ b/tests/python/artifact_runtime/recipe/test_trace_ir.py
@@ -1,6 +1,6 @@
 #  Copyright (c) 2026, TensorCast Team.
 
-from tensorcast.serving.builder.trace_ir import (
+from tensorcast.artifact_runtime.recipe.trace_ir import (
     CopyPlanEntry,
     MultiRange,
     Range,
diff --git a/tests/python/test_serving_builder_validation.py b/tests/python/artifact_runtime/recipe/test_validation.py
similarity index 70%
rename from tests/python/test_serving_builder_validation.py
rename to tests/python/artifact_runtime/recipe/test_validation.py
index 2eed9055..a7c64ddf 100644
--- a/tests/python/test_serving_builder_validation.py
+++ b/tests/python/artifact_runtime/recipe/test_validation.py
@@ -7,35 +7,35 @@
 import pytest
 import torch
 
-from tensorcast.serving.builder.compiler import (
-    CompiledServingRecipe,
+from tensorcast.artifact_runtime.recipe.compiler import (
+    CompiledRuntimeRecipe,
+    TensorcastRuntimeFacts,
     TensorcastSemanticValidationSpec,
-    TensorcastServingFacts,
     TensorSchemaEntry,
 )
-from tensorcast.serving.builder.recipe_validation import (
-    validate_recipe_for_builder_mode,
-)
-from tensorcast.serving.builder.semantic_validation import (
+from tensorcast.artifact_runtime.recipe.semantic_validation import (
     evaluate_semantic_validation_spec,
 )
-from tensorcast.serving.builder.tensor_schema import (
+from tensorcast.artifact_runtime.recipe.tensor_schema import (
     validate_tensor_schema_against_tensors,
 )
-from tensorcast.serving.builder.trace_ir import TracePlan
-from tensorcast.types import BuilderMode, FinalizeClass, ServingSupportLevel
+from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan
+from tensorcast.artifact_runtime.recipe.validation import (
+    validate_recipe_for_builder_mode,
+)
+from tensorcast.types import BuilderMode, FinalizeClass, RuntimeSupportLevel
 
 
-def _recipe() -> CompiledServingRecipe:
-    return CompiledServingRecipe(
+def _recipe() -> CompiledRuntimeRecipe:
+    return CompiledRuntimeRecipe(
         compile_key="compile-key",
         source_artifact_ref="mi2:test:source",
         source_metadata_fingerprint="metadata-fingerprint",
-        serving_facts=TensorcastServingFacts(
+        runtime_facts=TensorcastRuntimeFacts(
             framework_name="vllm",
             adapter_version="adapter-v1",
             serving_abi_version="abi-v1",
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             runtime_only_tensor_names=("runtime_only",),
             process_after_load_class=FinalizeClass.RUNTIME_ONLY,
             post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY,
@@ -79,9 +79,9 @@ def test_validate_recipe_for_builder_mode_rejects_binding_finalize_fact_mismatch
 def test_validate_recipe_for_builder_mode_rejects_non_publication_ready() -> None:
     recipe = replace(
         _recipe(),
-        serving_facts=replace(
-            _recipe().serving_facts,
-            support_level=ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY,
+        runtime_facts=replace(
+            _recipe().runtime_facts,
+            support_level=RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY,
         ),
     )
 
@@ -116,6 +116,28 @@ def test_evaluate_semantic_validation_spec_rejects_explicit_mismatch() -> None:
         evaluate_semantic_validation_spec(spec, {"values": [3, 2, 1]})
 
 
+def test_evaluate_semantic_validation_spec_compares_framework_probes() -> None:
+    spec = TensorcastSemanticValidationSpec(
+        kind="framework_semantic_probes",
+        payload={"values": [1, 2, 3]},
+    )
+
+    assert evaluate_semantic_validation_spec(
+        spec,
+        _ProbePayload(values=(1, 2, 3)),
+    ) == {"values": [1, 2, 3]}
+
+
+def test_evaluate_semantic_validation_spec_rejects_framework_probe_mismatch() -> None:
+    spec = TensorcastSemanticValidationSpec(
+        kind="framework_semantic_probes",
+        payload={"values": [1, 2, 3]},
+    )
+
+    with pytest.raises(RuntimeError, match="semantic validation failed"):
+        evaluate_semantic_validation_spec(spec, {"values": [1, 2]})
+
+
 def test_validate_tensor_schema_against_tensors_checks_names_shape_stride_dtype() -> (
     None
 ):
diff --git a/tests/python/test_serving_config.py b/tests/python/artifact_runtime/test_config.py
similarity index 74%
rename from tests/python/test_serving_config.py
rename to tests/python/artifact_runtime/test_config.py
index 156455d9..7fed30a9 100644
--- a/tests/python/test_serving_config.py
+++ b/tests/python/artifact_runtime/test_config.py
@@ -2,50 +2,48 @@
 
 from __future__ import annotations
 
+import importlib.util
+
 import pytest
 
-from tensorcast.serving import (
-    ArtifactBindStartPlan,
+import tensorcast.artifact_runtime.dto as serving_dto
+from tensorcast.artifact_runtime.config import (
     RetainedBindingAcquireSettings,
-    RetainedBindingAcquireStartPlan,
-    ServingArtifactLocator,
-    ServingConfig,
-    ServingPolicy,
-    ServingStartPlanError,
-    SourceBootstrapToBindingStartPlan,
-    merge_serving_reload_extra_config,
-    plan_serving_start,
+    RuntimeArtifactBindStartPlan,
+    RuntimeRetainedRealizationStartPlan,
+    RuntimeSourceBootstrapStartPlan,
+    RuntimeStartPlanError,
+    TensorCastRuntimeConfig,
+    plan_runtime_start,
+)
+from tensorcast.artifact_runtime.locator import (
+    ArtifactLocator,
     ranked_version_key_for_member,
 )
-from tensorcast.serving import dto as serving_dto
-from tensorcast.serving.retained_binding import parse_retained_serving_binding_authority
-from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef
+from tensorcast.artifact_runtime.policy import (
+    RuntimePolicy,
+    merge_runtime_reload_extra_config,
+)
+from tensorcast.retained_realization import parse_retained_realization_authority
+from tensorcast.types import RuntimeBindingMemberRef, RuntimeTopologyRef
 
 FrameworkIntegrationContext = serving_dto.FrameworkIntegrationContext
-PreparedServingArtifact = serving_dto.PreparedServingArtifact
+PreparedRuntimeArtifact = serving_dto.PreparedRuntimeArtifact
 RuntimeTensorView = serving_dto.RuntimeTensorView
-ServingBindingValue = serving_dto.ServingBindingValue
-ServingPlacement = serving_dto.ServingPlacement
-
-
-def test_serving_root_does_not_reexport_lifecycle_dtos() -> None:
-    import tensorcast.serving as serving
-
-    for name in (
-        "BootstrapEndpointProjection",
-        "BootstrapSummary",
-        "BindingValueRefProjection",
-        "FrameworkIntegrationContext",
-        "MaterializationDiagnosticsProjection",
-        "PreparedServingArtifact",
-        "ReloadRequestProjection",
-        "RuntimeTensorView",
-        "ServingBindingValue",
-        "ServingPlacement",
-        "SourceBoundContractProjection",
-    ):
-        with pytest.raises(AttributeError):
-            getattr(serving, name)
+RuntimeBindingValue = serving_dto.RuntimeBindingValue
+RuntimePlacement = serving_dto.RuntimePlacement
+
+
+def _find_spec_or_none(module_name: str):
+    try:
+        return importlib.util.find_spec(module_name)
+    except ModuleNotFoundError:
+        return None
+
+
+def test_serving_public_package_is_removed() -> None:
+    assert _find_spec_or_none("tensorcast.serving") is None
+    assert _find_spec_or_none("tensorcast.serving.runtime") is None
 
 
 def _retained_binding_acquire_config() -> dict:
@@ -81,13 +79,13 @@ def _retained_binding_acquire_config() -> dict:
                     "reservation_bytes": 4096,
                     "scope_digest": "scope-1",
                 },
-                "readiness": "serving_published_ready",
+                "readiness": "runtime_published_ready",
                 "serving_artifact_id": "mi2:test:serving",
                 "trusted_reservation_bytes": 4096,
                 "expected": {
                     "target_layout_hash": "layout-hash",
                     "tensor_schema_hash": "schema-hash",
-                    "serving_build_digest": "build-digest",
+                    "runtime_build_digest": "build-digest",
                     "resolved_spec_digest": "spec-digest",
                 },
             },
@@ -95,8 +93,8 @@ def _retained_binding_acquire_config() -> dict:
     }
 
 
-def test_serving_config_parses_nested_schema_defaults() -> None:
-    config = ServingConfig.from_mapping(
+def test_runtime_config_parses_nested_schema_defaults() -> None:
+    config = TensorCastRuntimeConfig.from_mapping(
         {
             "runtime": {
                 "mode": "CONNECT",
@@ -107,7 +105,7 @@ def test_serving_config_parses_nested_schema_defaults() -> None:
                     "address": "127.0.0.1:50051",
                 },
             },
-            "serving": {
+            "runtime_artifact": {
                 "artifact_locator": {
                     "kind": "version_key",
                     "value": " models/demo/serving/v1 ",
@@ -126,11 +124,11 @@ def test_serving_config_parses_nested_schema_defaults() -> None:
     assert config.runtime.mode == "connect"
     assert config.runtime.daemon.show_logs is True
     assert config.runtime.global_store.resolved_mode("connect") == "connect"
-    assert config.serving.artifact_locator == ServingArtifactLocator(
+    assert config.runtime_artifact.artifact_locator == ArtifactLocator(
         kind="version_key",
         value="models/demo/serving/v1",
     )
-    assert config.to_mapping()["serving"]["artifact_locator"] == {
+    assert config.to_mapping()["runtime_artifact"]["artifact_locator"] == {
         "kind": "version_key",
         "value": "models/demo/serving/v1",
         "schema_version": 1,
@@ -140,11 +138,25 @@ def test_serving_config_parses_nested_schema_defaults() -> None:
     assert config.materialization.collective_policy_value() == "require_collective"
 
 
-def test_serving_config_rejects_selector_alias() -> None:
-    with pytest.raises(ValueError, match="artifact_locator"):
-        ServingConfig.from_mapping(
+def test_runtime_config_rejects_removed_serving_section() -> None:
+    with pytest.raises(ValueError, match="serving.*removed"):
+        TensorCastRuntimeConfig.from_mapping(
             {
                 "serving": {
+                    "artifact_locator": {
+                        "kind": "artifact_ref",
+                        "value": "mi2:test:serving",
+                    },
+                },
+            }
+        )
+
+
+def test_runtime_config_rejects_runtime_artifact_selector_alias() -> None:
+    with pytest.raises(ValueError, match="runtime_artifact.artifact_locator"):
+        TensorCastRuntimeConfig.from_mapping(
+            {
+                "runtime_artifact": {
                     "selector": {
                         "kind": "artifact_ref",
                         "value": "mi2:test:serving",
@@ -154,8 +166,8 @@ def test_serving_config_rejects_selector_alias() -> None:
         )
 
 
-def test_serving_config_emits_retained_binding_acquire_canonical_field() -> None:
-    config = ServingConfig.from_mapping(_retained_binding_acquire_config())
+def test_runtime_config_emits_retained_binding_acquire_canonical_field() -> None:
+    config = TensorCastRuntimeConfig.from_mapping(_retained_binding_acquire_config())
 
     assert isinstance(config.retained_binding_acquire, RetainedBindingAcquireSettings)
     mapping = config.to_mapping()
@@ -163,9 +175,9 @@ def test_serving_config_emits_retained_binding_acquire_canonical_field() -> None
     assert mapping["retained_binding_acquire"]["mode"] == "external"
 
 
-def test_serving_config_rejects_preload_key() -> None:
-    with pytest.raises(ValueError, match="Unexpected TensorCast serving config"):
-        ServingConfig.from_mapping(
+def test_runtime_config_rejects_preload_key() -> None:
+    with pytest.raises(ValueError, match="Unexpected TensorCast runtime config"):
+        TensorCastRuntimeConfig.from_mapping(
             {
                 "preload": {
                     "mode": "external",
@@ -175,13 +187,13 @@ def test_serving_config_rejects_preload_key() -> None:
 
 
 def test_ranked_version_key_locator_scopes_by_serving_member(monkeypatch) -> None:
-    member = ServingBindingMemberRef(
+    member = RuntimeBindingMemberRef(
         member_id="dp0:pp0:tp1",
         member_index=1,
         member_count=2,
         group_id="group-1",
     )
-    locator = ServingArtifactLocator.ranked_version_key("models/demo/serving/v1/")
+    locator = ArtifactLocator.ranked_version_key("models/demo/serving/v1/")
 
     assert (
         ranked_version_key_for_member(
@@ -193,8 +205,8 @@ def test_ranked_version_key_locator_scopes_by_serving_member(monkeypatch) -> Non
     assert locator.resolve_version_key(member=member) == (
         "models/demo/serving/v1/members/dp0:pp0:tp1"
     )
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="topology-digest",
             logical_topology_ref="fake://topology",
         ),
@@ -218,18 +230,18 @@ def resolve_key_mapping_cached(self, *, key):
     assert locator.resolve_artifact_ref(member=member) == "mi2:test:serving-rank-1"
 
 
-def test_serving_artifact_locator_is_canonical_name() -> None:
-    locator = ServingArtifactLocator.ranked_version_key("models/demo/serving/v1")
+def test_artifact_locator_is_runtime_canonical_name() -> None:
+    locator = ArtifactLocator.ranked_version_key("models/demo/serving/v1")
 
-    assert isinstance(locator, ServingArtifactLocator)
+    assert isinstance(locator, ArtifactLocator)
     assert locator.kind == "ranked_version_key"
     assert locator.value == "models/demo/serving/v1"
 
 
-def test_plan_serving_start_classifies_three_canonical_variants() -> None:
-    artifact_config = ServingConfig.from_mapping(
+def test_plan_runtime_start_classifies_three_canonical_variants() -> None:
+    artifact_config = TensorCastRuntimeConfig.from_mapping(
         {
-            "serving": {
+            "runtime_artifact": {
                 "artifact_locator": {
                     "kind": "artifact_ref",
                     "value": "mi2:test:serving",
@@ -237,57 +249,57 @@ def test_plan_serving_start_classifies_three_canonical_variants() -> None:
             },
         }
     )
-    artifact_plan = plan_serving_start(
+    artifact_plan = plan_runtime_start(
         config=artifact_config,
         source_selector=object(),
     )
-    assert isinstance(artifact_plan, ArtifactBindStartPlan)
+    assert isinstance(artifact_plan, RuntimeArtifactBindStartPlan)
     assert artifact_plan.kind == "artifact_bind"
     assert artifact_plan.artifact_locator.value == "mi2:test:serving"
 
     source_selector = object()
-    source_plan = plan_serving_start(
-        config=ServingConfig.from_mapping({}),
+    source_plan = plan_runtime_start(
+        config=TensorCastRuntimeConfig.from_mapping({}),
         source_selector=source_selector,
     )
-    assert isinstance(source_plan, SourceBootstrapToBindingStartPlan)
+    assert isinstance(source_plan, RuntimeSourceBootstrapStartPlan)
     assert source_plan.kind == "source_bootstrap_to_binding"
     assert source_plan.source_selector is source_selector
 
-    retained_plan = plan_serving_start(
-        config=ServingConfig.from_mapping(_retained_binding_acquire_config()),
+    retained_plan = plan_runtime_start(
+        config=TensorCastRuntimeConfig.from_mapping(_retained_binding_acquire_config()),
         source_selector=source_selector,
     )
-    assert isinstance(retained_plan, RetainedBindingAcquireStartPlan)
+    assert isinstance(retained_plan, RuntimeRetainedRealizationStartPlan)
     assert retained_plan.kind == "retained_binding_acquire"
     assert retained_plan.authority.binding_value_ref.binding_id == "binding-1"
 
 
-def test_plan_serving_start_reports_no_selected_candidate() -> None:
-    with pytest.raises(ServingStartPlanError, match="rejected candidates"):
-        plan_serving_start(
-            config=ServingConfig.from_mapping({}),
+def test_plan_runtime_start_reports_no_selected_candidate() -> None:
+    with pytest.raises(RuntimeStartPlanError, match="rejected candidates"):
+        plan_runtime_start(
+            config=TensorCastRuntimeConfig.from_mapping({}),
             source_selector=None,
         )
 
 
 def test_ranked_version_key_locator_requires_member() -> None:
-    locator = ServingArtifactLocator.ranked_version_key("models/demo/serving/v1")
+    locator = ArtifactLocator.ranked_version_key("models/demo/serving/v1")
 
-    with pytest.raises(ValueError, match="requires a serving member"):
+    with pytest.raises(ValueError, match="requires a member"):
         locator.resolve_version_key()
 
 
-def test_serving_config_rejects_unknown_top_level_keys() -> None:
-    with pytest.raises(ValueError, match="Unexpected TensorCast serving config"):
-        ServingConfig.from_mapping({"unrelated": "unexpected"})
+def test_runtime_config_rejects_unknown_top_level_keys() -> None:
+    with pytest.raises(ValueError, match="Unexpected TensorCast runtime config"):
+        TensorCastRuntimeConfig.from_mapping({"unrelated": "unexpected"})
 
 
-def test_serving_policy_pinned_requires_identity_fields() -> None:
+def test_runtime_policy_pinned_requires_identity_fields() -> None:
     with pytest.raises(ValueError, match="manifest_ref"):
-        ServingPolicy(mode="pinned")
+        RuntimePolicy(mode="pinned")
 
-    policy = ServingPolicy(
+    policy = RuntimePolicy(
         mode="pinned",
         manifest_ref="tensor:__tensorcast_meta__.manifest_json",
         representation_contract_hash="repr-hash",
@@ -297,19 +309,19 @@ def test_serving_policy_pinned_requires_identity_fields() -> None:
     assert policy.manifest_ref == "tensor:__tensorcast_meta__.manifest_json"
 
 
-def test_merge_serving_reload_extra_config_normalizes_wire_shape() -> None:
+def test_merge_runtime_reload_extra_config_normalizes_wire_shape() -> None:
     extra = {
         "runtime": {
             "mode": "connect",
         },
-        "serving": {
+        "runtime_artifact": {
             "policy": {
                 "mode": "from_manifest",
             },
         },
     }
 
-    merged = merge_serving_reload_extra_config(
+    merged = merge_runtime_reload_extra_config(
         extra,
         artifact_locator={
             "kind": "artifact_ref",
@@ -324,21 +336,21 @@ def test_merge_serving_reload_extra_config_normalizes_wire_shape() -> None:
     )
 
     assert merged["runtime"] == {"mode": "connect"}
-    assert merged["serving"]["artifact_locator"] == {
+    assert merged["runtime_artifact"]["artifact_locator"] == {
         "kind": "artifact_ref",
         "value": "mi2:test:serving",
     }
-    assert merged["serving"]["policy"] == {
+    assert merged["runtime_artifact"]["policy"] == {
         "mode": "pinned",
         "manifest_ref": "tensor:manifest",
         "representation_contract_hash": "repr-hash",
         "serving_build_digest": "build-digest",
     }
-    assert extra["serving"]["policy"] == {"mode": "from_manifest"}
+    assert extra["runtime_artifact"]["policy"] == {"mode": "from_manifest"}
 
 
-def test_serving_config_parses_retained_binding_authority() -> None:
-    config = ServingConfig.from_mapping(
+def test_runtime_config_parses_retained_binding_authority() -> None:
+    config = TensorCastRuntimeConfig.from_mapping(
         {
             "retained_binding_acquire": {
                 "mode": "external",
@@ -362,13 +374,13 @@ def test_serving_config_parses_retained_binding_authority() -> None:
                     "reservation_capability": {
                         "capability_id": "capability-1",
                     },
-                    "readiness": "serving_published_ready",
+                    "readiness": "runtime_published_ready",
                     "serving_artifact_id": "mi2:test:serving",
                     "trusted_reservation_bytes": 4096,
                     "expected": {
                         "target_layout_hash": "layout-hash",
                         "tensor_schema_hash": "schema-hash",
-                        "serving_build_digest": "build-digest",
+                        "runtime_build_digest": "build-digest",
                         "resolved_spec_digest": "spec-digest",
                     },
                 },
@@ -419,21 +431,21 @@ def test_retained_binding_authority_parses_typed_refs() -> None:
                     "scope_digest": "scope-1",
                 },
                 "local_serving_ref": "binding-local:binding-1:value-1",
-                "readiness": "serving_published_ready",
+                "readiness": "runtime_published_ready",
                 "verification_state": "local_only",
                 "serving_artifact_id": "mi2:test:serving",
                 "trusted_reservation_bytes": 4096,
                 "expected": {
                     "target_layout_hash": "layout-hash",
                     "tensor_schema_hash": "schema-hash",
-                    "serving_build_digest": "build-digest",
+                    "runtime_build_digest": "build-digest",
                     "resolved_spec_digest": "spec-digest",
                 },
             },
         },
     }
 
-    authority = parse_retained_serving_binding_authority(config)
+    authority = parse_retained_realization_authority(config)
 
     assert authority.binding_value_ref.binding_id == "binding-1"
     assert authority.reservation_capability.reservation_bytes == 4096
@@ -448,14 +460,14 @@ def test_prepared_serving_artifact_serializes_without_bootstrap_projection() ->
         "binding_value_id": "value-1",
         "seal_generation": 1,
     }
-    prepared = PreparedServingArtifact(
+    prepared = PreparedRuntimeArtifact(
         source_artifact_ref="disk:/model",
         serving_artifact_ref=None,
         serving_manifest_ref="tensor:manifest",
         representation_contract_hash="repr-hash",
         serving_build_digest="build-digest",
         binding_value_ref=binding_value_ref,
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         family="dummy",
         tensor_schema_hash="schema-hash",
         binding_layout_id="layout-1",
@@ -466,20 +478,20 @@ def test_prepared_serving_artifact_serializes_without_bootstrap_projection() ->
 
     assert payload["serving_manifest_ref"] == "tensor:manifest"
     assert payload["serving_artifact_ref"] is None
-    assert payload["readiness"] == "serving_local_ready"
+    assert payload["readiness"] == "runtime_local_ready"
     assert payload["binding_value_ref"] == binding_value_ref
     assert "bootstrap_summary" not in payload
 
     binding_value = prepared.to_binding_value()
-    assert isinstance(binding_value, ServingBindingValue)
+    assert isinstance(binding_value, RuntimeBindingValue)
     assert binding_value.source_artifact_ref == "disk:/model"
-    assert binding_value.readiness == "serving_local_ready"
+    assert binding_value.readiness == "runtime_local_ready"
     assert binding_value.tensor_schema_hash == "schema-hash"
     assert binding_value.to_dict()["binding_value_ref"] == binding_value_ref
 
 
 def test_prepared_serving_artifact_builds_reload_request() -> None:
-    artifact = PreparedServingArtifact(
+    artifact = PreparedRuntimeArtifact(
         source_artifact_ref="disk:/model",
         serving_artifact_ref="mi2:test:serving",
         manifest_ref="tensor:manifest",
@@ -505,7 +517,7 @@ def test_prepared_serving_artifact_builds_reload_request() -> None:
 
 
 def test_local_ready_prepared_serving_artifact_cannot_build_reload_request() -> None:
-    artifact = PreparedServingArtifact(
+    artifact = PreparedRuntimeArtifact(
         source_artifact_ref="disk:/model",
         serving_artifact_ref=None,
         manifest_ref="tensor:manifest",
@@ -517,7 +529,7 @@ def test_local_ready_prepared_serving_artifact_cannot_build_reload_request() ->
             "binding_value_id": "value-1",
             "seal_generation": 1,
         },
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         local_serving_ref="binding-local:binding-1:value-1",
         family="demo",
         tensor_schema_hash="schema-hash",
@@ -529,17 +541,17 @@ def test_local_ready_prepared_serving_artifact_cannot_build_reload_request() ->
     payload = artifact.to_dict()
     assert payload["serving_artifact_ref"] is None
     assert payload["binding_value_ref"]["binding_value_id"] == "value-1"
-    assert payload["readiness"] == "serving_local_ready"
+    assert payload["readiness"] == "runtime_local_ready"
     assert payload["reload_request"] is None
 
 
 def test_framework_context_and_runtime_tensor_view_are_identity_only() -> None:
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="topology-digest",
             logical_topology_ref="vllm://parallelism?tp=2&pp=1&dp=1",
         ),
-        member=ServingBindingMemberRef(
+        member=RuntimeBindingMemberRef(
             member_id="dp0:pp0:tp1",
             member_index=1,
             member_count=2,
diff --git a/tests/python/test_serving_contract.py b/tests/python/artifact_runtime/test_contract.py
similarity index 80%
rename from tests/python/test_serving_contract.py
rename to tests/python/artifact_runtime/test_contract.py
index b61cdeeb..ba5f5904 100644
--- a/tests/python/test_serving_contract.py
+++ b/tests/python/artifact_runtime/test_contract.py
@@ -5,18 +5,18 @@
 import pytest
 import torch
 
-from tensorcast.api.store.serving_builder import compute_serving_tensor_schema_hash
 from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry
-from tensorcast.serving.contract import (
+from tensorcast.artifact_runtime.contract import (
     collect_runtime_tensor_schema,
+    compute_canonical_runtime_tensor_schema_hash,
     compute_runtime_representation_contract_hash,
     compute_runtime_tensor_schema_hash,
     logical_topology_json,
 )
-from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef
+from tensorcast.types import RuntimeBindingMemberRef, RuntimeTopologyRef
 
 
-def test_runtime_tensor_schema_hash_matches_serving_builder_contract() -> None:
+def test_runtime_tensor_schema_hash_matches_canonical_runtime_contract() -> None:
     tensor = torch.empty((2, 3), dtype=torch.float16)
     schema = collect_runtime_tensor_schema(
         {"weights": tensor},
@@ -39,18 +39,19 @@ def test_runtime_tensor_schema_hash_matches_serving_builder_contract() -> None:
     )
 
     assert compute_runtime_tensor_schema_hash(
-        schema) == compute_serving_tensor_schema_hash(canonical_index)
+        schema
+    ) == compute_canonical_runtime_tensor_schema_hash(canonical_index)
 
 
 def test_runtime_tensor_schema_requires_zero_storage_offset() -> None:
-    view = torch.empty((4, ), dtype=torch.float32)[1:]
+    view = torch.empty((4,), dtype=torch.float32)[1:]
 
     with pytest.raises(ValueError, match="storage_offset == 0"):
         collect_runtime_tensor_schema({"view": view}, remove_duplicate=False)
 
 
 def test_runtime_tensor_schema_duplicate_filter_is_explicit() -> None:
-    tensor = torch.empty((2, ), dtype=torch.float32)
+    tensor = torch.empty((2,), dtype=torch.float32)
 
     full = collect_runtime_tensor_schema(
         {
@@ -68,17 +69,15 @@ def test_runtime_tensor_schema_duplicate_filter_is_explicit() -> None:
     )
 
     assert tuple(entry.name for entry in full) == ("a", "b")
-    assert tuple(entry.name for entry in deduped) == ("a", )
+    assert tuple(entry.name for entry in deduped) == ("a",)
 
 
 def test_logical_topology_json_is_canonicalized_by_core() -> None:
-    topology = ServingTopologyRef(schema_topology_digest="topology-digest")
+    topology = RuntimeTopologyRef(schema_topology_digest="topology-digest")
 
     payload_a = {
-        "family":
-        "vllm_tensor_parallel",
-        "version":
-        "v1",
+        "family": "vllm_tensor_parallel",
+        "version": "v1",
         "dimensions": [
             {
                 "name": "pipeline_parallel",
@@ -101,22 +100,21 @@ def test_logical_topology_json_is_canonicalized_by_core() -> None:
                 "name": "pipeline_parallel",
             },
         ],
-        "version":
-        "v1",
-        "family":
-        "vllm_tensor_parallel",
+        "version": "v1",
+        "family": "vllm_tensor_parallel",
     }
 
-    assert logical_topology_json(topology, framework_payload=payload_a) == \
-        logical_topology_json(topology, framework_payload=payload_b)
+    assert logical_topology_json(
+        topology, framework_payload=payload_a
+    ) == logical_topology_json(topology, framework_payload=payload_b)
 
 
 def test_runtime_representation_contract_hash_is_versioned_and_stable() -> None:
-    topology = ServingTopologyRef(
+    topology = RuntimeTopologyRef(
         schema_topology_digest="topology-digest",
         logical_topology_ref="vllm://parallelism?tp=2",
     )
-    member = ServingBindingMemberRef(
+    member = RuntimeBindingMemberRef(
         member_id="tp1",
         member_index=1,
         member_count=2,
diff --git a/tests/python/artifact_runtime/test_fake_framework_boundary.py b/tests/python/artifact_runtime/test_fake_framework_boundary.py
new file mode 100644
index 00000000..fb8c2b61
--- /dev/null
+++ b/tests/python/artifact_runtime/test_fake_framework_boundary.py
@@ -0,0 +1,1407 @@
+#  Copyright (c) 2026, TensorCast Team.
+
+import weakref
+from contextlib import contextmanager
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+import tensorcast as tc
+import tensorcast.artifact_runtime.lifecycle as integration_mod
+import tensorcast.artifact_runtime.recipe.local_ready as local_ready_mod
+from tensorcast.api.store.artifact import Artifact
+from tensorcast.artifact_runtime.admin import AdminLocalSourceBootstrap
+from tensorcast.artifact_runtime.host import (
+    FrameworkIdentity,
+    IntegrationHost,
+    MaterializationExecutionFacts,
+    PlacementAdmissionFacts,
+    PlacementIdentityFacts,
+    PlacementMemberFacts,
+    SourceSelector,
+)
+from tensorcast.artifact_runtime.intent import (
+    BootstrapPolicy,
+    ExistingRuntimeArtifact,
+    RequestContext,
+    RetainedBindingAcquire,
+)
+from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.recipe.build import (
+    RecipeBuildSession,
+    RuntimeBindingPlan,
+)
+from tensorcast.artifact_runtime.recipe.compiler import (
+    CompiledRuntimeRecipe,
+    TensorcastRuntimeFacts,
+    TensorcastSemanticValidationSpec,
+    TensorSchemaEntry,
+)
+from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+    RetainedRealizationExpectedDigests,
+)
+from tensorcast.types import (
+    BindingReservationCapability,
+    BindingValueRef,
+    FinalizeClass,
+    RuntimeArtifactManifest,
+    RuntimeBindingMemberRef,
+    RuntimeSupportLevel,
+)
+
+
+class _FakeArtifactView:
+    def __init__(self, parent, names=None):
+        self.parent = parent
+        self.names = tuple(names or ())
+
+    def bind(self, **kwargs):
+        binding = _FakeBinding()
+        binding.names = self.names
+        binding.kwargs = kwargs
+        return binding
+
+
+class _FakeArtifact:
+    def subset(self, names):
+        return _FakeArtifactView(self, names)
+
+
+class _FakeBinding:
+    def __init__(self):
+        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
+        self.binding_layout_id = "layout-1"
+        self.realized = None
+        self.swapped = None
+        self.closed = False
+
+    def realize_from(self, source_view, *, realization_plan, options):
+        self.realized = (source_view, realization_plan, options)
+        return "epoch-1"
+
+    def swap(self, artifact, **kwargs):
+        self.swapped = (artifact, kwargs)
+        self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)}
+        return self
+
+    def freeze_current(self, *, update_epoch, source_artifact_ref):
+        return SimpleNamespace(
+            binding_id="binding-1",
+            binding_layout_id=self.binding_layout_id,
+            binding_value_id="value-1",
+            seal_generation=1,
+            update_epoch=update_epoch,
+            source_artifact_ref=source_artifact_ref,
+            local_serving_ref="binding-local:fake",
+        )
+
+    def close(self):
+        self.closed = True
+
+
+class _FakeRestoredRetainedBinding:
+    def __init__(self):
+        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
+        self.binding_layout_id = "layout-1"
+        self.binding_value_ref = SimpleNamespace(
+            binding_id="binding-1",
+            binding_layout_id="layout-1",
+            binding_value_id="value-1",
+            seal_generation=1,
+        )
+        self.reservation_bytes = 4096
+        self.closed = False
+        self.transferred = False
+
+    def transfer_to_runtime(self):
+        self.transferred = True
+        return SimpleNamespace(close=lambda: None)
+
+    def close(self):
+        self.closed = True
+
+
+def _retained_authority() -> ParsedRetainedRealizationAuthority:
+    member = RuntimeBindingMemberRef(
+        member_id="member-0",
+        member_index=0,
+        member_count=1,
+        group_id="group-1",
+    )
+    binding_ref = BindingValueRef(
+        binding_id="binding-1",
+        binding_layout_id="layout-1",
+        binding_value_id="value-1",
+        seal_generation=1,
+    )
+    capability = BindingReservationCapability(
+        capability_id="capability-1",
+        binding_value_ref=binding_ref,
+        daemon_id="daemon-1",
+        daemon_session_id="session-1",
+        device_uuid="gpu-0",
+        member=member,
+        reservation_bytes=4096,
+        scope_digest="scope-1",
+    )
+    return ParsedRetainedRealizationAuthority(
+        group_id="group-1",
+        local_serving_ref="binding-local:fake",
+        binding_value_ref=binding_ref,
+        reservation_capability=capability,
+        daemon_id="daemon-1",
+        daemon_session_id="session-1",
+        device_uuid="gpu-0",
+        member=member,
+        reservation_bytes=4096,
+        expected=RetainedRealizationExpectedDigests(
+            target_layout_hash="layout-hash",
+            tensor_schema_hash="fake-schema",
+            runtime_build_digest="build-digest",
+            resolved_spec_digest="spec-digest",
+        ),
+        readiness="runtime_local_ready",
+        verification_state="local_only",
+    )
+
+
+class _FakeSource:
+    def subset(self, names):
+        return ("subset", tuple(names))
+
+
+class _FakeRuntimeModel:
+    def __init__(self):
+        self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")}
+
+
+class _FakeFrameworkHost:
+    def identity(self, model_config):
+        del model_config
+        return FrameworkIdentity(
+            framework_name="fakefw",
+            framework_version="fakefw-v1",
+            adapter_version="adapter-v1",
+            serving_abi_version="abi-v1",
+        )
+
+    def prepare_model_construction(self, framework_config, model_config):
+        del framework_config, model_config
+
+    def build_meta_model(self, framework_config, model_config):
+        del framework_config, model_config
+        return _FakeRuntimeModel()
+
+    def build_runtime_model(self, framework_config, model_config, target_device):
+        del framework_config, model_config, target_device
+        return _FakeRuntimeModel()
+
+    def assert_model_ready_for_runtime_binding(self, model, *, context):
+        del context
+        assert "w" in model.tensors
+
+    def semantic_probes(self, model, model_config):
+        del model, model_config
+        return {}
+
+
+class _FakePlacementHost:
+    def identity_facts(self, framework_config):
+        del framework_config
+        return PlacementIdentityFacts(
+            tensor_parallel_rank=0,
+            tensor_parallel_size=1,
+            pipeline_parallel_rank=0,
+            pipeline_parallel_size=1,
+            data_parallel_rank=0,
+            data_parallel_size=1,
+        )
+
+    def admission_facts(self, framework_config):
+        del framework_config
+        return PlacementAdmissionFacts()
+
+    def member_facts(self, framework_config):
+        del framework_config
+        return PlacementMemberFacts(
+            runtime_rank=0,
+            runtime_world_size=1,
+            member_id="member-0",
+            member_index=0,
+            member_count=1,
+            group_id_hint="group-1",
+        )
+
+    def execution_facts(self, framework_config):
+        del framework_config
+        return MaterializationExecutionFacts(
+            collective_rank=0,
+            collective_world_size=1,
+            tensor_parallel_ranks=(0,),
+        )
+
+
+class _FakeTensorSurface:
+    def runtime_only_tensor_names(self, model):
+        del model
+        return ()
+
+    def align_runtime_tensor_names(self, model, expected_names):
+        assert set(expected_names) == set(model.tensors)
+        return 0
+
+    def collect_runtime_tensors(self, model, *, remove_duplicate=False):
+        del remove_duplicate
+        return dict(model.tensors)
+
+    def collect_runtime_tensor_view(self, tensors):
+        del tensors
+        return ()
+
+    def compute_runtime_tensor_schema_hash(self, tensors, *, remove_duplicate=False):
+        del tensors, remove_duplicate
+        return "fake-schema"
+
+    def attach_bound_tensors(self, model, tensors, *, replace_meta_params):
+        del replace_meta_params
+        model.tensors.update(tensors)
+        return model
+
+    def allocate_runtime_only_tensors(self, model, target_device):
+        del model, target_device
+        return {}
+
+    def snapshot_tensor_invariants(self, tensors):
+        return tuple(sorted(tensors))
+
+    def validate_tensor_invariants(self, before, after):
+        assert before == tuple(sorted(after))
+
+
+def _realization_plan_proto():
+    from tensorcast.proto.daemon.v2 import store_daemon_pb2
+
+    plan = store_daemon_pb2.BindingRealizationPlan()
+    entry = plan.entries.add(dst_name="w")
+    entry.op_kind = store_daemon_pb2.BINDING_REALIZATION_OP_KIND_COPY
+    entry.source_name = "w"
+    return plan.SerializeToString(deterministic=True)
+
+
+def _recipe(source_artifact_ref="mi2:source"):
+    return CompiledRuntimeRecipe(
+        compile_key="compile",
+        source_artifact_ref=source_artifact_ref,
+        source_metadata_fingerprint="meta",
+        runtime_facts=TensorcastRuntimeFacts(
+            framework_name="fakefw",
+            framework_version="fakefw-v1",
+            adapter_version="adapter-v1",
+            serving_abi_version="abi-v1",
+            support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
+            runtime_only_tensor_names=(),
+            process_after_load_class=FinalizeClass.RUNTIME_ONLY,
+            post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY,
+        ),
+        trace_plan=TracePlan(
+            copy_plan=[],
+            expected_src_names={"w"},
+            expected_dst_names={"w"},
+            tensorcast_slices={},
+            src_hull={},
+        ),
+        tensor_schema=(
+            TensorSchemaEntry(
+                name="w",
+                dtype="torch.float16",
+                shape=(1,),
+                stride=(1,),
+            ),
+        ),
+        source_hull=(),
+        realization_plan=(),
+        realization_fallback_plan=(),
+        topology_ref=None,
+        member_ref=None,
+        semantic_validation_spec=TensorcastSemanticValidationSpec.empty(),
+        realization_plan_proto=_realization_plan_proto(),
+        realization_plan_count=1,
+    )
+
+
+def test_fake_second_framework_core_generated_ids_are_framework_neutral():
+    group_id = integration_mod.build_collective_group_id(
+        artifact_ref="mi2:fake:serving",
+        operation_scope="fakefw.realize",
+        tp_ranks=(0, 1),
+        contract_identity="repr",
+    )
+    assert group_id.startswith("tensorcast-")
+    assert "vllm" not in group_id
+
+    _contract_hash, manifest_bytes = (
+        local_ready_mod.prepare_same_binding_manifest_carrier(
+            _recipe(),
+            manifest_tensor_name="__tensorcast_meta__.manifest",
+            representation_contract_hash="repr",
+            topology_admission_digest="topology-digest",
+        )
+    )
+    manifest = RuntimeArtifactManifest.from_bytes(manifest_bytes)
+    lower_manifest = manifest_bytes.lower()
+    assert integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION == (
+        "tensorcast-bootstrap-v1"
+    )
+    assert manifest.topology_admission_digest == "topology-digest"
+    assert (
+        integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION.encode()
+        in manifest_bytes
+    )
+    assert b"vllm" not in lower_manifest
+
+
+def test_fake_second_framework_uses_host_intent_lifecycle(monkeypatch):
+    identity = RuntimeBindingPlan(
+        model_hash="hash",
+        model_id="fake-model",
+        model_revision=None,
+        dtype="torch.float16",
+        runtime_version="fake-runtime-v1",
+        framework_name="fakefw",
+        framework_version="fakefw-v1",
+        adapter_version="adapter-v1",
+        serving_abi_version="abi-v1",
+        trace_cache_schema_version=1,
+        tp_rank=0,
+        tp_world_size=1,
+    )
+    session = RecipeBuildSession(identity)
+    assert session.recipe_cache_key(metadata_fingerprint="meta")
+
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+    monkeypatch.setattr(
+        ArtifactRuntimeIntegration,
+        "build_materialization_options",
+        lambda self, **kwargs: ("realize-options", kwargs),
+    )
+    direct_resolve_calls = []
+
+    class _FakeResolver:
+        def resolve(self, artifact_ref):
+            direct_resolve_calls.append(("resolve", artifact_ref))
+            return SimpleNamespace(
+                artifact=_FakeArtifact(),
+                artifact_ref=artifact_ref,
+                tensor_names=("w",),
+                manifest=SimpleNamespace(
+                    representation_contract_hash="repr-direct",
+                    source_artifact_ref="mi2:source",
+                    serving_build_digest="build-direct",
+                ),
+            )
+
+        def cross_check(self, resolved_artifact, **kwargs):
+            direct_resolve_calls.append(("cross_check", kwargs))
+            return resolved_artifact
+
+    host = IntegrationHost(
+        framework=_FakeFrameworkHost(),
+        placement=_FakePlacementHost(),
+        tensor_surface=_FakeTensorSurface(),
+    )
+    direct_attachment = ArtifactRuntimeIntegration(
+        resolver=_FakeResolver(),
+        host=host,
+    ).start(
+        ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")),
+        RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+            target_device=torch.device("cuda:0"),
+        ),
+    )
+    direct_payload = direct_attachment.view.endpoint.to_weight_version_payload()
+    assert direct_attachment.state.runtime_view.readiness == "runtime_ready"
+    assert direct_payload["serving_artifact_ref"] == "mi2:serving"
+    assert direct_payload["source_artifact_ref"] == "mi2:source"
+    assert direct_resolve_calls[1][1]["expected_tensor_schema_hash"] == "fake-schema"
+    reload_attachment = ArtifactRuntimeIntegration(
+        resolver=_FakeResolver(),
+        host=host,
+    ).reload(
+        direct_attachment.state,
+        ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving-next")),
+        RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+        model=direct_attachment.model,
+    )
+    reload_payload = reload_attachment.view.endpoint.to_weight_version_payload()
+    reload_response = reload_attachment.view.endpoint.to_reload_response_payload()
+    assert reload_payload["serving_artifact_ref"] == "mi2:serving-next"
+    assert reload_response == {
+        "schema_version": 1,
+        "serving_artifact_ref": "mi2:serving-next",
+        "representation_contract_hash": "repr-direct",
+        "serving_build_digest": "build-direct",
+        "readiness": "runtime_ready",
+    }
+    assert direct_attachment.state.binding.swapped[1]["options"] == "realize-options"
+    described = ArtifactRuntimeIntegration(host=host).describe(reload_attachment.state)
+    assert (
+        described.endpoint.to_weight_version_payload()["serving_artifact_ref"]
+        == "mi2:serving-next"
+    )
+
+    host_binding = _FakeBinding()
+    host_model = _FakeRuntimeModel()
+    attachment = ArtifactRuntimeIntegration(host=host).start(
+        AdminLocalSourceBootstrap(
+            source_selector=SourceSelector.local_path("/tmp/fake-model"),
+            bootstrap_policy=BootstrapPolicy(),
+            recipe=_recipe(),
+            source_subject=_FakeSource(),
+            source_artifact_ref="mi2:source",
+            model=host_model,
+            binding_factory=lambda *args, **kwargs: host_binding,
+        ),
+        RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+            target_device=torch.device("cuda:0"),
+        ),
+    )
+    assert attachment.model is host_model
+    assert attachment.state.runtime_view.readiness == "runtime_local_ready"
+    payload = attachment.view.endpoint.to_weight_version_payload()
+    assert payload["source_artifact_ref"] == "mi2:source"
+    assert payload["family"] == "generic"
+    assert payload["tp_rank"] == 0
+    assert attachment.prepared is not None
+    assert host_binding.realized is not None
+    assert host_binding.realized[2] == "realize-options"
+
+    retained_calls = []
+    restored = _FakeRestoredRetainedBinding()
+
+    @contextmanager
+    def fake_restore_retained(**kwargs):
+        retained_calls.append(kwargs)
+        yield restored
+
+    monkeypatch.setattr(
+        integration_mod, "restore_retained_binding", fake_restore_retained
+    )
+    retained_attachment = ArtifactRuntimeIntegration(host=host).start(
+        RetainedBindingAcquire(authority=_retained_authority()),
+        RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+            target_device=torch.device("cuda:0"),
+        ),
+    )
+    retained_payload = retained_attachment.view.endpoint.to_weight_version_payload()
+    assert retained_attachment.state.runtime_view.readiness == "runtime_local_ready"
+    assert retained_payload["local_serving_ref"] == "binding-local:fake"
+    assert retained_payload["binding_value_ref"]["binding_value_id"] == "value-1"
+    assert retained_calls[0]["expected_member"].member_index == 0
+    assert restored.transferred
+
+
+def test_artifact_realize_model_runtime_uses_direct_runtime_host(monkeypatch):
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+    materialization_calls = []
+
+    def build_materialization_options(_self, **kwargs):
+        materialization_calls.append(kwargs)
+        return "realize-options", kwargs
+
+    monkeypatch.setattr(
+        ArtifactRuntimeIntegration,
+        "build_materialization_options",
+        build_materialization_options,
+    )
+
+    class _RecordingArtifactView:
+        def __init__(self, parent, names):
+            self.parent = parent
+            self.names = tuple(names)
+
+        def bind(self, **kwargs):
+            binding = _FakeBinding()
+            binding.last_materialization_diagnostics = {
+                "source": "p2p",
+                "operation_id": "op-direct",
+                "total_bytes": 2,
+                "retry_reason_buckets": {"none": 0},
+                "ipc_open_sec": 0.001,
+                "restore_tensors_sec": 0.002,
+            }
+            binding.last_execution_diagnostics = SimpleNamespace(
+                actual_collective_committed_bytes=0,
+                actual_local_typed_bytes=2,
+                actual_generic_backend_bytes=0,
+                fallback_bytes=0,
+                residual_bytes=0,
+                direct_write_supported=True,
+                dominant_executor="local_typed",
+            )
+            self.parent.bind_calls.append((self.names, kwargs, binding))
+            return binding
+
+        def tensor_dict(self, **_kwargs):
+            raise AssertionError("direct model-runtime path must not use TensorDict")
+
+        def tensor_dict_with_diagnostics(self, **_kwargs):
+            raise AssertionError("direct model-runtime path must not use TensorDict")
+
+        def tensor_dict_into(self, *_args, **_kwargs):
+            raise AssertionError("direct model-runtime path must not use TensorDict")
+
+        def state_dict(self):
+            raise AssertionError("direct model-runtime path must not build state dict")
+
+    class _RecordingArtifact:
+        def __init__(self):
+            self.bind_calls = []
+
+        def subset(self, names):
+            return _RecordingArtifactView(self, names)
+
+        def tensor_dict(self, **_kwargs):
+            raise AssertionError("direct model-runtime path must not use TensorDict")
+
+        def state_dict(self):
+            raise AssertionError("direct model-runtime path must not build state dict")
+
+    resolved_artifact = _RecordingArtifact()
+    resolver_calls = []
+
+    class _Resolver:
+        def resolve(self, artifact_ref):
+            resolver_calls.append(("resolve", artifact_ref))
+            return SimpleNamespace(
+                artifact=resolved_artifact,
+                artifact_ref=artifact_ref,
+                tensor_names=("w",),
+                manifest=SimpleNamespace(
+                    representation_contract_hash="repr-direct",
+                    source_artifact_ref="mi2:source",
+                    serving_build_digest="build-direct",
+                ),
+            )
+
+        def cross_check(self, resolved, **kwargs):
+            resolver_calls.append(("cross_check", kwargs))
+            return resolved
+
+    class _Store:
+        pass
+
+    def reject_runtime_session(*_args, **_kwargs):
+        raise AssertionError("direct model-runtime path must not start a session")
+
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "from_config",
+        classmethod(reject_runtime_session),
+    )
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "start",
+        reject_runtime_session,
+    )
+
+    artifact = Artifact(
+        store_ref=weakref.ref(_Store()),
+        artifact_id="mi2:serving",
+        canonical_index_bytes=b"index",
+    )
+    host = tc.RuntimeHostCapabilities(
+        framework=_FakeFrameworkHost(),
+        placement=_FakePlacementHost(),
+        tensor_surface=_FakeTensorSurface(),
+    )
+    profile_events = []
+
+    handle = artifact.realize(
+        tc.ArtifactRealizationSpec.model_runtime(
+            framework="fakefw",
+            device=torch.device("cuda:0"),
+            adapter_version="adapter-v1",
+            runtime_abi_version="abi-v1",
+        ),
+        runtime_host=host,
+        runtime_context=RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+        runtime_resolver=_Resolver(),
+        profile_sink=profile_events.append,
+    )
+    attachment = handle.attachment()
+
+    assert handle.attach() is attachment
+    assert attachment.state.model_runtime_handle is handle
+    assert handle.report.target_kind == "model_runtime"
+    assert handle.report.artifact_id == "mi2:serving"
+    assert handle.report.artifact_profile == "durable_artifact"
+    assert handle.report.authority_scope == "daemon_mediated_durable"
+    assert handle.report.source_selection_digest
+    assert handle.report.model_runtime is not None
+    assert handle.report.model_runtime.framework == "fakefw"
+    assert handle.report.model_runtime.adapter_version == "adapter-v1"
+    assert handle.report.model_runtime.runtime_abi_version == "abi-v1"
+    assert handle.report.runtime_attach_sec is not None
+    assert handle.report.runtime_attach_sec >= 0.0
+    assert handle.report.runtime_finalize_sec is not None
+    assert handle.report.runtime_finalize_sec >= 0.0
+    assert handle.report.total_sec is not None
+    assert handle.report.total_sec >= handle.report.runtime_attach_sec
+    assert (
+        attachment.state.realization_handle.report.target_kind == "runtime_attachment"
+    )
+    assert attachment.state.realization_handle.report.runtime_attach_sec == (
+        handle.report.runtime_attach_sec
+    )
+    assert attachment.state.realization_handle.report.runtime_finalize_sec == (
+        handle.report.runtime_finalize_sec
+    )
+    assert torch.equal(
+        attachment.model.tensors["w"], torch.ones((1,), dtype=torch.float16)
+    )
+    assert resolved_artifact.bind_calls
+    bind_names, bind_kwargs, _binding = resolved_artifact.bind_calls[0]
+    assert bind_names == ("w",)
+    assert bind_kwargs["device"] == torch.device("cuda:0")
+    assert bind_kwargs["options"] == "realize-options"
+    assert len(materialization_calls) == 1
+    assert materialization_calls[0]["artifact_ref"] == "mi2:serving"
+    assert (
+        materialization_calls[0]["operation_scope"]
+        == "startup.direct_artifact_runtime.bind"
+    )
+    assert materialization_calls[0][
+        "source_bound_contract_state"
+    ].source_bound_contract_ready
+    assert handle.report.source == "p2p"
+    assert handle.report.operation_id == "op-direct"
+    assert handle.report.materialization_diagnostics["ipc_open_sec"] == 0.001
+    assert handle.report.execution_commit is not None
+    assert handle.report.execution_commit.actual_executor_path == "local_typed"
+    assert handle.report.execution_commit.direct_write_bytes == 2
+    assert handle.report.execution_commit.fallback_bytes == 0
+    assert handle.report.envelope.copy_bytes == 0
+    assert handle.report.envelope.temporary_replica_bytes == 0
+    assert handle.report.envelope.retained_bytes == 0
+    assert handle.report.envelope.cuda_ipc_open_count == 0
+    assert [event["event"] for event in profile_events] == [
+        "runtime_materialization.attach.start",
+        "runtime_materialization.attach.done",
+    ]
+    assert resolver_calls[0] == ("resolve", "mi2:serving")
+    assert resolver_calls[1][0] == "cross_check"
+
+    serving_attachment = ArtifactRuntimeIntegration(
+        resolver=_Resolver(),
+        host=host,
+    ).start(
+        ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")),
+        RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+            target_device=torch.device("cuda:0"),
+        ),
+    )
+    serving_handle = serving_attachment.state.model_runtime_handle
+    assert serving_handle.attach() is serving_attachment
+    assert serving_handle.report.target_kind == handle.report.target_kind
+    assert serving_handle.report.operation_backend == handle.report.operation_backend
+    assert serving_handle.report.envelope == handle.report.envelope
+    assert serving_handle.report.target_plan == handle.report.target_plan
+    assert serving_handle.report.model_runtime == handle.report.model_runtime
+    assert serving_handle.release_contract.release_policy == (
+        handle.release_contract.release_policy
+    )
+    assert serving_handle.release_contract.release_strictness == (
+        handle.release_contract.release_strictness
+    )
+
+
+def test_artifact_realize_model_runtime_uses_same_store_when_resolver_omitted(
+    monkeypatch,
+):
+    import tensorcast.api.store as store_api
+    import tensorcast.artifact_runtime.artifact.resolver as resolver_mod
+
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+    monkeypatch.setattr(
+        store_api,
+        "artifact",
+        lambda *args, **kwargs: (_ for _ in ()).throw(
+            AssertionError("direct model-runtime resolver must use artifact store")
+        ),
+    )
+    monkeypatch.setattr(
+        resolver_mod.tc_artifact_manifest,
+        "read_runtime_artifact_manifest_tensor",
+        lambda *_args, **_kwargs: SimpleNamespace(
+            representation_contract_hash="repr-direct",
+            source_artifact_ref="mi2:source",
+            serving_build_digest="build-direct",
+            local_serving_ref=None,
+        ),
+    )
+    manifest_cross_checks = []
+    monkeypatch.setattr(
+        resolver_mod.tc_artifact_manifest,
+        "cross_check_runtime_artifact_manifest",
+        lambda **kwargs: manifest_cross_checks.append(kwargs),
+    )
+
+    class _StoreArtifactView:
+        def __init__(self, parent, names):
+            self.parent = parent
+            self.names = tuple(names)
+
+        def bind(self, **kwargs):
+            binding = _FakeBinding()
+            self.parent.bind_calls.append((self.names, kwargs, binding))
+            return binding
+
+    class _StoreArtifact:
+        def __init__(self):
+            self.bind_calls = []
+            self.descriptor = SimpleNamespace(
+                artifact_id="mi2:serving",
+                tensor_names=("w", tc.SERVING_MANIFEST_TENSOR_NAME),
+                tensor_metas={
+                    "w": SimpleNamespace(
+                        shape=(1,),
+                        dtype=torch.float16,
+                        stride=(1,),
+                        storage_offset=0,
+                        size_bytes=2,
+                    )
+                },
+                total_bytes=2,
+            )
+
+        def describe(self):
+            return self.descriptor
+
+        def subset(self, names):
+            return _StoreArtifactView(self, names)
+
+    opened_artifact = _StoreArtifact()
+    store_calls = []
+
+    class _Store:
+        closed = False
+        _runtime = object()
+        _materialization = object()
+
+        def artifact(self, **kwargs):
+            store_calls.append(kwargs)
+            return opened_artifact
+
+    store = _Store()
+    host = tc.RuntimeHostCapabilities(
+        framework=_FakeFrameworkHost(),
+        placement=_FakePlacementHost(),
+        tensor_surface=_FakeTensorSurface(),
+    )
+    materialization_options = tc.GetArtifactOptions()
+    artifact = Artifact(
+        store_ref=weakref.ref(store),
+        artifact_id="mi2:serving",
+    )
+
+    handle = artifact.realize(
+        tc.ArtifactRealizationSpec.model_runtime(
+            framework="fakefw",
+            device=torch.device("cuda:0"),
+            adapter_version="adapter-v1",
+            runtime_abi_version="abi-v1",
+            options=materialization_options,
+        ),
+        runtime_host=host,
+        runtime_context=RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+        runtime_resolver=None,
+    )
+
+    assert handle.report.target_kind == "model_runtime"
+    assert store_calls == [{"ref": "mi2:serving"}]
+    assert manifest_cross_checks
+    bind_names, bind_kwargs, _binding = opened_artifact.bind_calls[0]
+    assert bind_names == ("w",)
+    assert bind_kwargs["options"] is materialization_options
+
+
+def test_model_runtime_rejects_spec_context_device_mismatch():
+    class _Store:
+        pass
+
+    artifact = Artifact(
+        store_ref=weakref.ref(_Store()),
+        artifact_id="mi2:serving",
+    )
+
+    with pytest.raises(tc.ArtifactError) as exc_info:
+        artifact.realize(
+            tc.ArtifactRealizationSpec.model_runtime(
+                framework="fakefw",
+                device=torch.device("cuda:0"),
+            ),
+            runtime_host=object(),
+            runtime_context=RequestContext(target_device=torch.device("cuda:1")),
+        )
+
+    assert exc_info.value.status_code == "INVALID_ARGUMENT"
+    assert "target_device facts disagree" in str(exc_info.value)
+
+
+def test_model_runtime_options_and_runtime_artifact_policy_are_separate(
+    monkeypatch,
+):
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+    host = tc.RuntimeHostCapabilities(
+        framework=_FakeFrameworkHost(),
+        placement=_FakePlacementHost(),
+        tensor_surface=_FakeTensorSurface(),
+    )
+
+    class _RecordingArtifactView:
+        def __init__(self, parent, names):
+            self.parent = parent
+            self.names = tuple(names)
+
+        def bind(self, **kwargs):
+            binding = _FakeBinding()
+            self.parent.bind_calls.append((self.names, kwargs, binding))
+            return binding
+
+    class _RecordingArtifact:
+        def __init__(self):
+            self.bind_calls = []
+
+        def subset(self, names):
+            return _RecordingArtifactView(self, names)
+
+    class _Resolver:
+        def __init__(self):
+            self.cross_checks = []
+            self.artifact = _RecordingArtifact()
+
+        def resolve(self, artifact_ref):
+            return SimpleNamespace(
+                artifact=self.artifact,
+                artifact_ref=artifact_ref,
+                tensor_names=("w",),
+                manifest=SimpleNamespace(
+                    representation_contract_hash="repr-direct",
+                    source_artifact_ref="mi2:source",
+                    serving_build_digest="build-direct",
+                ),
+            )
+
+        def cross_check(self, resolved_artifact, **kwargs):
+            self.cross_checks.append(kwargs)
+            return resolved_artifact
+
+    def realize_with(runtime_artifact_policy=None):
+        resolver = _Resolver()
+        materialization_options = tc.GetArtifactOptions()
+
+        class _Store:
+            pass
+
+        artifact = Artifact(
+            store_ref=weakref.ref(_Store()),
+            artifact_id="mi2:serving",
+        )
+        handle = artifact.realize(
+            tc.ArtifactRealizationSpec.model_runtime(
+                framework="fakefw",
+                device=torch.device("cuda:0"),
+                adapter_version="adapter-v1",
+                runtime_abi_version="abi-v1",
+                options=materialization_options,
+                runtime_artifact_policy=runtime_artifact_policy,
+            ),
+            runtime_host=host,
+            runtime_context=RequestContext(
+                framework_config=SimpleNamespace(),
+                model_config=SimpleNamespace(model="fake-model"),
+            ),
+            runtime_resolver=resolver,
+        )
+        assert handle.report.target_kind == "model_runtime"
+        return resolver, materialization_options
+
+    resolver, materialization_options = realize_with()
+    policy_seen = resolver.cross_checks[0]["runtime_artifact_policy"]
+    assert policy_seen is not materialization_options
+    assert resolver.artifact.bind_calls[0][1]["options"] is materialization_options
+
+    runtime_policy = tc.RuntimeArtifactPolicy(
+        expected_representation_contract_hash="repr-direct",
+    )
+    resolver, materialization_options = realize_with(runtime_policy)
+    policy_seen = resolver.cross_checks[0]["runtime_artifact_policy"]
+    assert policy_seen.expected_representation_contract_hash == "repr-direct"
+    bind_kwargs = resolver.artifact.bind_calls[0][1]
+    assert bind_kwargs["runtime_artifact_policy"].expected_representation_contract_hash
+    assert bind_kwargs["options"] is materialization_options
+
+
+def test_artifact_realize_model_runtime_uses_local_ready_restore(monkeypatch):
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+
+    def reject_runtime_session(*_args, **_kwargs):
+        raise AssertionError("direct model-runtime path must not start a session")
+
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "from_config",
+        classmethod(reject_runtime_session),
+    )
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "start",
+        reject_runtime_session,
+    )
+
+    class _NoMaterializeArtifact:
+        def subset(self, _names):
+            raise AssertionError(
+                "local-ready direct path must not bind source artifact"
+            )
+
+        def tensor_dict(self, **_kwargs):
+            raise AssertionError("local-ready direct path must not use TensorDict")
+
+        def state_dict(self):
+            raise AssertionError("local-ready direct path must not build state dict")
+
+    restored = _FakeRestoredRetainedBinding()
+    restore_calls = []
+
+    @contextmanager
+    def fake_restore_prepared(**kwargs):
+        restore_calls.append(kwargs)
+        assert kwargs["expected_member"].member_id == "member-0"
+        yield restored
+
+    monkeypatch.setattr(
+        integration_mod,
+        "restore_prepared_local_ready_binding",
+        fake_restore_prepared,
+    )
+
+    class _Resolver:
+        def resolve(self, artifact_ref):
+            return SimpleNamespace(
+                artifact=_NoMaterializeArtifact(),
+                artifact_ref=artifact_ref,
+                tensor_names=("w",),
+                manifest=SimpleNamespace(
+                    representation_contract_hash="repr-local",
+                    source_artifact_ref="mi2:source",
+                    serving_build_digest="build-local",
+                    local_serving_ref="binding-local:binding-1:value-1",
+                ),
+            )
+
+        def cross_check(self, resolved, **_kwargs):
+            return resolved
+
+    class _Store:
+        pass
+
+    artifact = Artifact(
+        store_ref=weakref.ref(_Store()),
+        artifact_id="mi2:serving-local",
+        canonical_index_bytes=b"index",
+    )
+    handle = artifact.realize(
+        tc.ArtifactRealizationSpec.model_runtime(
+            framework="fakefw",
+            device=torch.device("cuda:0"),
+        ),
+        runtime_host=tc.RuntimeHostCapabilities(
+            framework=_FakeFrameworkHost(),
+            placement=_FakePlacementHost(),
+            tensor_surface=_FakeTensorSurface(),
+        ),
+        runtime_context=RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+        runtime_resolver=_Resolver(),
+    )
+
+    attachment = handle.attachment()
+    assert attachment.state.runtime_view.readiness == "runtime_local_ready"
+    assert attachment.state.runtime_view.local_serving_ref == (
+        "binding-local:binding-1:value-1"
+    )
+    assert torch.equal(
+        attachment.model.tensors["w"], torch.ones((1,), dtype=torch.float16)
+    )
+    assert restored.transferred
+    assert not restored.closed
+    assert restore_calls
+    assert restore_calls[0]["resolved_artifact"].artifact_ref == "mi2:serving-local"
+    assert handle.report.artifact_id == "mi2:serving-local"
+    assert handle.report.artifact_profile == "durable_artifact"
+    assert handle.report.authority_scope == "daemon_mediated_durable"
+    assert handle.report.lifecycle_plan is not None
+    assert handle.report.lifecycle_plan.retained is True
+    assert handle.report.runtime_attach_sec is not None
+    assert handle.report.runtime_attach_sec >= 0.0
+    assert handle.report.runtime_finalize_sec is not None
+    assert handle.report.runtime_finalize_sec >= 0.0
+    assert handle.report.total_sec is not None
+    assert handle.report.total_sec >= handle.report.runtime_attach_sec
+    assert handle.report.envelope.retained_bytes == restored.reservation_bytes
+    assert handle.report.envelope.release_policy == (
+        "close_runtime_attachment",
+        "release_placement_lease",
+    )
+
+
+def test_artifact_realize_model_runtime_uses_mounted_source_artifact(monkeypatch):
+    source_artifact_ref = "msa1:test-source"
+    calls = []
+    host_binding = _FakeBinding()
+
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+    monkeypatch.setattr(
+        ArtifactRuntimeIntegration,
+        "resolve_source_subject",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("mounted-source artifact already owns the source subject")
+        ),
+    )
+    monkeypatch.setattr(
+        ArtifactRuntimeIntegration,
+        "build_materialization_options",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("mounted-source model_runtime must preserve spec options")
+        ),
+    )
+
+    class _Provider:
+        def build_catalog(self, request):
+            calls.append(("catalog", request))
+            return SimpleNamespace(
+                source_artifact_ref=request.source_artifact_ref,
+                metadata_fingerprint="meta",
+                ordered_names=("w",),
+                meta_by_name={},
+                selected_files=(),
+            )
+
+    class _RecipeSession:
+        def build_recipe(self, **kwargs):
+            calls.append(("recipe", kwargs))
+            return SimpleNamespace(
+                recipe=_recipe(source_artifact_ref=source_artifact_ref),
+                diagnostics={"compile_key": "compile"},
+            )
+
+    monkeypatch.setattr(
+        ArtifactRuntimeIntegration,
+        "build_recipe_session",
+        lambda self, request: calls.append(("session", request)) or _RecipeSession(),
+    )
+
+    def fake_realize_local_ready_binding_from_source(**kwargs):
+        calls.append(("prepare", kwargs))
+        update_epoch = host_binding.realize_from(
+            kwargs["source_subject"],
+            realization_plan=kwargs["recipe"].realization_plan_proto,
+            options=kwargs["options"],
+        )
+        return SimpleNamespace(
+            binding=host_binding,
+            update_epoch=update_epoch,
+            layout=SimpleNamespace(binding_layout_id="layout-1"),
+            realization_entry_count=1,
+        )
+
+    monkeypatch.setattr(
+        local_ready_mod,
+        "realize_local_ready_binding_from_source",
+        fake_realize_local_ready_binding_from_source,
+    )
+
+    class _Store:
+        pass
+
+    source_handle = tc.PublicDiskSourceHandle(
+        path="/tmp/fake-model",
+        canonical_index_bytes=b"index",
+        artifact_id=source_artifact_ref,
+        generation=1,
+    )
+    artifact = Artifact(
+        store_ref=weakref.ref(_Store()),
+        artifact_id=source_artifact_ref,
+        source_subject=source_handle,
+    )
+    host = tc.RuntimeHostCapabilities(
+        framework=_FakeFrameworkHost(),
+        placement=_FakePlacementHost(),
+        tensor_surface=_FakeTensorSurface(),
+        source_catalog=_Provider(),
+    )
+    materialization_options = tc.GetArtifactOptions()
+
+    handle = artifact.realize(
+        tc.ArtifactRealizationSpec.model_runtime(
+            framework="fakefw",
+            device=torch.device("cuda:0"),
+            adapter_version="adapter-v1",
+            options=materialization_options,
+        ),
+        runtime_host=host,
+        runtime_context=RequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+    )
+    attachment = handle.attachment()
+
+    assert attachment.state.runtime_view.readiness == "runtime_local_ready"
+    assert attachment.state.runtime_view.source_artifact_ref == source_artifact_ref
+    assert handle.report.target_kind == "model_runtime"
+    assert handle.report.model_runtime.framework == "fakefw"
+    assert handle.report.model_runtime.adapter_version == "adapter-v1"
+    assert handle.report.artifact_profile == "mounted_source"
+    assert handle.report.authority_scope == "daemon_local_mounted_source"
+    assert handle.report.logical_layout_hash
+    assert calls[0][0] == "catalog"
+    assert calls[0][1].source_selector == SourceSelector.local_path("/tmp/fake-model")
+    assert calls[0][1].source_subject.subject is source_handle
+    assert calls[0][1].source_artifact_ref == source_artifact_ref
+    assert calls[2][0] == "recipe"
+    assert calls[2][1]["source_catalog"].source_artifact_ref == source_artifact_ref
+    assert calls[3][0] == "prepare"
+    assert calls[3][1]["source_subject"] is source_handle
+    assert host_binding.realized[2] is materialization_options
+
+
+def test_fake_second_framework_uses_direct_artifact_runtime_api(monkeypatch):
+    monkeypatch.setattr(
+        integration_mod,
+        "read_source_bound_contract_state",
+        lambda: SimpleNamespace(
+            source_bound_contract_ready=True,
+            source_bound_contract_version=4,
+            source_bound_capability_names=("collective",),
+        ),
+    )
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeIntegration,
+        "build_materialization_options",
+        lambda self, **kwargs: ("runtime-options", kwargs),
+    )
+
+    def reject_runtime_session(*_args, **_kwargs):
+        raise AssertionError("second-runtime proof must use artifact runtime API")
+
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "from_config",
+        classmethod(reject_runtime_session),
+    )
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "start",
+        reject_runtime_session,
+    )
+    monkeypatch.setattr(
+        integration_mod.ArtifactRuntimeSession,
+        "reload",
+        reject_runtime_session,
+    )
+
+    resolver_calls = []
+
+    class _Resolver:
+        def resolve(self, artifact_ref):
+            resolver_calls.append(("resolve", artifact_ref))
+            return SimpleNamespace(
+                artifact=_FakeArtifact(),
+                artifact_ref=artifact_ref,
+                tensor_names=("w",),
+                manifest=SimpleNamespace(
+                    representation_contract_hash=f"repr:{artifact_ref}",
+                    source_artifact_ref="mi2:source",
+                    serving_build_digest=f"build:{artifact_ref}",
+                ),
+            )
+
+        def cross_check(self, resolved_artifact, **kwargs):
+            resolver_calls.append(("cross_check", kwargs))
+            return resolved_artifact
+
+    host = tc.RuntimeHostCapabilities(
+        framework=_FakeFrameworkHost(),
+        placement=_FakePlacementHost(),
+        tensor_surface=_FakeTensorSurface(),
+    )
+    resolver = _Resolver()
+
+    class _Store:
+        pass
+
+    artifact = Artifact(
+        store_ref=weakref.ref(_Store()),
+        artifact_id="mi2:serving",
+    )
+
+    handle = artifact.realize(
+        tc.ArtifactRealizationSpec.model_runtime(
+            framework="fakefw",
+            device=torch.device("cuda:0"),
+            adapter_version="adapter-v1",
+            runtime_abi_version="abi-v1",
+        ),
+        runtime_host=host,
+        runtime_context=tc.RuntimeRequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+        runtime_resolver=resolver,
+    )
+    attachment = handle.attachment()
+    reloaded = tc.reload_runtime_attachment(
+        current_attachment=attachment,
+        artifact_locator=tc.ArtifactLocator.artifact_ref("mi2:serving-next"),
+        policy=tc.RuntimePolicy(),
+        runtime_host=host,
+        runtime_context=tc.RuntimeRequestContext(
+            framework_config=SimpleNamespace(),
+            model_config=SimpleNamespace(model="fake-model"),
+        ),
+        ensure_runtime_initialized=lambda: None,
+        model=attachment.model,
+        runtime_resolver=resolver,
+    )
+
+    assert (
+        attachment.view.endpoint.to_weight_version_payload()["serving_artifact_ref"]
+        == "mi2:serving"
+    )
+    assert (
+        reloaded.view.endpoint.to_reload_response_payload()["serving_artifact_ref"]
+        == "mi2:serving-next"
+    )
+    assert handle.report.target_kind == "model_runtime"
+    assert handle.report.model_runtime.framework == "fakefw"
+    assert reloaded.state.runtime_view.serving_artifact_ref == "mi2:serving-next"
+    assert resolver_calls[0] == ("resolve", "mi2:serving")
+    assert ("resolve", "mi2:serving-next") in resolver_calls
+
+
+def test_fake_second_framework_artifact_runtime_conformance_kit():
+    from tensorcast.artifact_runtime.testing import (
+        assert_level1_artifact_runtime_conformance,
+    )
+
+    result = assert_level1_artifact_runtime_conformance(tc)
+
+    assert result.checks["direct_start"]
+    assert result.checks["artifact_realization_report"]
+    assert result.checks["runtime_session_not_required"]
+    assert result.checks["target_layout_from_runtime_binding"]
+    assert result.checks["runtime_only_tensors_allocated"]
+    assert result.checks["runtime_publication_actions"]
+    assert result.checks["reload"]
+    assert result.checks["describe"]
+    assert result.checks["source_capability_not_required"]
+    assert result.checks["source_catalog_not_required"]
+    assert result.checks["resolver_uses_artifact_refs"]
+    assert result.checks["rejects_local_reload_artifact_locator"]
+    assert result.checks["rejects_untyped_reload_artifact_locator"]
+    assert result.checks["rejects_untyped_reload_policy"]
+
+
+def test_conformance_failure_summary_includes_onboarding_hint():
+    from tensorcast.artifact_runtime.testing import ConformanceResult
+
+    result = ConformanceResult(
+        checks={"direct_start": False},
+        messages={"direct_start": "provide a tensor surface"},
+        level="level1-runtime",
+    )
+
+    try:
+        result.assert_passed()
+    except AssertionError as exc:
+        message = str(exc)
+    else:
+        raise AssertionError("expected conformance failure")
+
+    assert "level1-runtime" in message
+    assert "direct_start" in message
+    assert "provide a tensor surface" in message
diff --git a/tests/python/test_serving_integration.py b/tests/python/artifact_runtime/test_lifecycle.py
similarity index 88%
rename from tests/python/test_serving_integration.py
rename to tests/python/artifact_runtime/test_lifecycle.py
index 15c0899b..75db16ba 100644
--- a/tests/python/test_serving_integration.py
+++ b/tests/python/artifact_runtime/test_lifecycle.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import importlib.util
 import json
 from contextlib import contextmanager
 from dataclasses import fields
@@ -11,9 +12,18 @@
 import torch
 from torch import nn
 
-import tensorcast.serving._runtime_impl.lifecycle as integration_mod
-from tensorcast.pytorch.module_binding import TorchModuleAdapterMixin
-from tensorcast.serving._runtime_impl.lifecycle import (
+import tensorcast.artifact_runtime.contract as contract_mod
+import tensorcast.artifact_runtime.lifecycle as integration_mod
+import tensorcast.artifact_runtime.recipe.local_ready as local_ready_mod
+from tensorcast.artifact_runtime.admin import AdminLocalSourceBootstrap
+from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig
+from tensorcast.artifact_runtime.contract import logical_topology_json
+from tensorcast.artifact_runtime.diagnostics import (
+    binding_layout_debug_payload,
+    binding_layout_profile_fields,
+    binding_layout_tensor_count,
+)
+from tensorcast.artifact_runtime.lifecycle import (
     PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION,
     PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION,
     RECIPE_CACHE_POLICY_SCHEMA_VERSION,
@@ -25,13 +35,17 @@
     AdmissionRejectedError,
     AdmissionRequest,
     ArtifactLocatorResolutionError,
+    ArtifactRuntimeIntegration,
+    ArtifactRuntimeIntegrationError,
+    ArtifactRuntimeNotImplementedError,
+    ArtifactRuntimeSession,
     AttachFinalizeError,
     AuthorityValidationError,
     BootstrapPolicy,
     CapabilityMissingError,
     ConfigConflictError,
     DefaultAdmissionPolicy,
-    ExistingServingArtifact,
+    ExistingRuntimeArtifact,
     FinalizeClass,
     FrameworkIdentity,
     IntegrationHost,
@@ -53,24 +67,18 @@
     RetainedBindingAcquire,
     RuntimeAttachment,
     RuntimeBindingMaterialization,
+    RuntimeBindingPlan,
     RuntimeBindingResult,
     RuntimeBindingState,
     RuntimeBindingView,
+    RuntimeLoadResult,
+    RuntimePlacement,
     RuntimeProfile,
+    RuntimeReloadResult,
     RuntimeStateSeed,
+    RuntimeSupportLevel,
     RuntimeWorkerView,
     SchemaMismatchError,
-    ServingArtifactLocator,
-    ServingBindingPlan,
-    ServingConfig,
-    ServingIntegration,
-    ServingIntegrationError,
-    ServingIntegrationNotImplementedError,
-    ServingLoadResult,
-    ServingPlacement,
-    ServingReloadResult,
-    ServingRuntimeSession,
-    ServingSupportLevel,
     SourceCatalogRequest,
     SourceDownloadPolicy,
     SourceProviderError,
@@ -78,55 +86,57 @@
     SourceSubject,
     TensorcastSemanticValidationSpec,
     TensorSchemaEntry,
-    _DirectServingLoad,
+    _DirectRuntimeLoad,
     _LocalReadyBootstrap,
     _LocalReadyFinalize,
     _RetainedBindingAcquire,
-    _ServingReload,
-    bind_serving_artifact,
+    _RuntimeReload,
+    bind_runtime_artifact,
     build_local_ready_prepared_artifact,
     is_runtime_binding_swap_capable,
     local_ready_current_value_summary_fields,
     restore_prepared_local_ready_binding,
     restore_retained_binding,
     runtime_binding_state_from_runtime_view,
-    serving_placement_from_framework_facts,
+    runtime_placement_from_framework_facts,
     source_selection_projection_from_artifact_realization_report,
     source_selection_projection_from_execution_diagnostics,
     source_selection_projection_from_materialization_diagnostics,
     source_subject_broadcast_payload,
     source_subject_from_broadcast_payload,
-    swap_serving_artifact,
+    swap_runtime_artifact,
 )
-from tensorcast.serving._runtime_impl.lifecycle import (
+from tensorcast.artifact_runtime.lifecycle import (
     BindingValueRef as IntegrationBindingValueRef,
 )
-from tensorcast.serving._runtime_impl.lifecycle import (
-    ServingBindingMemberRef as IntegrationServingBindingMemberRef,
-)
-from tensorcast.serving.admin import AdminLocalSourceBootstrap
-from tensorcast.serving.contract import logical_topology_json
-from tensorcast.serving.diagnostics import (
-    binding_layout_debug_payload,
-    binding_layout_profile_fields,
-    binding_layout_tensor_count,
+from tensorcast.artifact_runtime.lifecycle import (
+    RuntimeBindingMemberRef as IntegrationRuntimeBindingMemberRef,
 )
-from tensorcast.serving.local_ready import (
+from tensorcast.artifact_runtime.locator import ArtifactLocator
+from tensorcast.artifact_runtime.recipe.local_ready import (
     canonical_index_entries_from_tensor_schema,
     logical_topology_json_from_recipe,
 )
-from tensorcast.serving.retained_binding import (
-    ParsedRetainedServingBindingAuthority,
-    RetainedServingBindingExpectedDigests,
+from tensorcast.pytorch.module_binding import TorchModuleAdapterMixin
+from tensorcast.retained_realization_authority import (
+    ParsedRetainedRealizationAuthority,
+    RetainedRealizationExpectedDigests,
 )
 from tensorcast.types import (
     BindingReservationCapability,
     BindingValueRef,
-    ServingBindingMemberRef,
-    ServingTopologyRef,
+    RuntimeBindingMemberRef,
+    RuntimeTopologyRef,
 )
 
 
+def _find_spec_or_none(module_name: str):
+    try:
+        return importlib.util.find_spec(module_name)
+    except ModuleNotFoundError:
+        return None
+
+
 def _profile_records(tmp_path) -> list[dict[str, object]]:
     return [
         json.loads(line)
@@ -167,7 +177,7 @@ def _matrix_placement(
     pp_size: int = 1,
     dp_size: int = 1,
     eplb_digest: str | None = None,
-) -> ServingPlacement:
+) -> RuntimePlacement:
     framework_payload = {
         "family": "vllm_parallelism",
         "version": "v1",
@@ -200,7 +210,7 @@ def _matrix_placement(
         eplb_physical_to_logical_digest=eplb_digest,
         semantic_placement_digests=framework_payload["semantic_placement_digests"],
     )
-    return serving_placement_from_framework_facts(
+    return runtime_placement_from_framework_facts(
         identity_facts=PlacementIdentityFacts(
             tensor_parallel_rank=0,
             tensor_parallel_size=tp_size,
@@ -281,14 +291,12 @@ def test_integration_host_contract_skeleton_and_default_admission():
         framework=_ContractFrameworkHost(),
         placement=_ContractPlacementHost(),
     )
-    service = ServingIntegration(host=host)
+    service = ArtifactRuntimeIntegration(host=host)
     assert service.host is host
 
     decision = DefaultAdmissionPolicy().admit(
         AdmissionRequest(
-            intent=ExistingServingArtifact(
-                ServingArtifactLocator.artifact_ref("artifact:1")
-            ),
+            intent=ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("artifact:1")),
             framework_identity=FrameworkIdentity(
                 framework_name="fake",
                 framework_version="1",
@@ -330,7 +338,7 @@ def identity_payload(self, framework_config):
                 "tp_world_size": 1,
             }
 
-    integration = ServingIntegration(
+    integration = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_PayloadPlacementHost(),
@@ -384,7 +392,7 @@ def test_placement_identity_payload_includes_schema_versions():
         expert_parallel_enabled=True,
         semantic_placement_digests={"expert_mapping": "expert-digest"},
     )
-    placement = serving_placement_from_framework_facts(
+    placement = runtime_placement_from_framework_facts(
         identity_facts=identity,
         admission_facts=admission,
         member_facts=PlacementMemberFacts(
@@ -410,15 +418,15 @@ def test_placement_identity_payload_includes_schema_versions():
 
 
 def test_existing_serving_artifact_rejects_local_source_selector():
-    service = ServingIntegration()
-    with pytest.raises(ServingIntegrationError, match="LocalSourceBootstrap"):
+    service = ArtifactRuntimeIntegration()
+    with pytest.raises(ArtifactRuntimeIntegrationError, match="LocalSourceBootstrap"):
         service.start(
-            ExistingServingArtifact(SourceSelector.local_path("/tmp/model")),
+            ExistingRuntimeArtifact(SourceSelector.local_path("/tmp/model")),
             RequestContext(),
         )
-    with pytest.raises(ServingIntegrationError, match="local_path"):
+    with pytest.raises(ArtifactRuntimeIntegrationError, match="local_path"):
         service.start(
-            ExistingServingArtifact(
+            ExistingRuntimeArtifact(
                 {
                     "kind": "local_path",
                     "value": "/tmp/model",
@@ -430,10 +438,10 @@ def test_existing_serving_artifact_rejects_local_source_selector():
 
 def test_retained_binding_acquire_rejects_arbitrary_authority_object():
     with pytest.raises(
-        ServingIntegrationError,
-        match="ParsedRetainedServingBindingAuthority",
+        ArtifactRuntimeIntegrationError,
+        match="ParsedRetainedRealizationAuthority",
     ):
-        RetainedBindingAcquire(SimpleNamespace(readiness="serving_local_ready"))
+        RetainedBindingAcquire(SimpleNamespace(readiness="runtime_local_ready"))
 
     authority = _authority()
     assert RetainedBindingAcquire(authority).authority is authority
@@ -455,12 +463,12 @@ def test_public_local_source_bootstrap_excludes_admin_override_fields():
         )
 
 
-def test_serving_runtime_session_plans_direct_start_from_config(monkeypatch):
+def test_artifact_runtime_session_plans_direct_start_from_config(monkeypatch):
     captured = {}
     state = RuntimeBindingState(
         runtime_view=RuntimeBindingView(
             serving_artifact_ref="mi2:serving",
-            readiness="serving",
+            readiness="runtime_ready",
         )
     )
     attachment = RuntimeAttachment(
@@ -480,15 +488,15 @@ def fake_start(self, intent, context):
         "ensure_initialized",
         lambda self: captured.setdefault("runtime_initialized", self),
     )
-    monkeypatch.setattr(ServingIntegration, "start", fake_start)
+    monkeypatch.setattr(ArtifactRuntimeIntegration, "start", fake_start)
 
-    session = ServingRuntimeSession.from_config(
-        ServingConfig.from_mapping(
+    session = ArtifactRuntimeSession.from_config(
+        TensorCastRuntimeConfig.from_mapping(
             {
                 "bootstrap": {
                     "mode": "disabled",
                 },
-                "serving": {
+                "runtime_artifact": {
                     "artifact_locator": {
                         "kind": "artifact_ref",
                         "value": "mi2:serving",
@@ -504,17 +512,17 @@ def fake_start(self, intent, context):
     result = session.start(RequestContext(model_config=object()))
 
     assert result is attachment
-    assert captured["runtime_initialized"] is session.serving_config.runtime
-    assert isinstance(captured["intent"], ExistingServingArtifact)
+    assert captured["runtime_initialized"] is session.runtime_config.runtime
+    assert isinstance(captured["intent"], ExistingRuntimeArtifact)
     assert captured["intent"].artifact_locator.kind == "artifact_ref"
 
 
-def test_serving_runtime_session_private_intent_initializes_runtime(monkeypatch):
+def test_artifact_runtime_session_private_intent_initializes_runtime(monkeypatch):
     captured = {}
     state = RuntimeBindingState(
         runtime_view=RuntimeBindingView(
             serving_artifact_ref="mi2:serving",
-            readiness="serving",
+            readiness="runtime_ready",
         )
     )
     attachment = RuntimeAttachment(
@@ -535,13 +543,13 @@ def fake_start(self, intent, context):
         captured["context"] = context
         return attachment
 
-    monkeypatch.setattr(ServingIntegration, "start", fake_start)
-    session = ServingRuntimeSession.from_config(
+    monkeypatch.setattr(ArtifactRuntimeIntegration, "start", fake_start)
+    session = ArtifactRuntimeSession.from_config(
         {
             "bootstrap": {
                 "mode": "disabled",
             },
-            "serving": {
+            "runtime_artifact": {
                 "artifact_locator": {
                     "kind": "artifact_ref",
                     "value": "mi2:serving",
@@ -561,11 +569,11 @@ def fake_start(self, intent, context):
     result = session._start_intent(intent, RequestContext(model_config=object()))
 
     assert result is attachment
-    assert captured["runtime_initialized"] is session.serving_config.runtime
+    assert captured["runtime_initialized"] is session.runtime_config.runtime
     assert captured["intent"] is intent
 
 
-def test_serving_runtime_session_rejects_conflicting_start_config(monkeypatch):
+def test_artifact_runtime_session_rejects_conflicting_start_config(monkeypatch):
     initialized = False
 
     def fail_if_initialized(self):
@@ -578,13 +586,13 @@ def fail_if_initialized(self):
         "ensure_initialized",
         fail_if_initialized,
     )
-    session = ServingRuntimeSession.from_config(
-        ServingConfig.from_mapping(
+    session = ArtifactRuntimeSession.from_config(
+        TensorCastRuntimeConfig.from_mapping(
             {
                 "bootstrap": {
                     "mode": "required",
                 },
-                "serving": {
+                "runtime_artifact": {
                     "artifact_locator": {
                         "kind": "artifact_ref",
                         "value": "mi2:serving",
@@ -603,7 +611,7 @@ def fail_if_initialized(self):
     assert not initialized
 
 
-def test_serving_runtime_session_uses_source_host_for_local_bootstrap(monkeypatch):
+def test_artifact_runtime_session_uses_source_host_for_local_bootstrap(monkeypatch):
     captured = {}
     attachment = RuntimeAttachment(
         model=object(),
@@ -634,10 +642,10 @@ def fake_start(self, intent, context):
         "ensure_initialized",
         lambda self: captured.setdefault("runtime_initialized", self),
     )
-    monkeypatch.setattr(ServingIntegration, "start", fake_start)
+    monkeypatch.setattr(ArtifactRuntimeIntegration, "start", fake_start)
 
-    session = ServingRuntimeSession.from_config(
-        ServingConfig.from_mapping(
+    session = ArtifactRuntimeSession.from_config(
+        TensorCastRuntimeConfig.from_mapping(
             {
                 "bootstrap": {
                     "mode": "required",
@@ -658,7 +666,7 @@ def fake_start(self, intent, context):
     )
 
     assert result is attachment
-    assert captured["runtime_initialized"] is session.serving_config.runtime
+    assert captured["runtime_initialized"] is session.runtime_config.runtime
     assert captured["source_selector_args"][0] == "framework-config"
     assert isinstance(captured["intent"], LocalSourceBootstrap)
     assert captured["intent"].source_selector == SourceSelector.local_path(
@@ -666,19 +674,19 @@ def fake_start(self, intent, context):
     )
 
 
-def test_serving_runtime_session_rejects_local_reload_artifact_locator(monkeypatch):
+def test_artifact_runtime_session_rejects_local_reload_artifact_locator(monkeypatch):
     monkeypatch.setattr(
         integration_mod.tc_runtime_config.RuntimeSettings,
         "ensure_initialized",
         lambda self: pytest.fail("local artifact locator rejection must precede init"),
     )
-    session = ServingRuntimeSession.from_config(
-        ServingConfig.from_mapping(
+    session = ArtifactRuntimeSession.from_config(
+        TensorCastRuntimeConfig.from_mapping(
             {
                 "bootstrap": {
                     "mode": "disabled",
                 },
-                "serving": {
+                "runtime_artifact": {
                     "artifact_locator": {
                         "kind": "artifact_ref",
                         "value": "mi2:serving",
@@ -714,7 +722,7 @@ def test_serving_runtime_session_rejects_local_reload_artifact_locator(monkeypat
             policy=None,
             context=RequestContext(),
         )
-    with pytest.raises(ConfigConflictError, match="ServingArtifactLocator"):
+    with pytest.raises(ConfigConflictError, match="ArtifactLocator"):
         session.reload(
             current_attachment=attachment,
             artifact_locator={
@@ -724,10 +732,10 @@ def test_serving_runtime_session_rejects_local_reload_artifact_locator(monkeypat
             policy=None,
             context=RequestContext(),
         )
-    with pytest.raises(ConfigConflictError, match="ServingPolicy"):
+    with pytest.raises(ConfigConflictError, match="RuntimePolicy"):
         session.reload(
             current_attachment=attachment,
-            artifact_locator=ServingArtifactLocator.artifact_ref("mi2:serving-next"),
+            artifact_locator=ArtifactLocator.artifact_ref("mi2:serving-next"),
             policy={"mode": "from_manifest"},
             context=RequestContext(),
         )
@@ -768,14 +776,14 @@ def admit(self, request):
                 endpoint_fields={},
             )
 
-    decision = ServingIntegration(
+    decision = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_MultiDimPlacementHost(),
             admission=_Admission(),
         )
     )._admit_intent(
-        ExistingServingArtifact(ServingArtifactLocator.artifact_ref("mi2:serving")),
+        ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")),
         RequestContext(framework_config=object(), model_config=object()),
     )
 
@@ -791,7 +799,7 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only():
         representation_contract_hash="repr",
         tensor_schema_hash="schema",
         local_serving_ref="local:ready",
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         diagnostics={
             "serving_build_digest": "build",
             "family": "fake-family",
@@ -813,7 +821,7 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only():
         runtime_view=runtime_view,
     )
 
-    worker_view = ServingIntegration().describe(state)
+    worker_view = ArtifactRuntimeIntegration().describe(state)
 
     assert isinstance(worker_view, RuntimeWorkerView)
     payload = worker_view.endpoint.to_weight_version_payload()
@@ -823,7 +831,7 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only():
     assert payload["representation_contract_hash"] == "repr"
     assert payload["serving_build_digest"] == "build"
     assert payload["tensor_schema_hash"] == "schema"
-    assert payload["readiness"] == "serving_local_ready"
+    assert payload["readiness"] == "runtime_local_ready"
     assert payload["family"] == "fake-family"
     assert payload["tp_rank"] == 1
     assert payload["tp_world_size"] == 2
@@ -839,12 +847,12 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only():
     assert worker_view.diagnostics["verification_job_id"] == "job-1"
 
 
-def test_runtime_worker_view_ignores_redundant_source_selection_diagnostics():
+def test_runtime_worker_view_preserves_explicit_source_selection_diagnostics():
     runtime_view = RuntimeBindingView(
         serving_artifact_ref="mi2:serving",
         representation_contract_hash="repr",
         tensor_schema_hash="schema",
-        readiness="serving",
+        readiness="runtime_ready",
         diagnostics={
             "source_selection": {
                 "selected_source_kind": "canonical_fallback",
@@ -857,7 +865,15 @@ def test_runtime_worker_view_ignores_redundant_source_selection_diagnostics():
     worker_view = RuntimeWorkerView.from_runtime_view(runtime_view)
     payload = worker_view.endpoint.to_weight_version_payload()
 
-    assert "source_selection" not in payload
+    assert payload["source_selection"] == {
+        "schema_version": 1,
+        "selected_source_kind": "canonical_fallback",
+        "p2p_bytes": 0,
+        "fallback_bytes": 2048,
+        "disk_bytes": 0,
+        "reselection_attempts": 0,
+        "fallback_reason_bucket": "transport_unavailable",
+    }
 
 
 def test_source_selection_projection_from_materialization_diagnostics():
@@ -931,7 +947,7 @@ def test_execution_diagnostics_seed_runtime_source_selection_projection():
             serving_build_digest="build",
         ),
     )
-    seed = ServingIntegration._state_seed(
+    seed = ArtifactRuntimeIntegration._state_seed(
         resolved,
         tensor_schema_hash="schema",
         execution_diagnostics=SimpleNamespace(
@@ -985,7 +1001,7 @@ def test_artifact_realization_report_seeds_runtime_source_selection_projection()
         serving_artifact_ref="mi2:serving",
         representation_contract_hash="repr",
         tensor_schema_hash="schema",
-        readiness="serving",
+        readiness="runtime_ready",
         diagnostics={"artifact_realization_report": report},
     )
     worker_view = RuntimeWorkerView.from_runtime_view(runtime_view)
@@ -1023,7 +1039,7 @@ def test_artifact_realization_report_fallback_uses_strategy_and_envelope_facts()
         serving_artifact_ref="mi2:serving",
         representation_contract_hash="repr",
         tensor_schema_hash="schema",
-        readiness="serving",
+        readiness="runtime_ready",
         diagnostics={"artifact_realization_report": report},
     )
     worker_view = RuntimeWorkerView.from_runtime_view(runtime_view)
@@ -1042,7 +1058,7 @@ def test_materialization_diagnostics_seed_runtime_source_selection_projection():
         ),
     )
 
-    seed = ServingIntegration._state_seed(
+    seed = ArtifactRuntimeIntegration._state_seed(
         resolved,
         tensor_schema_hash="schema",
         materialization_diagnostics={
@@ -1096,7 +1112,7 @@ def test_runtime_binding_result_captures_materialization_diagnostics():
 
 
 def test_local_bootstrap_requires_host_source_catalog_provider():
-    service = ServingIntegration(
+    service = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_ContractPlacementHost(),
@@ -1140,7 +1156,7 @@ def build_catalog(self, request):
         placement=_ContractPlacementHost(),
         source_catalog=provider,
     )
-    service = ServingIntegration(host=host)
+    service = ArtifactRuntimeIntegration(host=host)
     source_subject = SourceSubject(
         artifact_ref="mi2:source",
         subject=object(),
@@ -1181,7 +1197,7 @@ def build_catalog(request):
                 return SimpleNamespace()
             return SimpleNamespace(source_artifact_ref=provider_ref)
 
-    service = ServingIntegration(
+    service = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_ContractPlacementHost(),
@@ -1193,7 +1209,7 @@ def build_catalog(request):
         subject=object(),
     )
 
-    with pytest.raises(ServingIntegrationError, match=expected):
+    with pytest.raises(ArtifactRuntimeIntegrationError, match=expected):
         service._local_ready_source_catalog(
             _LocalReadyBootstrap(
                 source_selector=SourceSelector.local_path("/tmp/model"),
@@ -1206,9 +1222,11 @@ def build_catalog(request):
 
 
 def test_local_ready_build_recipe_requires_real_source_subject_artifact_ref():
-    service = ServingIntegration()
+    service = ArtifactRuntimeIntegration()
 
-    with pytest.raises(ServingIntegrationError, match="real source artifact identity"):
+    with pytest.raises(
+        ArtifactRuntimeIntegrationError, match="real source artifact identity"
+    ):
         service._local_ready_prepare_with_built_recipe(
             _LocalReadyBootstrap(
                 source_selector=SourceSelector.local_path("/tmp/model"),
@@ -1268,7 +1286,7 @@ def test_recipe_cache_policy_builds_model_adjacent_cache_config(tmp_path):
         }
     )
 
-    config = ServingIntegration._local_ready_recipe_cache_config(
+    config = ArtifactRuntimeIntegration._local_ready_recipe_cache_config(
         _LocalReadyBootstrap(cache_config=policy),
         source_catalog=source_catalog,
     )
@@ -1308,7 +1326,7 @@ def test_local_source_bootstrap_start_derives_request_from_host(monkeypatch):
         source_artifact_ref="mi2:source",
         representation_contract_hash="repr",
         tensor_schema_hash="schema",
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
     )
     runtime_state = RuntimeBindingState(
         binding=object(),
@@ -1353,11 +1371,11 @@ def recipe_cache_policy(self, framework_config, model_config):
     def fake_prepare(self, request):
         del self
         captured["request"] = request
-        return integration_mod.LocalReadyServingResult(
+        return integration_mod.LocalReadyRuntimeResult(
             model=model,
             runtime_state=runtime_state,
             runtime_view=runtime_view,
-            prepared=integration_mod.PreparedServingArtifact(
+            prepared=integration_mod.PreparedRuntimeArtifact(
                 source_artifact_ref="mi2:source",
                 serving_manifest_ref="manifest-ref",
                 representation_contract_hash="repr",
@@ -1369,10 +1387,10 @@ def fake_prepare(self, request):
         )
 
     monkeypatch.setattr(
-        ServingIntegration, "_prepare_local_source_bootstrap", fake_prepare
+        ArtifactRuntimeIntegration, "_prepare_local_source_bootstrap", fake_prepare
     )
 
-    attachment = ServingIntegration(
+    attachment = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_ContractPlacementHost(),
@@ -1440,7 +1458,7 @@ def resolve(self, artifact_ref):
                 tensor_names=(),
             )
 
-    service = ServingIntegration(
+    service = ArtifactRuntimeIntegration(
         resolver=_Resolver(),
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
@@ -1448,9 +1466,11 @@ def resolve(self, artifact_ref):
         ),
     )
 
-    with pytest.raises(ServingIntegrationError, match="TensorSurfaceHost") as exc_info:
+    with pytest.raises(
+        ArtifactRuntimeIntegrationError, match="TensorSurfaceHost"
+    ) as exc_info:
         service.start(
-            ExistingServingArtifact(ServingArtifactLocator.artifact_ref("mi2:serving")),
+            ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")),
             RequestContext(
                 framework_config=object(),
                 model_config=SimpleNamespace(model="fake"),
@@ -1489,7 +1509,7 @@ def cleanup_after_recipe_build(
 
         def support_level(self, model, model_config):
             self.events.append(("support", model, model_config))
-            return ServingSupportLevel.RUNTIME_BIND_SWAP_READY
+            return RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY
 
         def process_after_load_class(self, model, model_config):
             self.events.append(("process_class", model, model_config))
@@ -1500,7 +1520,7 @@ def post_bind_finalize_class(self, model, model_config):
             return FinalizeClass.RUNTIME_ONLY
 
     framework = _TraceFrameworkHost()
-    integration = ServingIntegration(
+    integration = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=framework,
             placement=_ContractPlacementHost(),
@@ -1521,7 +1541,7 @@ def post_bind_finalize_class(self, model, model_config):
     )
     assert (
         integration.support_level("model", "model-config")
-        is ServingSupportLevel.RUNTIME_BIND_SWAP_READY
+        is RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY
     )
     assert (
         integration.process_after_load_class("model", "model-config")
@@ -1541,7 +1561,7 @@ def post_bind_finalize_class(self, model, model_config):
 
 
 def test_integration_host_fails_recipe_trace_miss_clearly():
-    integration = ServingIntegration(
+    integration = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_ContractPlacementHost(),
@@ -1549,7 +1569,7 @@ def test_integration_host_fails_recipe_trace_miss_clearly():
         )
     )
 
-    with pytest.raises(ServingIntegrationError, match="RecipeTraceHost"):
+    with pytest.raises(ArtifactRuntimeIntegrationError, match="RecipeTraceHost"):
         integration.trace_model_load(object(), (), {})
 
 
@@ -1634,7 +1654,7 @@ def test_local_ready_current_value_summary_is_core_owned():
         "verification_state": "local_only",
         "local_serving_ref": "binding-local:binding-1:value-1",
     }
-    with pytest.raises(integration_mod.ServingIntegrationError):
+    with pytest.raises(integration_mod.ArtifactRuntimeIntegrationError):
         local_ready_current_value_summary_fields(
             SimpleNamespace(binding_value_id="value-1"),
             require_local_serving_ref=True,
@@ -1699,14 +1719,15 @@ def test_build_local_ready_prepared_artifact_returns_runtime_state_and_view():
     assert result.runtime_state.artifact_ref == "mi2:test:source"
     assert result.runtime_view.source_artifact_ref == "mi2:test:source"
     assert result.runtime_view.serving_artifact_ref is None
-    assert result.runtime_view.readiness == "serving_local_ready"
+    assert result.runtime_view.readiness == "runtime_local_ready"
     assert result.runtime_view.local_serving_ref == ("binding-local:binding-1:value-1")
     assert result.runtime_view.tensor_schema_hash == "schema"
-    report = result.runtime_view.diagnostics["serving_realization_report"]
+    report = result.runtime_view.diagnostics["runtime_realization_report"]
+    assert result.runtime_view.diagnostics["serving_realization_report"] is report
     assert report["realization"]["binding_value"]["verification_state"] == "local_only"
     assert "verification_state" not in result.runtime_view.diagnostics
     assert result.binding_value is not None
-    assert result.binding_value.readiness == "serving_local_ready"
+    assert result.binding_value.readiness == "runtime_local_ready"
     assert result.binding_value.local_serving_ref == "binding-local:binding-1:value-1"
     worker_view = RuntimeWorkerView.from_runtime_view(result.runtime_view)
     payload = worker_view.endpoint.to_weight_version_payload()
@@ -1725,28 +1746,28 @@ def test_build_local_ready_prepared_artifact_returns_runtime_state_and_view():
 
 def test_serving_integration_builds_local_ready_manifest_contract_in_core(monkeypatch):
     calls = []
-    integration = ServingIntegration()
+    integration = ArtifactRuntimeIntegration()
     recipe = SimpleNamespace(topology_ref=object(), member_ref=object())
 
     monkeypatch.setattr(
-        integration_mod,
+        local_ready_mod,
         "canonical_index_from_recipe",
         lambda seen_recipe: calls.append(("canonical", seen_recipe)) or "canonical",
     )
     monkeypatch.setattr(
-        integration_mod,
-        "compute_serving_tensor_schema_hash",
+        contract_mod,
+        "compute_canonical_runtime_tensor_schema_hash",
         lambda canonical, **kwargs: calls.append(("schema", canonical, kwargs))
         or "schema-hash",
     )
     monkeypatch.setattr(
-        integration_mod,
+        local_ready_mod,
         "logical_topology_json_from_recipe",
         lambda seen_recipe, **kwargs: calls.append(("topology", seen_recipe, kwargs))
         or '{"topology": true}',
     )
     monkeypatch.setattr(
-        integration_mod,
+        local_ready_mod,
         "prepare_same_binding_manifest_carrier",
         lambda seen_recipe, **kwargs: calls.append(("carrier", seen_recipe, kwargs))
         or ("manifest-ref", b"manifest"),
@@ -1797,10 +1818,10 @@ def test_serving_integration_builds_local_ready_manifest_contract_in_core(monkey
 
 def test_local_ready_logical_topology_requires_topology_ref():
     recipe = SimpleNamespace(
-        topology_ref=ServingTopologyRef(schema_topology_digest="a")
+        topology_ref=RuntimeTopologyRef(schema_topology_digest="a")
     )
 
-    with pytest.raises(ValueError, match="requires ServingTopologyRef"):
+    with pytest.raises(ValueError, match="requires RuntimeTopologyRef"):
         logical_topology_json_from_recipe(recipe)
 
 
@@ -1820,10 +1841,10 @@ def test_serving_integration_builds_local_ready_manifest_from_framework_context(
         adapter_version=lambda: "adapter-v1",
         serving_abi_version=lambda _model_config: "abi-v1",
     )
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     recipe = SimpleNamespace(topology_ref=object(), member_ref=object())
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -1837,29 +1858,29 @@ def test_serving_integration_builds_local_ready_manifest_from_framework_context(
     )
 
     monkeypatch.setattr(
-        integration_mod,
+        local_ready_mod,
         "canonical_index_from_recipe",
         lambda seen_recipe: calls.append(("canonical", seen_recipe)) or "canonical",
     )
     monkeypatch.setattr(
-        integration_mod,
-        "compute_serving_tensor_schema_hash",
+        contract_mod,
+        "compute_canonical_runtime_tensor_schema_hash",
         lambda canonical, **kwargs: calls.append(("schema", canonical, kwargs))
         or "schema-hash",
     )
     monkeypatch.setattr(
-        integration_mod,
+        contract_mod,
         "compute_runtime_representation_contract_hash",
         lambda **kwargs: calls.append(("repr", kwargs)) or "repr-hash",
     )
     monkeypatch.setattr(
-        integration_mod,
+        local_ready_mod,
         "logical_topology_json_from_recipe",
         lambda seen_recipe, **kwargs: calls.append(("topology", seen_recipe, kwargs))
         or '{"topology": true}',
     )
     monkeypatch.setattr(
-        integration_mod,
+        local_ready_mod,
         "prepare_same_binding_manifest_carrier",
         lambda seen_recipe, **kwargs: calls.append(("carrier", seen_recipe, kwargs))
         or ("manifest-ref", b"manifest"),
@@ -1913,9 +1934,9 @@ def test_serving_integration_prepares_manifest_carrier_result(monkeypatch):
         adapter_version=lambda: "adapter-v1",
         serving_abi_version=lambda _model_config: "abi-v1",
     )
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -1926,12 +1947,12 @@ def test_serving_integration_prepares_manifest_carrier_result(monkeypatch):
     carrier_bytes = b"manifest-bytes"
 
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "build_local_ready_manifest_carrier_from_framework_context",
         lambda _self, **_kwargs: ("repr-hash", carrier_bytes),
     )
     monkeypatch.setattr(
-        integration_mod.ServingArtifactManifest,
+        integration_mod.RuntimeArtifactManifest,
         "from_bytes",
         lambda seen: SimpleNamespace(
             serving_manifest_ref=f"manifest:{seen!r}",
@@ -1956,10 +1977,10 @@ def test_serving_integration_prepares_manifest_carrier_result(monkeypatch):
 
 
 def test_serving_integration_builds_local_ready_binding_contract(monkeypatch):
-    integration = ServingIntegration()
+    integration = ArtifactRuntimeIntegration()
     monkeypatch.setattr(
-        integration_mod,
-        "compute_serving_binding_tensor_schema_hash",
+        local_ready_mod,
+        "compute_runtime_binding_tensor_schema_hash",
         lambda *_args, **_kwargs: "schema-hash",
     )
     recipe = SimpleNamespace(
@@ -1998,7 +2019,7 @@ def test_serving_integration_builds_local_ready_binding_contract(monkeypatch):
 
 
 def test_serving_integration_owns_local_ready_recipe_fields():
-    integration = ServingIntegration()
+    integration = ArtifactRuntimeIntegration()
     recipe = SimpleNamespace(
         trace_plan=SimpleNamespace(
             copy_plan=(1, 2),
@@ -2012,7 +2033,7 @@ def test_serving_integration_owns_local_ready_recipe_fields():
         realization_fallback_plan=(1,),
         source_artifact_ref="mi2:test:source",
         source_metadata_fingerprint="meta-fingerprint",
-        serving_facts=SimpleNamespace(
+        runtime_facts=SimpleNamespace(
             process_after_load_class=FinalizeClass.REPRESENTATION_CHANGING
         ),
     )
@@ -2165,7 +2186,7 @@ def support_level(self, model, model_config):
         support_level = getattr(self.adapter, "support_level", None)
         if callable(support_level):
             return support_level(model, model_config)
-        return ServingSupportLevel.RUNTIME_BIND_SWAP_READY
+        return RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY
 
     def process_after_load_class(self, model, model_config):
         process_after_load = getattr(self.adapter, "process_after_load_class", None)
@@ -2268,8 +2289,8 @@ def close(self):
         self.closed = True
 
 
-def _member() -> ServingBindingMemberRef:
-    return ServingBindingMemberRef(
+def _member() -> RuntimeBindingMemberRef:
+    return RuntimeBindingMemberRef(
         member_id="member-0",
         member_index=0,
         member_count=1,
@@ -2286,7 +2307,7 @@ def _binding_ref() -> BindingValueRef:
     )
 
 
-def _authority() -> ParsedRetainedServingBindingAuthority:
+def _authority() -> ParsedRetainedRealizationAuthority:
     member = _member()
     binding_ref = _binding_ref()
     capability = BindingReservationCapability(
@@ -2299,7 +2320,7 @@ def _authority() -> ParsedRetainedServingBindingAuthority:
         reservation_bytes=4096,
         scope_digest="scope-1",
     )
-    return ParsedRetainedServingBindingAuthority(
+    return ParsedRetainedRealizationAuthority(
         group_id="group-1",
         local_serving_ref="binding-local:binding-1:value-1",
         binding_value_ref=binding_ref,
@@ -2309,23 +2330,23 @@ def _authority() -> ParsedRetainedServingBindingAuthority:
         device_uuid="gpu-0",
         member=member,
         reservation_bytes=4096,
-        expected=RetainedServingBindingExpectedDigests(
+        expected=RetainedRealizationExpectedDigests(
             target_layout_hash="layout-hash",
             tensor_schema_hash="schema-hash",
-            serving_build_digest="build-digest",
+            runtime_build_digest="build-digest",
             resolved_spec_digest="spec-digest",
         ),
-        readiness="serving_local_ready",
+        readiness="runtime_local_ready",
         verification_state="local_only",
     )
 
 
 def test_framework_boundary_reexports_serving_identity_types():
     assert IntegrationBindingValueRef is BindingValueRef
-    assert IntegrationServingBindingMemberRef is ServingBindingMemberRef
+    assert IntegrationRuntimeBindingMemberRef is RuntimeBindingMemberRef
     assert SERVING_MANIFEST_TENSOR_NAME.startswith("__tensorcast_meta__.")
     assert FinalizeClass.RUNTIME_ONLY.value == "runtime_only"
-    assert ServingSupportLevel.RUNTIME_BIND_SWAP_READY.value
+    assert RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY.value
 
 
 def test_retained_binding_authority_uses_parsed_retained_authority():
@@ -2339,9 +2360,9 @@ def test_retained_binding_authority_uses_parsed_retained_authority():
     assert parsed.member.group_id == "group-1"
     assert parsed.expected.target_layout_hash == "layout-hash"
     assert parsed.expected.tensor_schema_hash == "schema-hash"
-    assert parsed.expected.serving_build_digest == "build-digest"
+    assert parsed.expected.runtime_build_digest == "build-digest"
     assert parsed.expected.resolved_spec_digest == "spec-digest"
-    assert parsed.readiness == "serving_local_ready"
+    assert parsed.readiness == "runtime_local_ready"
     assert parsed.local_serving_ref == "binding-local:binding-1:value-1"
 
 
@@ -2359,8 +2380,8 @@ def test_serving_integration_p15_request_contract_smoke():
     state.close()
     assert closed == ["binding"]
 
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -2369,13 +2390,13 @@ def test_serving_integration_p15_request_contract_smoke():
         identity_payload={"rank": 0},
     )
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(
+    integration = ArtifactRuntimeIntegration(
         host=_host_for_adapter(adapter),
         profile_sink=lambda _event: None,
     )
     assert integration.host.framework.identity(None).framework_name == "fakefw"
 
-    identity = ServingBindingPlan(
+    identity = RuntimeBindingPlan(
         model_hash="hash",
         model_id="fake-model",
         model_revision=None,
@@ -2404,13 +2425,13 @@ def test_serving_integration_p15_request_contract_smoke():
         ),
     )
     for request, method in request_and_method:
-        with pytest.raises(ServingIntegrationNotImplementedError):
+        with pytest.raises(ArtifactRuntimeNotImplementedError):
             method(request)
 
 
 def test_serving_integration_builds_recipe_session_identity_from_request():
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -2424,7 +2445,7 @@ def test_serving_integration_builds_recipe_session_identity_from_request():
         adapter_version=lambda: "adapter-v1",
         serving_abi_version=lambda _model_config: "abi-v1",
     )
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     model_config = SimpleNamespace(
         model="fake-model",
         revision="rev-a",
@@ -2454,7 +2475,7 @@ def test_serving_integration_builds_recipe_session_identity_from_request():
 
 def test_serving_integration_load_and_reload_use_materialization():
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     resolved = SimpleNamespace(
         artifact=_Artifact(),
         artifact_ref="mi2:test:serving-a",
@@ -2465,8 +2486,8 @@ def test_serving_integration_load_and_reload_use_materialization():
         ),
     )
 
-    load_result = integration._load_existing_serving_artifact(
-        _DirectServingLoad(
+    load_result = integration._load_existing_runtime_artifact(
+        _DirectRuntimeLoad(
             resolved_artifact=resolved,
             framework_config=SimpleNamespace(name="framework"),
             model_config=SimpleNamespace(name="model"),
@@ -2474,12 +2495,12 @@ def test_serving_integration_load_and_reload_use_materialization():
         )
     )
 
-    assert isinstance(load_result, ServingLoadResult)
+    assert isinstance(load_result, RuntimeLoadResult)
     assert isinstance(load_result.runtime_state, RuntimeBindingState)
     assert load_result.runtime_view.serving_artifact_ref == "mi2:test:serving-a"
     assert load_result.runtime_view.source_artifact_ref == "mi2:test:source"
     assert load_result.runtime_view.representation_contract_hash == "repr-a"
-    assert load_result.runtime_view.readiness == "serving"
+    assert load_result.runtime_view.readiness == "runtime_ready"
     load_report = load_result.runtime_view.diagnostics["artifact_realization_report"]
     assert load_report["target_kind"] == "runtime_attachment"
     assert load_report["artifact_id"] == "mi2:test:serving-a"
@@ -2503,8 +2524,8 @@ def test_serving_integration_load_and_reload_use_materialization():
             serving_build_digest="build-b",
         ),
     )
-    reload_result = integration._reload_existing_serving_artifact(
-        _ServingReload(
+    reload_result = integration._reload_existing_runtime_artifact(
+        _RuntimeReload(
             current_state=load_result.runtime_state,
             resolved_artifact=next_resolved,
             model=load_result.model,
@@ -2513,7 +2534,7 @@ def test_serving_integration_load_and_reload_use_materialization():
         )
     )
 
-    assert isinstance(reload_result, ServingReloadResult)
+    assert isinstance(reload_result, RuntimeReloadResult)
     assert reload_result.runtime_view.serving_artifact_ref == "mi2:test:serving-b"
     assert reload_result.runtime_view.representation_contract_hash == "repr-b"
     reload_report = reload_result.runtime_view.diagnostics[
@@ -2566,14 +2587,14 @@ def fake_build_options(self, **kwargs):
         return "bind-options", {"profile": True}
 
     monkeypatch.setattr(
-        ServingIntegration, "build_materialization_options", fake_build_options
+        ArtifactRuntimeIntegration, "build_materialization_options", fake_build_options
     )
 
-    result = ServingIntegration(
+    result = ArtifactRuntimeIntegration(
         resolver=_Resolver(),
         host=_host_for_adapter(adapter),
-    )._load_existing_serving_artifact(
-        _DirectServingLoad(
+    )._load_existing_runtime_artifact(
+        _DirectRuntimeLoad(
             artifact_ref="mi2:test:serving",
             target_device=torch.device("cpu"),
             configured_collective_policy="collective-policy",
@@ -2595,14 +2616,14 @@ def fake_build_options(self, **kwargs):
             resolved,
             {
                 "expected_tensor_schema_hash": result.runtime_view.tensor_schema_hash,
-                "serving_runtime_policy": "manifest-policy",
+                "runtime_artifact_policy": "manifest-policy",
             },
         ),
         (
             "options",
             {
                 "artifact_ref": "mi2:test:serving",
-                "operation_scope": "startup.direct_serving_artifact.bind",
+                "operation_scope": "startup.direct_runtime_artifact.bind",
                 "configured_policy": "collective-policy",
                 "source_bound_contract_state": SimpleNamespace(
                     source_bound_contract_ready=True
@@ -2641,15 +2662,15 @@ def cross_check(self, resolved_artifact, **kwargs):
             return resolved_artifact
 
     binding = _Bound()
-    integration = ServingIntegration(resolver=_Resolver())
+    integration = ArtifactRuntimeIntegration(resolver=_Resolver())
     current_state = RuntimeBindingState(
         binding=binding,
         artifact_ref="mi2:test:serving-current",
         runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"),
     )
 
-    result = integration._reload_existing_serving_artifact(
-        _ServingReload(
+    result = integration._reload_existing_runtime_artifact(
+        _RuntimeReload(
             current_state=current_state,
             artifact_ref="mi2:test:serving-next",
             target_device=torch.device("cpu"),
@@ -2667,7 +2688,7 @@ def cross_check(self, resolved_artifact, **kwargs):
             resolved,
             {
                 "expected_tensor_schema_hash": "schema-hash",
-                "serving_runtime_policy": "manifest-policy",
+                "runtime_artifact_policy": "manifest-policy",
             },
         ),
     ]
@@ -2692,14 +2713,14 @@ def resolve(self, artifact_ref):
             calls.append(("resolve", artifact_ref))
             return resolved
 
-    member = ServingBindingMemberRef(
+    member = RuntimeBindingMemberRef(
         member_id="dp0:pp0:tp1",
         member_index=1,
         member_count=2,
         group_id="group-1",
     )
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             group_id="group-1",
             schema_topology_digest="topology-digest",
             logical_topology_ref="fake://topology",
@@ -2718,14 +2739,12 @@ def resolve_key_mapping_cached(self, *, key):
         "tensorcast.api.store.get_runtime_context", lambda: _RuntimeContext()
     )
 
-    result = ServingIntegration(resolver=_Resolver())._resolved_artifact(
+    result = ArtifactRuntimeIntegration(resolver=_Resolver())._resolved_artifact(
         resolved_artifact=None,
         artifact_ref=None,
-        artifact_locator=ServingArtifactLocator.ranked_version_key(
-            "models/demo/serving/v1"
-        ),
+        artifact_locator=ArtifactLocator.ranked_version_key("models/demo/serving/v1"),
         expected_tensor_schema_hash=None,
-        serving_runtime_policy=None,
+        runtime_artifact_policy=None,
         placement=placement,
     )
 
@@ -2744,12 +2763,12 @@ def test_serving_integration_rejects_resolved_artifact_ref_mismatch():
     )
 
     with pytest.raises(ManifestMismatchError, match="artifact ref mismatch"):
-        ServingIntegration()._resolved_artifact(
+        ArtifactRuntimeIntegration()._resolved_artifact(
             resolved_artifact=resolved,
             artifact_ref="mi2:test:serving-rank-1",
             artifact_locator=None,
             expected_tensor_schema_hash=None,
-            serving_runtime_policy=None,
+            runtime_artifact_policy=None,
         )
 
 
@@ -2768,12 +2787,12 @@ def test_serving_integration_accepts_matching_topology_digest_and_logical_topolo
     )
 
     assert (
-        ServingIntegration()._resolved_artifact(
+        ArtifactRuntimeIntegration()._resolved_artifact(
             resolved_artifact=resolved,
             artifact_ref="mi2:test:serving-rank-0",
             artifact_locator=None,
             expected_tensor_schema_hash=None,
-            serving_runtime_policy=None,
+            runtime_artifact_policy=None,
             placement=placement,
         )
         is resolved
@@ -2812,12 +2831,12 @@ def test_serving_integration_rejects_topology_mismatch_matrix(
     )
 
     with pytest.raises(ManifestMismatchError, match=match):
-        ServingIntegration()._resolved_artifact(
+        ArtifactRuntimeIntegration()._resolved_artifact(
             resolved_artifact=resolved,
             artifact_ref="mi2:test:serving-rank-0",
             artifact_locator=None,
             expected_tensor_schema_hash=None,
-            serving_runtime_policy=None,
+            runtime_artifact_policy=None,
             placement=current_placement,
         )
 
@@ -2837,12 +2856,12 @@ def test_serving_integration_rejects_logical_topology_mismatch_without_digest():
     )
 
     with pytest.raises(ManifestMismatchError, match="logical topology mismatch"):
-        ServingIntegration()._resolved_artifact(
+        ArtifactRuntimeIntegration()._resolved_artifact(
             resolved_artifact=resolved,
             artifact_ref="mi2:test:serving-rank-0",
             artifact_locator=None,
             expected_tensor_schema_hash=None,
-            serving_runtime_policy=None,
+            runtime_artifact_policy=None,
             placement=current_placement,
         )
 
@@ -2871,7 +2890,7 @@ def fake_build_options(self, **kwargs):
         return "swap-options", {"profile": True}
 
     monkeypatch.setattr(
-        ServingIntegration, "build_materialization_options", fake_build_options
+        ArtifactRuntimeIntegration, "build_materialization_options", fake_build_options
     )
     binding = _Bound()
     current_state = RuntimeBindingState(
@@ -2880,8 +2899,8 @@ def fake_build_options(self, **kwargs):
         runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"),
     )
 
-    ServingIntegration(resolver=_Resolver())._reload_existing_serving_artifact(
-        _ServingReload(
+    ArtifactRuntimeIntegration(resolver=_Resolver())._reload_existing_runtime_artifact(
+        _RuntimeReload(
             current_state=current_state,
             artifact_ref="mi2:test:serving-next",
             target_device=torch.device("cpu"),
@@ -2916,16 +2935,16 @@ def fake_build_options(self, **kwargs):
 
 
 def test_serving_integration_reload_rejects_non_swap_capable_binding():
-    integration = ServingIntegration()
+    integration = ArtifactRuntimeIntegration()
     current_state = RuntimeBindingState(
         binding=SimpleNamespace(),
         artifact_ref="mi2:test:serving-current",
         runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"),
     )
 
-    with pytest.raises(ServingIntegrationError, match="swap-capable"):
-        integration._reload_existing_serving_artifact(
-            _ServingReload(
+    with pytest.raises(ArtifactRuntimeIntegrationError, match="swap-capable"):
+        integration._reload_existing_runtime_artifact(
+            _RuntimeReload(
                 current_state=current_state,
                 artifact_ref="mi2:test:serving-next",
                 target_device=torch.device("cpu"),
@@ -2959,7 +2978,7 @@ def cross_check(self, resolved_artifact, **_kwargs):
             return resolved_artifact
 
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "build_materialization_options",
         lambda self, **_kwargs: ("swap-options", {}),
     )
@@ -2975,7 +2994,7 @@ def cross_check(self, resolved_artifact, **_kwargs):
         runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"),
     )
 
-    attachment = ServingIntegration(
+    attachment = ArtifactRuntimeIntegration(
         resolver=_Resolver(),
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
@@ -2984,9 +3003,7 @@ def cross_check(self, resolved_artifact, **_kwargs):
         ),
     ).reload(
         current_state,
-        ExistingServingArtifact(
-            ServingArtifactLocator.artifact_ref("mi2:test:serving-next")
-        ),
+        ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:test:serving-next")),
         RequestContext(model_config=SimpleNamespace(model="fake")),
         model=_Model(),
     )
@@ -2997,7 +3014,7 @@ def cross_check(self, resolved_artifact, **_kwargs):
 
 def test_serving_integration_load_prepared_local_ready_uses_restore(monkeypatch):
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     binding_ref = _binding_ref()
 
     class _PreparedRestored:
@@ -3039,8 +3056,8 @@ def fake_restore_prepared(**kwargs):
         ),
     )
 
-    result = integration._load_existing_serving_artifact(
-        _DirectServingLoad(
+    result = integration._load_existing_runtime_artifact(
+        _DirectRuntimeLoad(
             resolved_artifact=resolved,
             model_config=SimpleNamespace(name="model"),
             target_device=torch.device("cpu"),
@@ -3048,7 +3065,7 @@ def fake_restore_prepared(**kwargs):
         )
     )
 
-    assert result.runtime_view.readiness == "serving_local_ready"
+    assert result.runtime_view.readiness == "runtime_local_ready"
     assert result.runtime_view.binding_value_ref == binding_ref
     assert torch.equal(result.model.w.detach(), torch.full((1,), 3.0))
     assert restored.transferred
@@ -3056,14 +3073,14 @@ def fake_restore_prepared(**kwargs):
 
 
 def test_serving_integration_error_taxonomy_is_structured():
-    assert issubclass(ManifestMismatchError, ServingIntegrationError)
-    assert issubclass(SchemaMismatchError, ServingIntegrationError)
-    assert issubclass(AdmissionRejectedError, ServingIntegrationError)
-    assert issubclass(AuthorityValidationError, ServingIntegrationError)
-    assert issubclass(CapabilityMissingError, ServingIntegrationError)
-    assert issubclass(PlacementAdmissionError, ServingIntegrationError)
-    assert issubclass(ArtifactLocatorResolutionError, ServingIntegrationError)
-    assert issubclass(SourceProviderError, ServingIntegrationError)
+    assert issubclass(ManifestMismatchError, ArtifactRuntimeIntegrationError)
+    assert issubclass(SchemaMismatchError, ArtifactRuntimeIntegrationError)
+    assert issubclass(AdmissionRejectedError, ArtifactRuntimeIntegrationError)
+    assert issubclass(AuthorityValidationError, ArtifactRuntimeIntegrationError)
+    assert issubclass(CapabilityMissingError, ArtifactRuntimeIntegrationError)
+    assert issubclass(PlacementAdmissionError, ArtifactRuntimeIntegrationError)
+    assert issubclass(ArtifactLocatorResolutionError, ArtifactRuntimeIntegrationError)
+    assert issubclass(SourceProviderError, ArtifactRuntimeIntegrationError)
     error = SchemaMismatchError(
         "bad schema",
         operation="reload",
@@ -3077,98 +3094,66 @@ def test_serving_integration_error_taxonomy_is_structured():
 
 
 def test_public_runtime_package_boundary_hides_admin_helpers():
-    import tensorcast.serving as serving
-    import tensorcast.serving.admin as serving_admin
-    import tensorcast.serving.hosts as serving_hosts
-    import tensorcast.serving.policy as serving_policy
-    import tensorcast.serving.runtime as serving_runtime
-    from tensorcast.serving.testing import (
+    import tensorcast as tc
+    import tensorcast.artifact_runtime.admin as runtime_admin
+    import tensorcast.artifact_runtime.host as runtime_host
+    from tensorcast.artifact_runtime.testing import (
         assert_framework_isolation,
-        assert_public_runtime_boundary,
-    )
-
-    assert serving_runtime.ServingRuntimeSession is ServingRuntimeSession
-    assert serving_runtime.ServingConfig is ServingConfig
-    assert serving_runtime.ServingArtifactLocator is ServingArtifactLocator
-    assert ServingArtifactLocator is serving_policy.ServingArtifactLocator
-    assert serving_runtime.ServingPolicy is serving_policy.ServingPolicy
-    assert integration_mod.ServingPolicy is serving_policy.ServingPolicy
-    assert "ServingRuntimeSession" in serving_runtime.__all__
-    assert "FrameworkAdapter" not in serving.__all__
+        assert_public_artifact_runtime_boundary,
+    )
+
+    assert tc.TensorCastRuntimeConfig is TensorCastRuntimeConfig
+    assert tc.plan_runtime_start is integration_mod.tc_runtime_config.plan_runtime_start
+    assert "ServingConfig" not in tc.__all__
+    assert "plan_serving_start" not in tc.__all__
+    assert not hasattr(tc, "ServingConfig")
+    assert not hasattr(tc, "plan_serving_start")
+    assert not hasattr(integration_mod, "ServingPolicy")
+    assert _find_spec_or_none("tensorcast.serving") is None
+    assert _find_spec_or_none("tensorcast.serving.runtime") is None
+    assert _find_spec_or_none("tensorcast.serving.config") is None
+    assert _find_spec_or_none("tensorcast.serving.contract") is None
+    assert _find_spec_or_none("tensorcast.serving.hosts") is None
+    assert _find_spec_or_none("tensorcast.serving.policy") is None
+    assert _find_spec_or_none("tensorcast.serving.runtime_contract") is None
+    assert "ServingArtifactLocator" not in tc.__all__
+    assert "ServingPolicy" not in tc.__all__
+    assert not hasattr(tc, "ServingArtifactLocator")
+    assert not hasattr(tc, "ServingPolicy")
+    assert "ArtifactRuntimeSession" not in tc.__all__
+    assert not hasattr(tc, "ArtifactRuntimeSession")
     assert not hasattr(integration_mod, "FrameworkAdapter")
-    assert not hasattr(ServingIntegration, "framework_adapter")
-    assert "AdminLocalSourceBootstrap" not in serving_runtime.__all__
-    assert "_AdminLocalSourceBootstrap" not in serving_runtime.__all__
-    assert "bind_serving_artifact" not in serving_runtime.__all__
-    assert not hasattr(ServingIntegration, "bind")
-    assert not hasattr(ServingIntegration, "swap")
-    assert not hasattr(ServingIntegration, "restore_retained")
-    assert not hasattr(ServingIntegration, "restore_prepared_local_ready")
-    assert serving_admin.AdminLocalSourceBootstrap is AdminLocalSourceBootstrap
-    assert serving_hosts.IntegrationHost is IntegrationHost
-    assert serving_hosts.SourceHost is integration_mod.SourceHost
-    assert serving_hosts.RecipeCachePolicy is RecipeCachePolicy
-    assert serving.PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION == (
+    assert not hasattr(ArtifactRuntimeIntegration, "framework_adapter")
+    assert "AdminLocalSourceBootstrap" not in tc.__all__
+    assert "_AdminLocalSourceBootstrap" not in tc.__all__
+    assert "bind_runtime_artifact" not in tc.__all__
+    assert not hasattr(ArtifactRuntimeIntegration, "bind")
+    assert not hasattr(ArtifactRuntimeIntegration, "swap")
+    assert not hasattr(ArtifactRuntimeIntegration, "restore_retained")
+    assert not hasattr(ArtifactRuntimeIntegration, "restore_prepared_local_ready")
+    assert runtime_admin.AdminLocalSourceBootstrap is AdminLocalSourceBootstrap
+    assert runtime_host.RuntimeHostCapabilities is IntegrationHost
+    assert runtime_host.IntegrationHost is IntegrationHost
+    assert runtime_host.SourceHost is integration_mod.SourceHost
+    assert runtime_host.RecipeCachePolicy is RecipeCachePolicy
+    assert runtime_host.PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION == (
         PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION
     )
-    assert serving_hosts.PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION == (
+    assert runtime_host.PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION == (
         PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION
     )
-    assert serving_hosts.SOURCE_CATALOG_REQUEST_SCHEMA_VERSION == (
+    assert runtime_host.SOURCE_CATALOG_REQUEST_SCHEMA_VERSION == (
         SOURCE_CATALOG_REQUEST_SCHEMA_VERSION
     )
-    assert serving.SOURCE_CATALOG_SCHEMA_VERSION == SOURCE_CATALOG_SCHEMA_VERSION
-    assert serving_hosts.SOURCE_CATALOG_SCHEMA_VERSION == (
-        SOURCE_CATALOG_SCHEMA_VERSION
-    )
-
-    assert_public_runtime_boundary(serving_runtime)
-    assert_framework_isolation(
-        ("tensorcast.serving.runtime", "tensorcast.serving.hosts")
-    )
-
-
-def test_serving_root_facade_is_explicit_and_curated():
-    import tensorcast.serving as serving
-
-    hidden_names = {
-        "bind_serving_artifact",
-        "swap_serving_artifact",
-        "restore_retained_binding",
-        "build_materialization_execution_context",
-        "compile_recipe_from_inputs",
-        "load_compiled_recipe_cache",
-        "write_compiled_recipe_cache",
-        "materialize_recipe_copy_plan_tensors",
-        "complete_pure_transform_recipe_publication",
-        "build_binding_finalize_build_intent",
-        "build_pure_transform_build_intent",
-        "RecipeBuildSession",
-        "RecipePublicationContext",
-        "CompiledServingRecipe",
-        "PublishedReplicaProjection",
-        "ReloadResponseProjection",
-        "RuntimeEndpointProjection",
-        "SourceSelectionProjection",
-        "WeightVersionProjection",
-        "RuntimeAttachmentStore",
-        "RuntimeAttachmentRecord",
-        "ModelAttributeRuntimeState",
-        "ReadinessInventoryAdmissionPolicy",
-        "aggregate_runtime_view_outputs",
-        "publication_aggregate",
-    }
-    assert hidden_names.isdisjoint(serving.__all__)
-    assert "ServingRuntimeSession" not in serving.__all__
-    assert "IntegrationHost" in serving.__all__
-    assert "ConformanceResult" not in serving.__all__
-    assert "ServingConfig" in serving.__all__
-    assert "RuntimeSettings" in serving.__all__
-    assert not hasattr(serving, "__getattr__")
+    assert runtime_host.SOURCE_CATALOG_SCHEMA_VERSION == (SOURCE_CATALOG_SCHEMA_VERSION)
+
+    assert_public_artifact_runtime_boundary(tc)
+    assert_framework_isolation(("tensorcast", "tensorcast.artifact_runtime.host"))
 
-    hidden_name = "CompiledServingRecipe"
-    with pytest.raises(AttributeError):
-        getattr(serving, hidden_name)
+
+def test_serving_public_package_is_removed():
+    assert _find_spec_or_none("tensorcast.serving") is None
+    assert _find_spec_or_none("tensorcast.serving.runtime") is None
 
 
 def test_source_subject_broadcast_round_trips_non_public_subjects():
@@ -3198,7 +3183,7 @@ def test_source_subject_broadcast_round_trips_non_public_subjects():
         "metadata_fingerprint": "meta",
     }
 
-    integration = ServingIntegration()
+    integration = ArtifactRuntimeIntegration()
     payload = integration.source_subject_broadcast_payload(subject)
     assert integration.source_subject_from_broadcast_payload(payload) == restored
 
@@ -3234,7 +3219,7 @@ def broadcast_object(payload, *, src):
             calls.append((payload, src))
             return payload
 
-    resolved = ServingIntegration().resolve_source_subject(
+    resolved = ArtifactRuntimeIntegration().resolve_source_subject(
         SourceSelector.local_path("/tmp/model"),
         verify_checksums=True,
         coordinator=_Coordinator(),
@@ -3290,7 +3275,7 @@ def test_runtime_binding_materialization_attaches_and_transfers_ownership(
                 binding_handle=binding,
                 target_device=torch.device("cpu"),
                 tensor_schema_hash="schema",
-                artifact_profile="serving_artifact",
+                artifact_profile="runtime_artifact",
                 authority_scope="daemon_mediated_runtime_attachment",
             ),
         ),
@@ -3447,7 +3432,7 @@ def _local_ready_recipe() -> SimpleNamespace:
 
 def _representation_changing_local_ready_recipe() -> SimpleNamespace:
     recipe = _local_ready_recipe()
-    recipe.serving_facts = SimpleNamespace(
+    recipe.runtime_facts = SimpleNamespace(
         process_after_load_class=FinalizeClass.REPRESENTATION_CHANGING
     )
     recipe.semantic_validation_spec = TensorcastSemanticValidationSpec(
@@ -3483,7 +3468,7 @@ def _local_ready_finalize_request(**overrides) -> _LocalReadyFinalize:
 
 def test_serving_integration_finalizes_local_ready_runtime_in_core():
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     model = _MaterializedModel()
     model_config = SimpleNamespace(name="model-config")
     binding = _LocalReadyBinding()
@@ -3519,7 +3504,7 @@ def test_serving_integration_finalizes_local_ready_runtime_in_core():
     assert result.recipe is recipe
     assert result.binding is binding
     assert result.current_value.local_serving_ref == ("binding-local:binding-1:value-1")
-    assert result.runtime_view.readiness == "serving_local_ready"
+    assert result.runtime_view.readiness == "runtime_local_ready"
     assert result.runtime_view.source_artifact_ref == "mi2:test:source"
     report = result.runtime_view.diagnostics["artifact_realization_report"]
     assert report["target_kind"] == "runtime_attachment"
@@ -3550,14 +3535,14 @@ def test_serving_integration_finalizes_local_ready_runtime_in_core():
 def test_serving_integration_validates_local_ready_representation_contract(monkeypatch):
     calls = []
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     model = _MaterializedModel()
     model_config = SimpleNamespace(
         model="fake-model",
         compute_hash=lambda: "model-hash",
     )
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -3567,7 +3552,7 @@ def test_serving_integration_validates_local_ready_representation_contract(monke
     )
 
     monkeypatch.setattr(
-        integration_mod,
+        contract_mod,
         "compute_runtime_representation_contract_hash",
         lambda **kwargs: calls.append(kwargs) or "repr",
     )
@@ -3620,8 +3605,8 @@ def test_serving_integration_closes_local_ready_binding_on_representation_drift(
 ):
     binding = _LocalReadyBinding()
     model_config = SimpleNamespace(model="fake-model")
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -3630,14 +3615,14 @@ def test_serving_integration_closes_local_ready_binding_on_representation_drift(
         identity_payload={"rank": 0},
     )
     monkeypatch.setattr(
-        integration_mod,
+        contract_mod,
         "compute_runtime_representation_contract_hash",
         lambda **_kwargs: "actual",
     )
 
     with pytest.raises(ManifestMismatchError, match="contract hash drifted"):
         adapter = _MaterializationAdapter()
-        ServingIntegration(
+        ArtifactRuntimeIntegration(
             host=_host_for_adapter(adapter),
         )._finalize_local_ready_runtime(
             _LocalReadyFinalize(
@@ -3671,8 +3656,10 @@ def test_serving_integration_closes_local_ready_binding_on_representation_drift(
 def test_serving_integration_rejects_representation_changing_finalize_without_semantic_validation():
     binding = _LocalReadyBinding()
 
-    with pytest.raises(ServingIntegrationError, match="explicit semantic validation"):
-        ServingIntegration(
+    with pytest.raises(
+        ArtifactRuntimeIntegrationError, match="explicit semantic validation"
+    ):
+        ArtifactRuntimeIntegration(
             host=_host_for_adapter(_MaterializationAdapter())
         )._finalize_local_ready_runtime(
             _local_ready_finalize_request(
@@ -3690,10 +3677,10 @@ def test_serving_integration_rejects_representation_changing_finalize_without_co
     binding = _LocalReadyBinding()
 
     with pytest.raises(
-        ServingIntegrationError,
+        ArtifactRuntimeIntegrationError,
         match="requires representation contract validation",
     ):
-        ServingIntegration(
+        ArtifactRuntimeIntegration(
             host=_host_for_adapter(_MaterializationAdapter())
         )._finalize_local_ready_runtime(
             _local_ready_finalize_request(
@@ -3711,8 +3698,8 @@ def test_serving_integration_rejects_representation_changing_finalize_without_re
     monkeypatch,
 ):
     binding = _LocalReadyBinding()
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -3721,16 +3708,16 @@ def test_serving_integration_rejects_representation_changing_finalize_without_re
         identity_payload={"rank": 0},
     )
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "local_ready_representation_contract_hash",
         lambda _self, **_kwargs: "repr",
     )
 
     with pytest.raises(
-        ServingIntegrationError,
+        ArtifactRuntimeIntegrationError,
         match="ready same-binding contract proof",
     ):
-        ServingIntegration(
+        ArtifactRuntimeIntegration(
             host=_host_for_adapter(_MaterializationAdapter())
         )._finalize_local_ready_runtime(
             _local_ready_finalize_request(
@@ -3759,7 +3746,7 @@ def test_serving_integration_prepare_local_ready_owns_contract_and_options(monke
     adapter.align_runtime_tensor_names = (
         lambda model, names: align_calls.append(tuple(names)) or 0
     )
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     calls = []
     source_bound_contract_state = SimpleNamespace(
         source_bound_contract_ready=True,
@@ -3781,9 +3768,11 @@ def fake_prepare(**kwargs):
         )
 
     monkeypatch.setattr(
-        ServingIntegration, "build_materialization_options", fake_build_options
+        ArtifactRuntimeIntegration, "build_materialization_options", fake_build_options
+    )
+    monkeypatch.setattr(
+        local_ready_mod, "realize_local_ready_binding_from_source", fake_prepare
     )
-    monkeypatch.setattr(integration_mod, "prepare_local_ready_serving", fake_prepare)
 
     result = integration._prepare_local_source_bootstrap(
         _LocalReadyBootstrap(
@@ -3848,7 +3837,7 @@ def runtime_only_tensor_names(self, model):
             del model
             return ("runtime_only",)
 
-    integration = ServingIntegration(
+    integration = ArtifactRuntimeIntegration(
         host=_host_for_adapter(
             adapter,
             placement=_PlacementWithExecutionFacts(),
@@ -3863,13 +3852,13 @@ def runtime_only_tensor_names(self, model):
     )
 
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "build_materialization_options",
         lambda self, **kwargs: calls.append(kwargs) or ("realize-options", {}),
     )
     monkeypatch.setattr(
-        integration_mod,
-        "prepare_local_ready_serving",
+        local_ready_mod,
+        "realize_local_ready_binding_from_source",
         lambda **kwargs: SimpleNamespace(
             binding=_LocalReadyBinding(),
             update_epoch="epoch-1",
@@ -3945,25 +3934,25 @@ def build_recipe(self, **kwargs):
             )
 
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "resolve_source_subject",
         lambda self, selector, **kwargs: calls.append(("resolve", selector, kwargs))
         or source_subject,
     )
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "build_recipe_session",
         lambda self, request: calls.append(("session", request)) or _Session(),
     )
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "build_materialization_options",
         lambda self, **kwargs: calls.append(("options", kwargs))
         or ("realize-options", {}),
     )
     monkeypatch.setattr(
-        integration_mod,
-        "prepare_local_ready_serving",
+        local_ready_mod,
+        "realize_local_ready_binding_from_source",
         lambda **kwargs: calls.append(("prepare", kwargs))
         or SimpleNamespace(
             binding=_LocalReadyBinding(),
@@ -3973,7 +3962,7 @@ def build_recipe(self, **kwargs):
         ),
     )
 
-    result = ServingIntegration(
+    result = ArtifactRuntimeIntegration(
         host=IntegrationHost(
             framework=_ContractFrameworkHost(),
             placement=_ContractPlacementHost(),
@@ -4034,7 +4023,7 @@ def build_recipe(self, **kwargs):
 
 def test_serving_integration_prepare_local_ready_builds_framework_context(monkeypatch):
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     calls = []
     source_bound_contract_state = SimpleNamespace(
         source_bound_contract_ready=True,
@@ -4043,8 +4032,8 @@ def test_serving_integration_prepare_local_ready_builds_framework_context(monkey
     )
     recipe = _representation_changing_local_ready_recipe()
     model_config = SimpleNamespace(name="model-config")
-    placement = ServingPlacement(
-        topology=ServingTopologyRef(
+    placement = RuntimePlacement(
+        topology=RuntimeTopologyRef(
             schema_topology_digest="digest",
             logical_topology_ref="fake://topology",
         ),
@@ -4063,7 +4052,7 @@ def fake_prepare(**kwargs):
         )
 
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "prepare_local_ready_manifest_carrier_from_framework_context",
         lambda self, **kwargs: calls.append(("carrier", kwargs))
         or LocalReadyManifestCarrierResult(
@@ -4074,16 +4063,18 @@ def fake_prepare(**kwargs):
         ),
     )
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "build_materialization_options",
         lambda self, **kwargs: ("realize-options", {}),
     )
     monkeypatch.setattr(
-        ServingIntegration,
+        ArtifactRuntimeIntegration,
         "local_ready_representation_contract_hash",
         lambda self, **kwargs: "repr",
     )
-    monkeypatch.setattr(integration_mod, "prepare_local_ready_serving", fake_prepare)
+    monkeypatch.setattr(
+        local_ready_mod, "realize_local_ready_binding_from_source", fake_prepare
+    )
 
     result = integration._prepare_local_source_bootstrap(
         _LocalReadyBootstrap(
@@ -4125,7 +4116,7 @@ def fake_prepare(**kwargs):
 
 def test_serving_integration_finalizes_local_ready_runtime_runs_semantic_validation():
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     model_config = SimpleNamespace(name="model-config")
 
     result = integration._finalize_local_ready_runtime(
@@ -4167,7 +4158,7 @@ def test_serving_integration_finalizes_local_ready_runtime_closes_on_error():
 
     with pytest.raises(SchemaMismatchError, match="tensor set"):
         adapter = _MaterializationAdapter()
-        ServingIntegration(
+        ArtifactRuntimeIntegration(
             host=_host_for_adapter(adapter),
         )._finalize_local_ready_runtime(
             _LocalReadyFinalize(
@@ -4193,7 +4184,7 @@ def test_serving_integration_finalizes_local_ready_runtime_closes_on_error():
     assert binding.closed
 
 
-def test_serving_integration_acquire_retained_binding_uses_materialization():
+def test_artifact_runtime_acquire_retained_binding_uses_materialization():
     client = _Client()
     adapter = _MaterializationAdapter()
     adapter.compute_runtime_tensor_schema_hash = (
@@ -4202,7 +4193,7 @@ def test_serving_integration_acquire_retained_binding_uses_materialization():
     adapter.allocate_runtime_only_tensors = (
         lambda model, _target_device: _allocate_cpu_runtime_only(model)
     )
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
     model_config = SimpleNamespace(name="model-config")
 
     result = integration._restore_retained_for_intent(
@@ -4221,7 +4212,7 @@ def test_serving_integration_acquire_retained_binding_uses_materialization():
     assert result.model is not None
     assert torch.equal(result.model.w.detach(), torch.ones((1,)))
     assert not result.model.runtime_only.is_meta
-    assert result.runtime_view.readiness == "serving_local_ready"
+    assert result.runtime_view.readiness == "runtime_local_ready"
     assert result.runtime_view.tensor_schema_hash == "schema-hash"
     assert result.runtime_view.binding_value_ref == _binding_ref()
     report = result.runtime_view.diagnostics["artifact_realization_report"]
@@ -4246,16 +4237,16 @@ def test_serving_integration_acquire_retained_binding_uses_materialization():
     assert result.runtime_state.release_contract.released is True
 
 
-def test_serving_integration_acquire_retained_binding_rejects_published_ready():
+def test_artifact_runtime_acquire_retained_binding_rejects_published_ready():
     authority = _authority()
-    authority = ParsedRetainedServingBindingAuthority(
+    authority = ParsedRetainedRealizationAuthority(
         **{
             **authority.__dict__,
-            "readiness": "serving_published_ready",
+            "readiness": "runtime_published_ready",
         }
     )
     adapter = _MaterializationAdapter()
-    integration = ServingIntegration(host=_host_for_adapter(adapter))
+    integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter))
 
     with pytest.raises(RestoreBindingError, match="swap-capable"):
         integration._restore_retained_for_intent(
@@ -4302,11 +4293,11 @@ def ensure_client(self):
 
 def test_bind_and_swap_return_attach_ready_results():
     resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("w",))
-    result = bind_serving_artifact(
+    result = bind_runtime_artifact(
         resolved_artifact=resolved,
         tensor_names=("w",),
         device=torch.device("cuda:0"),
-        serving_runtime_policy=None,
+        runtime_artifact_policy=None,
         options=None,
     )
 
@@ -4320,10 +4311,10 @@ def swap(self, artifact, **kwargs):
 
     binding = _SwapBinding()
     binding.tensors[SERVING_MANIFEST_TENSOR_NAME] = torch.ones((1,), dtype=torch.uint8)
-    swap_result = swap_serving_artifact(
+    swap_result = swap_runtime_artifact(
         binding=binding,
         resolved_artifact=resolved,
-        serving_runtime_policy="policy",
+        runtime_artifact_policy="policy",
         options="options",
     )
 
@@ -4331,7 +4322,7 @@ def swap(self, artifact, **kwargs):
     assert isinstance(binding.swapped[0], _Subset)
     assert binding.swapped[0].names == ("w", SERVING_MANIFEST_TENSOR_NAME)
     assert binding.swapped[1] == {
-        "serving_runtime_policy": "policy",
+        "runtime_artifact_policy": "policy",
         "options": "options",
     }
 
@@ -4369,7 +4360,7 @@ def test_restore_retained_binding_keeps_runtime_owned_attachment():
 
 
 def test_restore_retained_binding_rejects_member_mismatch():
-    expected_member = ServingBindingMemberRef(
+    expected_member = RuntimeBindingMemberRef(
         member_id="other-member",
         member_index=0,
         member_count=1,
diff --git a/tests/python/test_serving_readiness.py b/tests/python/artifact_runtime/test_readiness.py
similarity index 87%
rename from tests/python/test_serving_readiness.py
rename to tests/python/artifact_runtime/test_readiness.py
index ca0dceb7..26f25df1 100644
--- a/tests/python/test_serving_readiness.py
+++ b/tests/python/artifact_runtime/test_readiness.py
@@ -2,21 +2,21 @@
 
 from types import SimpleNamespace
 
-from tensorcast.serving._runtime_impl.lifecycle import (
+from tensorcast.artifact_runtime.lifecycle import (
     AdmissionRequest,
     FrameworkIdentity,
     PlacementAdmissionFacts,
     PlacementIdentityFacts,
     RuntimeProfile,
 )
-from tensorcast.serving.readiness import (
+from tensorcast.artifact_runtime.readiness import (
     ReadinessInventoryAdmissionPolicy,
     is_binding_finalize_publication_allowlisted,
     is_pure_transform_publication_allowlisted,
     is_runtime_bind_swap_allowlisted,
-    serving_support_level_display_name,
+    runtime_support_level_display_name,
 )
-from tensorcast.types import FinalizeClass, ServingSupportLevel
+from tensorcast.types import FinalizeClass, RuntimeSupportLevel
 
 
 def _row(**overrides):
@@ -24,9 +24,9 @@ def _row(**overrides):
         "family": "fake",
         "process_after_load_class": FinalizeClass.RUNTIME_ONLY,
         "post_bind_finalize_class": FinalizeClass.RUNTIME_ONLY,
-        "support_level": ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+        "support_level": RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         "pure_transform_candidate": True,
-        "serving_only_runtime_allowed": True,
+        "runtime_bind_swap_allowed": True,
     }
     values.update(overrides)
     return SimpleNamespace(**values)
@@ -36,7 +36,7 @@ def test_readiness_helpers_accept_framework_inventory_rows() -> None:
     row = _row()
 
     assert (
-        serving_support_level_display_name(row.support_level)
+        runtime_support_level_display_name(row.support_level)
         == "runtime_bind_swap_ready"
     )
     assert is_pure_transform_publication_allowlisted(row) is True
diff --git a/tests/python/test_serving_runtime.py b/tests/python/artifact_runtime/test_runtime_config.py
similarity index 98%
rename from tests/python/test_serving_runtime.py
rename to tests/python/artifact_runtime/test_runtime_config.py
index b3b62d3d..34a050e6 100644
--- a/tests/python/test_serving_runtime.py
+++ b/tests/python/artifact_runtime/test_runtime_config.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from tensorcast.serving import (
+from tensorcast.artifact_runtime.config import (
     DEFAULT_RUNTIME_PROFILE,
     RuntimeSettings,
     resolve_runtime_config_profile,
diff --git a/tests/python/test_serving_runtime_contract.py b/tests/python/artifact_runtime/test_runtime_contract.py
similarity index 97%
rename from tests/python/test_serving_runtime_contract.py
rename to tests/python/artifact_runtime/test_runtime_contract.py
index 09c1465d..11071b15 100644
--- a/tests/python/test_serving_runtime_contract.py
+++ b/tests/python/artifact_runtime/test_runtime_contract.py
@@ -4,7 +4,7 @@
 
 from types import SimpleNamespace
 
-from tensorcast.serving.runtime_contract import (
+from tensorcast.artifact_runtime.contract import (
     SourceBoundContractState,
     read_source_bound_contract_state,
 )
diff --git a/tests/python/test_serving_builder_source_catalog.py b/tests/python/artifact_runtime/test_source.py
similarity index 99%
rename from tests/python/test_serving_builder_source_catalog.py
rename to tests/python/artifact_runtime/test_source.py
index 9576e86c..a1a2ed73 100644
--- a/tests/python/test_serving_builder_source_catalog.py
+++ b/tests/python/artifact_runtime/test_source.py
@@ -7,7 +7,7 @@
 from safetensors.torch import save_file
 
 from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry
-from tensorcast.serving.source_catalog import (
+from tensorcast.artifact_runtime.source import (
     SOURCE_CATALOG_SCHEMA_VERSION,
     SourceCatalog,
     SourceManifest,
diff --git a/tests/python/test_serving_state.py b/tests/python/artifact_runtime/test_state.py
similarity index 94%
rename from tests/python/test_serving_state.py
rename to tests/python/artifact_runtime/test_state.py
index b6bc026b..3784f1ed 100644
--- a/tests/python/test_serving_state.py
+++ b/tests/python/artifact_runtime/test_state.py
@@ -2,18 +2,18 @@
 
 from types import SimpleNamespace
 
-from tensorcast.serving.runtime_attachment import (
+from tensorcast.artifact_runtime.attachment import (
     RuntimeAttachment,
     RuntimeBindingState,
     RuntimeBindingView,
 )
-from tensorcast.serving.runtime_view import (
-    RuntimeWorkerView,
-)
-from tensorcast.serving.state import (
+from tensorcast.artifact_runtime.state import (
     ModelAttributeRuntimeState,
     RuntimeAttachmentStore,
 )
+from tensorcast.artifact_runtime.view import (
+    RuntimeWorkerView,
+)
 
 
 def _attachment(value_id: str) -> RuntimeAttachment:
@@ -28,7 +28,7 @@ def _attachment(value_id: str) -> RuntimeAttachment:
         representation_contract_hash=f"repr-{value_id}",
         tensor_schema_hash=f"schema-{value_id}",
         binding_value_ref=binding_value_ref,
-        readiness="serving",
+        readiness="runtime_ready",
     )
     return RuntimeAttachment(
         model=object(),
diff --git a/tests/python/test_serving_runtime_view.py b/tests/python/artifact_runtime/test_view.py
similarity index 97%
rename from tests/python/test_serving_runtime_view.py
rename to tests/python/artifact_runtime/test_view.py
index eb0042bf..f3e12ac8 100644
--- a/tests/python/test_serving_runtime_view.py
+++ b/tests/python/artifact_runtime/test_view.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from tensorcast.serving.runtime_view import aggregate_runtime_view_outputs
+from tensorcast.artifact_runtime.view import aggregate_runtime_view_outputs
 
 
 def test_runtime_view_aggregate_reports_partial_publication():
diff --git a/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py b/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py
index 2b8c939d..ef78caaa 100644
--- a/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py
+++ b/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py
@@ -19,8 +19,8 @@
 from tensorcast.api._device import device_uuid_for
 from tensorcast.api.store.owned_binding_slot import restore_owned_binding_tensors
 from tensorcast.api.store.runtime import StoreRuntimeContext
-from tensorcast.api.store.serving_binding_reference_consumer import (
-    ReferenceServingTensorSpec,
+from tensorcast.api.store.runtime_realization_reference_consumer import (
+    ReferenceRuntimeTensorSpec,
     acquire_reference_binding_response,
     build_reference_resolved_spec,
     build_reference_tensor_index_bytes,
@@ -32,9 +32,9 @@
 from tensorcast.daemon_ctl import DaemonCtl
 from tensorcast.types import (
     PrefetchRetentionPolicy,
-    ServingBindingMemberRef,
-    ServingBindingSetTarget,
-    ServingTopologyRef,
+    RealizationTargetSet,
+    RuntimeBindingMemberRef,
+    RuntimeTopologyRef,
 )
 from tests.python.utils.daemon import start_daemon_binary
 from tests.python.utils.ports import get_free_port
@@ -51,20 +51,20 @@
 
 from tensorcast.api.store.owned_binding_slot import restore_owned_binding_tensors
 from tensorcast.api.store.runtime import StoreRuntimeContext
-from tensorcast.api.store.serving_binding_reference_consumer import (
+from tensorcast.api.store.runtime_realization_reference_consumer import (
     acquire_reference_binding_response,
 )
 from tensorcast.daemon_ctl import DaemonCtl
 from tensorcast.proto.operation.v1 import operation_pb2
-from tensorcast.types import PrefetchedServingBinding, ServingBindingTarget
+from tensorcast.types import PrefetchHandoff, RealizationTarget
 
 daemon_addr, target_path, prefetched_path = sys.argv[1:4]
 target_proto = operation_pb2.ServingBindingTarget()
 target_proto.ParseFromString(open(target_path, "rb").read())
 prefetched_proto = operation_pb2.PrefetchServingBindingResult()
 prefetched_proto.ParseFromString(open(prefetched_path, "rb").read())
-target = ServingBindingTarget.from_proto(target_proto)
-prefetched = PrefetchedServingBinding.from_proto(prefetched_proto)
+target = RealizationTarget.from_proto(target_proto)
+prefetched = PrefetchHandoff.from_proto(prefetched_proto)
 
 runtime = StoreRuntimeContext(daemon_addr)
 client = DaemonCtl(daemon_addr)
@@ -99,20 +99,20 @@
 
 from tensorcast.api.store.owned_binding_slot import restore_owned_binding_tensors
 from tensorcast.api.store.runtime import StoreRuntimeContext
-from tensorcast.api.store.serving_binding_reference_consumer import (
+from tensorcast.api.store.runtime_realization_reference_consumer import (
     acquire_reference_binding_response,
 )
 from tensorcast.daemon_ctl import DaemonCtl
 from tensorcast.proto.operation.v1 import operation_pb2
-from tensorcast.types import PrefetchedServingBindingSet, ServingBindingSetTarget
+from tensorcast.types import PrefetchHandoffSet, RealizationTargetSet
 
 daemon_addr, target_path, prefetched_path = sys.argv[1:4]
 target_proto = operation_pb2.ServingBindingSetTarget()
 target_proto.ParseFromString(open(target_path, "rb").read())
 prefetched_proto = operation_pb2.PrefetchServingBindingSetResult()
 prefetched_proto.ParseFromString(open(prefetched_path, "rb").read())
-target_set = ServingBindingSetTarget.from_proto(target_proto)
-prefetched_set = PrefetchedServingBindingSet.from_proto(prefetched_proto)
+target_set = RealizationTargetSet.from_proto(target_proto)
+prefetched_set = PrefetchHandoffSet.from_proto(prefetched_proto)
 targets_by_member = {target.member.member_id: target for target in target_set.members}
 
 runtime = StoreRuntimeContext(daemon_addr)
@@ -152,7 +152,7 @@ def _skip_without_real_cuda() -> None:
 
 
 def _write_single_float_artifact(artifact_dir: Path, value: float) -> None:
-    tensor = ReferenceServingTensorSpec(
+    tensor = ReferenceRuntimeTensorSpec(
         name="alpha",
         size_bytes=4,
         dtype="torch.float32",
@@ -248,7 +248,7 @@ def test_prefetch_serving_binding_real_cuda_worker_read_and_release(tmp_path) ->
         ).source
         assert source.artifact_id.startswith("msa1:")
         assert source.canonical_index_bytes == build_reference_tensor_index_bytes(
-            ReferenceServingTensorSpec(
+            ReferenceRuntimeTensorSpec(
                 name="alpha",
                 size_bytes=4,
                 dtype="torch.float32",
@@ -262,7 +262,7 @@ def test_prefetch_serving_binding_real_cuda_worker_read_and_release(tmp_path) ->
             artifact_selection_digest=source.trusted_content_artifact_id
             or source.artifact_id,
             device_uuid=device_uuid_for(0),
-            tensor=ReferenceServingTensorSpec(
+            tensor=ReferenceRuntimeTensorSpec(
                 name="alpha",
                 size_bytes=4,
                 dtype="torch.float32",
@@ -344,7 +344,7 @@ def test_prefetch_serving_binding_set_real_cuda_worker_reads_members(tmp_path) -
     source_root = tmp_path / "public-source-root"
     artifact_dir = source_root / "model"
     expected_value = 7.5
-    tensor = ReferenceServingTensorSpec(
+    tensor = ReferenceRuntimeTensorSpec(
         name="alpha",
         size_bytes=4,
         dtype="torch.float32",
@@ -369,13 +369,13 @@ def test_prefetch_serving_binding_set_real_cuda_worker_reads_members(tmp_path) -
         selection_digest = source.trusted_content_artifact_id or source.artifact_id
         members = []
         cache_root = tmp_path / "resolved-spec-cache"
-        topology = ServingTopologyRef(
+        topology = RuntimeTopologyRef(
             schema_topology_digest="vllm-tp2-schema",
             admission_topology_digest="vllm-tp2-admission",
             logical_topology_ref="vllm://parallelism?tp=2&pp=1&dp=1",
         )
         for index in range(2):
-            member = ServingBindingMemberRef(
+            member = RuntimeBindingMemberRef(
                 member_id=f"dp0:pp0:tp{index}",
                 member_index=index,
                 member_count=2,
@@ -401,7 +401,7 @@ def test_prefetch_serving_binding_set_real_cuda_worker_reads_members(tmp_path) -
                     device_uuid=device_uuid_for(0),
                 )
             )
-        target_set = ServingBindingSetTarget(
+        target_set = RealizationTargetSet(
             runtime="vllm",
             source=members[0].source,
             topology=topology,
diff --git a/tests/python/examples/test_serving_runtime_reference_framework.py b/tests/python/examples/test_runtime_reference_framework.py
similarity index 72%
rename from tests/python/examples/test_serving_runtime_reference_framework.py
rename to tests/python/examples/test_runtime_reference_framework.py
index 6e249700..1a81146c 100644
--- a/tests/python/examples/test_serving_runtime_reference_framework.py
+++ b/tests/python/examples/test_runtime_reference_framework.py
@@ -9,14 +9,14 @@
 _EXAMPLE = (
     Path(__file__).resolve().parents[3]
     / "examples"
-    / "serving_runtime_reference_framework"
+    / "runtime_reference_framework"
     / "reference_framework.py"
 )
 
 
 def _load_example_module():
     spec = importlib.util.spec_from_file_location(
-        "serving_runtime_reference_framework",
+        "runtime_reference_framework",
         _EXAMPLE,
     )
     assert spec is not None
@@ -26,7 +26,7 @@ def _load_example_module():
     return module
 
 
-def test_reference_framework_uses_public_serving_surfaces_only():
+def test_reference_framework_uses_public_artifact_runtime_surfaces_only():
     module = ast.parse(_EXAMPLE.read_text(encoding="utf-8"), filename=str(_EXAMPLE))
     imported: set[str] = set()
     for node in ast.walk(module):
@@ -40,10 +40,11 @@ def test_reference_framework_uses_public_serving_surfaces_only():
         for name in imported
         if name == "vllm"
         or name.startswith("vllm.")
-        or name == "tensorcast.serving.integration"
-        or name.startswith("tensorcast.serving.builder")
-        or name.startswith("tensorcast.serving.admin")
-        or name.startswith("tensorcast.serving._runtime_impl")
+        or name == "tensorcast.serving"
+        or name.startswith("tensorcast.serving.")
+        or name.startswith("tensorcast.artifact_runtime.recipe.builder")
+        or name.startswith("tensorcast.artifact_runtime.admin")
+        or name.startswith("tensorcast.artifact_runtime.lifecycle")
     }
     assert forbidden == set()
 
@@ -53,7 +54,7 @@ def test_reference_framework_runs_level1_conformance():
 
     result = module.run_level1_conformance()
 
-    assert result.level == "level1-runtime"
+    assert result.level == "level1-artifact-runtime"
     assert result.checks["direct_start"]
     assert result.checks["reload"]
     assert result.checks["describe"]
diff --git a/tests/python/node_agent/test_plan_execution.py b/tests/python/node_agent/test_plan_execution.py
index 6270c64b..64d9a98e 100644
--- a/tests/python/node_agent/test_plan_execution.py
+++ b/tests/python/node_agent/test_plan_execution.py
@@ -15,7 +15,7 @@
 from tensorcast.api.store import (
     BuilderMode,
     RepresentationPublishSpec,
-    ServingBuildIntent,
+    RuntimeArtifactBuildIntent,
     build_pure_transform_publication_bundle_from_registered_artifact,
     build_pure_transform_transform_spec,
     compute_pure_transform_representation_contract_hash,
@@ -41,8 +41,8 @@
 from tensorcast.proto.node_agent.v1 import node_agent_pb2
 from tensorcast.proto.plan.v1 import plan_pb2
 from tensorcast.types import (
+    RuntimeArtifactManifest,
     ServerConfig,
-    ServingArtifactManifest,
     build_serving_manifest_ref,
 )
 
@@ -227,7 +227,7 @@ def register(self, tensors, key, policy):  # noqa: ANN001
             manifest_tensor = tensors["__tensorcast_meta__.manifest_json"]
             assert manifest_tensor.dtype == torch.uint8
             assert manifest_tensor.ndim == 1
-            manifest = ServingArtifactManifest.from_bytes(
+            manifest = RuntimeArtifactManifest.from_bytes(
                 bytes(manifest_tensor.tolist())
             )
             assert manifest.framework_name == "torch"
@@ -236,7 +236,7 @@ def register(self, tensors, key, policy):  # noqa: ANN001
     result = adapter.execute_transform_register(
         spec=build_pure_transform_transform_spec(
             transform_name="identity.v1",
-            build_intent=ServingBuildIntent(
+            build_intent=RuntimeArtifactBuildIntent(
                 builder_mode=BuilderMode.PURE_TRANSFORM,
                 framework_name="torch",
                 adapter_version="adapter-v1",
@@ -809,7 +809,7 @@ def test_node_agent_servicer_serializes_pure_transform_publication_result() -> N
         instance_id="inst-1", engine="test", register_identity_transform=False
     )
     bundle = build_pure_transform_publication_bundle_from_registered_artifact(
-        build_intent=ServingBuildIntent(
+        build_intent=RuntimeArtifactBuildIntent(
             representation_contract_hash="bafkrepresentation",
             builder_mode=BuilderMode.PURE_TRANSFORM,
             framework_name="torch",
diff --git a/tests/python/test_assembly_attempt.py b/tests/python/test_assembly_attempt.py
index 6603dee0..6790131b 100644
--- a/tests/python/test_assembly_attempt.py
+++ b/tests/python/test_assembly_attempt.py
@@ -16,16 +16,16 @@
     AssemblyCloseoutContract,
     AssemblyRequirementSetRef,
     PublishedModelVersion,
-    RegisteredServingPublication,
+    RegisteredRuntimeArtifactPublication,
     RepresentationPublishContract,
     RepresentationPublishSpec,
-    ServingArtifactManifest,
-    ServingBuildIntent,
+    RuntimeArtifactBuildIntent,
+    RuntimeArtifactManifest,
     Store,
     build_binding_finalize_admission_facts,
     build_representation_publish_requirements,
     build_serving_manifest_ref,
-    prepare_pure_transform_serving_registration,
+    prepare_pure_transform_runtime_registration,
 )
 from tensorcast.api.store.artifact import Artifact
 from tensorcast.api.store.common import (
@@ -44,8 +44,8 @@
     AssemblyAttemptRef,
     BindingValueRef,
     BuilderMode,
-    ServingPublicationSubject,
-    ServingSupportLevel,
+    RuntimePublicationSubject,
+    RuntimeSupportLevel,
 )
 
 
@@ -192,8 +192,8 @@ def _canonical_index_bytes() -> bytes:
     return b'{"w":[0,4,[1],[1],"torch.float32",0]}'
 
 
-def _serving_build_intent() -> ServingBuildIntent:
-    return ServingBuildIntent(
+def _serving_build_intent() -> RuntimeArtifactBuildIntent:
+    return RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkrepresentation",
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
@@ -204,8 +204,8 @@ def _serving_build_intent() -> ServingBuildIntent:
     )
 
 
-def _binding_finalize_build_intent() -> ServingBuildIntent:
-    return ServingBuildIntent(
+def _binding_finalize_build_intent() -> RuntimeArtifactBuildIntent:
+    return RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -217,7 +217,7 @@ def _binding_finalize_build_intent() -> ServingBuildIntent:
 
 
 def _representation_publish_bundle() -> RepresentationPublishSpec:
-    manifest = ServingArtifactManifest(
+    manifest = RuntimeArtifactManifest(
         framework_name="torch",
         adapter_version="adapter-v1",
         serving_abi_version="abi-v1",
@@ -230,7 +230,7 @@ def _representation_publish_bundle() -> RepresentationPublishSpec:
         build_pipeline_version="pipeline-v1",
     )
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(
+        subject=RuntimePublicationSubject(
             serving_artifact_id="mi2:test:serving",
         ),
         serving_manifest_ref=build_serving_manifest_ref(),
@@ -286,7 +286,7 @@ def test_register_pure_transform_publication_registers_manifest_bearing_artifact
     store = Store("fake://daemon", runtime=runtime)
     tensors = {"w": torch.ones((1,), dtype=torch.float32)}
     build_intent = _serving_build_intent()
-    prepared = prepare_pure_transform_serving_registration(
+    prepared = prepare_pure_transform_runtime_registration(
         build_intent=build_intent,
         source_artifact=None,
         tensors=tensors,
@@ -390,7 +390,7 @@ def test_complete_pure_transform_publication_runs_register_and_closeout() -> Non
     store = Store("fake://daemon", runtime=runtime)
     tensors = {"w": torch.ones((1,), dtype=torch.float32)}
     build_intent = _serving_build_intent()
-    prepared = prepare_pure_transform_serving_registration(
+    prepared = prepare_pure_transform_runtime_registration(
         build_intent=build_intent,
         source_artifact=None,
         tensors=tensors,
@@ -457,7 +457,7 @@ def test_complete_pure_transform_publication_canonical_full_routes_source_artifa
     runtime = FakeRuntime(client)
     store = Store("fake://daemon", runtime=runtime)
     captured: dict[str, object] = {}
-    prepared = prepare_pure_transform_serving_registration(
+    prepared = prepare_pure_transform_runtime_registration(
         build_intent=_serving_build_intent(),
         source_artifact=source_artifact,
         tensors={"w": torch.ones((1,), dtype=torch.float32)},
@@ -466,13 +466,13 @@ def test_complete_pure_transform_publication_canonical_full_routes_source_artifa
     def _register(
         tensors: dict[str, torch.Tensor],
         **kwargs: object,
-    ) -> RegisteredServingPublication:
+    ) -> RegisteredRuntimeArtifactPublication:
         del tensors
         del kwargs
         bundle = _representation_publish_bundle().model_copy(
             update={"contract_family": "canonical_full"}
         )
-        return RegisteredServingPublication(
+        return RegisteredRuntimeArtifactPublication(
             registered_artifact=RegisteredArtifact(
                 artifact_id="mi2:test:serving",
                 replica=ReplicaInfo(
@@ -577,13 +577,13 @@ def test_complete_pure_transform_publication_routes_structural_view_contribution
     def _register_publication(
         tensors: dict[str, torch.Tensor],
         **kwargs: object,
-    ) -> RegisteredServingPublication:
+    ) -> RegisteredRuntimeArtifactPublication:
         del tensors
         del kwargs
         bundle = _representation_publish_bundle().model_copy(
             update={"contract_family": "pp"}
         )
-        return RegisteredServingPublication(
+        return RegisteredRuntimeArtifactPublication(
             registered_artifact=RegisteredArtifact(
                 artifact_id="mi2:test:serving",
                 replica=ReplicaInfo(
@@ -596,7 +596,7 @@ def _register_publication(
                 canonical_index=canonical_index_from_bytes(_canonical_index_bytes()),
                 lease=None,
             ),
-            prepared_registration=prepare_pure_transform_serving_registration(
+            prepared_registration=prepare_pure_transform_runtime_registration(
                 build_intent=_serving_build_intent(),
                 source_artifact=source_artifact,
                 tensors={"w": torch.ones((1,), dtype=torch.float32)},
@@ -801,7 +801,7 @@ def test_start_representation_publish_attempt_provisions_binding_subject_layout_
     canonical_index = canonical_index_from_bytes(
         b'{"serving.weight":[0,4,[1],[1],"torch.float32",0]}'
     )
-    manifest = ServingArtifactManifest(
+    manifest = RuntimeArtifactManifest(
         framework_name="torch",
         adapter_version="adapter-vbinding",
         serving_abi_version="abi-vbinding",
@@ -814,7 +814,7 @@ def test_start_representation_publish_attempt_provisions_binding_subject_layout_
         build_pipeline_version="pipeline-vbinding",
     )
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(
+        subject=RuntimePublicationSubject(
             binding_value_ref=BindingValueRef(
                 binding_id="binding-1",
                 binding_layout_id="layout-1",
@@ -839,7 +839,7 @@ def test_start_representation_publish_attempt_provisions_binding_subject_layout_
             representation_publish_contract=contract,
         ),
         admission_facts=build_binding_finalize_admission_facts(
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             same_binding_fast_path_validated=True,
         ),
         contract_family="canonical_full",
@@ -1291,7 +1291,7 @@ def test_representation_publish_closeout_contract_accepts_matching_typed_child()
         serving_version_key="models/demo/serving/v3",
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_publish_contract=RepresentationPublishContract(
-            subject=ServingPublicationSubject(
+            subject=RuntimePublicationSubject(
                 serving_artifact_id="mi2:test:serving",
             ),
             serving_manifest_ref=build_serving_manifest_ref(),
@@ -1331,7 +1331,7 @@ def test_representation_publish_closeout_contract_rejects_outer_serving_artifact
             serving_artifact_id="mi2:test:serving",
             serving_manifest_ref=build_serving_manifest_ref(),
             representation_publish_contract=RepresentationPublishContract(
-                subject=ServingPublicationSubject(
+                subject=RuntimePublicationSubject(
                     serving_artifact_id="mi2:test:serving",
                 ),
                 serving_manifest_ref=build_serving_manifest_ref(),
@@ -1348,7 +1348,7 @@ def test_representation_publish_closeout_contract_accepts_binding_subject_child(
         kind="representation_publish",
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_publish_contract=RepresentationPublishContract(
-            subject=ServingPublicationSubject(
+            subject=RuntimePublicationSubject(
                 binding_value_ref=BindingValueRef(
                     binding_id="binding-1",
                     binding_layout_id="layout-1",
diff --git a/tests/python/test_binding.py b/tests/python/test_binding.py
index 1cfcb237..c5d1fc38 100644
--- a/tests/python/test_binding.py
+++ b/tests/python/test_binding.py
@@ -38,8 +38,8 @@
     HashLocation,
     IdentityMintStrategy,
     PublishedModelVersion,
+    RuntimeArtifactPolicy,
     ServerConfig,
-    ServingRuntimePolicy,
     SourceBoundCapability,
     SourceBoundPlanDiagnostics,
     VramRegionHandle,
@@ -322,7 +322,7 @@ def commit_binding_artifact(self, **kwargs: Any) -> Any:
             current_value=self._make_binding_value(
                 binding_id=binding_id,
                 selection=selection,
-            )
+            ),
         )
 
     def begin_binding_update(self, **kwargs: Any) -> Any:
@@ -701,7 +701,7 @@ def test_binding_swap_forwards_first_class_execution_topology(
     assert execution_topology.source_sharing_domain == "node-a"
     assert (
         refill_call["collective_policy"]
-        == store_daemon_pb2.COLLECTIVE_POLICY_REQUIRE_COLLECTIVE
+        == store_daemon_pb2.COLLECTIVE_POLICY_COLLECTIVE_FIRST
     )
     assert "clid=same-host-tp-load" not in str(refill_call["operation_id"])
 
@@ -1103,6 +1103,42 @@ def test_binding_realize_from_accepts_rank_zero_collective_group(
     assert "clid=same-host-tp-load" not in str(refill_call["operation_id"])
 
 
+def test_binding_realize_from_defaults_collective_group_to_collective_first(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    store, _runtime, client = _setup_store(monkeypatch)
+    artifact = store.artifact(artifact_id="artifact-1")
+    layout = artifact.bind(device="cuda:0", packing="byte_space").layout
+    binding = store.create_binding(layout, ownership="daemon", device="cuda:0")
+
+    binding.realize_from(
+        artifact,
+        realization_plan=(
+            store_mod.BindingRealizationEntry(
+                op="copy",
+                source_name="alpha",
+                dst_name="alpha",
+            ),
+        ),
+        options=GetArtifactOptions(
+            execution_topology=ExecutionTopologyContext(
+                collective_group=CollectiveLoadGroup(
+                    group_id="same-host-tp-load",
+                    world_size=8,
+                    rank=0,
+                ),
+                source_locality="shared_source",
+            )
+        ),
+    )
+
+    refill_call = client.refill_calls[-1]
+    assert (
+        refill_call["collective_policy"]
+        == store_daemon_pb2.COLLECTIVE_POLICY_COLLECTIVE_FIRST
+    )
+
+
 def test_binding_realize_from_serializes_partial_const_fill_ranges(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
@@ -1855,7 +1891,7 @@ def _wait_attempt(
 
     result = store.complete_binding_finalize_publication_from_binding(
         binding,
-        build_intent=store_mod.ServingBuildIntent(
+        build_intent=store_mod.RuntimeArtifactBuildIntent(
             builder_mode=store_mod.BuilderMode.BINDING_FINALIZE,
             framework_name="pytest",
             adapter_version="adapter-v1",
@@ -1865,7 +1901,7 @@ def _wait_attempt(
             source_artifact_ref="mi2:test:source",
         ),
         admission_facts=store_mod.build_binding_finalize_admission_facts(
-            support_level=store_mod.ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=store_mod.RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             same_binding_fast_path_validated=True,
         ),
         contract_family="canonical_full",
@@ -2280,10 +2316,10 @@ def test_binding_swap_coerces_published_model_version_into_runtime_policy() -> N
         serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"),
     )
 
-    binding.swap("artifact-2", serving_runtime_policy=version)
+    binding.swap("artifact-2", runtime_artifact_policy=version)
 
     assert len(slot.swap_calls) == 1
-    assert slot.swap_calls[0]["serving_runtime_policy"] == ServingRuntimePolicy(
+    assert slot.swap_calls[0]["runtime_artifact_policy"] == RuntimeArtifactPolicy(
         require_manifest=True,
         serving_manifest_ref="tensor:__alt_manifest__.json",
         expected_representation_contract_hash="bafkrepresentation",
@@ -2291,6 +2327,21 @@ def test_binding_swap_coerces_published_model_version_into_runtime_policy() -> N
     )
 
 
+def test_binding_swap_uses_only_runtime_artifact_policy_name() -> None:
+    slot = _FakeBindingSlot()
+    binding = Binding(slot)
+    neutral_policy = RuntimeArtifactPolicy(serving_manifest_ref="tensor:a.json")
+
+    with pytest.raises(TypeError, match="serving_runtime_policy"):
+        binding.swap(
+            "artifact-2",
+            runtime_artifact_policy=neutral_policy,
+            serving_runtime_policy=neutral_policy,
+        )
+
+    assert slot.swap_calls == []
+
+
 def test_bind_does_not_delegate_to_bind_into(monkeypatch: pytest.MonkeyPatch) -> None:
     store, _runtime, _client = _setup_store(monkeypatch)
     artifact = store.artifact(artifact_id="artifact-1")
diff --git a/tests/python/test_dense_piece_assembly_sealing_acceptance.py b/tests/python/test_dense_piece_assembly_sealing_acceptance.py
index 64e8da7d..8b2667e1 100644
--- a/tests/python/test_dense_piece_assembly_sealing_acceptance.py
+++ b/tests/python/test_dense_piece_assembly_sealing_acceptance.py
@@ -28,7 +28,7 @@
     AssemblyReadinessPolicy,
     AssemblyRequirementSetRef,
     BuilderMode,
-    ServingBuildIntent,
+    RuntimeArtifactBuildIntent,
     Store,
     build_serving_manifest_ref,
 )
@@ -1043,7 +1043,7 @@ def test_complete_pure_transform_publication_publishes_serving_lineage(
 
         result = store.complete_pure_transform_publication(
             source_tensors,
-            build_intent=ServingBuildIntent(
+            build_intent=RuntimeArtifactBuildIntent(
                 builder_mode=BuilderMode.PURE_TRANSFORM,
                 framework_name="torch",
                 adapter_version="adapter-v1",
@@ -1124,7 +1124,7 @@ def test_complete_pure_transform_publication_structural_pp_publishes_serving_lin
 
         result = store.complete_pure_transform_publication(
             source_tensors,
-            build_intent=ServingBuildIntent(
+            build_intent=RuntimeArtifactBuildIntent(
                 builder_mode=BuilderMode.PURE_TRANSFORM,
                 framework_name="torch",
                 adapter_version="adapter-v1",
@@ -1196,7 +1196,7 @@ def test_complete_pure_transform_publication_serving_binding_swap(
         source_handle_v1 = store.artifact(artifact_id=source_artifact_v1)
         result_v1 = store.complete_pure_transform_publication(
             _artifact_tensor_dict(store, artifact_id=source_artifact_v1),
-            build_intent=ServingBuildIntent(
+            build_intent=RuntimeArtifactBuildIntent(
                 builder_mode=BuilderMode.PURE_TRANSFORM,
                 framework_name="torch",
                 adapter_version="adapter-v1",
@@ -1230,7 +1230,7 @@ def test_complete_pure_transform_publication_serving_binding_swap(
         source_handle_v2 = store.artifact(artifact_id=source_artifact_v2)
         result_v2 = store.complete_pure_transform_publication(
             _artifact_tensor_dict(store, artifact_id=source_artifact_v2),
-            build_intent=ServingBuildIntent(
+            build_intent=RuntimeArtifactBuildIntent(
                 builder_mode=BuilderMode.PURE_TRANSFORM,
                 framework_name="torch",
                 adapter_version="adapter-v1",
@@ -1253,7 +1253,7 @@ def test_complete_pure_transform_publication_serving_binding_swap(
         binding = store.artifact(key="models/demo/serving/swap/v1").bind(
             device="cuda:0",
             packing="byte_space",
-            serving_runtime_policy=result_v1.require_serving_runtime_policy(),
+            runtime_artifact_policy=result_v1.require_runtime_artifact_policy(),
         )
         torch.testing.assert_close(
             binding.tensors["weights"].cpu(),
@@ -1267,7 +1267,7 @@ def test_complete_pure_transform_publication_serving_binding_swap(
         )
         binding.swap(
             store.artifact(artifact_id=str(result_v2.serving_artifact_id)),
-            serving_runtime_policy=result_v2.require_serving_runtime_policy(),
+            runtime_artifact_policy=result_v2.require_runtime_artifact_policy(),
         )
         synchronize_cuda()
         torch.testing.assert_close(
diff --git a/tests/python/test_pytorch_module_binding.py b/tests/python/test_pytorch_module_binding.py
index 90bfa93b..217356ac 100644
--- a/tests/python/test_pytorch_module_binding.py
+++ b/tests/python/test_pytorch_module_binding.py
@@ -6,6 +6,7 @@
 import torch
 from torch import nn
 
+from tensorcast.artifact_runtime.host import TorchTensorHost
 from tensorcast.pytorch.module_binding import (
     TorchModuleAdapterMixin,
     align_runtime_binding_exclude_names,
@@ -19,22 +20,18 @@
     snapshot_tensor_invariants,
     validate_tensor_invariants,
 )
-from tensorcast.serving.hosts import TorchTensorHost
 
 
 class _TaggedParameter(nn.Parameter):
     pass
 
 
-def test_attach_tensors_materializes_meta_parameter_aliases_and_subclass(
-) -> None:
-
+def test_attach_tensors_materializes_meta_parameter_aliases_and_subclass() -> None:
     class _Model(nn.Module):
-
         def __init__(self) -> None:
             super().__init__()
             self.w = _TaggedParameter(
-                torch.empty((2, ), device="meta", dtype=torch.float32),
+                torch.empty((2,), device="meta", dtype=torch.float32),
                 requires_grad=False,
             )
             self.alias = self.w
@@ -62,13 +59,12 @@ def __init__(self) -> None:
 
 
 def test_attach_tensors_materializes_meta_buffer_aliases() -> None:
-
     class _Model(nn.Module):
-
         def __init__(self) -> None:
             super().__init__()
             self.register_buffer(
-                "b", torch.empty((2, ), device="meta", dtype=torch.float32))
+                "b", torch.empty((2,), device="meta", dtype=torch.float32)
+            )
             self.register_buffer("alias_b", self.b)
             self.captured = [self.b]
 
@@ -94,58 +90,54 @@ def __init__(self) -> None:
 
 def test_attach_tensors_skips_reserved_tensorcast_names() -> None:
     model = nn.Module()
-    model.register_parameter("w", nn.Parameter(torch.zeros((1, ))))
+    model.register_parameter("w", nn.Parameter(torch.zeros((1,))))
 
     result = attach_tensors_to_module(
         model,
         {
-            "w":
-            torch.ones((1, )),
-            "__tensorcast_meta__.manifest_json":
-            torch.ones((4, ), dtype=torch.uint8),
+            "w": torch.ones((1,)),
+            "__tensorcast_meta__.manifest_json": torch.ones((4,), dtype=torch.uint8),
         },
         replace_meta_params=False,
         fail_on_missing=False,
     )
 
-    assert result.attached == ("w", )
-    assert result.skipped == ("__tensorcast_meta__.manifest_json", )
-    assert torch.equal(model.w, torch.ones((1, )))
+    assert result.attached == ("w",)
+    assert result.skipped == ("__tensorcast_meta__.manifest_json",)
+    assert torch.equal(model.w, torch.ones((1,)))
 
 
 def test_attach_tensors_fail_closed_on_missing_and_unexpected_names() -> None:
     model = nn.Module()
-    model.register_parameter("w", nn.Parameter(torch.zeros((1, ))))
-    model.register_buffer("b", torch.zeros((1, )))
+    model.register_parameter("w", nn.Parameter(torch.zeros((1,))))
+    model.register_buffer("b", torch.zeros((1,)))
 
     with pytest.raises(RuntimeError, match="missing required"):
         attach_tensors_to_module(
             model,
-            {"w": torch.ones((1, ))},
+            {"w": torch.ones((1,))},
             replace_meta_params=False,
         )
 
     with pytest.raises(RuntimeError, match="unexpected tensor names"):
         attach_tensors_to_module(
             model,
-            {"unexpected": torch.ones((1, ))},
+            {"unexpected": torch.ones((1,))},
             replace_meta_params=False,
             fail_on_missing=False,
         )
 
 
-def test_collect_module_tensors_handles_excludes_reserved_and_duplicates(
-) -> None:
+def test_collect_module_tensors_handles_excludes_reserved_and_duplicates() -> None:
     model = nn.Module()
-    model.register_parameter(
-        "w", nn.Parameter(torch.ones((1, ), dtype=torch.float32)))
+    model.register_parameter("w", nn.Parameter(torch.ones((1,), dtype=torch.float32)))
     model.register_parameter("alias", model.w)
     model.register_parameter(
         "other_meta",
-        nn.Parameter(torch.empty((1, ), device="meta", dtype=torch.float32)),
+        nn.Parameter(torch.empty((1,), device="meta", dtype=torch.float32)),
     )
     reserved = nn.Module()
-    reserved.register_buffer("manifest_json", torch.ones((1, )))
+    reserved.register_buffer("manifest_json", torch.ones((1,)))
     model.add_module("__tensorcast_meta__", reserved)
 
     with pytest.raises(RuntimeError, match="reserved names"):
@@ -167,23 +159,17 @@ def test_collect_module_tensors_handles_excludes_reserved_and_duplicates(
 
 
 def test_allocate_unbound_module_tensors_materializes_aliases() -> None:
-
     class _Model(nn.Module):
-
         def __init__(self) -> None:
             super().__init__()
             self.w = nn.Parameter(
-                torch.empty_strided((2, 3), (3, 1),
-                                    device="meta",
-                                    dtype=torch.float16),
+                torch.empty_strided((2, 3), (3, 1), device="meta", dtype=torch.float16),
                 requires_grad=False,
             )
             self.alias = self.w
             self.register_buffer(
                 "b",
-                torch.empty_strided((4, ), (1, ),
-                                    device="meta",
-                                    dtype=torch.float32),
+                torch.empty_strided((4,), (1,), device="meta", dtype=torch.float32),
             )
             self.register_buffer("alias_b", self.b)
 
@@ -203,7 +189,7 @@ def __init__(self) -> None:
     assert model.w.shape == (2, 3)
     assert model.w.stride() == (3, 1)
     assert model.w.dtype == torch.float16
-    assert model.b.shape == (4, )
+    assert model.b.shape == (4,)
     assert model.b.dtype == torch.float32
     assert allocated["w"].data_ptr() == model.w.data.data_ptr()
     assert allocated["b"].data_ptr() == model.b.data_ptr()
@@ -211,8 +197,8 @@ def __init__(self) -> None:
 
 def test_align_and_assert_runtime_tensor_names() -> None:
     model = nn.Module()
-    model.register_parameter("w", nn.Parameter(torch.ones((1, ))))
-    model.register_buffer("runtime_only", torch.ones((1, )))
+    model.register_parameter("w", nn.Parameter(torch.ones((1,))))
+    model.register_buffer("runtime_only", torch.ones((1,)))
     captured: list[tuple[str, ...]] = []
 
     count = align_runtime_binding_exclude_names(
@@ -223,64 +209,59 @@ def test_align_and_assert_runtime_tensor_names() -> None:
     )
 
     assert count == 1
-    assert captured == [("runtime_only", )]
+    assert captured == [("runtime_only",)]
     assert collect_module_tensor_names(model) == {"w", "runtime_only"}
     assert_runtime_tensors_match_expected_names({"w": model.w}, {"w"})
     with pytest.raises(RuntimeError, match="tensor set mismatch"):
-        assert_runtime_tensors_match_expected_names({"w": model.w},
-                                                    {"missing"})
+        assert_runtime_tensors_match_expected_names({"w": model.w}, {"missing"})
 
 
 def test_assert_module_tensors_are_meta_reports_materialized_tensors() -> None:
     meta_model = nn.Module()
     meta_model.register_parameter(
         "w",
-        nn.Parameter(torch.empty((1, ), device="meta")),
+        nn.Parameter(torch.empty((1,), device="meta")),
     )
     assert_module_tensors_are_meta(meta_model, context="test context")
 
     materialized = nn.Module()
-    materialized.register_parameter("w", nn.Parameter(torch.ones((2, ))))
+    materialized.register_parameter("w", nn.Parameter(torch.ones((2,))))
     with pytest.raises(RuntimeError, match="test context"):
         assert_module_tensors_are_meta(materialized, context="test context")
 
 
 def test_runtime_tensor_schema_hash_and_invariants() -> None:
-    tensors = {"w": torch.ones((2, ), dtype=torch.float32)}
+    tensors = {"w": torch.ones((2,), dtype=torch.float32)}
 
     schema_hash = compute_runtime_tensor_schema_hash(tensors)
     before = snapshot_tensor_invariants(tensors)
     validate_tensor_invariants(before, tensors)
 
     assert schema_hash
-    changed = {"w": torch.ones((3, ), dtype=torch.float32)}
+    changed = {"w": torch.ones((3,), dtype=torch.float32)}
     with pytest.raises(RuntimeError, match="invariant changed"):
         validate_tensor_invariants(before, changed)
 
 
 def test_torch_module_adapter_mixin_provides_default_binding_ops() -> None:
-
     class _Adapter(TorchModuleAdapterMixin):
-
-        def runtime_only_tensor_names(self,
-                                      model: nn.Module) -> tuple[str, ...]:
+        def runtime_only_tensor_names(self, model: nn.Module) -> tuple[str, ...]:
             del model
-            return ("runtime_only", )
+            return ("runtime_only",)
 
     model = nn.Module()
     model.register_parameter(
         "w",
-        nn.Parameter(torch.ones((1, ), dtype=torch.float32),
-                     requires_grad=False),
+        nn.Parameter(torch.ones((1,), dtype=torch.float32), requires_grad=False),
     )
     model.register_buffer(
         "runtime_only",
-        torch.empty((1, ), device="meta", dtype=torch.float32),
+        torch.empty((1,), device="meta", dtype=torch.float32),
     )
 
     adapter = _Adapter()
     tensors = adapter.collect_runtime_binding_tensors(model)
-    assert tuple(tensors) == ("w", )
+    assert tuple(tensors) == ("w",)
     assert adapter.compute_runtime_tensor_schema_hash(tensors)
 
     bound = torch.tensor([2.0], dtype=torch.float32)
@@ -296,15 +277,11 @@ def runtime_only_tensor_names(self,
     adapter.validate_tensor_invariants(invariants, {"w": model.w})
 
 
-def test_torch_module_adapter_mixin_rehydrates_runtime_only_tensors(
-) -> None:
-
+def test_torch_module_adapter_mixin_rehydrates_runtime_only_tensors() -> None:
     class _Adapter(TorchModuleAdapterMixin):
-
-        def runtime_only_tensor_names(self,
-                                      model: nn.Module) -> tuple[str, ...]:
+        def runtime_only_tensor_names(self, model: nn.Module) -> tuple[str, ...]:
             del model
-            return ("runtime_only", )
+            return ("runtime_only",)
 
         def rehydrate_runtime_only_tensors(
             self,
@@ -313,14 +290,14 @@ def rehydrate_runtime_only_tensors(
             target_device: torch.device,
         ) -> Mapping[str, torch.Tensor]:
             assert set(allocated) == {"runtime_only"}
-            tensor = torch.full((2, ), 7.0, device=target_device)
+            tensor = torch.full((2,), 7.0, device=target_device)
             model._buffers["runtime_only"] = tensor
             return {"runtime_only": tensor}
 
     model = nn.Module()
     model.register_buffer(
         "runtime_only",
-        torch.empty((2, ), device="meta", dtype=torch.float32),
+        torch.empty((2,), device="meta", dtype=torch.float32),
     )
 
     allocated = _Adapter().allocate_runtime_only_tensors(
@@ -328,18 +305,15 @@ def rehydrate_runtime_only_tensors(
         torch.device("cpu"),
     )
 
-    assert torch.equal(model.runtime_only, torch.full((2, ), 7.0))
-    assert torch.equal(allocated["runtime_only"], torch.full((2, ), 7.0))
+    assert torch.equal(model.runtime_only, torch.full((2,), 7.0))
+    assert torch.equal(allocated["runtime_only"], torch.full((2,), 7.0))
 
 
 def test_torch_tensor_host_rehydrates_runtime_only_tensors() -> None:
-
     class _Surface(TorchTensorHost):
-
-        def runtime_only_tensor_names(self,
-                                      model: object) -> tuple[str, ...]:
+        def runtime_only_tensor_names(self, model: object) -> tuple[str, ...]:
             del model
-            return ("runtime_only", )
+            return ("runtime_only",)
 
         def rehydrate_runtime_only_tensors(
             self,
@@ -348,14 +322,14 @@ def rehydrate_runtime_only_tensors(
             target_device: object,
         ) -> Mapping[str, object]:
             del allocated
-            tensor = torch.full((2, ), 11.0, device=target_device)
+            tensor = torch.full((2,), 11.0, device=target_device)
             model._buffers["runtime_only"] = tensor
             return {"runtime_only": tensor}
 
     model = nn.Module()
     model.register_buffer(
         "runtime_only",
-        torch.empty((2, ), device="meta", dtype=torch.float32),
+        torch.empty((2,), device="meta", dtype=torch.float32),
     )
 
     allocated = _Surface().allocate_runtime_only_tensors(
@@ -363,5 +337,5 @@ def rehydrate_runtime_only_tensors(
         torch.device("cpu"),
     )
 
-    assert torch.equal(model.runtime_only, torch.full((2, ), 11.0))
-    assert torch.equal(allocated["runtime_only"], torch.full((2, ), 11.0))
+    assert torch.equal(model.runtime_only, torch.full((2,), 11.0))
+    assert torch.equal(allocated["runtime_only"], torch.full((2,), 11.0))
diff --git a/tests/python/test_pytorch_trace_capture.py b/tests/python/test_pytorch_trace_capture.py
index 6ef58a13..683f52da 100644
--- a/tests/python/test_pytorch_trace_capture.py
+++ b/tests/python/test_pytorch_trace_capture.py
@@ -8,8 +8,8 @@
 from torch import nn
 
 from tensorcast.pytorch.trace_capture import TraceActivation, trace_model_load
-from tensorcast.serving.builder.materialization import apply_copy_plan
-from tensorcast.serving.builder.trace_ir import MultiRange, Range
+from tensorcast.artifact_runtime.recipe.materialization import apply_copy_plan
+from tensorcast.artifact_runtime.recipe.trace_ir import MultiRange, Range
 
 
 @dataclass(frozen=True)
diff --git a/tests/python/test_serving_publication_types.py b/tests/python/test_runtime_publication_types.py
similarity index 72%
rename from tests/python/test_serving_publication_types.py
rename to tests/python/test_runtime_publication_types.py
index ab32c823..8c7c3a4b 100644
--- a/tests/python/test_serving_publication_types.py
+++ b/tests/python/test_runtime_publication_types.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import importlib.util
 import inspect
 from pathlib import Path
 
@@ -18,13 +19,13 @@
     build_pure_transform_publication_bundle_from_registered_artifact,
     build_pure_transform_publication_spec,
     build_pure_transform_transform_spec,
-    build_serving_publication_bundle_from_registered_artifact,
+    build_runtime_artifact_publication_bundle_from_registered_artifact,
     compute_pure_transform_representation_contract_hash,
-    compute_serving_tensor_schema_hash,
-    count_canonical_serving_tensors,
-    prepare_binding_finalize_serving_registration,
-    prepare_pure_transform_serving_registration,
-    prepare_serving_registration,
+    compute_runtime_artifact_tensor_schema_hash,
+    count_canonical_runtime_tensors,
+    prepare_binding_finalize_runtime_registration,
+    prepare_pure_transform_runtime_registration,
+    prepare_runtime_artifact_registration,
 )
 from tensorcast.api.store.handles import RegisteredArtifact
 from tensorcast.api.store.types import (
@@ -43,14 +44,14 @@
     PublishedModelVersion,
     RepresentationPublishContract,
     RepresentationPublishSpec,
-    ServingAdmissionFacts,
-    ServingArtifactManifest,
-    ServingBuildIntent,
-    ServingPublicationSubject,
-    ServingRuntimePolicy,
-    ServingSupportLevel,
+    RuntimeAdmissionFacts,
+    RuntimeArtifactBuildIntent,
+    RuntimeArtifactManifest,
+    RuntimeArtifactPolicy,
+    RuntimePublicationSubject,
+    RuntimeSupportLevel,
     build_serving_manifest_ref,
-    coerce_serving_runtime_policy,
+    coerce_runtime_artifact_policy,
     parse_serving_manifest_ref,
 )
 from tensorcast.types import ArtifactDescriptor as PublishedArtifactDescriptor
@@ -69,9 +70,8 @@ def _canonical_index(
     )
 
 
-def test_serving_build_digest_ignores_source_and_semantic_hash_inputs(
-) -> None:
-    intent_a = ServingBuildIntent(
+def test_serving_build_digest_ignores_source_and_semantic_hash_inputs() -> None:
+    intent_a = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafksemantic-a",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -80,7 +80,7 @@ def test_serving_build_digest_ignores_source_and_semantic_hash_inputs(
         build_pipeline_version="pipeline-v1",
         source_artifact_ref="mi2:source-a",
     )
-    intent_b = ServingBuildIntent(
+    intent_b = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafksemantic-b",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -90,26 +90,42 @@ def test_serving_build_digest_ignores_source_and_semantic_hash_inputs(
         source_artifact_ref="mi2:source-b",
     )
 
-    assert (intent_a.compute_serving_build_digest() ==
-            intent_b.compute_serving_build_digest())
+    assert (
+        intent_a.compute_serving_build_digest()
+        == intent_b.compute_serving_build_digest()
+    )
 
 
-def test_tensorcast_top_level_exports_cover_vllm_serving_contract() -> None:
-    assert tc.prepare_serving_registration is prepare_serving_registration
-    assert (tc.prepare_binding_finalize_serving_registration
-            is prepare_binding_finalize_serving_registration)
-    assert (tc.build_serving_publication_bundle_from_registered_artifact
-            is build_serving_publication_bundle_from_registered_artifact)
+def test_tensorcast_top_level_hides_runtime_publication_helpers() -> None:
+    assert not hasattr(tc, "prepare_runtime_artifact_registration")
+    assert not hasattr(tc, "prepare_binding_finalize_runtime_registration")
+    assert not hasattr(
+        tc,
+        "build_runtime_artifact_publication_bundle_from_registered_artifact",
+    )
+    assert not hasattr(tc, "RuntimeAdmissionFacts")
+    assert not hasattr(tc, "ServingAdmissionFacts")
+    assert not hasattr(tc, "ServingSupportLevel")
+    assert not hasattr(tc, "ServingPublicationSubject")
+    assert not hasattr(tc, "prepare_serving_registration")
+    assert not hasattr(tc, "prepare_binding_finalize_serving_registration")
+    assert not hasattr(tc, "prepare_pure_transform_serving_registration")
+    assert not hasattr(tc, "build_serving_publication_bundle")
+    assert not hasattr(tc, "SERVING_BUILD_DIGEST_VERSION")
     assert tc.PublishedModelVersion is PublishedModelVersion
     assert tc.RepresentationPublishContract is RepresentationPublishContract
-    assert tc.ServingAdmissionFacts is ServingAdmissionFacts
-    assert tc.ServingArtifactManifest is ServingArtifactManifest
-    assert tc.SERVING_BUILD_DIGEST_VERSION == SERVING_BUILD_DIGEST_VERSION
-    assert tc.ServingRuntimePolicy is ServingRuntimePolicy
+    assert tc.RuntimeArtifactManifest is RuntimeArtifactManifest
+    assert tc.RuntimeArtifactPolicy is RuntimeArtifactPolicy
+    assert SERVING_BUILD_DIGEST_VERSION == "tensorcast.serving_build_digest.v1"
+
 
+def test_legacy_serving_builder_module_path_is_removed() -> None:
+    spec = importlib.util.find_spec("tensorcast.api.store.serving_builder")
+    assert spec is None
 
-def test_serving_artifact_manifest_round_trips_via_json_payload() -> None:
-    intent = ServingBuildIntent(
+
+def test_runtime_artifact_manifest_round_trips_via_json_payload() -> None:
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafksemantic",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -118,23 +134,22 @@ def test_serving_artifact_manifest_round_trips_via_json_payload() -> None:
         build_pipeline_version="pipeline-v2",
         source_artifact_ref="mi2:source",
     )
-    manifest = ServingArtifactManifest.from_build_intent(
+    manifest = RuntimeArtifactManifest.from_build_intent(
         intent=intent,
         tensor_schema_hash="bafktensorschema",
         canonical_tensor_count=17,
     )
 
-    restored = ServingArtifactManifest.from_bytes(manifest.to_bytes())
+    restored = RuntimeArtifactManifest.from_bytes(manifest.to_bytes())
 
     assert restored == manifest
     assert restored.serving_manifest_ref == build_serving_manifest_ref()
-    assert restored.serving_build_digest == intent.compute_serving_build_digest(
-    )
+    assert restored.serving_build_digest == intent.compute_serving_build_digest()
     assert restored.serving_build_digest_version == SERVING_BUILD_DIGEST_VERSION
 
 
 def test_representation_publish_contract_matches_serving_manifest() -> None:
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafksemantic",
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
@@ -142,48 +157,53 @@ def test_representation_publish_contract_matches_serving_manifest() -> None:
         serving_abi_version="abi-v3",
         build_pipeline_version="pipeline-v3",
     )
-    manifest = ServingArtifactManifest.from_build_intent(
+    manifest = RuntimeArtifactManifest.from_build_intent(
         intent=intent,
         tensor_schema_hash="bafktensorschema",
         canonical_tensor_count=9,
     )
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(
-            serving_artifact_id="mi2:test:serving", ),
+        subject=RuntimePublicationSubject(
+            serving_artifact_id="mi2:test:serving",
+        ),
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_contract_hash=manifest.representation_contract_hash,
         serving_build_digest=manifest.serving_build_digest,
     )
 
     contract.validate_against_manifest(manifest)
-    assert (parse_serving_manifest_ref(
-        contract.serving_manifest_ref) == "__tensorcast_meta__.manifest_json")
+    assert (
+        parse_serving_manifest_ref(contract.serving_manifest_ref)
+        == "__tensorcast_meta__.manifest_json"
+    )
     runtime_policy = contract.to_runtime_policy()
     assert runtime_policy.require_manifest is True
     assert runtime_policy.serving_manifest_ref == build_serving_manifest_ref()
-    assert (runtime_policy.expected_representation_contract_hash ==
-            manifest.representation_contract_hash)
+    assert (
+        runtime_policy.expected_representation_contract_hash
+        == manifest.representation_contract_hash
+    )
 
 
-def test_serving_admission_facts_require_fast_path_validation() -> None:
-    with pytest.raises(ValueError,
-                       match="same_binding_fast_path_validated=True"):
-        ServingAdmissionFacts(
+def test_runtime_admission_facts_require_fast_path_validation() -> None:
+    with pytest.raises(ValueError, match="same_binding_fast_path_validated=True"):
+        RuntimeAdmissionFacts(
             finalize_class=FinalizeClass.REPRESENTATION_CHANGING,
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             same_binding_fast_path_validated=False,
         )
 
 
-def test_representation_publish_contract_accepts_binding_value_subject(
-) -> None:
+def test_representation_publish_contract_accepts_binding_value_subject() -> None:
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(binding_value_ref=BindingValueRef(
-            binding_id="binding-1",
-            binding_layout_id="layout-1",
-            binding_value_id="value-1",
-            seal_generation=7,
-        )),
+        subject=RuntimePublicationSubject(
+            binding_value_ref=BindingValueRef(
+                binding_id="binding-1",
+                binding_layout_id="layout-1",
+                binding_value_id="value-1",
+                seal_generation=7,
+            )
+        ),
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
@@ -192,20 +212,22 @@ def test_representation_publish_contract_accepts_binding_value_subject(
     assert contract.serving_artifact_id is None
     assert contract.binding_value_ref is not None
     restored = RepresentationPublishContract.from_publication_proto(
-        contract.to_publication_proto())
+        contract.to_publication_proto()
+    )
     assert restored.binding_value_ref is not None
     assert restored.binding_value_ref.binding_id == "binding-1"
 
 
-def test_binding_subject_contract_rejects_runtime_policy_until_promoted(
-) -> None:
+def test_binding_subject_contract_rejects_runtime_policy_until_promoted() -> None:
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(binding_value_ref=BindingValueRef(
-            binding_id="binding-2",
-            binding_layout_id="layout-2",
-            binding_value_id="value-2",
-            seal_generation=3,
-        )),
+        subject=RuntimePublicationSubject(
+            binding_value_ref=BindingValueRef(
+                binding_id="binding-2",
+                binding_layout_id="layout-2",
+                binding_value_id="value-2",
+                seal_generation=3,
+            )
+        ),
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
@@ -215,34 +237,37 @@ def test_binding_subject_contract_rejects_runtime_policy_until_promoted(
         contract.to_runtime_policy()
 
 
-def test_build_binding_finalize_admission_facts_requires_same_binding_proof(
-) -> None:
+def test_build_binding_finalize_admission_facts_requires_same_binding_proof() -> None:
     facts = build_binding_finalize_admission_facts(
-        support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+        support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
         same_binding_fast_path_validated=True,
     )
 
     assert facts.finalize_class == FinalizeClass.REPRESENTATION_CHANGING
     assert facts.same_binding_fast_path_validated is True
-    assert facts.support_level == ServingSupportLevel.BUILDER_PUBLICATION_READY
+    assert facts.support_level == RuntimeSupportLevel.BUILDER_PUBLICATION_READY
 
 
-def test_build_binding_finalize_publication_bundle_has_no_artifact_subject_parameter(
-) -> (None):
-    assert ("serving_artifact" not in inspect.signature(
-        build_binding_finalize_publication_bundle).parameters)
+def test_build_binding_finalize_publication_bundle_has_no_artifact_subject_parameter() -> (
+    None
+):
+    assert (
+        "runtime_artifact"
+        not in inspect.signature(build_binding_finalize_publication_bundle).parameters
+    )
 
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
-        ), )
-    intent = ServingBuildIntent(
+        ),
+    )
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         framework_name="torch",
         adapter_version="adapter-mounted-source",
@@ -257,25 +282,27 @@ def test_build_binding_finalize_publication_bundle_has_no_artifact_subject_param
             build_intent=intent,
             canonical_index=canonical_index,
             admission_facts=build_binding_finalize_admission_facts(
-                support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+                support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
                 same_binding_fast_path_validated=True,
             ),
         )
 
 
-def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject(
-) -> (None):
+def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject() -> (
+    None
+):
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
-        ), )
-    intent = ServingBuildIntent(
+        ),
+    )
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -296,7 +323,7 @@ def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject
         ),
         canonical_index=canonical_index,
         admission_facts=build_binding_finalize_admission_facts(
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             same_binding_fast_path_validated=True,
         ),
     )
@@ -306,58 +333,64 @@ def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject
     assert bundle.representation_publish_contract.binding_value_ref is not None
 
 
-def test_compute_serving_tensor_schema_hash_excludes_reserved_manifest_tensor(
-) -> None:
+def test_compute_runtime_artifact_tensor_schema_hash_excludes_reserved_manifest_tensor() -> (
+    None
+):
     canonical_without_manifest = CanonicalIndex(
-        entries=(CanonicalIndexEntry(
-            name="weights",
-            dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
-            storage_offset=0,
-            segment_offset=0,
-            size_bytes=16,
-        ), ),
+        entries=(
+            CanonicalIndexEntry(
+                name="weights",
+                dtype=torch.float16,
+                shape=(8,),
+                stride=(1,),
+                storage_offset=0,
+                segment_offset=0,
+                size_bytes=16,
+            ),
+        ),
         total_size_bytes=16,
         avbs_hash="bafkavbs",
     )
     canonical_with_manifest = CanonicalIndex(
-        entries=canonical_without_manifest.entries + (CanonicalIndexEntry(
-            name="__tensorcast_meta__.manifest_json",
-            dtype=torch.uint8,
-            shape=(32, ),
-            stride=(1, ),
-            storage_offset=0,
-            segment_offset=16,
-            size_bytes=32,
-        ), ),
+        entries=canonical_without_manifest.entries
+        + (
+            CanonicalIndexEntry(
+                name="__tensorcast_meta__.manifest_json",
+                dtype=torch.uint8,
+                shape=(32,),
+                stride=(1,),
+                storage_offset=0,
+                segment_offset=16,
+                size_bytes=32,
+            ),
+        ),
         total_size_bytes=48,
         avbs_hash="bafkavbs",
     )
 
-    assert compute_serving_tensor_schema_hash(
-        canonical_with_manifest) == compute_serving_tensor_schema_hash(
-            canonical_without_manifest)
-    assert count_canonical_serving_tensors(canonical_with_manifest) == 1
+    assert compute_runtime_artifact_tensor_schema_hash(
+        canonical_with_manifest
+    ) == compute_runtime_artifact_tensor_schema_hash(canonical_without_manifest)
+    assert count_canonical_runtime_tensors(canonical_with_manifest) == 1
 
 
-def test_compute_pure_transform_representation_contract_hash_accepts_tensor_mapping(
-) -> (None):
+def test_compute_pure_transform_representation_contract_hash_accepts_tensor_mapping() -> (
+    None
+):
     source_canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
-        ), )
+        ),
+    )
     serving_tensors = {
-        "weights":
-        torch.empty((8, ), dtype=torch.float16),
-        SERVING_MANIFEST_TENSOR_NAME:
-        torch.tensor(
+        "weights": torch.empty((8,), dtype=torch.float16),
+        SERVING_MANIFEST_TENSOR_NAME: torch.tensor(
             list(b'{"schema_version":1}'),
             dtype=torch.uint8,
         ),
@@ -373,25 +406,25 @@ def test_compute_pure_transform_representation_contract_hash_accepts_tensor_mapp
             CanonicalIndexEntry(
                 name="weights",
                 dtype=torch.float16,
-                shape=(8, ),
-                stride=(1, ),
+                shape=(8,),
+                stride=(1,),
                 storage_offset=0,
                 segment_offset=0,
                 size_bytes=16,
-            ), ),
+            ),
+        ),
     )
 
     assert hash_from_tensors == hash_from_index
 
 
-def test_build_pure_transform_publication_bundle_from_registered_artifact(
-) -> None:
+def test_build_pure_transform_publication_bundle_from_registered_artifact() -> None:
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -399,8 +432,8 @@ def test_build_pure_transform_publication_bundle_from_registered_artifact(
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
@@ -419,7 +452,7 @@ def test_build_pure_transform_publication_bundle_from_registered_artifact(
         canonical_index=canonical_index,
         lease=None,
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkrepresentation",
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
@@ -441,27 +474,37 @@ def test_build_pure_transform_publication_bundle_from_registered_artifact(
     assert bundle.manifest_tensor_name == "__tensorcast_meta__.manifest_json"
     assert bundle.serving_manifest_ref == build_serving_manifest_ref()
     assert bundle.serving_manifest.canonical_tensor_count == 1
-    assert (bundle.serving_manifest.tensor_schema_hash ==
-            compute_serving_tensor_schema_hash(canonical_index))
-    assert (bundle.representation_publish_contract.serving_artifact_id ==
-            "mi2:test:serving")
-    assert (bundle.closeout_contract.representation_publish_contract ==
-            bundle.representation_publish_contract)
+    assert (
+        bundle.serving_manifest.tensor_schema_hash
+        == compute_runtime_artifact_tensor_schema_hash(canonical_index)
+    )
+    assert (
+        bundle.representation_publish_contract.serving_artifact_id == "mi2:test:serving"
+    )
+    assert (
+        bundle.closeout_contract.representation_publish_contract
+        == bundle.representation_publish_contract
+    )
     assert bundle.closeout_contract.kind == "representation_publish"
-    assert (ServingArtifactManifest.from_bytes(
-        bundle.serving_manifest_bytes) == bundle.serving_manifest)
-    assert (bundle.representation_publish_contract.serving_build_digest_version
-            == SERVING_BUILD_DIGEST_VERSION)
+    assert (
+        RuntimeArtifactManifest.from_bytes(bundle.serving_manifest_bytes)
+        == bundle.serving_manifest
+    )
+    assert (
+        bundle.representation_publish_contract.serving_build_digest_version
+        == SERVING_BUILD_DIGEST_VERSION
+    )
 
 
-def test_compute_pure_transform_representation_contract_hash_normalizes_logical_topology(
-) -> (None):
+def test_compute_pure_transform_representation_contract_hash_normalizes_logical_topology() -> (
+    None
+):
     source_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -469,8 +512,8 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_
         CanonicalIndexEntry(
             name="bias",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=16,
@@ -480,8 +523,8 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_
         CanonicalIndexEntry(
             name="bias",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=16,
@@ -489,8 +532,8 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -500,44 +543,43 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_
     hash_a = compute_pure_transform_representation_contract_hash(
         source_artifact=source_index,
         serving_artifact=serving_index,
-        logical_topology_json=
-        '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2},{"name":"pp","size":1}]}',
+        logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2},{"name":"pp","size":1}]}',
     )
     hash_b = compute_pure_transform_representation_contract_hash(
         source_artifact=source_index,
         serving_artifact=serving_index,
-        logical_topology_json=
-        '{"dimensions":[{"name":"pp","size":1},{"name":"tp","size":2}],"version":"v1","family":"tp"}',
+        logical_topology_json='{"dimensions":[{"name":"pp","size":1},{"name":"tp","size":2}],"version":"v1","family":"tp"}',
     )
     hash_c = compute_pure_transform_representation_contract_hash(
         source_artifact=source_index,
         serving_artifact=serving_index,
-        logical_topology_json=
-        '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":4},{"name":"pp","size":1}]}',
+        logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":4},{"name":"pp","size":1}]}',
     )
 
     assert hash_a == hash_b
     assert hash_a != hash_c
 
 
-def test_build_pure_transform_publication_bundle_auto_derives_representation_hash(
-) -> (None):
+def test_build_pure_transform_publication_bundle_auto_derives_representation_hash() -> (
+    None
+):
     source_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
-        ), )
+        ),
+    )
     serving_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -545,8 +587,8 @@ def test_build_pure_transform_publication_bundle_auto_derives_representation_has
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
@@ -565,7 +607,7 @@ def test_build_pure_transform_publication_bundle_auto_derives_representation_has
         canonical_index=serving_index,
         lease=None,
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
         adapter_version="adapter-v4-auto",
@@ -581,35 +623,34 @@ def test_build_pure_transform_publication_bundle_auto_derives_representation_has
         serving_artifact=registered_artifact,
         source_version_key="models/demo/source/auto",
         serving_version_key="models/demo/serving/auto",
-        logical_topology_json=
-        '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}',
+        logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}',
     )
 
     expected_hash = compute_pure_transform_representation_contract_hash(
         source_artifact=source_index,
         serving_artifact=registered_artifact,
-        logical_topology_json=
-        '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}',
+        logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}',
     )
     assert bundle.representation_publish_contract.representation_contract_hash == (
-        expected_hash)
+        expected_hash
+    )
     assert bundle.contract_family == "pp"
     assert bundle.serving_manifest.representation_contract_hash == expected_hash
 
 
-def test_prepare_pure_transform_serving_registration_embeds_manifest_tensor(
-) -> None:
+def test_prepare_pure_transform_runtime_registration_embeds_manifest_tensor() -> None:
     source_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
-        ), )
-    intent = ServingBuildIntent(
+        ),
+    )
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
         adapter_version="adapter-v4-prep",
@@ -618,7 +659,7 @@ def test_prepare_pure_transform_serving_registration_embeds_manifest_tensor(
         source_artifact_ref="mi2:test:prep-source",
     )
 
-    prepared = prepare_pure_transform_serving_registration(
+    prepared = prepare_pure_transform_runtime_registration(
         build_intent=intent,
         source_artifact=source_index,
         tensors={"weights": torch.zeros(8, dtype=torch.float16)},
@@ -628,21 +669,28 @@ def test_prepare_pure_transform_serving_registration_embeds_manifest_tensor(
     assert "__tensorcast_meta__.manifest_json" in prepared.tensors
     assert len(prepared.serving_manifest_bytes) % 8 == 0
     assert prepared.canonical_index.total_size_bytes == sum(
-        int(entry.size_bytes) for entry in prepared.canonical_index.entries)
-    assert (ServingArtifactManifest.from_bytes(
-        prepared.serving_manifest_bytes) == prepared.serving_manifest)
-    assert (ServingArtifactManifest.from_bytes(
-        bytes(prepared.tensors["__tensorcast_meta__.manifest_json"].tolist()))
-            == prepared.serving_manifest)
+        int(entry.size_bytes) for entry in prepared.canonical_index.entries
+    )
+    assert (
+        RuntimeArtifactManifest.from_bytes(prepared.serving_manifest_bytes)
+        == prepared.serving_manifest
+    )
+    assert (
+        RuntimeArtifactManifest.from_bytes(
+            bytes(prepared.tensors["__tensorcast_meta__.manifest_json"].tolist())
+        )
+        == prepared.serving_manifest
+    )
     assert prepared.representation_contract_hash == (
         compute_pure_transform_representation_contract_hash(
             source_artifact=source_index,
             serving_artifact=prepared.canonical_index,
-        ))
+        )
+    )
 
 
-def test_prepare_serving_registration_supports_binding_finalize() -> None:
-    intent = ServingBuildIntent(
+def test_prepare_runtime_artifact_registration_supports_binding_finalize() -> None:
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
         adapter_version="adapter-v4-binding",
@@ -651,7 +699,7 @@ def test_prepare_serving_registration_supports_binding_finalize() -> None:
         source_artifact_ref="mi2:test:binding-source",
     )
 
-    prepared = prepare_serving_registration(
+    prepared = prepare_runtime_artifact_registration(
         build_intent=intent,
         tensors={"weights": torch.ones(8, dtype=torch.float16)},
         representation_contract_hash="bafkbindingrepr",
@@ -666,9 +714,10 @@ def test_prepare_serving_registration_supports_binding_finalize() -> None:
     assert prepared.representation_contract_hash == "bafkbindingrepr"
 
 
-def test_prepare_binding_finalize_serving_registration_requires_binding_finalize(
-) -> (None):
-    intent = ServingBuildIntent(
+def test_prepare_binding_finalize_runtime_registration_requires_binding_finalize() -> (
+    None
+):
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
         adapter_version="adapter-v4-wrong",
@@ -678,16 +727,17 @@ def test_prepare_binding_finalize_serving_registration_requires_binding_finalize
     )
 
     with pytest.raises(Exception, match="builder_mode=BINDING_FINALIZE"):
-        prepare_binding_finalize_serving_registration(
+        prepare_binding_finalize_runtime_registration(
             build_intent=intent,
             tensors={"weights": torch.ones(8, dtype=torch.float16)},
             representation_contract_hash="bafkbindingrepr",
         )
 
 
-def test_prepare_binding_finalize_serving_registration_supports_binding_finalize(
-) -> (None):
-    intent = ServingBuildIntent(
+def test_prepare_binding_finalize_runtime_registration_supports_binding_finalize() -> (
+    None
+):
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
         adapter_version="adapter-v4-binding-helper",
@@ -696,7 +746,7 @@ def test_prepare_binding_finalize_serving_registration_supports_binding_finalize
         source_artifact_ref="mi2:test:binding-source",
     )
 
-    prepared = prepare_binding_finalize_serving_registration(
+    prepared = prepare_binding_finalize_runtime_registration(
         build_intent=intent,
         tensors={"weights": torch.ones(8, dtype=torch.float16)},
         representation_contract_hash="bafkbindingrepr",
@@ -705,14 +755,16 @@ def test_prepare_binding_finalize_serving_registration_supports_binding_finalize
 
     assert prepared.serving_manifest.builder_mode == BuilderMode.BINDING_FINALIZE
     assert prepared.serving_manifest.topology_admission_digest == "bafktopology"
-    manifest_from_tensor = ServingArtifactManifest.from_bytes(
-        bytes(prepared.tensors[prepared.manifest_tensor_name].tolist()))
+    manifest_from_tensor = RuntimeArtifactManifest.from_bytes(
+        bytes(prepared.tensors[prepared.manifest_tensor_name].tolist())
+    )
     assert manifest_from_tensor.topology_admission_digest == "bafktopology"
 
 
-def test_prepare_binding_finalize_serving_registration_rejects_stale_manifest_topology(
-) -> None:
-    intent = ServingBuildIntent(
+def test_prepare_binding_finalize_runtime_registration_rejects_stale_manifest_topology() -> (
+    None
+):
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
         adapter_version="adapter-v4-binding-helper",
@@ -720,14 +772,14 @@ def test_prepare_binding_finalize_serving_registration_rejects_stale_manifest_to
         build_pipeline_version="pipeline-v4-binding-helper",
         source_artifact_ref="mi2:test:binding-source",
     )
-    prepared = prepare_binding_finalize_serving_registration(
+    prepared = prepare_binding_finalize_runtime_registration(
         build_intent=intent,
         tensors={"weights": torch.ones(8, dtype=torch.float16)},
         representation_contract_hash="bafkbindingrepr",
     )
 
     with pytest.raises(ArtifactError, match="topology_admission_digest"):
-        prepare_binding_finalize_serving_registration(
+        prepare_binding_finalize_runtime_registration(
             build_intent=intent,
             tensors=dict(prepared.tensors),
             representation_contract_hash="bafkbindingrepr",
@@ -735,11 +787,13 @@ def test_prepare_binding_finalize_serving_registration_rejects_stale_manifest_to
         )
 
 
-def test_prepare_serving_registration_keeps_manifest_on_tensor_device(
-) -> None:
-    device = (torch.device("cuda:0")
-              if torch.cuda.is_available() else torch.device("cpu"))
-    intent = ServingBuildIntent(
+def test_prepare_runtime_artifact_registration_keeps_manifest_on_tensor_device() -> (
+    None
+):
+    device = (
+        torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
+    )
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
         adapter_version="adapter-v4-device",
@@ -748,25 +802,25 @@ def test_prepare_serving_registration_keeps_manifest_on_tensor_device(
         source_artifact_ref="mi2:test:binding-source",
     )
 
-    prepared = prepare_serving_registration(
+    prepared = prepare_runtime_artifact_registration(
         build_intent=intent,
         tensors={"weights": torch.ones(8, dtype=torch.float16, device=device)},
         representation_contract_hash="bafkbindingrepr",
     )
 
     assert prepared.tensors["weights"].device == device
-    assert prepared.tensors[
-        "__tensorcast_meta__.manifest_json"].device == device
+    assert prepared.tensors["__tensorcast_meta__.manifest_json"].device == device
 
 
-def test_build_serving_publication_bundle_from_registered_artifact_rejects_binding_finalize(
-) -> (None):
+def test_build_runtime_artifact_publication_bundle_from_registered_artifact_rejects_binding_finalize() -> (
+    None
+):
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -774,8 +828,8 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
@@ -794,7 +848,7 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi
         canonical_index=canonical_index,
         lease=None,
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -805,7 +859,7 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi
     )
 
     with pytest.raises(ValueError, match="binding_value_ref subject"):
-        build_serving_publication_bundle_from_registered_artifact(
+        build_runtime_artifact_publication_bundle_from_registered_artifact(
             build_intent=intent,
             serving_artifact=registered_artifact,
             source_version_key="models/demo/source/v4",
@@ -813,14 +867,13 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi
         )
 
 
-def test_build_binding_finalize_publication_bundle_uses_admission_facts(
-) -> None:
+def test_build_binding_finalize_publication_bundle_uses_admission_facts() -> None:
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -828,15 +881,15 @@ def test_build_binding_finalize_publication_bundle_uses_admission_facts(
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
         ),
         total_size_bytes=80,
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -846,7 +899,7 @@ def test_build_binding_finalize_publication_bundle_uses_admission_facts(
         source_artifact_ref="mi2:test:binding-source",
     )
     admission_facts = build_binding_finalize_admission_facts(
-        support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+        support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         topology_admission_digest="bafktopology",
         same_binding_fast_path_validated=True,
     )
@@ -869,14 +922,15 @@ def test_build_binding_finalize_publication_bundle_uses_admission_facts(
     assert bundle.admission_facts == admission_facts
 
 
-def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_runtime_ready(
-) -> (None):
+def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_runtime_ready() -> (
+    None
+):
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -884,15 +938,15 @@ def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_r
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
         ),
         total_size_bytes=80,
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -914,13 +968,13 @@ def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_r
             canonical_index=canonical_index,
             serving_version_key="models/demo/serving/v4",
             admission_facts=build_binding_finalize_admission_facts(
-                support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+                support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
                 same_binding_fast_path_validated=True,
             ),
         )
 
 
-def test_published_model_version_builds_serving_runtime_policy() -> None:
+def test_published_model_version_builds_runtime_artifact_policy() -> None:
     version = PublishedModelVersion(
         assembly_id="cgid:test-assembly",
         source_artifact_id="mi2:test:source",
@@ -935,22 +989,20 @@ def test_published_model_version_builds_serving_runtime_policy() -> None:
         ),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
-        serving_manifest_ref=build_serving_manifest_ref(
-            "__serving_manifest__.json"),
+        serving_manifest_ref=build_serving_manifest_ref("__serving_manifest__.json"),
     )
 
-    policy = version.require_serving_runtime_policy()
+    policy = version.require_runtime_artifact_policy()
 
-    assert isinstance(policy, ServingRuntimePolicy)
+    assert isinstance(policy, RuntimeArtifactPolicy)
     assert policy.require_manifest is True
     assert policy.serving_manifest_ref == "tensor:__serving_manifest__.json"
     assert policy.expected_representation_contract_hash == "bafkrepresentation"
     assert policy.expected_serving_build_digest == "bafkbuilddigest"
 
 
-def test_coerce_serving_runtime_policy_accepts_manifest_lineage_models(
-) -> None:
-    manifest = ServingArtifactManifest(
+def test_coerce_runtime_artifact_policy_accepts_manifest_lineage_models() -> None:
+    manifest = RuntimeArtifactManifest(
         framework_name="torch",
         adapter_version="adapter-v6",
         serving_abi_version="abi-v6",
@@ -958,26 +1010,25 @@ def test_coerce_serving_runtime_policy_accepts_manifest_lineage_models(
         serving_build_digest="bafkbuilddigest",
         tensor_schema_hash="bafktensorschema",
         canonical_tensor_count=1,
-        serving_manifest_ref=build_serving_manifest_ref(
-            "__alt_manifest__.json"),
+        serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"),
         builder_mode=BuilderMode.PURE_TRANSFORM,
         build_pipeline_version="pipeline-v6",
     )
 
-    policy = coerce_serving_runtime_policy(manifest)
+    policy = coerce_runtime_artifact_policy(manifest)
 
-    assert isinstance(policy, ServingRuntimePolicy)
+    assert isinstance(policy, RuntimeArtifactPolicy)
     assert policy.serving_manifest_ref == "tensor:__alt_manifest__.json"
     assert policy.expected_representation_contract_hash == "bafkrepresentation"
     assert policy.expected_serving_build_digest == "bafkbuilddigest"
 
 
-def test_coerce_serving_runtime_policy_accepts_contract_and_version() -> None:
+def test_coerce_runtime_artifact_policy_accepts_contract_and_version() -> None:
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(
-            serving_artifact_id="mi2:test:serving", ),
-        serving_manifest_ref=build_serving_manifest_ref(
-            "__alt_manifest__.json"),
+        subject=RuntimePublicationSubject(
+            serving_artifact_id="mi2:test:serving",
+        ),
+        serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
     )
@@ -995,14 +1046,13 @@ def test_coerce_serving_runtime_policy_accepts_contract_and_version() -> None:
         ),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
-        serving_manifest_ref=build_serving_manifest_ref(
-            "__alt_manifest__.json"),
+        serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"),
     )
 
-    contract_policy = coerce_serving_runtime_policy(contract)
-    version_policy = coerce_serving_runtime_policy(version)
+    contract_policy = coerce_runtime_artifact_policy(contract)
+    version_policy = coerce_runtime_artifact_policy(version)
 
-    assert contract_policy == ServingRuntimePolicy(
+    assert contract_policy == RuntimeArtifactPolicy(
         require_manifest=True,
         serving_manifest_ref="tensor:__alt_manifest__.json",
         expected_representation_contract_hash="bafkrepresentation",
@@ -1011,9 +1061,10 @@ def test_coerce_serving_runtime_policy_accepts_contract_and_version() -> None:
     assert version_policy == contract_policy
 
 
-def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publish_spec(
-) -> (None):
-    manifest = ServingArtifactManifest(
+def test_coerce_runtime_artifact_policy_accepts_runtime_ready_representation_publish_spec() -> (
+    None
+):
+    manifest = RuntimeArtifactManifest(
         framework_name="torch",
         adapter_version="adapter-v6-runtime",
         serving_abi_version="abi-v6-runtime",
@@ -1025,8 +1076,9 @@ def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publ
         build_pipeline_version="pipeline-v6-runtime",
     )
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(
-            serving_artifact_id="mi2:test:serving", ),
+        subject=RuntimePublicationSubject(
+            serving_artifact_id="mi2:test:serving",
+        ),
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
@@ -1042,15 +1094,15 @@ def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publ
             kind="representation_publish",
             representation_publish_contract=contract,
         ),
-        admission_facts=ServingAdmissionFacts(
+        admission_facts=RuntimeAdmissionFacts(
             finalize_class=FinalizeClass.RUNTIME_ONLY,
-            support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+            support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         ),
     )
 
-    policy = coerce_serving_runtime_policy(spec)
+    policy = coerce_runtime_artifact_policy(spec)
 
-    assert policy == ServingRuntimePolicy(
+    assert policy == RuntimeArtifactPolicy(
         require_manifest=True,
         serving_manifest_ref=build_serving_manifest_ref(),
         expected_representation_contract_hash="bafkrepresentation",
@@ -1058,9 +1110,10 @@ def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publ
     )
 
 
-def test_coerce_serving_runtime_policy_rejects_builder_only_representation_publish_spec(
-) -> (None):
-    manifest = ServingArtifactManifest(
+def test_coerce_runtime_artifact_policy_rejects_builder_only_representation_publish_spec() -> (
+    None
+):
+    manifest = RuntimeArtifactManifest(
         framework_name="torch",
         adapter_version="adapter-v6-runtime-blocked",
         serving_abi_version="abi-v6-runtime-blocked",
@@ -1072,8 +1125,9 @@ def test_coerce_serving_runtime_policy_rejects_builder_only_representation_publi
         build_pipeline_version="pipeline-v6-runtime-blocked",
     )
     contract = RepresentationPublishContract(
-        subject=ServingPublicationSubject(
-            serving_artifact_id="mi2:test:serving", ),
+        subject=RuntimePublicationSubject(
+            serving_artifact_id="mi2:test:serving",
+        ),
         serving_manifest_ref=build_serving_manifest_ref(),
         representation_contract_hash="bafkrepresentation",
         serving_build_digest="bafkbuilddigest",
@@ -1088,18 +1142,18 @@ def test_coerce_serving_runtime_policy_rejects_builder_only_representation_publi
             kind="representation_publish",
             representation_publish_contract=contract,
         ),
-        admission_facts=ServingAdmissionFacts(
+        admission_facts=RuntimeAdmissionFacts(
             finalize_class=FinalizeClass.RUNTIME_ONLY,
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
         ),
     )
 
     with pytest.raises(ValueError, match="RUNTIME_BIND_SWAP_READY"):
-        coerce_serving_runtime_policy(spec)
+        coerce_runtime_artifact_policy(spec)
 
 
 def test_build_pure_transform_transform_spec_wraps_transform_args() -> None:
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkrepresentation",
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
@@ -1126,9 +1180,8 @@ def test_build_pure_transform_transform_spec_wraps_transform_args() -> None:
     assert spec.publication_spec.serving_version_key == "models/demo/serving/v6"
 
 
-def test_build_pure_transform_transform_spec_can_omit_representation_hash(
-) -> None:
-    intent = ServingBuildIntent(
+def test_build_pure_transform_transform_spec_can_omit_representation_hash() -> None:
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
         adapter_version="adapter-v6-auto",
@@ -1146,12 +1199,12 @@ def test_build_pure_transform_transform_spec_can_omit_representation_hash(
 
 
 def test_build_pure_transform_publication_spec_wraps_typed_inputs() -> None:
-    admission_facts = ServingAdmissionFacts(
+    admission_facts = RuntimeAdmissionFacts(
         finalize_class=FinalizeClass.RUNTIME_ONLY,
-        support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+        support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         topology_admission_digest="bafktopology",
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         builder_mode=BuilderMode.PURE_TRANSFORM,
         framework_name="torch",
         adapter_version="adapter-v7",
@@ -1164,9 +1217,8 @@ def test_build_pure_transform_publication_spec_wraps_typed_inputs() -> None:
         contract_family="canonical_full",
         source_version_key="models/demo/source/v7",
         serving_version_key="models/demo/serving/v7",
-        serving_manifest_ref=build_serving_manifest_ref(
-            "__alt_manifest__.json"),
-        structural_view_ids=("view-a", ),
+        serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"),
+        structural_view_ids=("view-a",),
         admission_facts=admission_facts,
     )
 
@@ -1175,7 +1227,7 @@ def test_build_pure_transform_publication_spec_wraps_typed_inputs() -> None:
     assert publication_spec.source_version_key == "models/demo/source/v7"
     assert publication_spec.serving_version_key == "models/demo/serving/v7"
     assert publication_spec.serving_manifest_ref == "tensor:__alt_manifest__.json"
-    assert publication_spec.structural_view_ids == ("view-a", )
+    assert publication_spec.structural_view_ids == ("view-a",)
     assert publication_spec.admission_facts == admission_facts
 
 
@@ -1184,9 +1236,9 @@ def test_pure_transform_publication_no_longer_exposes_string_arg_fallback() -> N
 
     removed_markers: list[str] = []
     for path in (
-        Path("tensorcast/api/store/serving_builder.py"),
+        Path("tensorcast/api/store/publication_builder.py"),
         Path("tensorcast/engine_adapter/adapter.py"),
-        Path("tensorcast/serving/builder/publication.py"),
+        Path("tensorcast/artifact_runtime/recipe/publication.py"),
     ):
         text = path.read_text(encoding="utf-8")
         if "tc_serving_" in text or "build_pure_transform_serving_args" in text:
@@ -1195,14 +1247,15 @@ def test_pure_transform_publication_no_longer_exposes_string_arg_fallback() -> N
     assert removed_markers == []
 
 
-def test_representation_publish_spec_round_trips_admission_facts_and_digest_version(
-) -> (None):
+def test_representation_publish_spec_round_trips_admission_facts_and_digest_version() -> (
+    None
+):
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -1210,8 +1263,8 @@ def test_representation_publish_spec_round_trips_admission_facts_and_digest_vers
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
@@ -1230,13 +1283,13 @@ def test_representation_publish_spec_round_trips_admission_facts_and_digest_vers
         canonical_index=canonical_index,
         lease=None,
     )
-    admission_facts = ServingAdmissionFacts(
+    admission_facts = RuntimeAdmissionFacts(
         finalize_class=FinalizeClass.RUNTIME_ONLY,
-        support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
+        support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY,
         topology_admission_digest="bafktopology",
     )
     bundle = build_pure_transform_publication_bundle_from_registered_artifact(
-        build_intent=ServingBuildIntent(
+        build_intent=RuntimeArtifactBuildIntent(
             representation_contract_hash="bafkrepresentation",
             builder_mode=BuilderMode.PURE_TRANSFORM,
             framework_name="torch",
@@ -1253,18 +1306,21 @@ def test_representation_publish_spec_round_trips_admission_facts_and_digest_vers
     restored = RepresentationPublishSpec.from_proto(bundle.to_proto())
 
     assert restored.admission_facts == admission_facts
-    assert (restored.representation_publish_contract.
-            serving_build_digest_version == SERVING_BUILD_DIGEST_VERSION)
+    assert (
+        restored.representation_publish_contract.serving_build_digest_version
+        == SERVING_BUILD_DIGEST_VERSION
+    )
 
 
-def test_topology_admission_digest_does_not_change_representation_or_build_identity(
-) -> (None):
+def test_topology_admission_digest_does_not_change_representation_or_build_identity() -> (
+    None
+):
     canonical_index = _canonical_index(
         CanonicalIndexEntry(
             name="weights",
             dtype=torch.float16,
-            shape=(8, ),
-            stride=(1, ),
+            shape=(8,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=0,
             size_bytes=16,
@@ -1272,15 +1328,15 @@ def test_topology_admission_digest_does_not_change_representation_or_build_ident
         CanonicalIndexEntry(
             name="__tensorcast_meta__.manifest_json",
             dtype=torch.uint8,
-            shape=(64, ),
-            stride=(1, ),
+            shape=(64,),
+            stride=(1,),
             storage_offset=0,
             segment_offset=16,
             size_bytes=64,
         ),
         total_size_bytes=80,
     )
-    intent = ServingBuildIntent(
+    intent = RuntimeArtifactBuildIntent(
         representation_contract_hash="bafkbindingrepr",
         builder_mode=BuilderMode.BINDING_FINALIZE,
         framework_name="torch",
@@ -1300,7 +1356,7 @@ def test_topology_admission_digest_does_not_change_representation_or_build_ident
         publication_subject=binding_value,
         canonical_index=canonical_index,
         admission_facts=build_binding_finalize_admission_facts(
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             topology_admission_digest="bafktopology-a",
             same_binding_fast_path_validated=True,
         ),
@@ -1310,17 +1366,20 @@ def test_topology_admission_digest_does_not_change_representation_or_build_ident
         publication_subject=binding_value,
         canonical_index=canonical_index,
         admission_facts=build_binding_finalize_admission_facts(
-            support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY,
+            support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY,
             topology_admission_digest="bafktopology-b",
             same_binding_fast_path_validated=True,
         ),
     )
 
-    assert (bundle_a.representation_publish_contract.
-            representation_contract_hash == bundle_b.
-            representation_publish_contract.representation_contract_hash)
-    assert (bundle_a.representation_publish_contract.serving_build_digest ==
-            bundle_b.representation_publish_contract.serving_build_digest)
+    assert (
+        bundle_a.representation_publish_contract.representation_contract_hash
+        == bundle_b.representation_publish_contract.representation_contract_hash
+    )
+    assert (
+        bundle_a.representation_publish_contract.serving_build_digest
+        == bundle_b.representation_publish_contract.serving_build_digest
+    )
     assert bundle_a.serving_manifest.topology_admission_digest == "bafktopology-a"
     assert bundle_b.serving_manifest.topology_admission_digest == "bafktopology-b"
     assert bundle_a.admission_facts != bundle_b.admission_facts
diff --git a/tests/python/test_serving_fake_framework_boundary.py b/tests/python/test_serving_fake_framework_boundary.py
deleted file mode 100644
index 5dc437ce..00000000
--- a/tests/python/test_serving_fake_framework_boundary.py
+++ /dev/null
@@ -1,670 +0,0 @@
-#  Copyright (c) 2026, TensorCast Team.
-
-from contextlib import contextmanager
-from types import SimpleNamespace
-
-import torch
-
-import tensorcast.serving._runtime_impl.lifecycle as integration_mod
-from tensorcast.serving._runtime_impl.lifecycle import (
-    FrameworkIdentity,
-    IntegrationHost,
-    MaterializationExecutionFacts,
-    PlacementAdmissionFacts,
-    PlacementIdentityFacts,
-    PlacementMemberFacts,
-    ServingIntegration,
-    SourceSelector,
-)
-from tensorcast.serving.admin import AdminLocalSourceBootstrap
-from tensorcast.serving.builder.compiler import (
-    CompiledServingRecipe,
-    TensorcastSemanticValidationSpec,
-    TensorcastServingFacts,
-    TensorSchemaEntry,
-)
-from tensorcast.serving.builder.trace_ir import TracePlan
-from tensorcast.serving.recipe_build import (
-    RecipeBuildSession,
-    ServingBindingPlan,
-)
-from tensorcast.serving.retained_binding import (
-    ParsedRetainedServingBindingAuthority,
-    RetainedServingBindingExpectedDigests,
-)
-from tensorcast.serving.runtime import (
-    BootstrapPolicy,
-    ExistingServingArtifact,
-    RequestContext,
-    RetainedBindingAcquire,
-    ServingArtifactLocator,
-)
-from tensorcast.types import (
-    BindingReservationCapability,
-    BindingValueRef,
-    FinalizeClass,
-    ServingArtifactManifest,
-    ServingBindingMemberRef,
-    ServingSupportLevel,
-)
-
-
-class _FakeArtifactView:
-    def __init__(self, parent, names=None):
-        self.parent = parent
-        self.names = tuple(names or ())
-
-    def bind(self, **kwargs):
-        binding = _FakeBinding()
-        binding.names = self.names
-        binding.kwargs = kwargs
-        return binding
-
-
-class _FakeArtifact:
-    def subset(self, names):
-        return _FakeArtifactView(self, names)
-
-
-class _FakeBinding:
-    def __init__(self):
-        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
-        self.binding_layout_id = "layout-1"
-        self.realized = None
-        self.swapped = None
-        self.closed = False
-
-    def realize_from(self, source_view, *, realization_plan, options):
-        self.realized = (source_view, realization_plan, options)
-        return "epoch-1"
-
-    def swap(self, artifact, **kwargs):
-        self.swapped = (artifact, kwargs)
-        self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)}
-        return self
-
-    def freeze_current(self, *, update_epoch, source_artifact_ref):
-        return SimpleNamespace(
-            binding_id="binding-1",
-            binding_layout_id=self.binding_layout_id,
-            binding_value_id="value-1",
-            seal_generation=1,
-            update_epoch=update_epoch,
-            source_artifact_ref=source_artifact_ref,
-            local_serving_ref="binding-local:fake",
-        )
-
-    def close(self):
-        self.closed = True
-
-
-class _FakeRestoredRetainedBinding:
-    def __init__(self):
-        self.tensors = {"w": torch.ones((1,), dtype=torch.float16)}
-        self.binding_layout_id = "layout-1"
-        self.binding_value_ref = SimpleNamespace(
-            binding_id="binding-1",
-            binding_layout_id="layout-1",
-            binding_value_id="value-1",
-            seal_generation=1,
-        )
-        self.reservation_bytes = 4096
-        self.closed = False
-        self.transferred = False
-
-    def transfer_to_runtime(self):
-        self.transferred = True
-        return SimpleNamespace(close=lambda: None)
-
-    def close(self):
-        self.closed = True
-
-
-def _retained_authority() -> ParsedRetainedServingBindingAuthority:
-    member = ServingBindingMemberRef(
-        member_id="member-0",
-        member_index=0,
-        member_count=1,
-        group_id="group-1",
-    )
-    binding_ref = BindingValueRef(
-        binding_id="binding-1",
-        binding_layout_id="layout-1",
-        binding_value_id="value-1",
-        seal_generation=1,
-    )
-    capability = BindingReservationCapability(
-        capability_id="capability-1",
-        binding_value_ref=binding_ref,
-        daemon_id="daemon-1",
-        daemon_session_id="session-1",
-        device_uuid="gpu-0",
-        member=member,
-        reservation_bytes=4096,
-        scope_digest="scope-1",
-    )
-    return ParsedRetainedServingBindingAuthority(
-        group_id="group-1",
-        local_serving_ref="binding-local:fake",
-        binding_value_ref=binding_ref,
-        reservation_capability=capability,
-        daemon_id="daemon-1",
-        daemon_session_id="session-1",
-        device_uuid="gpu-0",
-        member=member,
-        reservation_bytes=4096,
-        expected=RetainedServingBindingExpectedDigests(
-            target_layout_hash="layout-hash",
-            tensor_schema_hash="fake-schema",
-            serving_build_digest="build-digest",
-            resolved_spec_digest="spec-digest",
-        ),
-        readiness="serving_local_ready",
-        verification_state="local_only",
-    )
-
-
-class _FakeSource:
-    def subset(self, names):
-        return ("subset", tuple(names))
-
-
-class _FakeRuntimeModel:
-    def __init__(self):
-        self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")}
-
-
-class _FakeFrameworkHost:
-    def identity(self, model_config):
-        del model_config
-        return FrameworkIdentity(
-            framework_name="fakefw",
-            framework_version="fakefw-v1",
-            adapter_version="adapter-v1",
-            serving_abi_version="abi-v1",
-        )
-
-    def prepare_model_construction(self, framework_config, model_config):
-        del framework_config, model_config
-
-    def build_meta_model(self, framework_config, model_config):
-        del framework_config, model_config
-        return _FakeRuntimeModel()
-
-    def build_runtime_model(self, framework_config, model_config, target_device):
-        del framework_config, model_config, target_device
-        return _FakeRuntimeModel()
-
-    def assert_model_ready_for_runtime_binding(self, model, *, context):
-        del context
-        assert "w" in model.tensors
-
-    def semantic_probes(self, model, model_config):
-        del model, model_config
-        return {}
-
-
-class _FakePlacementHost:
-    def identity_facts(self, framework_config):
-        del framework_config
-        return PlacementIdentityFacts(
-            tensor_parallel_rank=0,
-            tensor_parallel_size=1,
-            pipeline_parallel_rank=0,
-            pipeline_parallel_size=1,
-            data_parallel_rank=0,
-            data_parallel_size=1,
-        )
-
-    def admission_facts(self, framework_config):
-        del framework_config
-        return PlacementAdmissionFacts()
-
-    def member_facts(self, framework_config):
-        del framework_config
-        return PlacementMemberFacts(
-            runtime_rank=0,
-            runtime_world_size=1,
-            member_id="member-0",
-            member_index=0,
-            member_count=1,
-            group_id_hint="group-1",
-        )
-
-    def execution_facts(self, framework_config):
-        del framework_config
-        return MaterializationExecutionFacts(
-            collective_rank=0,
-            collective_world_size=1,
-            tensor_parallel_ranks=(0,),
-        )
-
-
-class _FakeTensorSurface:
-    def runtime_only_tensor_names(self, model):
-        del model
-        return ()
-
-    def align_runtime_tensor_names(self, model, expected_names):
-        assert set(expected_names) == set(model.tensors)
-        return 0
-
-    def collect_runtime_tensors(self, model, *, remove_duplicate=False):
-        del remove_duplicate
-        return dict(model.tensors)
-
-    def collect_runtime_tensor_view(self, tensors):
-        del tensors
-        return ()
-
-    def compute_runtime_tensor_schema_hash(self, tensors, *, remove_duplicate=False):
-        del tensors, remove_duplicate
-        return "fake-schema"
-
-    def attach_bound_tensors(self, model, tensors, *, replace_meta_params):
-        del replace_meta_params
-        model.tensors.update(tensors)
-        return model
-
-    def allocate_runtime_only_tensors(self, model, target_device):
-        del model, target_device
-        return {}
-
-    def snapshot_tensor_invariants(self, tensors):
-        return tuple(sorted(tensors))
-
-    def validate_tensor_invariants(self, before, after):
-        assert before == tuple(sorted(after))
-
-
-def _realization_plan_proto():
-    from tensorcast.proto.daemon.v2 import store_daemon_pb2
-
-    plan = store_daemon_pb2.BindingRealizationPlan()
-    entry = plan.entries.add(dst_name="w")
-    entry.op_kind = store_daemon_pb2.BINDING_REALIZATION_OP_KIND_COPY
-    entry.source_name = "w"
-    return plan.SerializeToString(deterministic=True)
-
-
-def _recipe():
-    return CompiledServingRecipe(
-        compile_key="compile",
-        source_artifact_ref="mi2:source",
-        source_metadata_fingerprint="meta",
-        serving_facts=TensorcastServingFacts(
-            framework_name="fakefw",
-            framework_version="fakefw-v1",
-            adapter_version="adapter-v1",
-            serving_abi_version="abi-v1",
-            support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY,
-            runtime_only_tensor_names=(),
-            process_after_load_class=FinalizeClass.RUNTIME_ONLY,
-            post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY,
-        ),
-        trace_plan=TracePlan(
-            copy_plan=[],
-            expected_src_names={"w"},
-            expected_dst_names={"w"},
-            tensorcast_slices={},
-            src_hull={},
-        ),
-        tensor_schema=(
-            TensorSchemaEntry(
-                name="w",
-                dtype="torch.float16",
-                shape=(1,),
-                stride=(1,),
-            ),
-        ),
-        source_hull=(),
-        realization_plan=(),
-        realization_fallback_plan=(),
-        topology_ref=None,
-        member_ref=None,
-        semantic_validation_spec=TensorcastSemanticValidationSpec.empty(),
-        realization_plan_proto=_realization_plan_proto(),
-        realization_plan_count=1,
-    )
-
-
-def test_fake_second_framework_core_generated_ids_are_framework_neutral():
-    group_id = integration_mod.build_collective_group_id(
-        artifact_ref="mi2:fake:serving",
-        operation_scope="fakefw.realize",
-        tp_ranks=(0, 1),
-        contract_identity="repr",
-    )
-    assert group_id.startswith("tensorcast-")
-    assert "vllm" not in group_id
-
-    _contract_hash, manifest_bytes = (
-        integration_mod.prepare_same_binding_manifest_carrier(
-            _recipe(),
-            manifest_tensor_name="__tensorcast_meta__.manifest",
-            representation_contract_hash="repr",
-            topology_admission_digest="topology-digest",
-        )
-    )
-    manifest = ServingArtifactManifest.from_bytes(manifest_bytes)
-    lower_manifest = manifest_bytes.lower()
-    assert integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION == (
-        "tensorcast-bootstrap-v1"
-    )
-    assert manifest.topology_admission_digest == "topology-digest"
-    assert (
-        integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION.encode()
-        in manifest_bytes
-    )
-    assert b"vllm" not in lower_manifest
-
-
-def test_fake_second_framework_uses_host_intent_lifecycle(monkeypatch):
-    identity = ServingBindingPlan(
-        model_hash="hash",
-        model_id="fake-model",
-        model_revision=None,
-        dtype="torch.float16",
-        runtime_version="fake-runtime-v1",
-        framework_name="fakefw",
-        framework_version="fakefw-v1",
-        adapter_version="adapter-v1",
-        serving_abi_version="abi-v1",
-        trace_cache_schema_version=1,
-        tp_rank=0,
-        tp_world_size=1,
-    )
-    session = RecipeBuildSession(identity)
-    assert session.recipe_cache_key(metadata_fingerprint="meta")
-
-    monkeypatch.setattr(
-        integration_mod,
-        "read_source_bound_contract_state",
-        lambda: SimpleNamespace(
-            source_bound_contract_ready=True,
-            source_bound_contract_version=4,
-            source_bound_capability_names=("collective",),
-        ),
-    )
-    monkeypatch.setattr(
-        ServingIntegration,
-        "build_materialization_options",
-        lambda self, **kwargs: ("realize-options", kwargs),
-    )
-    direct_resolve_calls = []
-
-    class _FakeResolver:
-        def resolve(self, artifact_ref):
-            direct_resolve_calls.append(("resolve", artifact_ref))
-            return SimpleNamespace(
-                artifact=_FakeArtifact(),
-                artifact_ref=artifact_ref,
-                tensor_names=("w",),
-                manifest=SimpleNamespace(
-                    representation_contract_hash="repr-direct",
-                    source_artifact_ref="mi2:source",
-                    serving_build_digest="build-direct",
-                ),
-            )
-
-        def cross_check(self, resolved_artifact, **kwargs):
-            direct_resolve_calls.append(("cross_check", kwargs))
-            return resolved_artifact
-
-    host = IntegrationHost(
-        framework=_FakeFrameworkHost(),
-        placement=_FakePlacementHost(),
-        tensor_surface=_FakeTensorSurface(),
-    )
-    direct_attachment = ServingIntegration(
-        resolver=_FakeResolver(),
-        host=host,
-    ).start(
-        ExistingServingArtifact(ServingArtifactLocator.artifact_ref("mi2:serving")),
-        RequestContext(
-            framework_config=SimpleNamespace(),
-            model_config=SimpleNamespace(model="fake-model"),
-            target_device=torch.device("cuda:0"),
-        ),
-    )
-    direct_payload = direct_attachment.view.endpoint.to_weight_version_payload()
-    assert direct_attachment.state.runtime_view.readiness == "serving"
-    assert direct_payload["serving_artifact_ref"] == "mi2:serving"
-    assert direct_payload["source_artifact_ref"] == "mi2:source"
-    assert direct_resolve_calls[1][1]["expected_tensor_schema_hash"] == "fake-schema"
-    reload_attachment = ServingIntegration(
-        resolver=_FakeResolver(),
-        host=host,
-    ).reload(
-        direct_attachment.state,
-        ExistingServingArtifact(
-            ServingArtifactLocator.artifact_ref("mi2:serving-next")
-        ),
-        RequestContext(
-            framework_config=SimpleNamespace(),
-            model_config=SimpleNamespace(model="fake-model"),
-        ),
-        model=direct_attachment.model,
-    )
-    reload_payload = reload_attachment.view.endpoint.to_weight_version_payload()
-    reload_response = reload_attachment.view.endpoint.to_reload_response_payload()
-    assert reload_payload["serving_artifact_ref"] == "mi2:serving-next"
-    assert reload_response == {
-        "schema_version": 1,
-        "serving_artifact_ref": "mi2:serving-next",
-        "representation_contract_hash": "repr-direct",
-        "serving_build_digest": "build-direct",
-        "readiness": "serving",
-    }
-    assert direct_attachment.state.binding.swapped[1]["options"] == "realize-options"
-    described = ServingIntegration(host=host).describe(reload_attachment.state)
-    assert (
-        described.endpoint.to_weight_version_payload()["serving_artifact_ref"]
-        == "mi2:serving-next"
-    )
-
-    host_binding = _FakeBinding()
-    host_model = _FakeRuntimeModel()
-    attachment = ServingIntegration(host=host).start(
-        AdminLocalSourceBootstrap(
-            source_selector=SourceSelector.local_path("/tmp/fake-model"),
-            bootstrap_policy=BootstrapPolicy(),
-            recipe=_recipe(),
-            source_subject=_FakeSource(),
-            source_artifact_ref="mi2:source",
-            model=host_model,
-            binding_factory=lambda *args, **kwargs: host_binding,
-        ),
-        RequestContext(
-            framework_config=SimpleNamespace(),
-            model_config=SimpleNamespace(model="fake-model"),
-            target_device=torch.device("cuda:0"),
-        ),
-    )
-    assert attachment.model is host_model
-    assert attachment.state.runtime_view.readiness == "serving_local_ready"
-    payload = attachment.view.endpoint.to_weight_version_payload()
-    assert payload["source_artifact_ref"] == "mi2:source"
-    assert payload["family"] == "generic"
-    assert payload["tp_rank"] == 0
-    assert attachment.prepared is not None
-    assert host_binding.realized is not None
-    assert host_binding.realized[2] == "realize-options"
-
-    retained_calls = []
-    restored = _FakeRestoredRetainedBinding()
-
-    @contextmanager
-    def fake_restore_retained(**kwargs):
-        retained_calls.append(kwargs)
-        yield restored
-
-    monkeypatch.setattr(
-        integration_mod, "restore_retained_binding", fake_restore_retained
-    )
-    retained_attachment = ServingIntegration(host=host).start(
-        RetainedBindingAcquire(authority=_retained_authority()),
-        RequestContext(
-            framework_config=SimpleNamespace(),
-            model_config=SimpleNamespace(model="fake-model"),
-            target_device=torch.device("cuda:0"),
-        ),
-    )
-    retained_payload = retained_attachment.view.endpoint.to_weight_version_payload()
-    assert retained_attachment.state.runtime_view.readiness == "serving_local_ready"
-    assert retained_payload["local_serving_ref"] == "binding-local:fake"
-    assert retained_payload["binding_value_ref"]["binding_value_id"] == "value-1"
-    assert retained_calls[0]["expected_member"].member_index == 0
-    assert restored.transferred
-
-
-def test_fake_second_framework_uses_public_runtime_session(monkeypatch):
-    import tensorcast.serving.hosts as tc_hosts
-    import tensorcast.serving.runtime as tc_runtime
-    from tensorcast.serving.testing import assert_framework_isolation
-
-    monkeypatch.setattr(
-        tc_runtime.RuntimeSettings, "ensure_initialized", lambda self: None
-    )
-    monkeypatch.setattr(
-        integration_mod,
-        "read_source_bound_contract_state",
-        lambda: SimpleNamespace(
-            source_bound_contract_ready=True,
-            source_bound_contract_version=4,
-            source_bound_capability_names=("collective",),
-        ),
-    )
-    monkeypatch.setattr(
-        integration_mod.ServingIntegration,
-        "build_materialization_options",
-        lambda self, **kwargs: ("runtime-options", kwargs),
-    )
-
-    class _Resolver:
-        def resolve(self, artifact_ref):
-            return SimpleNamespace(
-                artifact=_FakeArtifact(),
-                artifact_ref=artifact_ref,
-                tensor_names=("w",),
-                manifest=SimpleNamespace(
-                    representation_contract_hash=f"repr:{artifact_ref}",
-                    source_artifact_ref="mi2:source",
-                    serving_build_digest=f"build:{artifact_ref}",
-                ),
-            )
-
-        def cross_check(self, resolved_artifact, **kwargs):
-            return resolved_artifact
-
-    host = tc_hosts.IntegrationHost(
-        framework=_FakeFrameworkHost(),
-        placement=_FakePlacementHost(),
-        tensor_surface=_FakeTensorSurface(),
-    )
-    session = tc_runtime.ServingRuntimeSession.from_config(
-        {
-            "bootstrap": {
-                "mode": "disabled",
-            },
-            "serving": {
-                "artifact_locator": {
-                    "kind": "artifact_ref",
-                    "value": "mi2:serving",
-                },
-            },
-        },
-        host=host,
-        resolver=_Resolver(),
-    )
-
-    attachment = session.start(
-        tc_runtime.RequestContext(
-            framework_config=SimpleNamespace(),
-            model_config=SimpleNamespace(model="fake-model"),
-            target_device=torch.device("cuda:0"),
-        )
-    )
-    reloaded = session.reload(
-        current_attachment=attachment,
-        artifact_locator=tc_runtime.ServingArtifactLocator.artifact_ref(
-            "mi2:serving-next"
-        ),
-        policy=tc_runtime.ServingPolicy(),
-        context=tc_runtime.RequestContext(
-            framework_config=SimpleNamespace(),
-            model_config=SimpleNamespace(model="fake-model"),
-        ),
-        model=attachment.model,
-    )
-
-    assert (
-        attachment.view.endpoint.to_weight_version_payload()["serving_artifact_ref"]
-        == "mi2:serving"
-    )
-    assert (
-        reloaded.view.endpoint.to_reload_response_payload()["serving_artifact_ref"]
-        == "mi2:serving-next"
-    )
-    assert_framework_isolation(
-        ("tensorcast.serving.runtime", "tensorcast.serving.hosts")
-    )
-
-
-def test_fake_second_framework_runtime_conformance_kit():
-    import tensorcast.serving.hosts as tc_hosts
-    import tensorcast.serving.runtime as tc_runtime
-    from tensorcast.serving.testing import (
-        assert_level1_runtime_conformance,
-        assert_level2_local_bootstrap_conformance,
-        assert_level3_retained_binding_conformance,
-    )
-
-    result = assert_level1_runtime_conformance(tc_runtime, tc_hosts)
-
-    assert result.checks["direct_start"]
-    assert result.checks["reload"]
-    assert result.checks["describe"]
-    assert result.checks["source_capability_not_required"]
-    assert result.checks["source_catalog_not_required"]
-    assert result.checks["rejects_local_reload_artifact_locator"]
-    assert result.checks["rejects_untyped_reload_artifact_locator"]
-    assert result.checks["rejects_untyped_reload_policy"]
-
-    local = assert_level2_local_bootstrap_conformance(tc_runtime, tc_hosts)
-    assert local.checks["missing_source_catalog_fails_closed"]
-    assert local.checks["source_catalog_request_core_owned"]
-    assert local.checks["recipe_build_receives_core_catalog"]
-    assert local.checks["missing_trace_capability_is_explicit"]
-    assert local.checks["local_path_is_not_reload_artifact_locator"]
-
-    retained = assert_level3_retained_binding_conformance(tc_runtime, tc_hosts)
-    assert retained.checks["retained_acquire_public_start"]
-    assert retained.checks["retained_acquire_uses_host_member"]
-    assert retained.checks["retained_acquire_transfers_ownership"]
-    assert retained.checks["missing_authority_fails_closed"]
-    assert retained.checks["authority_mismatch_fails_closed"]
-    assert retained.checks["failure_path_used_retained_restore"]
-    assert retained.checks["failure_cleanup_closes_untransferred_handle"]
-    assert retained.checks["rejects_arbitrary_retained_authority"]
-
-
-def test_conformance_failure_summary_includes_onboarding_hint():
-    from tensorcast.serving.testing import ConformanceResult
-
-    result = ConformanceResult(
-        checks={"direct_start": False},
-        messages={"direct_start": "provide a tensor surface"},
-        level="level1-runtime",
-    )
-
-    try:
-        result.assert_passed()
-    except AssertionError as exc:
-        message = str(exc)
-    else:
-        raise AssertionError("expected conformance failure")
-
-    assert "level1-runtime" in message
-    assert "direct_start" in message
-    assert "provide a tensor surface" in message