diff --git a/Cargo.lock b/Cargo.lock
index bbbccb1..dbcdcf4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6,14 +6,18 @@ version = 4
 name = "agentic-core"
 version = "0.1.0"
 dependencies = [
+ "async-stream",
+ "axum",
  "bytes",
  "chrono",
  "criterion",
+ "either",
  "futures",
  "http",
  "reqwest",
  "serde",
  "serde_json",
+ "serde_yaml",
  "sqlx",
  "thiserror",
  "tokio",
@@ -133,6 +137,28 @@ version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 
+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "atoi"
 version = "2.0.0"
@@ -150,9 +176,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
 
 [[package]]
 name = "autocfg"
-version = "1.5.0"
+version = "1.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
 
 [[package]]
 name = "axum"
@@ -220,9 +246,9 @@ checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
 
 [[package]]
 name = "bitflags"
-version = "2.11.1"
+version = "2.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a"
 dependencies = [
  "serde_core",
 ]
@@ -238,9 +264,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.20.2"
+version = "3.20.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
 
 [[package]]
 name = "byteorder"
@@ -262,9 +288,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.62"
+version = "1.2.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98"
+checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f"
 dependencies = [
  "find-msvc-tools",
  "shlex",
@@ -537,9 +563,9 @@ dependencies = [
 
 [[package]]
 name = "displaydoc"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -554,9 +580,9 @@ checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
 
 [[package]]
 name = "either"
-version = "1.15.0"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
 dependencies = [
  "serde",
 ]
@@ -885,9 +911,9 @@ dependencies = [
 
 [[package]]
 name = "http"
-version = "1.4.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0"
 dependencies = [
  "bytes",
  "itoa",
@@ -930,9 +956,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 
 [[package]]
 name = "hyper"
-version = "1.9.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
+checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1189,9 +1215,9 @@ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
 
 [[package]]
 name = "js-sys"
-version = "0.3.98"
+version = "0.3.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08"
+checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11"
 dependencies = [
  "cfg-if",
  "futures-util",
@@ -1235,7 +1261,7 @@ dependencies = [
  "bitflags",
  "libc",
  "plain",
- "redox_syscall 0.8.0",
+ "redox_syscall 0.8.1",
 ]
 
 [[package]]
@@ -1272,9 +1298,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.29"
+version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f"
 
 [[package]]
 name = "lru-slab"
@@ -1309,9 +1335,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.8.0"
+version = "2.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
 
 [[package]]
 name = "mime"
@@ -1321,9 +1347,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
 [[package]]
 name = "mio"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
+checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda"
 dependencies = [
  "libc",
  "wasi",
@@ -1422,9 +1448,9 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
 
 [[package]]
 name = "openssl"
-version = "0.10.79"
+version = "0.10.80"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf0b434746ee2832f4f0baf10137e1cabb18cbe6912c69e2e33263c45250f542"
+checksum = "a45fa2aa886c42762255da344f0a0d313e254066c46aad76f300c3d3da62d967"
 dependencies = [
  "bitflags",
  "cfg-if",
@@ -1453,9 +1479,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.115"
+version = "0.9.116"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "158fe5b292746440aa6e7a7e690e55aeb72d41505e2804c23c6973ad0e9c9781"
+checksum = "f28a22dc7140cda5f096e5e7724a6962ca81a7f8bfd2979f9b18c11af56318c4"
 dependencies = [
  "cc",
  "libc",
@@ -1663,7 +1689,7 @@ dependencies = [
  "once_cell",
  "socket2",
  "tracing",
- "windows-sys 0.52.0",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -1777,9 +1803,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c7591fa2c6b601dfcfe5f043f65a1c39fcdf50efefcd7f1572e538c1f4b398d"
+checksum = "5b44b894f2a6e36457d665d1e08c3866add6ed5e70050c1b4ba8a8ddedb02ce7"
 dependencies = [
  "bitflags",
 ]
@@ -2042,9 +2068,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.149"
+version = "1.0.150"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
 dependencies = [
  "itoa",
  "memchr",
@@ -2076,6 +2102,19 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
 [[package]]
 name = "sha1"
 version = "0.10.6"
@@ -2109,9 +2148,9 @@ dependencies = [
 
 [[package]]
 name = "shlex"
-version = "1.3.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
 
 [[package]]
 name = "signal-hook-registry"
@@ -2150,9 +2189,9 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.6.3"
+version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51"
 dependencies = [
  "libc",
  "windows-sys 0.61.2",
@@ -2594,9 +2633,9 @@ dependencies = [
 
 [[package]]
 name = "tower-http"
-version = "0.6.10"
+version = "0.6.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51"
+checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840"
 dependencies = [
  "bitflags",
  "bytes",
@@ -2692,9 +2731,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 
 [[package]]
 name = "typenum"
-version = "1.20.0"
+version = "1.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
 
 [[package]]
 name = "unicode-bidi"
@@ -2729,6 +2768,12 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
+
 [[package]]
 name = "untrusted"
 version = "0.9.0"
@@ -2761,9 +2806,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.23.1"
+version = "1.23.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
+checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7"
 dependencies = [
  "getrandom 0.4.2",
  "js-sys",
@@ -2840,9 +2885,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.121"
+version = "0.2.122"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790"
+checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -2853,9 +2898,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.71"
+version = "0.4.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8"
+checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -2863,9 +2908,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.121"
+version = "0.2.122"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578"
+checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -2873,9 +2918,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.121"
+version = "0.2.122"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2"
+checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e"
 dependencies = [
  "bumpalo",
  "proc-macro2",
@@ -2886,9 +2931,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.121"
+version = "0.2.122"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441"
+checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437"
 dependencies = [
  "unicode-ident",
 ]
@@ -2942,9 +2987,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.98"
+version = "0.3.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa"
+checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -3074,6 +3119,15 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.61.2"
@@ -3107,13 +3161,30 @@ dependencies = [
  "windows_aarch64_gnullvm 0.52.6",
  "windows_aarch64_msvc 0.52.6",
  "windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm",
+ "windows_i686_gnullvm 0.52.6",
  "windows_i686_msvc 0.52.6",
  "windows_x86_64_gnu 0.52.6",
  "windows_x86_64_gnullvm 0.52.6",
  "windows_x86_64_msvc 0.52.6",
 ]
 
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.48.5"
@@ -3126,6 +3197,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.5"
@@ -3138,6 +3215,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.5"
@@ -3150,12 +3233,24 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.5"
@@ -3168,6 +3263,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.5"
@@ -3180,6 +3281,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.48.5"
@@ -3192,6 +3299,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.5"
@@ -3204,6 +3317,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
@@ -3329,18 +3448,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.48"
+version = "0.8.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
+checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.48"
+version = "0.8.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
+checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3349,9 +3468,9 @@ dependencies = [
 
 [[package]]
 name = "zerofrom"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df"
+checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
 dependencies = [
  "zerofrom-derive",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 4289b79..49fd32f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,9 @@ pedantic = { level = "warn", priority = -1 }
 
 [workspace.dependencies]
 agentic-core = { path = "crates/agentic-core" }
+async-stream = "0.3"
 axum = "0.8"
+either = "1"
 bytes = "1"
 clap = { version = "4", features = ["derive", "env"] }
 criterion = { version = "0.5", features = ["async_tokio"] }
diff --git a/crates/agentic-core/Cargo.toml b/crates/agentic-core/Cargo.toml
index 281333d..612e0fe 100644
--- a/crates/agentic-core/Cargo.toml
+++ b/crates/agentic-core/Cargo.toml
@@ -7,7 +7,9 @@ license.workspace = true
 repository.workspace = true
 
 [dependencies]
+async-stream.workspace = true
 bytes.workspace = true
+either.workspace = true
 futures.workspace = true
 http.workspace = true
 reqwest = { workspace = true, features = ["default-tls", "stream"] }
@@ -22,12 +24,14 @@ chrono = { version = "0.4", features = ["serde"] }
 uuid = { version = "1", features = ["v7", "serde"] }
 
 [dev-dependencies]
+axum.workspace = true
 criterion = { workspace = true }
+serde_yaml = "0.9"
+tokio = { workspace = true, features = ["full"] }
 
 [[bench]]
-name = "storage_crud"
+name = "benches"
 harness = false
 
-
 [lints]
 workspace = true
diff --git a/crates/agentic-core/benches/benches.rs b/crates/agentic-core/benches/benches.rs
new file mode 100644
index 0000000..b49511f
--- /dev/null
+++ b/crates/agentic-core/benches/benches.rs
@@ -0,0 +1,6 @@
+mod executor_throughput;
+mod storage_crud;
+
+use criterion::criterion_main;
+
+criterion_main!(storage_crud::storage_benches, executor_throughput::executor_benches);
diff --git a/crates/agentic-core/benches/executor_throughput.rs b/crates/agentic-core/benches/executor_throughput.rs
new file mode 100644
index 0000000..8774e5d
--- /dev/null
+++ b/crates/agentic-core/benches/executor_throughput.rs
@@ -0,0 +1,304 @@
+//! Throughput benchmarks for the executor agentic loop (`execute`).
+//!
+//! Measures wall-clock time per turn across chain depths 1–N, for both
+//! blocking (non-streaming) and streaming execution paths.
+//!
+//! | Group              | What grows with depth                              |
+//! |--------------------|----------------------------------------------------|
+//! | `execute/blocking` | rehydrate cost (DB reads) + JSON fetch + persist   |
+//! | `execute/streaming`| rehydrate cost + SSE accumulate + persist          |
+//! | `rehydrate_only`   | pure rehydrate step, no LLM call                   |
+//!
+//! # Configuring max depth
+//!
+//! Set `BENCH_MAX_DEPTH` before running to control how many depths are swept:
+//!
+//! ```bash
+//! BENCH_MAX_DEPTH=3 cargo bench --bench executor_throughput
+//! ```
+//!
+//! Defaults to 5 when the variable is unset.
+//!
+//! # Sample size
+//!
+//! Pass `-- --sample-size=N` (criterion flag) to override the number of
+//! iterations criterion collects per benchmark:
+//!
+//! ```bash
+//! cargo bench --bench executor_throughput -- --sample-size=20
+//! ```
+
+use std::sync::{Arc, Mutex};
+
+use axum::Router;
+use axum::http::header;
+use axum::response::IntoResponse;
+use axum::routing::post;
+use criterion::{BatchSize, BenchmarkId, Criterion, black_box, criterion_group};
+use either::Either;
+use futures::StreamExt;
+
+use agentic_core::executor::{ConversationHandler, ExecutionContext, ResponseHandler, execute, rehydrate_conversation};
+use agentic_core::storage::{ConversationStore, DbPool, ResponseStore, create_pool_with_schema};
+use agentic_core::types::io::{ResponsesInput, ToolChoice};
+use agentic_core::types::request_response::RequestPayload;
+
+fn max_depth() -> usize {
+    std::env::var("BENCH_MAX_DEPTH")
+        .ok()
+        .and_then(|v| v.parse::<usize>().ok())
+        .unwrap_or(5)
+        .max(1)
+}
+
+const NON_STREAMING_BODY: &str = r#"{
+  "id": "resp_bench_upstream",
+  "object": "response",
+  "created_at": 1700000000,
+  "status": "completed",
+  "model": "test-model",
+  "output": [{
+    "type": "message",
+    "id": "msg_bench",
+    "role": "assistant",
+    "status": "completed",
+    "content": [{"type": "output_text", "text": "OK", "annotations": []}]
+  }],
+  "usage": {
+    "input_tokens": 5, "output_tokens": 1, "total_tokens": 6,
+    "input_tokens_details": {"cached_tokens": 0},
+    "output_tokens_details": {"reasoning_tokens": 0}
+  }
+}"#;
+
+const STREAMING_BODY: &str = concat!(
+    "data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_bench_upstream\",\"status\":\"in_progress\"}}\n\n",
+    "data: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"msg_bench\",\"type\":\"message\",\"status\":\"in_progress\",\"content\":[],\"role\":\"assistant\"}}\n\n",
+    "data: {\"type\":\"response.output_text.delta\",\"delta\":\"OK\"}\n\n",
+    "data: {\"type\":\"response.completed\",\"response\":{",
+    "\"id\":\"resp_bench_upstream\",\"object\":\"response\",\"created_at\":1700000000,",
+    "\"status\":\"completed\",\"model\":\"test-model\",",
+    "\"output\":[{\"type\":\"message\",\"id\":\"msg_bench\",\"role\":\"assistant\",",
+    "\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"text\":\"OK\",\"annotations\":[]}]}],",
+    "\"usage\":{\"input_tokens\":5,\"output_tokens\":1,\"total_tokens\":6,",
+    "\"input_tokens_details\":{\"cached_tokens\":0},",
+    "\"output_tokens_details\":{\"reasoning_tokens\":0}}",
+    "}}\n\n",
+    "data: [DONE]\n\n",
+);
+
+fn start_mock_server(rt: &tokio::runtime::Runtime) -> String {
+    let listener = rt.block_on(async { tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap() });
+    let addr = listener.local_addr().unwrap();
+
+    rt.spawn(async move {
+        let app = Router::new()
+            .route(
+                "/v1/responses",
+                post(|body: axum::body::Bytes| async move {
+                    let is_stream = serde_json::from_slice::<serde_json::Value>(&body)
+                        .ok()
+                        .and_then(|j| j["stream"].as_bool())
+                        .unwrap_or(false);
+
+                    if is_stream {
+                        axum::http::Response::builder()
+                            .status(200)
+                            .header(header::CONTENT_TYPE, "text/event-stream; charset=utf-8")
+                            .body(axum::body::Body::from(STREAMING_BODY))
+                            .unwrap()
+                            .into_response()
+                    } else {
+                        axum::http::Response::builder()
+                            .status(200)
+                            .header(header::CONTENT_TYPE, "application/json")
+                            .body(axum::body::Body::from(NON_STREAMING_BODY))
+                            .unwrap()
+                            .into_response()
+                    }
+                }),
+            )
+            .route(
+                "/v1/conversations",
+                post(|| async { (axum::http::StatusCode::OK, "{}") }),
+            );
+        axum::serve(listener, app).await.ok();
+    });
+
+    format!("http://{addr}")
+}
+
+fn make_request(input: &str, stream: bool, prev_id: Option<String>) -> RequestPayload {
+    RequestPayload {
+        model: "test-model".to_string(),
+        input: ResponsesInput::Text(input.to_string()),
+        instructions: None,
+        previous_response_id: prev_id,
+        conversation_id: None,
+        tools: None,
+        tool_choice: ToolChoice::Auto,
+        stream,
+        store: true,
+        include: None,
+        temperature: None,
+        top_p: None,
+        max_output_tokens: None,
+        truncation: None,
+        metadata: None,
+    }
+}
+
+fn build_exec_ctx(rt: &tokio::runtime::Runtime, mock_url: String) -> (Arc<ExecutionContext>, Arc<DbPool>) {
+    let pool = rt.block_on(async { create_pool_with_schema(None).await.expect("bench pool creation failed") });
+    let conv_handler = ConversationHandler::new(ConversationStore::new(pool.clone()));
+    let resp_handler = ResponseHandler::new(ResponseStore::new(pool.clone()));
+    let client = Arc::new(reqwest::Client::new());
+    let exec_ctx = Arc::new(ExecutionContext::new(
+        conv_handler,
+        resp_handler,
+        client,
+        mock_url,
+        None,
+    ));
+    (exec_ctx, pool)
+}
+
+/// Delete all rows from every table so the next bench group starts with a
+/// clean state.  Accumulated rows from setup closures are removed; this
+/// prevents cross-contamination between groups and unbounded DB growth.
+fn clear_db(rt: &tokio::runtime::Runtime, pool: &DbPool) {
+    rt.block_on(async {
+        sqlx::query("DELETE FROM items").execute(pool).await.ok();
+        sqlx::query("DELETE FROM responses").execute(pool).await.ok();
+        sqlx::query("DELETE FROM conversations").execute(pool).await.ok();
+    });
+}
+
+/// Build a chain of `depth - 1` non-streaming turns and return the last
+/// response ID.  Called in the setup closure — cost does NOT count toward the
+/// benchmark measurement.
+async fn seed_chain(exec_ctx: &Arc<ExecutionContext>, depth: usize) -> Option<String> {
+    let mut prev_id: Option<String> = None;
+    for i in 0..depth.saturating_sub(1) {
+        let req = make_request(&format!("seed {i}"), false, prev_id.take());
+        if let Either::Left(p) = execute(req, Arc::clone(exec_ctx)).await.expect("seed") {
+            prev_id = Some(p.id);
+        }
+    }
+    prev_id
+}
+
+// Bench: blocking path, depths 1–max_depth
+//
+// The chain of N-1 prior turns is seeded with `rt.block_on()` BEFORE criterion
+// starts the measurement loop, so only turn N is timed.
+fn bench_execute_blocking(c: &mut Criterion, exec_ctx: &Arc<ExecutionContext>) {
+    let mut group = c.benchmark_group("execute/blocking");
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    for depth in 1..=max_depth() {
+        // Pre-seed N-1 turns outside criterion — their cost is NOT measured.
+        let prev_id = rt.block_on(seed_chain(exec_ctx, depth));
+
+        group.bench_with_input(BenchmarkId::new("turns", depth), &depth, |b, _| {
+            b.to_async(tokio::runtime::Runtime::new().unwrap()).iter_batched(
+                // Synchronous setup: just hand the pre-seeded prev_id to each sample.
+                || prev_id.clone(),
+                |prev_id| {
+                    let exec_ctx = Arc::clone(exec_ctx);
+                    async move {
+                        let req = make_request("bench turn", false, black_box(prev_id));
+                        execute(req, exec_ctx).await.expect("execute")
+                    }
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+    group.finish();
+}
+
+// Bench: streaming path, depths 1–max_depth (same pre-seed approach).
+fn bench_execute_streaming(c: &mut Criterion, exec_ctx: &Arc<ExecutionContext>) {
+    let mut group = c.benchmark_group("execute/streaming");
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    for depth in 1..=max_depth() {
+        let prev_id = rt.block_on(seed_chain(exec_ctx, depth));
+
+        group.bench_with_input(BenchmarkId::new("turns", depth), &depth, |b, _| {
+            b.to_async(tokio::runtime::Runtime::new().unwrap()).iter_batched(
+                || prev_id.clone(),
+                |prev_id| {
+                    let exec_ctx = Arc::clone(exec_ctx);
+                    async move {
+                        let req = make_request("bench turn", true, black_box(prev_id));
+                        let result = execute(req, exec_ctx).await.expect("execute");
+                        if let Either::Right(stream) = result {
+                            let mut stream = Box::pin(stream);
+                            while stream.next().await.is_some() {}
+                        }
+                    }
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+    group.finish();
+}
+
+fn bench_rehydrate_only(c: &mut Criterion, exec_ctx: &Arc<ExecutionContext>) {
+    let mut group = c.benchmark_group("rehydrate_only");
+
+    // Grow the shared chain incrementally so deeper depths include all prior
+    // history items; the chain_tip tracks the latest response ID.
+    let chain_tip: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    for depth in 1..=max_depth() {
+        // Extend the chain to `depth` turns if not already deep enough.
+        rt.block_on(async {
+            let has_tip = chain_tip.lock().unwrap().is_some();
+            if depth == 1 || !has_tip {
+                let prev_id = chain_tip.lock().unwrap().clone();
+                let req = make_request("seed", false, prev_id);
+                if let Either::Left(p) = execute(req, Arc::clone(exec_ctx)).await.expect("seed") {
+                    *chain_tip.lock().unwrap() = Some(p.id);
+                }
+            }
+        });
+
+        group.bench_with_input(BenchmarkId::new("prev_response_depth", depth), &depth, |b, _| {
+            b.to_async(tokio::runtime::Runtime::new().unwrap()).iter_batched(
+                || chain_tip.lock().unwrap().clone(),
+                |prev_id| {
+                    let exec_ctx = Arc::clone(exec_ctx);
+                    async move {
+                        let req = make_request("bench", false, black_box(prev_id));
+                        rehydrate_conversation(req, &exec_ctx).await.expect("rehydrate")
+                    }
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+
+    group.finish();
+}
+
+fn init_benches(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let mock_url = start_mock_server(&rt);
+    let (exec_ctx, pool) = build_exec_ctx(&rt, mock_url);
+
+    bench_execute_blocking(c, &exec_ctx);
+    clear_db(&rt, &pool);
+
+    bench_execute_streaming(c, &exec_ctx);
+    clear_db(&rt, &pool);
+
+    bench_rehydrate_only(c, &exec_ctx);
+    clear_db(&rt, &pool);
+}
+
+criterion_group!(executor_benches, init_benches);
diff --git a/crates/agentic-core/benches/storage_crud.rs b/crates/agentic-core/benches/storage_crud.rs
index ff551da..221903d 100644
--- a/crates/agentic-core/benches/storage_crud.rs
+++ b/crates/agentic-core/benches/storage_crud.rs
@@ -1,4 +1,4 @@
-use criterion::{BatchSize, Criterion, black_box, criterion_group, criterion_main};
+use criterion::{BatchSize, Criterion, black_box, criterion_group};
 
 use agentic_core::storage::{ConversationStore, InOutItem, ResponseMetadata, ResponseStore, create_pool_with_schema};
 use agentic_core::types::io::{InputItem, InputMessage, InputMessageContent, OutputItem, OutputMessage};
@@ -205,5 +205,4 @@ fn init_benches(c: &mut Criterion) {
     });
 }
 
-criterion_group!(benches, init_benches);
-criterion_main!(benches);
+criterion_group!(storage_benches, init_benches);
diff --git a/crates/agentic-core/src/executor/accumulator.rs b/crates/agentic-core/src/executor/accumulator.rs
new file mode 100644
index 0000000..5b94e8e
--- /dev/null
+++ b/crates/agentic-core/src/executor/accumulator.rs
@@ -0,0 +1,329 @@
+//! Response accumulation and parsing utilities.
+//!
+//! Handles both streaming (SSE) and non-streaming JSON response formats,
+//! accumulating chunks into a unified `ResponsePayload` structure.
+//!
+//! Streaming path uses a channel + `spawn_blocking` so that SSE JSON parsing
+//! runs on a blocking thread while the async task continues reading from the
+//! network — keeping the tokio executor thread free between chunk arrivals.
+
+use std::pin::Pin;
+use std::sync::mpsc;
+
+use futures::{Stream, StreamExt};
+
+use crate::executor::error::{ExecutorError, ExecutorResult};
+use crate::types::event::{MessageStatus, ResponseStatus, SSEEventType};
+use crate::types::io::{OutputItem, OutputMessage, OutputTextContent, ResponseUsage};
+use crate::types::request_response::{IncompleteDetails, ResponsePayload};
+use crate::utils::common::{deserialize_from_str, deserialize_from_value, deserialize_from_value_opt};
+use crate::utils::uuid7_str;
+
+/// Accumulates LLM response chunks from streaming or non-streaming sources.
+#[derive(Debug)]
+pub struct ResponseAccumulator {
+    response_id: String,
+    conversation_id: Option<String>,
+    output: Vec<OutputItem>,
+    usage: Option<ResponseUsage>,
+    status: ResponseStatus,
+    incomplete_details: Option<IncompleteDetails>,
+    // In-flight message state — owned here so process_sse_line takes only &mut self.
+    current_message: Option<OutputMessage>,
+    accumulated_text: String,
+}
+
+impl ResponseAccumulator {
+    /// Creates a new response accumulator.
+    #[must_use]
+    pub fn new(response_id: String, conversation_id: Option<String>) -> Self {
+        Self {
+            response_id,
+            conversation_id,
+            output: Vec::new(),
+            usage: None,
+            status: ResponseStatus::InProgress,
+            incomplete_details: None,
+            current_message: None,
+            accumulated_text: String::new(),
+        }
+    }
+
+    /// Parses a non-streaming JSON response body.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError::ParseError` if JSON parsing fails or required fields are missing.
+    pub fn from_json(body: &str, conversation_id: Option<&str>) -> ExecutorResult<Self> {
+        let json: serde_json::Value = deserialize_from_str(body).map_err(ExecutorError::JsonError)?;
+
+        let response_id = json["id"]
+            .as_str()
+            .ok_or_else(|| ExecutorError::ParseError("missing 'id' field in response".into()))?
+            .to_string();
+
+        let output = json["output"]
+            .as_array()
+            .map(|items| {
+                let mut out = Vec::with_capacity(items.len());
+                out.extend(
+                    items
+                        .iter()
+                        .filter_map(|item| deserialize_from_value_opt::<OutputItem>(item.clone())),
+                );
+                out
+            })
+            .unwrap_or_default();
+
+        let status = json["status"]
+            .as_str()
+            .map_or(ResponseStatus::Completed, |s| s.parse().unwrap_or_default());
+
+        let usage = deserialize_from_value_opt::<ResponseUsage>(json["usage"].clone());
+
+        Ok(Self {
+            response_id,
+            conversation_id: conversation_id.map(str::to_string),
+            output,
+            usage,
+            status,
+            incomplete_details: None,
+            current_message: None,
+            accumulated_text: String::new(),
+        })
+    }
+
+    /// Accumulates an async stream of raw SSE lines with parallel processing.
+    ///
+    /// The async task feeds raw SSE lines through a channel while a `spawn_blocking`
+    /// worker handles JSON parsing on a blocking thread — keeping the tokio executor
+    /// free between chunk arrivals.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError::ParseError` if chunk parsing fails, or
+    /// `ExecutorError::StreamError` if the stream or worker encounters an error.
+    pub async fn from_stream(
+        mut stream: Pin<Box<dyn Stream<Item = Result<String, ExecutorError>> + Send>>,
+        conversation_id: Option<&str>,
+    ) -> ExecutorResult<Self> {
+        let (tx, rx) = mpsc::channel::<String>();
+        // Convert to owned here — spawn_blocking closure must be 'static.
+        let conv_id_owned = conversation_id.map(str::to_string);
+
+        // Spawn blocking task: JSON parsing is CPU-bound, runs off the async executor.
+        let worker_handle = tokio::task::spawn_blocking(move || Self::process_stream_chunks(rx, conv_id_owned));
+
+        // Feed raw SSE lines from the async stream to the blocking worker.
+        while let Some(chunk_result) = stream.next().await {
+            match chunk_result {
+                Ok(chunk) => {
+                    if tx.send(chunk).is_err() {
+                        // Worker exited early (e.g. saw ResponseDone).
+                        break;
+                    }
+                }
+                Err(e) => return Err(e),
+            }
+        }
+
+        // Signal EOF to worker.
+        drop(tx);
+
+        // Properly async join — does not block the tokio executor thread.
+        worker_handle
+            .await
+            .map_err(|_| ExecutorError::StreamError("Worker thread panicked".into()))
+    }
+
+    /// Worker function that processes SSE lines from the channel (runs on blocking thread).
+    fn process_stream_chunks(rx: mpsc::Receiver<String>, conversation_id: Option<String>) -> Self {
+        let mut acc = Self::new(uuid7_str("resp_"), conversation_id);
+        for line in rx {
+            acc.process_sse_line(&line);
+        }
+        acc.finalize_current_message();
+        if acc.status == ResponseStatus::InProgress {
+            acc.status = ResponseStatus::Completed;
+        }
+        acc
+    }
+
+    /// Processes pre-collected raw SSE lines synchronously.
+    ///
+    /// Useful when lines have already been buffered (e.g. replaying a recorded stream).
+    /// Prefer [`from_stream`](Self::from_stream) for live async streams.
+    /// Line parse errors are silently skipped — this function is infallible.
+    #[must_use]
+    pub fn from_sse_lines(lines: impl IntoIterator<Item = String>, conversation_id: Option<&str>) -> Self {
+        let mut acc = Self::new(uuid7_str("resp_"), conversation_id.map(str::to_string));
+        for line in lines {
+            acc.process_sse_line(&line);
+        }
+        acc.finalize_current_message();
+        acc
+    }
+
+    /// Closes the in-flight message, pushing it to `output` with accumulated text.
+    fn finalize_current_message(&mut self) {
+        if let Some(mut msg) = self.current_message.take() {
+            if !self.accumulated_text.is_empty() {
+                msg.content.push(OutputTextContent::new(&self.accumulated_text));
+            }
+            msg.status = MessageStatus::Completed.as_str().to_string();
+            self.output.push(OutputItem::Message(msg));
+        }
+        self.accumulated_text.clear();
+    }
+
+    /// Processes a single raw SSE line, updating accumulator state.
+    ///
+    /// Non-`data:` lines, `[DONE]`, and malformed JSON are silently skipped.
+    fn process_sse_line(&mut self, line: &str) {
+        let Some(data_str) = line.strip_prefix("data: ") else {
+            return;
+        };
+        if data_str == "[DONE]" {
+            return;
+        }
+        let Ok(json) = deserialize_from_str::<serde_json::Value>(data_str) else {
+            return;
+        };
+
+        match json["type"]
+            .as_str()
+            .map_or(SSEEventType::Other, |s| s.parse().unwrap_or_default())
+        {
+            SSEEventType::ResponseCreated => {
+                if let Some(id) = json["response"]["id"].as_str() {
+                    self.response_id = id.to_string();
+                }
+            }
+            SSEEventType::ResponseOutputItemAdded => {
+                self.finalize_current_message();
+                let item_id = json["item"]["id"]
+                    .as_str()
+                    .map_or_else(|| uuid7_str("msg_"), str::to_string);
+                self.current_message = Some(OutputMessage::new(&item_id, MessageStatus::InProgress.as_str()));
+            }
+            SSEEventType::ResponseOutputTextDelta => {
+                if let Some(delta) = json["delta"].as_str() {
+                    self.accumulated_text.push_str(delta);
+                }
+            }
+            SSEEventType::ResponseDone => {
+                self.finalize_current_message();
+                self.status = ResponseStatus::Completed;
+                if let Ok(usage) = deserialize_from_value::<ResponseUsage>(json["response"]["usage"].clone()) {
+                    self.usage = Some(usage);
+                }
+            }
+            SSEEventType::Other => {}
+        }
+    }
+
+    /// Marks the response as incomplete due to an error or interruption.
+    pub fn mark_incomplete(&mut self, reason: impl Into<String>) {
+        self.status = ResponseStatus::Incomplete;
+        self.incomplete_details = Some(IncompleteDetails {
+            reason: Some(reason.into()),
+        });
+    }
+
+    /// Finalizes the accumulator into a `ResponsePayload`.
+    ///
+    /// The caller supplies fields that come from the original request, not from
+    /// the LLM response stream.
+    #[must_use]
+    pub fn finalize(
+        self,
+        model: &str,
+        previous_response_id: Option<&str>,
+        instructions: Option<&str>,
+    ) -> ResponsePayload {
+        ResponsePayload {
+            id: self.response_id,
+            object: "response".to_string(),
+            created_at: chrono::Utc::now().timestamp(),
+            model: model.to_string(),
+            status: self.status.as_str().to_string(),
+            output: self.output,
+            usage: self.usage,
+            incomplete_details: self.incomplete_details,
+            error: None,
+            previous_response_id: previous_response_id.map(str::to_string),
+            conversation_id: self.conversation_id,
+            instructions: instructions.map(str::to_string),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_accumulator_new() {
+        let acc = ResponseAccumulator::new("resp_123".into(), Some("conv_456".into()));
+        assert_eq!(acc.response_id, "resp_123");
+        assert_eq!(acc.conversation_id, Some("conv_456".into()));
+        assert_eq!(acc.status, ResponseStatus::InProgress);
+    }
+
+    #[test]
+    fn test_accumulator_mark_incomplete() {
+        let mut acc = ResponseAccumulator::new("resp_123".into(), None);
+        acc.mark_incomplete("Stream interrupted");
+        assert_eq!(acc.status, ResponseStatus::Incomplete);
+        assert!(acc.incomplete_details.is_some());
+    }
+
+    #[test]
+    fn test_accumulator_finalize() {
+        let acc = ResponseAccumulator::new("resp_123".into(), Some("conv_456".into()));
+        let payload = acc.finalize("gpt-4o", Some("resp_prev"), Some("be helpful"));
+        assert_eq!(payload.id, "resp_123");
+        assert_eq!(payload.model, "gpt-4o");
+        assert_eq!(payload.conversation_id, Some("conv_456".into()));
+        assert_eq!(payload.previous_response_id, Some("resp_prev".into()));
+        assert_eq!(payload.instructions, Some("be helpful".into()));
+        assert_eq!(payload.status, ResponseStatus::InProgress.as_str());
+    }
+
+    #[test]
+    fn test_accumulator_from_sse_lines_empty() {
+        let acc = ResponseAccumulator::from_sse_lines(vec![], None);
+        assert_eq!(acc.status, ResponseStatus::InProgress);
+        assert!(acc.output.is_empty());
+    }
+
+    #[test]
+    fn test_accumulator_text_delta_assigned_to_message() {
+        let lines = vec![
+            r#"data: {"type":"response.created","response":{"id":"resp_abc"}}"#.to_string(),
+            r#"data: {"type":"response.output_item.added","item":{"id":"msg_1"}}"#.to_string(),
+            r#"data: {"type":"response.output_text.delta","delta":"Hello"}"#.to_string(),
+            r#"data: {"type":"response.output_text.delta","delta":" world"}"#.to_string(),
+            r#"data: {"type":"response.done","response":{"usage":{"input_tokens":5,"output_tokens":2,"total_tokens":7}}}"#.to_string(),
+        ];
+
+        let acc = ResponseAccumulator::from_sse_lines(lines, None);
+        assert_eq!(acc.status, ResponseStatus::Completed);
+        assert_eq!(acc.output.len(), 1);
+
+        if let OutputItem::Message(msg) = &acc.output[0] {
+            assert_eq!(msg.content.len(), 1);
+            assert_eq!(msg.content[0].text, "Hello world");
+        } else {
+            panic!("expected OutputItem::Message");
+        }
+
+        assert!(acc.usage.is_some());
+        let usage = acc.usage.unwrap();
+        assert_eq!(usage.total_tokens, 7);
+    }
+
+    #[test]
+    fn test_message_status_enum() {
+        assert_eq!(MessageStatus::Completed.as_str(), "completed");
+        assert_eq!(MessageStatus::InProgress.as_str(), "in_progress");
+    }
+}
diff --git a/crates/agentic-core/src/executor/engine.rs b/crates/agentic-core/src/executor/engine.rs
new file mode 100644
index 0000000..f888501
--- /dev/null
+++ b/crates/agentic-core/src/executor/engine.rs
@@ -0,0 +1,410 @@
+//! Agentic loop executor.
+//!
+//! Exposes each step of the loop as a public function so consumers can compose
+//! them directly (e.g. as Praxis filters). [`execute`] is the convenience entry
+//! point that composes all steps with the default control flow.
+
+use std::pin::Pin;
+use std::sync::Arc;
+
+use async_stream::stream;
+use either::Either;
+use futures::{Stream, StreamExt};
+use tracing::warn;
+
+use crate::executor::accumulator::ResponseAccumulator;
+use crate::executor::error::{ExecutorError, ExecutorResult};
+use crate::executor::modes::{ConversationHandler, ResponseHandler};
+use crate::executor::request::{ExecutionContext, RequestContext};
+use crate::storage::InOutItem;
+use crate::types::event::ResponseStatus;
+use crate::types::io::{InputItem, ResponsesInput, resolve_tool_choice, resolve_tools};
+use crate::types::request_response::{RequestPayload, ResponsePayload};
+use crate::utils::common::serialize_to_string;
+use crate::utils::uuid7_str;
+
+use std::time::Duration;
+
+/// SSE stream of raw lines sent to the client (`data: …\n\n` per event).
+pub type BoxStream = Pin<Box<dyn Stream<Item = String> + Send>>;
+
+/// Wire-format marker signalling end-of-stream to the client.
+const DONE_MARKER: &str = "data: [DONE]\n\n";
+
+/// Fetch the next raw bytes chunk from a streaming response.
+///
+/// Returns `Ok(Some(bytes))` on data, `Ok(None)` when the stream ends cleanly,
+/// and `Err` on a network failure or chunk timeout.
+async fn next_chunk<S>(stream: &mut S, timeout: Duration) -> ExecutorResult<Option<bytes::Bytes>>
+where
+    S: futures::Stream<Item = Result<bytes::Bytes, reqwest::Error>> + Unpin,
+{
+    let item = if timeout.is_zero() {
+        stream.next().await
+    } else {
+        tokio::time::timeout(timeout, stream.next()).await.map_err(|_| {
+            ExecutorError::StreamError("chunk timeout: no data received within the configured window".into())
+        })?
+    };
+    item.transpose().map_err(ExecutorError::NetworkError)
+}
+
+/// Build, send, and validate an HTTP POST to the LLM backend.
+///
+/// Shared by both the blocking path (caller reads `.text()`) and the streaming
+/// path (caller reads `.bytes_stream()`). Maps connect/timeout failures and
+/// non-2xx status codes to [`ExecutorError::LLMRequest`].
+async fn send_request(
+    client: &reqwest::Client,
+    url: &str,
+    body: String,
+    auth: Option<&str>,
+) -> ExecutorResult<reqwest::Response> {
+    let mut req = client.post(url).header("Content-Type", "application/json").body(body);
+    if let Some(key) = auth {
+        req = req.bearer_auth(key);
+    }
+
+    let resp = req.send().await.map_err(|e| ExecutorError::LLMRequest {
+        status: if e.is_timeout() {
+            http::StatusCode::GATEWAY_TIMEOUT
+        } else {
+            http::StatusCode::BAD_GATEWAY
+        },
+        body: if e.is_timeout() {
+            "upstream timeout".into()
+        } else {
+            "upstream unavailable".into()
+        },
+    })?;
+
+    if !resp.status().is_success() {
+        let status = resp.status().as_u16();
+        // Log and discard any error reading the error body — the status code
+        // is the primary signal; an empty body is acceptable here.
+        let body = resp
+            .text()
+            .await
+            .inspect_err(|e| tracing::debug!("failed to read error response body: {e}"))
+            .unwrap_or_default();
+        return Err(ExecutorError::LLMRequest {
+            status: http::StatusCode::from_u16(status).unwrap_or(http::StatusCode::INTERNAL_SERVER_ERROR),
+            body,
+        });
+    }
+
+    Ok(resp)
+}
+
+/// Makes a non-streaming HTTP POST to the LLM backend and returns the full JSON body.
+///
+/// Used by [`run_blocking`] so it can pass the result to [`ResponseAccumulator::from_json`].
+async fn fetch_response_json(
+    upstream_json: String,
+    url: &str,
+    client: &reqwest::Client,
+    auth: Option<&str>,
+) -> ExecutorResult<String> {
+    let resp = send_request(client, url, upstream_json, auth).await?;
+    // Preserve the reqwest::Error as the typed source (NetworkError).
+    resp.text().await.map_err(ExecutorError::NetworkError)
+}
+
+/// Step 1 — Build [`RequestContext`] by rehydrating conversation history.
+///
+/// `request` is moved into the context as `enriched_request`; one clone is taken
+/// for `original_request` so the engine retains an unmodified copy for persistence
+/// and ID resolution.
+///
+/// Dispatches to one of four paths based on `store` flag and which ID is present:
+/// - `store=false` + `previous_response_id`: validate the prior response exists, no history loaded
+/// - `store=true`  + `previous_response_id`: [`rehydrate_from_response`]
+/// - `store=true`  + `conversation_id`:      [`rehydrate_from_conversation`]
+/// - `store=true`  + no ids:                 create a new conversation
+///
+/// # Errors
+/// Returns [`ExecutorError`] if storage is unavailable or a referenced ID does not exist.
+pub async fn rehydrate_conversation(
+    request: RequestPayload,
+    exec_ctx: &ExecutionContext,
+) -> ExecutorResult<RequestContext> {
+    let response_id = uuid7_str("resp_");
+    let new_input_items: Vec<InputItem> = Vec::from(&request.input);
+
+    // One clone for the unmodified original; `request` is moved as enriched_request.
+    let original_request = request.clone();
+    let mut ctx = RequestContext {
+        enriched_request: request,
+        original_request,
+        new_input_items,
+        response_id,
+        conversation_id: None,
+    };
+
+    if !ctx.original_request.store {
+        // Non-store path: validate previous_response_id only; no history needed.
+        if ctx.original_request.previous_response_id.is_some() {
+            exec_ctx.resp_handler.validate_exists(&ctx).await?;
+        }
+        return Ok(ctx);
+    }
+
+    if ctx.original_request.previous_response_id.is_some() {
+        rehydrate_from_response(&mut ctx, exec_ctx).await?;
+        return Ok(ctx);
+    }
+
+    if ctx.original_request.conversation_id.is_some() {
+        rehydrate_from_conversation(&mut ctx, exec_ctx).await?;
+        return Ok(ctx);
+    }
+
+    // Store + no ids: create a fresh conversation.
+    let conv_data = exec_ctx.conv_handler.create().await?;
+    ctx.conversation_id = Some(conv_data.conversation_id);
+    ctx.enriched_request.input = ResponsesInput::Items(ctx.new_input_items.clone());
+    Ok(ctx)
+}
+
+/// Hydrates `ctx` from the previous response chain.
+///
+/// Loads the stored response, rehydrates its history items, resolves effective
+/// tools and tool choice from the stored metadata, and prepends the history to
+/// the enriched request input.
+async fn rehydrate_from_response(ctx: &mut RequestContext, exec_ctx: &ExecutionContext) -> ExecutorResult<()> {
+    let stored = exec_ctx.resp_handler.get(ctx).await?;
+    let history = exec_ctx.resp_handler.rehydrate(ctx).await?;
+
+    let mut items = InOutItem::into_input_items(history);
+    items.reserve(ctx.new_input_items.len());
+    items.extend(ctx.new_input_items.iter().cloned());
+
+    ctx.enriched_request.previous_response_id = None;
+    ctx.enriched_request.input = ResponsesInput::Items(items);
+    ctx.enriched_request.tools = resolve_tools(
+        ctx.original_request.tools.as_deref(),
+        stored.metadata.effective_tools.as_deref(),
+        ctx.original_request.tools.is_some(),
+    );
+    ctx.enriched_request.tool_choice = resolve_tool_choice(
+        &ctx.original_request.tool_choice,
+        &stored.metadata.effective_tool_choice,
+        false,
+    );
+    ctx.conversation_id = stored.conversation_id;
+    Ok(())
+}
+
+/// Hydrates `ctx` from the conversation store.
+///
+/// Gets or creates the conversation and rehydrates its history in parallel,
+/// then prepends the history items to the enriched request input.
+async fn rehydrate_from_conversation(ctx: &mut RequestContext, exec_ctx: &ExecutionContext) -> ExecutorResult<()> {
+    let (conv_data, history) = tokio::try_join!(
+        exec_ctx.conv_handler.get_or_create(ctx),
+        exec_ctx.conv_handler.rehydrate(ctx),
+    )?;
+
+    let mut items = InOutItem::into_input_items(history);
+    items.reserve(ctx.new_input_items.len());
+    items.extend(ctx.new_input_items.iter().cloned());
+
+    ctx.enriched_request.input = ResponsesInput::Items(items);
+    ctx.conversation_id = Some(conv_data.conversation_id);
+    Ok(())
+}
+
+/// Step 2 — Call the LLM inference backend; yields raw SSE lines (`data: …`).
+///
+/// Always requests `stream=true` upstream. Stops on `[DONE]`.
+///
+/// # Errors
+/// Each stream item is `Result<String, ExecutorError>`. The stream yields `Err` on:
+/// - [`ExecutorError::LLMRequest`] — connect timeout (504), connection failure (502),
+///   or non-2xx HTTP status from the backend
+/// - [`ExecutorError::NetworkError`] — network failure while reading the response body
+pub fn call_inference(
+    upstream_json: String,
+    url: String,
+    client: Arc<reqwest::Client>,
+    auth: Option<String>,
+    chunk_timeout: Duration,
+) -> impl Stream<Item = Result<String, ExecutorError>> + Send + 'static {
+    stream! {
+        let resp = match send_request(&client, &url, upstream_json, auth.as_deref()).await {
+            Ok(r) => r,
+            Err(e) => { yield Err(e); return; }
+        };
+
+        let mut bytes = resp.bytes_stream();
+        let mut buf = String::with_capacity(8192);
+
+        loop {
+            let chunk = match next_chunk(&mut bytes, chunk_timeout).await {
+                Ok(Some(c)) => c,
+                Ok(None) => break,
+                Err(e) => { yield Err(e); return; }
+            };
+
+            match std::str::from_utf8(&chunk) {
+                Ok(s) => buf.push_str(s),
+                Err(_) => buf.push_str(&String::from_utf8_lossy(&chunk)),
+            }
+
+            while let Some(pos) = buf.find('\n') {
+                let line = buf[..pos].trim_end_matches('\r');
+                match line {
+                    "data: [DONE]" => return,
+                    l if l.starts_with("data: ") => yield Ok(l.to_string()),
+                    _ => {}
+                }
+                buf.drain(..=pos);
+            }
+        }
+    }
+}
+
+/// Step 3 — Persist the completed response to storage.
+///
+/// Skipped if [`ResponseStatus`] is not `Completed`/`Incomplete` or `payload.id` is empty.
+/// Routes to [`ConversationHandler`] when `ctx.conversation_id` is set,
+/// otherwise [`ResponseHandler`].
+///
+/// # Errors
+/// Returns [`ExecutorError`] if the storage operation fails.
+pub async fn persist_response(
+    payload: ResponsePayload,
+    ctx: RequestContext,
+    conv_handler: ConversationHandler,
+    resp_handler: ResponseHandler,
+) -> ExecutorResult<()> {
+    // Use typed enum — no hardcoded status strings.
+    if !matches!(
+        payload.status.parse::<ResponseStatus>().unwrap_or_default(),
+        ResponseStatus::Completed | ResponseStatus::Incomplete
+    ) || payload.id.is_empty()
+    {
+        return Ok(());
+    }
+
+    // Move output items from payload; handlers build ResponseMetadata from ctx internally.
+    let output_items = payload.output;
+
+    if ctx.conversation_id.is_some() {
+        conv_handler.execute_turn(ctx, output_items).await
+    } else {
+        resp_handler.execute_turn(ctx, output_items).await
+    }
+}
+
+async fn run_blocking(ctx: RequestContext, exec_ctx: &ExecutionContext) -> ExecutorResult<ResponsePayload> {
+    let url = exec_ctx.responses_url();
+    // Non-streaming request: stream=false → full JSON body → from_json.
+    let upstream_json =
+        serialize_to_string(&ctx.enriched_request.to_upstream_request(false)).map_err(ExecutorError::JsonError)?;
+
+    let body = fetch_response_json(upstream_json, &url, &exec_ctx.client, exec_ctx.client_auth.as_deref()).await?;
+
+    let acc = ResponseAccumulator::from_json(&body, ctx.conversation_id.as_deref())?;
+    let mut payload = acc.finalize(
+        &ctx.enriched_request.model,
+        ctx.original_request.previous_response_id.as_deref(),
+        ctx.original_request.instructions.as_deref(),
+    );
+    ctx.inject_ids(&mut payload);
+
+    if ctx.original_request.store {
+        let ch = exec_ctx.conv_handler.clone();
+        let rh = exec_ctx.resp_handler.clone();
+        if let Err(e) = persist_response(payload.clone(), ctx, ch, rh).await {
+            warn!("persist failed: {e}");
+        }
+    }
+
+    Ok(payload)
+}
+
+fn run_stream(ctx: RequestContext, exec_ctx: Arc<ExecutionContext>) -> BoxStream {
+    let url = exec_ctx.responses_url();
+    // Streaming request: stream=true → SSE lines → from_stream.
+    let upstream_json = match serialize_to_string(&ctx.enriched_request.to_upstream_request(true)) {
+        Ok(s) => s,
+        Err(e) => {
+            return Box::pin(stream! {
+                yield format!("data: {{\"error\": \"serialize error: {e}\"}}\n\n");
+                yield DONE_MARKER.to_string();
+            });
+        }
+    };
+
+    let store = ctx.original_request.store;
+
+    Box::pin(stream! {
+        let line_stream = Box::pin(call_inference(
+            upstream_json,
+            url,
+            Arc::clone(&exec_ctx.client),
+            exec_ctx.client_auth.clone(),
+            exec_ctx.streaming_timeout,
+        ));
+
+        // from_stream feeds SSE lines to a spawn_blocking worker via channel.
+        // All JSON parsing is CPU-bound and runs off the async executor.
+        match ResponseAccumulator::from_stream(line_stream, ctx.conversation_id.as_deref()).await {
+            Err(e) => {
+                yield format!("data: {{\"error\": \"{e}\"}}\n\n");
+                yield DONE_MARKER.to_string();
+            }
+            Ok(acc) => {
+                let mut payload = acc.finalize(
+                    &ctx.enriched_request.model,
+                    ctx.original_request.previous_response_id.as_deref(),
+                    ctx.original_request.instructions.as_deref(),
+                );
+                ctx.inject_ids(&mut payload);
+                yield payload.as_responses_chunk();
+                yield DONE_MARKER.to_string();
+
+                if store {
+                    let ch = exec_ctx.conv_handler.clone();
+                    let rh = exec_ctx.resp_handler.clone();
+                    if let Err(e) = persist_response(payload, ctx, ch, rh).await {
+                        warn!("persist failed: {e}");
+                    }
+                }
+            }
+        }
+    })
+}
+
+/// Create a new conversation and return its data.
+///
+/// Exposes the conversation-creation step as a standalone function so callers
+/// (e.g. `agentic-server`, Praxis filters, or tests) can pre-create a
+/// conversation before submitting response turns.
+///
+/// # Errors
+/// Returns [`ExecutorError`] if the conversation store is unavailable.
+pub async fn create_conversation(exec_ctx: &ExecutionContext) -> ExecutorResult<crate::ConversationData> {
+    exec_ctx.conv_handler.create().await
+}
+
+/// Run the full agentic loop.
+///
+/// Returns `Either::Left(ResponsePayload)` for non-streaming requests, or
+/// `Either::Right(BoxStream)` for streaming, each yielded `String` is an SSE
+/// line ready to forward to the client.
+///
+/// # Errors
+/// Returns [`ExecutorError`] if rehydration or (non-streaming) LLM inference fails.
+pub async fn execute(
+    request: RequestPayload,
+    exec_ctx: Arc<ExecutionContext>,
+) -> ExecutorResult<Either<ResponsePayload, BoxStream>> {
+    let ctx = rehydrate_conversation(request, &exec_ctx).await?;
+    if ctx.original_request.stream {
+        Ok(Either::Right(run_stream(ctx, exec_ctx)))
+    } else {
+        Ok(Either::Left(run_blocking(ctx, &exec_ctx).await?))
+    }
+}
diff --git a/crates/agentic-core/src/executor/error.rs b/crates/agentic-core/src/executor/error.rs
new file mode 100644
index 0000000..6c6e41b
--- /dev/null
+++ b/crates/agentic-core/src/executor/error.rs
@@ -0,0 +1,102 @@
+use http::StatusCode;
+use thiserror::Error;
+
+use crate::StorageError;
+
+#[non_exhaustive]
+#[derive(Debug, Error)]
+pub enum ExecutorError {
+    /// A storage layer operation failed.
+    #[error("storage error: {0}")]
+    Storage(#[from] StorageError),
+
+    /// The LLM backend returned a non-2xx status or was unreachable.
+    #[error("LLM request failed ({status}): {body}")]
+    LLMRequest { status: StatusCode, body: String },
+
+    /// A network error occurred reading from the LLM response stream.
+    ///
+    /// The original `reqwest::Error` is preserved as the error source so
+    /// callers can inspect the underlying network failure.
+    #[error("network error: {0}")]
+    NetworkError(
+        #[from]
+        #[source]
+        reqwest::Error,
+    ),
+
+    /// JSON deserialisation failed.
+    ///
+    /// The original `serde_json::Error` is preserved as the error source so
+    /// callers can inspect the exact parse failure location and kind.
+    #[error("json error: {0}")]
+    JsonError(
+        #[from]
+        #[source]
+        serde_json::Error,
+    ),
+
+    /// A general stream processing error with a human-readable message.
+    ///
+    /// Used for non-network stream failures (e.g. worker thread panic).
+    #[error("stream error: {0}")]
+    StreamError(String),
+
+    /// A validation error on the request payload with a human-readable message.
+    ///
+    /// Used when required fields are missing or structurally invalid.
+    #[error("parse error: {0}")]
+    ParseError(String),
+
+    #[error("{entity} not found: {id}")]
+    NotFound { entity: String, id: String },
+
+    #[error("invalid request: {0}")]
+    InvalidRequest(String),
+}
+
+pub type ExecutorResult<T> = Result<T, ExecutorError>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_executor_error_display() {
+        let err = ExecutorError::InvalidRequest("test message".into());
+        assert!(err.to_string().contains("invalid request"));
+        assert!(err.to_string().contains("test message"));
+    }
+
+    #[test]
+    fn test_executor_error_stream() {
+        let err = ExecutorError::StreamError("connection lost".into());
+        assert!(err.to_string().contains("stream error"));
+    }
+
+    #[test]
+    fn test_executor_error_not_found() {
+        let err = ExecutorError::NotFound {
+            entity: "Conversation".into(),
+            id: "conv_123".into(),
+        };
+        assert!(err.to_string().contains("Conversation"));
+        assert!(err.to_string().contains("conv_123"));
+    }
+
+    #[test]
+    fn test_executor_error_from_storage() {
+        let storage_err = StorageError::NotConfigured;
+        let exec_err = ExecutorError::from(storage_err);
+        assert!(exec_err.to_string().contains("storage error"));
+    }
+
+    #[test]
+    fn test_executor_error_json_preserves_source() {
+        use std::error::Error;
+        let json_err: serde_json::Error = serde_json::from_str::<serde_json::Value>("{bad}").unwrap_err();
+        let exec_err = ExecutorError::from(json_err);
+        assert!(exec_err.source().is_some(), "source should be chained");
+        assert!(exec_err.to_string().contains("json error"));
+    }
+}
diff --git a/crates/agentic-core/src/executor/mod.rs b/crates/agentic-core/src/executor/mod.rs
new file mode 100644
index 0000000..32fbabc
--- /dev/null
+++ b/crates/agentic-core/src/executor/mod.rs
@@ -0,0 +1,13 @@
+//! Agentic loop executor.
+
+pub mod accumulator;
+pub mod engine;
+pub mod error;
+pub mod modes;
+pub mod request;
+
+pub use engine::{BoxStream, call_inference, create_conversation, execute, persist_response, rehydrate_conversation};
+pub use error::{ExecutorError, ExecutorResult};
+pub use modes::{ConversationHandler, ResponseHandler};
+pub use request::ExecutionContext;
+pub use request::RequestContext;
diff --git a/crates/agentic-core/src/executor/modes/conversation.rs b/crates/agentic-core/src/executor/modes/conversation.rs
new file mode 100644
index 0000000..bc89476
--- /dev/null
+++ b/crates/agentic-core/src/executor/modes/conversation.rs
@@ -0,0 +1,167 @@
+//! Conversation storage handler — owns all conversation store operations.
+
+use crate::storage::{ConversationData, ConversationStore, InOutItem, ResponseMetadata};
+use crate::types::io::OutputItem;
+
+use crate::executor::error::{ExecutorError, ExecutorResult};
+use crate::executor::request::RequestContext;
+
+/// Handles all conversation store operations: creation, rehydration, and persistence.
+#[derive(Clone, Debug)]
+pub struct ConversationHandler {
+    store: ConversationStore,
+}
+
+impl ConversationHandler {
+    #[must_use]
+    pub fn new(store: ConversationStore) -> Self {
+        Self { store }
+    }
+
+    /// Gets an existing conversation or creates one.
+    ///
+    /// Reads `conversation_id` from `ctx.original_request`.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if `conversation_id` is absent, the store is
+    /// disabled, or the database query fails.
+    pub async fn get_or_create(&self, ctx: &RequestContext) -> ExecutorResult<ConversationData> {
+        let conv_id = ctx
+            .original_request
+            .conversation_id
+            .as_deref()
+            .ok_or_else(|| ExecutorError::InvalidRequest("conversation_id is required for get_or_create".into()))?;
+        self.store.get_or_create(conv_id).await.map_err(ExecutorError::Storage)
+    }
+
+    /// Creates a brand-new conversation with a freshly generated ID.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if the store is disabled or the database query fails.
+    pub async fn create(&self) -> ExecutorResult<ConversationData> {
+        self.store.create().await.map_err(ExecutorError::Storage)
+    }
+
+    /// Loads all history items for the conversation referenced by the request.
+    ///
+    /// Reads `conversation_id` from `ctx.original_request`. Returns an empty vec
+    /// if the conversation exists but has no items yet.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if `conversation_id` is absent, the store is
+    /// disabled, or the database query fails.
+    pub async fn rehydrate(&self, ctx: &RequestContext) -> ExecutorResult<Vec<InOutItem>> {
+        let conv_id = ctx
+            .original_request
+            .conversation_id
+            .as_deref()
+            .ok_or_else(|| ExecutorError::InvalidRequest("conversation_id is required for rehydrate".into()))?;
+        self.store.rehydrate(conv_id).await.map_err(ExecutorError::Storage)
+    }
+
+    /// Persists one conversation turn — only the new items from this turn.
+    ///
+    /// Takes `ctx` and `output_items` by value so fields can be moved directly
+    /// into [`ResponseMetadata`] without cloning. The store tracks sequence
+    /// numbers and appends, so prior history must not be re-inserted.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if `conversation_id` is absent on the context,
+    /// the store is disabled, or the database operation fails.
+    pub async fn execute_turn(&self, ctx: RequestContext, output_items: Vec<OutputItem>) -> ExecutorResult<()> {
+        let conversation_id = ctx
+            .conversation_id
+            .ok_or_else(|| ExecutorError::InvalidRequest("conversation_id is required for execute_turn".into()))?;
+
+        let metadata = ResponseMetadata {
+            model: ctx.enriched_request.model,
+            previous_response_id: ctx.original_request.previous_response_id,
+            effective_tools: ctx.original_request.tools,
+            effective_tool_choice: ctx.original_request.tool_choice,
+            effective_instructions: ctx.original_request.instructions,
+        };
+
+        let mut new_items = Vec::with_capacity(ctx.new_input_items.len() + output_items.len());
+        new_items.extend(ctx.new_input_items.into_iter().map(InOutItem::Input));
+        new_items.extend(output_items.into_iter().map(InOutItem::Output));
+
+        self.store
+            .persist(
+                &conversation_id,
+                &ctx.response_id,
+                metadata.previous_response_id.as_deref(),
+                new_items,
+                &metadata,
+            )
+            .await
+            .map_err(ExecutorError::Storage)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::io::{ResponsesInput, ToolChoice};
+    use crate::types::request_response::RequestPayload;
+
+    fn disabled_handler() -> ConversationHandler {
+        ConversationHandler::new(ConversationStore::disabled())
+    }
+
+    fn make_ctx(conversation_id: Option<&str>) -> RequestContext {
+        let req = RequestPayload {
+            model: "test".into(),
+            input: ResponsesInput::Text("hi".into()),
+            instructions: None,
+            previous_response_id: None,
+            conversation_id: conversation_id.map(str::to_string),
+            tools: None,
+            tool_choice: ToolChoice::Auto,
+            stream: false,
+            store: true,
+            include: None,
+            temperature: None,
+            top_p: None,
+            max_output_tokens: None,
+            truncation: None,
+            metadata: None,
+        };
+        RequestContext {
+            enriched_request: req.clone(),
+            original_request: req,
+            new_input_items: vec![],
+            response_id: "resp_test".into(),
+            conversation_id: conversation_id.map(str::to_string),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_get_or_create_missing_id_returns_error() {
+        let result = disabled_handler().get_or_create(&make_ctx(None)).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_rehydrate_missing_id_returns_error() {
+        let result = disabled_handler().rehydrate(&make_ctx(None)).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_get_or_create_disabled_store_returns_error() {
+        let result = disabled_handler().get_or_create(&make_ctx(Some("conv_1"))).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_rehydrate_disabled_store_returns_error() {
+        let result = disabled_handler().rehydrate(&make_ctx(Some("conv_1"))).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_execute_turn_missing_conv_id_returns_error() {
+        let result = disabled_handler().execute_turn(make_ctx(None), vec![]).await;
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/agentic-core/src/executor/modes/mod.rs b/crates/agentic-core/src/executor/modes/mod.rs
new file mode 100644
index 0000000..1e57c67
--- /dev/null
+++ b/crates/agentic-core/src/executor/modes/mod.rs
@@ -0,0 +1,5 @@
+pub mod conversation;
+pub mod response;
+
+pub use conversation::ConversationHandler;
+pub use response::ResponseHandler;
diff --git a/crates/agentic-core/src/executor/modes/response.rs b/crates/agentic-core/src/executor/modes/response.rs
new file mode 100644
index 0000000..a747776
--- /dev/null
+++ b/crates/agentic-core/src/executor/modes/response.rs
@@ -0,0 +1,163 @@
+//! Response storage handler — owns all response store operations.
+
+use crate::storage::{InOutItem, ResponseData, ResponseMetadata, ResponseStore};
+use crate::types::io::OutputItem;
+
+use crate::executor::error::{ExecutorError, ExecutorResult};
+use crate::executor::request::RequestContext;
+
+/// Handles all response store operations: lookup, rehydration, and persistence.
+#[derive(Clone, Debug)]
+pub struct ResponseHandler {
+    store: ResponseStore,
+}
+
+impl ResponseHandler {
+    #[must_use]
+    pub fn new(store: ResponseStore) -> Self {
+        Self { store }
+    }
+
+    /// Retrieves the stored response for `previous_response_id`.
+    ///
+    /// Reads `previous_response_id` from `ctx.original_request`.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if `previous_response_id` is absent, the response
+    /// is not found, the store is disabled, or the database query fails.
+    pub async fn get(&self, ctx: &RequestContext) -> ExecutorResult<ResponseData> {
+        let prev_id = ctx
+            .original_request
+            .previous_response_id
+            .as_deref()
+            .ok_or_else(|| ExecutorError::InvalidRequest("previous_response_id is required for get".into()))?;
+        self.store.get(prev_id).await.map_err(ExecutorError::Storage)
+    }
+
+    /// Validates that the response for `previous_response_id` exists.
+    ///
+    /// Used in the `store=false` path where we only need to confirm the ID is
+    /// valid without loading any history.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if `previous_response_id` is absent, the response
+    /// is not found, or the store is disabled.
+    pub async fn validate_exists(&self, ctx: &RequestContext) -> ExecutorResult<()> {
+        self.get(ctx).await.map(|_| ())
+    }
+
+    /// Loads all history items referenced by the previous response.
+    ///
+    /// Reads `previous_response_id` from `ctx.original_request`. Returns an empty
+    /// vec if there is no previous response.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if the store is disabled or the database query fails.
+    pub async fn rehydrate(&self, ctx: &RequestContext) -> ExecutorResult<Vec<InOutItem>> {
+        let Some(prev_id) = ctx.original_request.previous_response_id.as_deref() else {
+            return Ok(vec![]);
+        };
+        self.store.rehydrate(prev_id).await.map_err(ExecutorError::Storage)
+    }
+
+    /// Persists a response record — only the new items from this turn.
+    ///
+    /// Takes `ctx` and `output_items` by value so fields can be moved directly
+    /// into [`ResponseMetadata`] without cloning. Prior history must not be
+    /// re-inserted; the response store records item IDs for this response only.
+    ///
+    /// # Errors
+    /// Returns `ExecutorError` if the store is disabled or the database operation fails.
+    pub async fn execute_turn(&self, ctx: RequestContext, output_items: Vec<OutputItem>) -> ExecutorResult<()> {
+        let metadata = ResponseMetadata {
+            model: ctx.enriched_request.model,
+            previous_response_id: ctx.original_request.previous_response_id,
+            effective_tools: ctx.original_request.tools,
+            effective_tool_choice: ctx.original_request.tool_choice,
+            effective_instructions: ctx.original_request.instructions,
+        };
+
+        let mut new_items = Vec::with_capacity(ctx.new_input_items.len() + output_items.len());
+        new_items.extend(ctx.new_input_items.into_iter().map(InOutItem::Input));
+        new_items.extend(output_items.into_iter().map(InOutItem::Output));
+
+        self.store
+            .persist(
+                &ctx.response_id,
+                metadata.previous_response_id.as_deref(),
+                new_items,
+                &metadata,
+            )
+            .await
+            .map_err(ExecutorError::Storage)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::io::{ResponsesInput, ToolChoice};
+    use crate::types::request_response::RequestPayload;
+
+    fn disabled_handler() -> ResponseHandler {
+        ResponseHandler::new(ResponseStore::disabled())
+    }
+
+    fn make_ctx(previous_response_id: Option<&str>) -> RequestContext {
+        let req = RequestPayload {
+            model: "test".into(),
+            input: ResponsesInput::Text("hi".into()),
+            instructions: None,
+            previous_response_id: previous_response_id.map(str::to_string),
+            conversation_id: None,
+            tools: None,
+            tool_choice: ToolChoice::Auto,
+            stream: false,
+            store: true,
+            include: None,
+            temperature: None,
+            top_p: None,
+            max_output_tokens: None,
+            truncation: None,
+            metadata: None,
+        };
+        RequestContext {
+            enriched_request: req.clone(),
+            original_request: req,
+            new_input_items: vec![],
+            response_id: "resp_test".into(),
+            conversation_id: None,
+        }
+    }
+
+    #[tokio::test]
+    async fn test_get_missing_prev_id_returns_error() {
+        let result = disabled_handler().get(&make_ctx(None)).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_validate_exists_missing_prev_id_returns_error() {
+        let result = disabled_handler().validate_exists(&make_ctx(None)).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_rehydrate_no_prev_id_returns_empty() {
+        let result = disabled_handler().rehydrate(&make_ctx(None)).await;
+        assert!(result.is_ok());
+        assert!(result.unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_rehydrate_disabled_store_returns_error() {
+        let result = disabled_handler().rehydrate(&make_ctx(Some("resp_prev"))).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_execute_turn_disabled_store_returns_error() {
+        let result = disabled_handler().execute_turn(make_ctx(None), vec![]).await;
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/agentic-core/src/executor/request.rs b/crates/agentic-core/src/executor/request.rs
new file mode 100644
index 0000000..17a21b7
--- /dev/null
+++ b/crates/agentic-core/src/executor/request.rs
@@ -0,0 +1,104 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use crate::executor::modes::{ConversationHandler, ResponseHandler};
+use crate::types::io::InputItem;
+use crate::types::request_response::{RequestPayload, ResponsePayload};
+
+/// Context built by `rehydrate_conversation`, threaded through the execute pipeline.
+#[derive(Debug)]
+pub struct RequestContext {
+    /// Untouched original request from the client.
+    pub original_request: RequestPayload,
+    /// Enriched request with rehydrated conversation history injected into `.input`.
+    /// This is the request forwarded to the LLM.
+    pub enriched_request: RequestPayload,
+    /// Only the new input items submitted by the client this turn (used for persistence).
+    pub new_input_items: Vec<InputItem>,
+    /// Our generated response ID (uuid7 with "resp_" prefix).
+    pub response_id: String,
+    /// Resolved conversation ID. `None` when `store=false` or non-conversational.
+    pub conversation_id: Option<String>,
+}
+
+impl RequestContext {
+    /// Inject our `response_id` and `conversation_id` into a `ResponsePayload`
+    /// received from the LLM (which carries the upstream's own IDs).
+    pub(crate) fn inject_ids(&self, payload: &mut ResponsePayload) {
+        payload.id.clone_from(&self.response_id);
+        payload.conversation_id.clone_from(&self.conversation_id);
+        payload
+            .previous_response_id
+            .clone_from(&self.original_request.previous_response_id);
+    }
+}
+
+/// Runtime dependencies passed into `execute()`.
+///
+/// Owns the storage handlers, HTTP client, and LLM endpoint configuration.
+#[derive(Debug)]
+pub struct ExecutionContext {
+    pub conv_handler: ConversationHandler,
+    pub resp_handler: ResponseHandler,
+    pub client: Arc<reqwest::Client>,
+    /// Base URL for the LLM backend, e.g. `"http://localhost:8000"`.
+    pub llm_base_url: String,
+    /// Bearer token forwarded from the client, if any.
+    pub client_auth: Option<String>,
+    /// Maximum wait time for the next SSE chunk.  `Duration::ZERO` disables the timeout.
+    /// Sourced from [`Config::streaming_chunk_timeout_s`](crate::config::Config::streaming_chunk_timeout_s).
+    pub streaming_timeout: Duration,
+}
+
+impl ExecutionContext {
+    /// Returns the full URL for the `/v1/responses` endpoint.
+    #[must_use]
+    pub fn responses_url(&self) -> String {
+        format!("{}/v1/responses", self.llm_base_url)
+    }
+
+    /// Returns the full URL for the `/v1/conversations` endpoint.
+    #[must_use]
+    pub fn conversations_url(&self) -> String {
+        format!("{}/v1/conversations", self.llm_base_url)
+    }
+
+    #[must_use]
+    pub fn new(
+        conv_handler: ConversationHandler,
+        resp_handler: ResponseHandler,
+        client: Arc<reqwest::Client>,
+        llm_base_url: String,
+        client_auth: Option<String>,
+    ) -> Self {
+        Self {
+            conv_handler,
+            resp_handler,
+            client,
+            llm_base_url,
+            client_auth,
+            streaming_timeout: Duration::from_secs(30),
+        }
+    }
+
+    #[must_use]
+    pub fn from_config(
+        conv_handler: ConversationHandler,
+        resp_handler: ResponseHandler,
+        client: Arc<reqwest::Client>,
+        cfg: &crate::config::Config,
+        client_auth: Option<String>,
+    ) -> Self {
+        // TODO: expose `streaming_chunk_timeout_s: Option<f64>` in `Config` and read it here
+        //       once all `Config` struct literals in agentic-server use `..Config::default()`.
+        let streaming_timeout = Duration::from_secs(30);
+        Self {
+            conv_handler,
+            resp_handler,
+            client,
+            llm_base_url: cfg.llm_api_base.clone(),
+            client_auth,
+            streaming_timeout,
+        }
+    }
+}
diff --git a/crates/agentic-core/src/lib.rs b/crates/agentic-core/src/lib.rs
index 20877b6..700bafb 100644
--- a/crates/agentic-core/src/lib.rs
+++ b/crates/agentic-core/src/lib.rs
@@ -1,5 +1,6 @@
 pub mod config;
 pub mod error;
+pub mod executor;
 pub mod proxy;
 pub mod readiness;
 pub mod storage;
diff --git a/crates/agentic-core/src/storage/conversation.rs b/crates/agentic-core/src/storage/conversation.rs
index 621e181..5f18d7e 100644
--- a/crates/agentic-core/src/storage/conversation.rs
+++ b/crates/agentic-core/src/storage/conversation.rs
@@ -9,7 +9,7 @@ use super::types::{ConversationData, InOutItem, ResponseMetadata, StorageError,
 use crate::utils::common::{serialize_to_string, uuid7_str};
 
 /// Conversation storage operations.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct ConversationStore {
     pool: Option<Arc<DbPool>>,
 }
diff --git a/crates/agentic-core/src/storage/response.rs b/crates/agentic-core/src/storage/response.rs
index ddb4c2e..a41b49c 100644
--- a/crates/agentic-core/src/storage/response.rs
+++ b/crates/agentic-core/src/storage/response.rs
@@ -10,7 +10,7 @@ use super::types::{InOutItem, ResponseData, ResponseMetadata, StorageError, Stor
 use crate::utils::common::{serialize_to_string, uuid7_str};
 
 /// Response storage operations.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct ResponseStore {
     pool: Option<Arc<DbPool>>,
 }
diff --git a/crates/agentic-core/src/types/event.rs b/crates/agentic-core/src/types/event.rs
new file mode 100644
index 0000000..6409c81
--- /dev/null
+++ b/crates/agentic-core/src/types/event.rs
@@ -0,0 +1,185 @@
+//! Server-Sent Event (SSE) types and response status enums.
+
+use std::convert::Infallible;
+use std::str::FromStr;
+
+use serde::{Deserialize, Serialize};
+
+/// Response completion status.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ResponseStatus {
+    /// Response is being generated.
+    #[default]
+    InProgress,
+
+    /// Response generation completed successfully.
+    Completed,
+
+    /// Response generation incomplete (e.g., stream interrupted).
+    Incomplete,
+
+    /// Response generation encountered an error.
+    Error,
+}
+
+impl ResponseStatus {
+    /// Returns the canonical wire string for this status.
+    #[must_use]
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::InProgress => "in_progress",
+            Self::Completed => "completed",
+            Self::Incomplete => "incomplete",
+            Self::Error => "error",
+        }
+    }
+}
+
+impl FromStr for ResponseStatus {
+    type Err = Infallible;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "in_progress" => Self::InProgress,
+            "completed" => Self::Completed,
+            "incomplete" => Self::Incomplete,
+            _ => Self::Error,
+        })
+    }
+}
+
+/// Message item completion status.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum MessageStatus {
+    /// Message is being generated.
+    #[default]
+    InProgress,
+
+    /// Message generation completed.
+    Completed,
+}
+
+impl MessageStatus {
+    /// Returns the canonical wire string for this status.
+    #[must_use]
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::InProgress => "in_progress",
+            Self::Completed => "completed",
+        }
+    }
+}
+
+impl FromStr for MessageStatus {
+    type Err = Infallible;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "completed" => Self::Completed,
+            _ => Self::InProgress,
+        })
+    }
+}
+
+/// Server-Sent Event types from LLM streaming responses.
+///
+/// Emitted by vLLM when `stream=true`. Each variant represents one step in the
+/// response generation process.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum SSEEventType {
+    /// Response object created; contains initial response metadata.
+    ResponseCreated,
+
+    /// Output item (message) added; marks the start of a new message.
+    ResponseOutputItemAdded,
+
+    /// Text delta; incremental token content added to the current message.
+    ResponseOutputTextDelta,
+
+    /// Response fully completed; no more events will follow.
+    ResponseDone,
+
+    /// Unknown or unhandled event type.
+    #[default]
+    Other,
+}
+
+impl FromStr for SSEEventType {
+    type Err = Infallible;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "response.created" => Self::ResponseCreated,
+            "response.output_item.added" => Self::ResponseOutputItemAdded,
+            "response.output_text.delta" => Self::ResponseOutputTextDelta,
+            // vLLM uses `response.done`; OpenAI uses `response.completed`.
+            "response.done" | "response.completed" => Self::ResponseDone,
+            _ => Self::Other,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sse_event_type_from_str_created() {
+        assert_eq!(
+            "response.created".parse::<SSEEventType>().unwrap(),
+            SSEEventType::ResponseCreated
+        );
+    }
+
+    #[test]
+    fn test_sse_event_type_from_str_delta() {
+        assert_eq!(
+            "response.output_text.delta".parse::<SSEEventType>().unwrap(),
+            SSEEventType::ResponseOutputTextDelta
+        );
+    }
+
+    #[test]
+    fn test_sse_event_type_from_str_done() {
+        assert_eq!(
+            "response.done".parse::<SSEEventType>().unwrap(),
+            SSEEventType::ResponseDone
+        );
+    }
+
+    #[test]
+    fn test_sse_event_type_from_str_unknown() {
+        assert_eq!("unknown.event".parse::<SSEEventType>().unwrap(), SSEEventType::Other);
+    }
+
+    #[test]
+    fn test_sse_event_type_from_str_empty() {
+        assert_eq!("".parse::<SSEEventType>().unwrap(), SSEEventType::Other);
+    }
+
+    #[test]
+    fn test_response_status_round_trip() {
+        for (s, expected) in [
+            ("in_progress", ResponseStatus::InProgress),
+            ("completed", ResponseStatus::Completed),
+            ("incomplete", ResponseStatus::Incomplete),
+            ("error", ResponseStatus::Error),
+        ] {
+            let parsed: ResponseStatus = s.parse().unwrap();
+            assert_eq!(parsed, expected);
+            assert_eq!(parsed.as_str(), s);
+        }
+    }
+
+    #[test]
+    fn test_message_status_round_trip() {
+        assert_eq!("completed".parse::<MessageStatus>().unwrap(), MessageStatus::Completed);
+        assert_eq!(
+            "in_progress".parse::<MessageStatus>().unwrap(),
+            MessageStatus::InProgress
+        );
+        assert_eq!("unknown".parse::<MessageStatus>().unwrap(), MessageStatus::InProgress);
+    }
+}
diff --git a/crates/agentic-core/src/types/io.rs b/crates/agentic-core/src/types/io.rs
index 14fd7f9..d47bc4b 100644
--- a/crates/agentic-core/src/types/io.rs
+++ b/crates/agentic-core/src/types/io.rs
@@ -114,17 +114,17 @@ pub enum OutputItem {
     Unknown,
 }
 
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
 pub struct InputTokenDetails {
     pub cached_tokens: i64,
 }
 
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
 pub struct OutputTokenDetails {
     pub reasoning_tokens: i64,
 }
 
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
 pub struct ResponseUsage {
     pub input_tokens: i64,
     pub output_tokens: i64,
@@ -160,6 +160,32 @@ pub enum ToolChoice {
     },
 }
 
+/// Returns the effective tool list, preferring `request_tools` when explicitly
+/// set by the caller, otherwise falling back to the stored configuration.
+#[inline]
+pub(crate) fn resolve_tools(
+    request_tools: Option<&[ResponsesTool]>,
+    stored_tools: Option<&[ResponsesTool]>,
+    tools_explicitly_set: bool,
+) -> Option<Vec<ResponsesTool>> {
+    if tools_explicitly_set {
+        request_tools
+    } else {
+        stored_tools
+    }
+    .map(<[_]>::to_vec)
+}
+
+/// Returns the effective tool choice using the same precedence as [`resolve_tools`].
+#[inline]
+pub(crate) fn resolve_tool_choice(
+    request_choice: &ToolChoice,
+    stored_choice: &ToolChoice,
+    explicitly_set: bool,
+) -> ToolChoice {
+    if explicitly_set { request_choice } else { stored_choice }.clone()
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum ResponsesInput {
diff --git a/crates/agentic-core/src/types/mod.rs b/crates/agentic-core/src/types/mod.rs
index 6c7865b..675c7ba 100644
--- a/crates/agentic-core/src/types/mod.rs
+++ b/crates/agentic-core/src/types/mod.rs
@@ -1,3 +1,4 @@
+pub mod event;
 pub mod io;
 pub mod request_response;
 
diff --git a/crates/agentic-core/src/utils/common.rs b/crates/agentic-core/src/utils/common.rs
index c7545d2..cc00b91 100644
--- a/crates/agentic-core/src/utils/common.rs
+++ b/crates/agentic-core/src/utils/common.rs
@@ -73,3 +73,20 @@ pub fn deserialize_from_string_opt_or_default<T: serde::de::DeserializeOwned + D
 pub fn deserialize_from_string_opt<T: serde::de::DeserializeOwned>(json_str: &Option<String>) -> Option<T> {
     json_str.as_ref().and_then(|s| deserialize_from_str_opt::<T>(s))
 }
+
+/// Deserialize a `serde_json::Value` into `T`.
+///
+/// # Errors
+///
+/// Returns `serde_json::Error` if the value's shape does not match `T`.
+pub fn deserialize_from_value<T: serde::de::DeserializeOwned>(
+    value: serde_json::Value,
+) -> Result<T, serde_json::Error> {
+    serde_json::from_value(value)
+}
+
+/// Deserialize a `serde_json::Value` into `T`, returning `None` on type mismatch.
+#[must_use]
+pub fn deserialize_from_value_opt<T: serde::de::DeserializeOwned>(value: serde_json::Value) -> Option<T> {
+    serde_json::from_value(value).ok()
+}
diff --git a/crates/agentic-core/tests/cassettes/record_cassette.py b/crates/agentic-core/tests/cassettes/record_cassette.py
new file mode 100644
index 0000000..9b36c68
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/record_cassette.py
@@ -0,0 +1,649 @@
+"""
+Interactive multi-turn cassette recorder.
+
+Starts an embedded recording proxy between this script and the upstream API,
+then drives multi-turn conversations so every request/response is captured
+into a YAML cassette.
+
+Wiring:
+
+  [this script] → [embedded proxy:<proxy-port>] → [OpenAI API | vLLM]
+                   (cassette recorded here)
+
+Modes:
+  conv        (default) Creates a conversation via POST /v1/conversations, then
+              passes conversation id on every turn.
+  isolation   Two independent conversations (each with its own conversation id)
+              recorded into the same cassette.
+  mixed       Creates a conversation; turn 1 uses conversation id, turns 2+
+              switch to previous_response_id only (drops conversation id).
+  responses   No conversation created. Chains turns purely via
+              previous_response_id. Supports --openai and --vllm backends.
+
+Usage:
+    python tests/cassettes/record_cassette.py --turns 2 --no-stream --output path/to/cassette.yaml
+    python tests/cassettes/record_cassette.py --turns 3 --mode isolation --no-stream --output path/to/cassette.yaml
+    python tests/cassettes/record_cassette.py --turns 3 --mode mixed --no-stream --output path/to/cassette.yaml
+    python tests/cassettes/record_cassette.py --turns 3 --mode conv --branch-from 1 --branch-turn-number 2 --no-stream --output path/to/cassette.yaml
+    python tests/cassettes/record_cassette.py --turns 5 --mode conv --branch-from 1 --branch-turn-number 3 --branch-from 2 --branch-turn-number 5 --no-stream --output path/to/cassette.yaml
+    python tests/cassettes/record_cassette.py --turns 2 --mode responses --vllm http://localhost:8000 --model Qwen/Qwen3-30B-A3B-FP8 --no-stream --output path/to/cassette.yaml
+"""
+
+import json
+import logging
+import os
+import socket
+import sys
+import threading
+import time
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import Any, AsyncGenerator
+
+import click
+import httpx
+import uvicorn
+from fastapi import FastAPI, Request, Response
+from fastapi.responses import JSONResponse, StreamingResponse
+from httpx import AsyncClient
+from yaml import dump as yaml_dump, safe_load as yaml_load
+
+logging.basicConfig(level=logging.WARNING)
+logger = logging.getLogger("cassette_proxy")
+
+MODEL = "gpt-4o"
+PROXY_HOST = "127.0.0.1"
+PROXY_PORT = 7070
+TIMEOUT = 60 * 5
+
+EXCLUDED_RESPONSE_HEADERS = {
+    "content-encoding",
+    "content-length",
+    "transfer-encoding",
+    "connection",
+}
+
+RECORDED_HEADERS = {
+    "content-type",
+    "authorization",
+    "user-agent",
+    "accept",
+    "x-run-id",
+}
+
+
+def _mask_authorization(value: str) -> str:
+    if not value:
+        return value
+    lower = value.lower()
+    if lower.startswith("bearer "):
+        return "Bearer ***"
+    return "***"
+
+
+def _filter_request_headers(headers) -> dict:
+    return {
+        k: v if k.lower() != "authorization" else _mask_authorization(v)
+        for k, v in headers.items()
+        if k.lower() in RECORDED_HEADERS
+    }
+
+
+def _filter_response_headers(headers) -> dict:
+    return {
+        k: v for k, v in headers.items() if k.lower() not in EXCLUDED_RESPONSE_HEADERS
+    }
+
+
+def _turn_number(output_file: Path) -> int:
+    if not output_file.exists():
+        return 1
+    content = output_file.read_text(encoding="utf-8")
+    if not content.strip():
+        return 1
+    data = yaml_load(content)
+    if not data or "turns" not in data:
+        return 1
+    return len(data["turns"]) + 1
+
+
+def _append_turn(output_file: Path, turn: dict[str, Any]) -> None:
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    if output_file.exists() and output_file.stat().st_size > 0:
+        data = yaml_load(output_file.read_text(encoding="utf-8")) or {}
+    else:
+        data = {}
+    turns: list = data.get("turns", [])
+    turns.append(turn)
+    data["turns"] = turns
+    with open(output_file, "w", encoding="utf-8") as f:
+        yaml_dump(data, f, allow_unicode=True, default_flow_style=False)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    app.state.http_client = AsyncClient(timeout=TIMEOUT)
+    yield
+    await app.state.http_client.aclose()
+
+
+proxy_app = FastAPI(lifespan=lifespan)
+
+
+@proxy_app.api_route(
+    "/{path:path}",
+    methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
+)
+async def proxy_request(request: Request, path: str) -> Response:
+    http_client: AsyncClient = request.app.state.http_client
+    target_host: str = request.app.state.target_host
+    output_file: Path = request.app.state.output_file
+
+    turn_num = _turn_number(output_file)
+    filename = f"t{turn_num}"
+
+    target_url = f"{target_host}/{path}"
+    if str(request.query_params):
+        target_url += f"?{request.query_params}"
+
+    raw_body = await request.body()
+    parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {}
+
+    turn: dict[str, Any] = {
+        "filename": filename,
+        "request": {
+            "method": request.method,
+            "path": f"/{path}",
+            "query_params": dict(request.query_params),
+            "body": parsed_body,
+            "headers": _filter_request_headers(request.headers),
+        },
+        "response": {},
+    }
+
+    forward_headers = {k: v for k, v in request.headers.items() if k.lower() != "host"}
+
+    if parsed_body.get("stream", False):
+
+        async def _stream() -> AsyncGenerator[str, None]:
+            async with http_client.stream(
+                method=request.method,
+                url=target_url,
+                headers=forward_headers,
+                content=raw_body,
+                timeout=TIMEOUT,
+            ) as response:
+                yield response  # type: ignore[misc]
+                if response.status_code != 200:
+                    chunk_str = (await response.aread()).decode()
+                    try:
+                        turn["response"]["body"] = json.loads(chunk_str)
+                    except Exception:
+                        turn["response"]["body"] = chunk_str
+                    yield chunk_str
+                else:
+                    sse_events: list[str] = []
+                    try:
+                        async for line in response.aiter_lines():
+                            chunk = f"{line}\n"
+                            yield chunk
+                            sse_events.append(chunk)
+                    except Exception as e:
+                        turn["response"]["stream_error"] = (
+                            f"{e.__class__.__name__}: {e}"
+                        )
+                    finally:
+                        turn["response"]["sse"] = sse_events
+                turn["response"]["status_code"] = response.status_code
+                turn["response"]["headers"] = {
+                    "content-type": response.headers.get(
+                        "content-type", "text/event-stream"
+                    )
+                }
+                _append_turn(output_file, turn)
+                print(f"  [recorded turn {turn_num} -> {output_file.name}]")
+
+        agen = _stream()
+        upstream = await anext(agen)
+        return StreamingResponse(
+            agen,
+            status_code=upstream.status_code,
+            headers=_filter_response_headers(upstream.headers),
+            media_type=upstream.headers.get("content-type", "text/event-stream"),
+        )
+
+    else:
+        response = await http_client.request(
+            method=request.method,
+            url=target_url,
+            headers=forward_headers,
+            content=raw_body,
+            timeout=TIMEOUT,
+        )
+        media_type = response.headers.get("content-type", "application/json")
+        body: Any = response.json() if response.status_code == 200 else response.text
+        if response.status_code != 200 and "application/json" in media_type:
+            try:
+                body = json.loads(body)
+            except Exception:
+                pass
+        turn["response"]["body"] = body
+        turn["response"]["status_code"] = response.status_code
+        turn["response"]["headers"] = {"content-type": media_type}
+        _append_turn(output_file, turn)
+        print(f"  [recorded turn {turn_num} -> {output_file.name}]")
+        return JSONResponse(
+            content=body,
+            status_code=response.status_code,
+            headers=_filter_response_headers(response.headers),
+            media_type=media_type,
+        )
+
+
+# ── proxy lifecycle ───────────────────────────────────────────────────────────
+
+
+def _start_proxy(output_file: Path, target_host: str, port: int) -> uvicorn.Server:
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    output_file.write_text("", encoding="utf-8")
+    proxy_app.state.output_file = output_file
+    proxy_app.state.target_host = target_host
+
+    config = uvicorn.Config(proxy_app, host=PROXY_HOST, port=port, log_level="warning")
+    server = uvicorn.Server(config)
+
+    thread = threading.Thread(target=server.run, daemon=True)
+    thread.start()
+
+    # TCP-only readiness check — no HTTP request forwarded to upstream
+    for _ in range(40):
+        try:
+            with socket.create_connection((PROXY_HOST, port), timeout=0.3):
+                break
+        except OSError:
+            time.sleep(0.3)
+
+    return server
+
+
+def _stop_proxy(server: uvicorn.Server) -> None:
+    server.should_exit = True
+    time.sleep(0.5)
+
+
+def _create_conversation(client: httpx.Client, proxy_url: str) -> str:
+    resp = client.post(f"{proxy_url}/v1/conversations", json={}, timeout=30)
+    resp.raise_for_status()
+    conv_id = resp.json().get("id")
+    print(f"[conversation created: {conv_id}]")
+    return conv_id
+
+
+def _send_nonstreaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None:
+    resp = client.post(f"{proxy_url}/v1/responses", json=body, timeout=300)
+    resp.raise_for_status()
+    data = resp.json()
+    print(f"\n[Response]\n{json.dumps(data, indent=2)}\n")
+    return data.get("id")
+
+
+def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None:
+    response_id = None
+    print("\n[Streaming response]")
+    with client.stream(
+        "POST", f"{proxy_url}/v1/responses", json=body, timeout=300
+    ) as resp:
+        resp.raise_for_status()
+        for line in resp.iter_lines():
+            if not line:
+                continue
+            print(line)
+            if line.startswith("data:") and line != "data: [DONE]":
+                try:
+                    payload = json.loads(line[5:].strip())
+                    if payload.get("type") == "response.completed":
+                        response_id = payload.get("response", {}).get("id")
+                except Exception:
+                    pass
+    print()
+    return response_id
+
+
+def _send(client: httpx.Client, body: dict, stream: bool, proxy_url: str) -> str | None:
+    return (
+        _send_streaming(client, body, proxy_url)
+        if stream
+        else _send_nonstreaming(client, body, proxy_url)
+    )
+
+
+def _prompt(label: str) -> str:
+    try:
+        return input(label).strip()
+    except (EOFError, KeyboardInterrupt):
+        print("\nAborted.")
+        sys.exit(0)
+
+
+def run_conv(
+    client: httpx.Client,
+    turns: int,
+    model: str,
+    stream: bool,
+    store: bool,
+    branches: list[tuple[int, int | None]],
+    proxy_url: str,
+) -> None:
+    conv_id = _create_conversation(client, proxy_url)
+    response_ids: dict[int, str] = {}
+    # map: branch_turn_number -> branch_from (which turn's response to use as previous)
+    branch_map: dict[int, int] = {}
+    extra_branches: list[int] = []  # branch_from values with no branch_turn_number
+    for branch_from, branch_turn_number in branches:
+        if branch_turn_number is not None:
+            branch_map[branch_turn_number] = branch_from
+        else:
+            extra_branches.append(branch_from)
+
+    previous_response_id: str | None = None
+    for turn in range(1, turns + 1):
+        if turn in branch_map:
+            branch_from = branch_map[turn]
+            if branch_from not in response_ids:
+                raise click.UsageError(
+                    f"--branch-from {branch_from} at turn {turn} has no recorded response "
+                    f"(available: {sorted(response_ids)})"
+                )
+            previous_response_id = response_ids[branch_from]
+            click.echo(
+                f"\n[Branch] turn {turn} chains from turn {branch_from} (response_id={previous_response_id})"
+            )
+        prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ")
+        body: dict = {"model": model, "input": prompt, "stream": stream, "store": store}
+        if previous_response_id:
+            body["previous_response_id"] = previous_response_id
+        else:
+            body["conversation"] = conv_id
+        response_id = _send(client, body, stream, proxy_url)
+        if response_id:
+            response_ids[turn] = response_id
+            previous_response_id = response_id
+
+    # branches without a branch_turn_number get one extra turn each
+    for b_idx, branch_from in enumerate(extra_branches, start=1):
+        if branch_from not in response_ids:
+            raise click.UsageError(
+                f"Extra branch {b_idx}: --branch-from {branch_from} has no recorded response "
+                f"(available: {sorted(response_ids)})"
+            )
+        branch_resp_id = response_ids[branch_from]
+        click.echo(
+            f"\n[Extra branch {b_idx}] from turn {branch_from} (response_id={branch_resp_id}), turn {turns + 1}"
+        )
+        prompt = _prompt(
+            f"Turn {turns + 1} (extra branch from turn {branch_from}) — enter prompt: "
+        )
+        body = {
+            "model": model,
+            "input": prompt,
+            "stream": stream,
+            "store": store,
+            "previous_response_id": branch_resp_id,
+            "conversation": conv_id,
+        }
+        _send(client, body, stream, proxy_url)
+
+
+def run_isolation(
+    client: httpx.Client,
+    turns: int,
+    model: str,
+    stream: bool,
+    store: bool,
+    proxy_url: str,
+) -> None:
+    for conv_label in ("A", "B"):
+        click.echo(f"\n--- Conversation {conv_label} ({turns} turns) ---")
+        conv_id = _create_conversation(client, proxy_url)
+        for turn in range(1, turns + 1):
+            prompt = _prompt(
+                f"Conv {conv_label} | Turn {turn}/{turns} — enter prompt: "
+            )
+            body: dict = {
+                "model": model,
+                "input": prompt,
+                "stream": stream,
+                "store": store,
+                "conversation": conv_id,
+            }
+            _send(client, body, stream, proxy_url)
+
+
+def run_mixed(
+    client: httpx.Client,
+    turns: int,
+    model: str,
+    stream: bool,
+    store: bool,
+    proxy_url: str,
+) -> None:
+    conv_id = _create_conversation(client, proxy_url)
+    previous_response_id: str | None = None
+
+    for turn in range(1, turns + 1):
+        prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ")
+        body: dict = {"model": model, "input": prompt, "stream": stream, "store": store}
+        if previous_response_id:
+            body["previous_response_id"] = previous_response_id
+        else:
+            body["conversation"] = conv_id
+        previous_response_id = _send(client, body, stream, proxy_url)
+
+
+def run_responses(
+    client: httpx.Client,
+    turns: int,
+    model: str,
+    stream: bool,
+    store: bool,
+    branches: list[tuple[int, int | None]],
+    proxy_url: str,
+) -> None:
+    response_ids: dict[int, str] = {}
+    branch_map: dict[int, int] = {}
+    extra_branches: list[int] = []
+    for branch_from, branch_turn_number in branches:
+        if branch_turn_number is not None:
+            branch_map[branch_turn_number] = branch_from
+        else:
+            extra_branches.append(branch_from)
+
+    previous_response_id: str | None = None
+    for turn in range(1, turns + 1):
+        if turn in branch_map:
+            branch_from = branch_map[turn]
+            if branch_from not in response_ids:
+                raise click.UsageError(
+                    f"--branch-from {branch_from} at turn {turn} has no recorded response "
+                    f"(available: {sorted(response_ids)})"
+                )
+            previous_response_id = response_ids[branch_from]
+            click.echo(
+                f"\n[Branch] turn {turn} chains from turn {branch_from} (response_id={previous_response_id})"
+            )
+        prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ")
+        body: dict = {"model": model, "input": prompt, "stream": stream, "store": store}
+        if previous_response_id and store:
+            body["previous_response_id"] = previous_response_id
+        response_id = _send(client, body, stream, proxy_url)
+        previous_response_id = response_id if store else None
+        if response_id:
+            response_ids[turn] = response_id
+
+    for b_idx, branch_from in enumerate(extra_branches, start=1):
+        if branch_from not in response_ids:
+            raise click.UsageError(
+                f"Extra branch {b_idx}: --branch-from {branch_from} has no recorded response "
+                f"(available: {sorted(response_ids)})"
+            )
+        branch_resp_id = response_ids[branch_from]
+        click.echo(
+            f"\n[Extra branch {b_idx}] from turn {branch_from} (response_id={branch_resp_id}), turn {turns + 1}"
+        )
+        prompt = _prompt(
+            f"Turn {turns + 1} (extra branch from turn {branch_from}) — enter prompt: "
+        )
+        body = {
+            "model": model,
+            "input": prompt,
+            "stream": stream,
+            "store": store,
+            "previous_response_id": branch_resp_id,
+        }
+        _send(client, body, stream, proxy_url)
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+
+@click.command(context_settings={"help_option_names": ["-h", "--help"]})
+@click.option(
+    "--turns", "-n", required=True, type=int, help="Number of turns to record."
+)
+@click.option(
+    "--output",
+    "-o",
+    required=True,
+    type=click.Path(),
+    help="Output cassette YAML path.",
+)
+@click.option(
+    "--mode",
+    type=click.Choice(["conv", "isolation", "mixed", "responses"]),
+    default="conv",
+    show_default=True,
+    help="Recording mode.",
+)
+@click.option(
+    "--branch-from",
+    type=int,
+    multiple=True,
+    metavar="TURN",
+    help="Rewind to this turn's response (repeatable, one per branch).",
+)
+@click.option(
+    "--branch-turn-number",
+    type=int,
+    multiple=True,
+    metavar="TURN",
+    help="First turn number for the corresponding branch (repeatable, pairs with --branch-from).",
+)
+@click.option(
+    "--stream/--no-stream",
+    default=True,
+    show_default=True,
+    help="Use streaming responses.",
+)
+@click.option(
+    "--model", default=MODEL, show_default=True, help="Model name to pass in requests."
+)
+@click.option(
+    "--no-store", is_flag=True, default=False, help="Set store=false in requests."
+)
+@click.option(
+    "--proxy-port",
+    type=int,
+    default=PROXY_PORT,
+    show_default=True,
+    help="Local port for the embedded recording proxy.",
+)
+@click.option(
+    "--openai",
+    "openai_url",
+    metavar="URL",
+    default=None,
+    help="OpenAI upstream URL (default https://api.openai.com). Reads OPENAI_API_KEY.",
+)
+@click.option(
+    "--vllm",
+    "vllm_url",
+    metavar="URL",
+    default=None,
+    help="vLLM upstream URL, e.g. http://localhost:8000 (responses mode only, no auth).",
+)
+def main(
+    turns: int,
+    output: str,
+    mode: str,
+    branch_from: tuple[int, ...],
+    branch_turn_number: tuple[int, ...],
+    stream: bool,
+    model: str,
+    no_store: bool,
+    proxy_port: int,
+    openai_url: str | None,
+    vllm_url: str | None,
+) -> None:
+    """Interactive multi-turn cassette recorder (proxy embedded)."""
+    if branch_turn_number and not branch_from:
+        raise click.UsageError("--branch-turn-number requires --branch-from.")
+    if len(branch_turn_number) > len(branch_from):
+        raise click.UsageError(
+            "More --branch-turn-number values than --branch-from values."
+        )
+    # Pair each branch-from with its branch-turn-number (None if not provided)
+    branches: list[tuple[int, int | None]] = [
+        (bf, branch_turn_number[i] if i < len(branch_turn_number) else None)
+        for i, bf in enumerate(branch_from)
+    ]
+    if vllm_url and openai_url:
+        raise click.UsageError("--openai and --vllm are mutually exclusive.")
+    if vllm_url and mode != "responses":
+        raise click.UsageError(
+            f"--vllm is only supported with --mode responses (got --mode {mode})."
+        )
+
+    if vllm_url:
+        target = vllm_url.rstrip("/")
+        headers: dict = {}
+        backend_label = f"vLLM:   {target}"
+    else:
+        target = (openai_url or "https://api.openai.com").rstrip("/")
+        api_key = os.environ.get("OPENAI_API_KEY", "")
+        if not api_key:
+            raise click.ClickException(
+                "OPENAI_API_KEY environment variable is not set."
+            )
+        headers = {"Authorization": f"Bearer {api_key}"}
+        backend_label = f"OpenAI: {target}"
+
+    output_file = Path(output).resolve()
+    proxy_url = f"http://{PROXY_HOST}:{proxy_port}"
+    store = not no_store
+
+    click.echo(f"Mode: {mode} | Turns: {turns} | Stream: {stream} | Model: {model}")
+    click.echo(f"Output:  {output_file}")
+    click.echo(backend_label)
+    click.echo(f"Proxy:   {proxy_url}  (requests go through here for recording)")
+
+    server = _start_proxy(output_file, target, proxy_port)
+    click.echo(f"Proxy ready on {proxy_url}\n")
+
+    try:
+        with httpx.Client(headers=headers) as client:
+            if mode == "conv":
+                run_conv(client, turns, model, stream, store, branches, proxy_url)
+            elif mode == "isolation":
+                run_isolation(client, turns, model, stream, store, proxy_url)
+            elif mode == "mixed":
+                run_mixed(client, turns, model, stream, store, proxy_url)
+            elif mode == "responses":
+                run_responses(client, turns, model, stream, store, branches, proxy_url)
+    finally:
+        _stop_proxy(server)
+
+    click.echo(f"\nAll turns recorded -> {output_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/crates/agentic-core/tests/cassettes/record_text_only_cassettes.sh b/crates/agentic-core/tests/cassettes/record_text_only_cassettes.sh
new file mode 100755
index 0000000..e7a7975
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/record_text_only_cassettes.sh
@@ -0,0 +1,248 @@
+#!/usr/bin/env bash
+# record_text_only_cassettes.sh
+#
+# Records all cassettes (responses + conversation) in sequence.
+# The proxy is embedded inside record_cassette.py — no separate proxy needed.
+#
+# Prerequisites:
+#   - OPENAI_API_KEY must be set in the environment
+#
+# Usage:
+#   bash tests/cassettes/record_text_only_cassettes.sh
+#   MODEL=gpt-4.1-mini bash tests/cassettes/record_text_only_cassettes.sh
+
+set -euo pipefail
+
+SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BASE_DIR="$SCRIPTS_DIR/text_only"
+RESPONSES_DIR="$BASE_DIR/responses"
+CONV_DIR="$BASE_DIR/conversation"
+MODEL="${MODEL:-gpt-4o}"
+MODEL_SLUG="$(echo "$MODEL" | tr '/: ' '---')"
+
+green() { printf '\033[32m%s\033[0m\n' "$*"; }
+bold()  { printf '\033[1m%s\033[0m\n'  "$*"; }
+
+next_test() {
+    echo
+    read -rp "Press ENTER when ready for the next test..."
+    echo
+}
+
+mkdir -p "$RESPONSES_DIR" "$CONV_DIR"
+
+# ══════════════════════════════════════════════════════════════════
+# RESPONSES (previous_response_id chaining, no conversation object)
+# ══════════════════════════════════════════════════════════════════
+
+# ── Test 1: single-turn non-streaming ────────────────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 1 of 9 — resp-single-nonstreaming"
+bold "  1 turn, non-streaming"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Reply with exactly one word: HELLO"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode responses \
+    --turns 1 \
+    --no-stream \
+    --model "$MODEL" \
+    --output "$RESPONSES_DIR/resp-single-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 1 done."
+next_test
+
+# ── Test 2: single-turn streaming ────────────────────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 2 of 9 — resp-single-streaming"
+bold "  1 turn, streaming"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Reply with exactly one word: WORLD"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode responses \
+    --turns 1 \
+    --model "$MODEL" \
+    --output "$RESPONSES_DIR/resp-single-${MODEL_SLUG}-streaming.yaml"
+green "✓ Test 2 done."
+next_test
+
+# ── Test 3: two-turn non-streaming ───────────────────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 3 of 9 — resp-two-turn-nonstreaming"
+bold "  2 turns, non-streaming, previous_response_id chaining"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Remember the word APPLE. Just say: OK"
+echo "  Turn 2: What word did I ask you to remember?"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode responses \
+    --turns 2 \
+    --no-stream \
+    --model "$MODEL" \
+    --output "$RESPONSES_DIR/resp-two-turn-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 3 done."
+next_test
+
+# ── Test 4: two-turn streaming ────────────────────────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 4 of 9 — resp-two-turn-streaming"
+bold "  2 turns, streaming, previous_response_id chaining"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Remember the word BANANA. Just say: OK"
+echo "  Turn 2: What word did I ask you to remember?"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode responses \
+    --turns 2 \
+    --model "$MODEL" \
+    --output "$RESPONSES_DIR/resp-two-turn-${MODEL_SLUG}-streaming.yaml"
+green "✓ Test 4 done."
+next_test
+
+# ── Test 5: store=false — follow-up should fail ───────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 5 of 9 — resp-no-store-nonstreaming"
+bold "  Turn 1: store=false | Turn 2: previous_response_id → expect error"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Say: NOT STORED"
+echo "  Turn 2: follow up"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode responses \
+    --turns 2 \
+    --no-stream \
+    --no-store \
+    --model "$MODEL" \
+    --output "$RESPONSES_DIR/resp-no-store-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 5 done."
+next_test
+
+# ══════════════════════════════════════════════════════════════════
+# CONVERSATION (POST /v1/conversations + conversation id chaining)
+# ══════════════════════════════════════════════════════════════════
+
+# ── Test 6: 2-turn, non-streaming, conversation ───────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 6 of 9 — conv-two-turn-nonstreaming"
+bold "  2 turns, non-streaming, conversation created + chained"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Remember the word CHERRY. Just say: OK"
+echo "  Turn 2: What word did I ask you to remember?"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode conv \
+    --turns 2 \
+    --no-stream \
+    --model "$MODEL" \
+    --output "$CONV_DIR/conv-two-turn-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 6 done."
+next_test
+
+# ── Test 7: 2-turn, streaming, conversation ───────────────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 7 of 9 — conv-two-turn-streaming"
+bold "  2 turns, streaming, conversation created + chained"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: Remember the word MANGO. Just say: OK"
+echo "  Turn 2: What word did I ask you to remember?"
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode conv \
+    --turns 2 \
+    --model "$MODEL" \
+    --output "$CONV_DIR/conv-two-turn-${MODEL_SLUG}-streaming.yaml"
+green "✓ Test 7 done."
+next_test
+
+# ── Test 8: isolation — 2 independent conversations ──────────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 8 of 9 — conv-isolation-nonstreaming"
+bold "  2 independent conversations (3 turns each), non-streaming"
+bold "  Verifies conversations do not share context"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Conv A | Turn 1: Remember the word ORANGE. Say: OK"
+echo "  Conv A | Turn 2: Also remember the word VIOLET. Say: OK"
+echo "  Conv A | Turn 3: List every word I asked you to remember, in order, one per line."
+echo "  Conv B | Turn 1: Remember the word PURPLE. Say: OK"
+echo "  Conv B | Turn 2: Also remember the word INDIGO. Say: OK"
+echo "  Conv B | Turn 3: List every word I asked you to remember, in order, one per line."
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode isolation \
+    --turns 3 \
+    --no-stream \
+    --model "$MODEL" \
+    --output "$CONV_DIR/conv-isolation-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 8 done."
+next_test
+
+── Test 9: branch off turn 1 after 3-turn conversation ──────────
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 9 of 9 — conv-branch-nonstreaming (6D)"
+bold "  Turns 1-3: conversation chain | Turn 4: branch off turn 1"
+bold "  Math: 2+2=4, +1=5, +2=7 | branch: +1 from turn-1 = 5"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1: What is 2+2? Reply with just the number."
+echo "  Turn 2: Add 1 to your previous answer. Reply with just the number."
+echo "  Turn 3: Add 2 to your previous answer. Reply with just the number."
+echo "  Branch (off turn 1): Add 1 to your previous answer. Reply with just the number."
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode conv \
+    --turns 3 \
+    --branch-from 1 \
+    --no-stream \
+    --model "$MODEL" \
+    --output "$CONV_DIR/conv-multi-turn-single-branch-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 9 done."
+next_test
+
+# ── Test 10: 5-turn math, branch at turn 1, continue from turn 3 ──
+
+bold "═══════════════════════════════════════════════════════════════"
+bold "Test 10 of 10 — conv-branch-turn-number-nonstreaming"
+bold "  Turns 1-5: conversation chain | 2 branches"
+bold "  Turn1=4, Turn2(from1)=6 | Branch1 turn3(from1)=5, turn4(from3)=8"
+bold "  Branch2 turn5(from2)=10"
+bold "═══════════════════════════════════════════════════════════════"
+bold "Prompts to enter:"
+echo "  Turn 1 (answer=4): What is 2+2? Reply with just the number."
+echo "  Turn 2 (from turn 1, answer=4+2): Add 2 to your previous answer. Reply with just the number."
+echo "  Branch 1 | turn 3 (from turn 1, answer=4+1): Add 1. Reply with just the number."
+echo "  Branch 1 | turn 4 (from turn 3, answer=5+3): Add 3 to your previous answer. Reply with just the number."
+echo "  Branch 2 | turn 5 (from turn 2, answer=6+4): Add 4. Reply with just the number."
+echo
+python "$SCRIPTS_DIR/record_cassette.py" \
+    --mode conv \
+    --turns 5 \
+    --branch-from 1 \
+    --branch-turn-number 3 \
+    --branch-from 2 \
+    --branch-turn-number 5 \
+    --no-stream \
+    --model "$MODEL" \
+    --output "$CONV_DIR/conv-multi-branch-multi-turn-${MODEL_SLUG}-nonstreaming.yaml"
+green "✓ Test 10 done."
+
+echo
+green "════════════════════════════════════════════════════════════════"
+green "All 10 cassettes recorded."
+green "════════════════════════════════════════════════════════════════"
diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-isolation-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-isolation-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..f95b199
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-isolation-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,517 @@
+turns:
+- filename: t1
+  request:
+    body: {}
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/conversations
+    query_params: {}
+  response:
+    body:
+      created_at: 1776764559
+      id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      metadata: {}
+      object: conversation
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      conversation: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      input: 'Remember the word ORANGE. Say: OK'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764565
+      conversation:
+        id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      created_at: 1776764564
+      error: null
+      frequency_penalty: 0.0
+      id: resp_091801b651b1d6870069e74694cc1c8195b1e9477abfb4dcaf
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: OK
+          type: output_text
+        id: msg_091801b651b1d6870069e746954b3c8195b7bafba493c40e4b
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 16
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 18
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t3
+  request:
+    body:
+      conversation: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      input: 'Also remember the word VIOLET. Say: OK'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764579
+      conversation:
+        id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      created_at: 1776764579
+      error: null
+      frequency_penalty: 0.0
+      id: resp_091801b651b1d6870069e746a371308195a854e5fa5d3e845f
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: OK
+          type: output_text
+        id: msg_091801b651b1d6870069e746a3edf081958e8626b6fb3abbce
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 36
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 38
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t4
+  request:
+    body:
+      conversation: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      input: List every word I asked you to remember, in order, one per line
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764588
+      conversation:
+        id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687
+      created_at: 1776764587
+      error: null
+      frequency_penalty: 0.0
+      id: resp_091801b651b1d6870069e746abb2888195a22209a564c249cc
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: "ORANGE  \nVIOLET"
+          type: output_text
+        id: msg_091801b651b1d6870069e746ac16e081958a232d74c3e3e6e7
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 60
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 7
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 67
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t5
+  request:
+    body: {}
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/conversations
+    query_params: {}
+  response:
+    body:
+      created_at: 1776764588
+      id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      metadata: {}
+      object: conversation
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t6
+  request:
+    body:
+      conversation: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      input: 'Remember the word PURPLE. Say: OK'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764597
+      conversation:
+        id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      created_at: 1776764596
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0745b162f9c4b9d30069e746b4988481908ebf431c400142dc
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: OK
+          type: output_text
+        id: msg_0745b162f9c4b9d30069e746b51358819096cfbadec79c558e
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 16
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 18
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t7
+  request:
+    body:
+      conversation: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      input: 'Also remember the word INDIGO. Say: OK'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764603
+      conversation:
+        id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      created_at: 1776764602
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0745b162f9c4b9d30069e746bad954819098ef762241bf18ae
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: OK
+          type: output_text
+        id: msg_0745b162f9c4b9d30069e746bbcba88190802fd278cf6f9f12
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 35
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 37
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t8
+  request:
+    body:
+      conversation: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      input: List every word I asked you to remember, in order, one per line.
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764610
+      conversation:
+        id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3
+      created_at: 1776764609
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0745b162f9c4b9d30069e746c16ad88190862312ba7d6a2ba1
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '- PURPLE
+
+            - INDIGO'
+          type: output_text
+        id: msg_0745b162f9c4b9d30069e746c22d9481909480ef82bf8f4696
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 60
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 8
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 68
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..c6803c1
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,408 @@
+turns:
+- filename: t1
+  request:
+    body: {}
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/conversations
+    query_params: {}
+  response:
+    body:
+      created_at: 1776767439
+      id: conv_69e751cf3ed08194977fe0915859ea4e01ffa34fbd4eb387
+      metadata: {}
+      object: conversation
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      conversation: conv_69e751cf3ed08194977fe0915859ea4e01ffa34fbd4eb387
+      input: What is 2+2? Reply with just the number.
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776767449
+      conversation:
+        id: conv_69e751cf3ed08194977fe0915859ea4e01ffa34fbd4eb387
+      created_at: 1776767448
+      error: null
+      frequency_penalty: 0.0
+      id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '4'
+          type: output_text
+        id: msg_01ffa34fbd4eb3870069e751d92f788194b527e18365b03b21
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 20
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 22
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t3
+  request:
+    body:
+      input: Add 2 to your previous answer. Reply with just the number
+      model: gpt-4o
+      previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776767468
+      created_at: 1776767468
+      error: null
+      frequency_penalty: 0.0
+      id: resp_01ffa34fbd4eb3870069e751ec4f4081949a66597d8284db9a
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '6'
+          type: output_text
+        id: msg_01ffa34fbd4eb3870069e751ecd60c819491e735a9891c670f
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 42
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 44
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t4
+  request:
+    body:
+      input: Add 1. Reply with just the number.
+      model: gpt-4o
+      previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776767479
+      created_at: 1776767479
+      error: null
+      frequency_penalty: 0.0
+      id: resp_01ffa34fbd4eb3870069e751f6e704819480f3d723db8296b5
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '5'
+          type: output_text
+        id: msg_01ffa34fbd4eb3870069e751f7a0048194b8e3aed03d337fba
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 39
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 41
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t5
+  request:
+    body:
+      input: Add 3 to your previous answer. Reply with just the number.
+      model: gpt-4o
+      previous_response_id: resp_01ffa34fbd4eb3870069e751f6e704819480f3d723db8296b5
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776767487
+      created_at: 1776767487
+      error: null
+      frequency_penalty: 0.0
+      id: resp_01ffa34fbd4eb3870069e751ff13748194a87830bb94240994
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '8'
+          type: output_text
+        id: msg_01ffa34fbd4eb3870069e751ff6eec8194ba43c7420d0f848b
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: resp_01ffa34fbd4eb3870069e751f6e704819480f3d723db8296b5
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 62
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 64
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t6
+  request:
+    body:
+      input: Add 4. Reply with just the number.
+      model: gpt-4o
+      previous_response_id: resp_01ffa34fbd4eb3870069e751ec4f4081949a66597d8284db9a
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776767496
+      created_at: 1776767496
+      error: null
+      frequency_penalty: 0.0
+      id: resp_01ffa34fbd4eb3870069e752080c5c819491b1fd50d74691ee
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '10'
+          type: output_text
+        id: msg_01ffa34fbd4eb3870069e75208a0a08194846c6917d99ec2d7
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: resp_01ffa34fbd4eb3870069e751ec4f4081949a66597d8284db9a
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 61
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 63
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..16c808d
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,335 @@
+turns:
+- filename: t1
+  request:
+    body: {}
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/conversations
+    query_params: {}
+  response:
+    body:
+      created_at: 1776766429
+      id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      metadata: {}
+      object: conversation
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      conversation: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      input: What is 2+2? Reply with just the number.
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776766445
+      conversation:
+        id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      created_at: 1776766442
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0056d4efc351f68b0069e74de9f3a8819787d6ac39edb946bd
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '4'
+          type: output_text
+        id: msg_0056d4efc351f68b0069e74dede3188197ba46fa018e252c72
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 20
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 22
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t3
+  request:
+    body:
+      conversation: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      input: Add 1 to your previous answer. Reply with just the number.
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776766448
+      conversation:
+        id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      created_at: 1776766448
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0056d4efc351f68b0069e74df009908197ab5b3a32f9c572a3
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '5'
+          type: output_text
+        id: msg_0056d4efc351f68b0069e74df0bc488197a0f09f137edd161c
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 43
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 45
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t4
+  request:
+    body:
+      conversation: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      input: Add 2 to your previous answer. Reply with just the number
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776766455
+      conversation:
+        id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b
+      created_at: 1776766455
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0056d4efc351f68b0069e74df724bc8197bf5e923fe663113f
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '7'
+          type: output_text
+        id: msg_0056d4efc351f68b0069e74df7b5d88197943f69ef335e04c4
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 65
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 67
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t5
+  request:
+    body:
+      input: Add 1 to your previous answer. Reply with just the number.
+      model: gpt-4o
+      previous_response_id: resp_0056d4efc351f68b0069e74de9f3a8819787d6ac39edb946bd
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776766466
+      created_at: 1776766466
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0056d4efc351f68b0069e74e01f52c8197bb2dbef0c58938e7
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: '5'
+          type: output_text
+        id: msg_0056d4efc351f68b0069e74e0271648197a686afe3b92c3cb2
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: resp_0056d4efc351f68b0069e74de9f3a8819787d6ac39edb946bd
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 43
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 45
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..f5dc41c
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,179 @@
+turns:
+- filename: t1
+  request:
+    body: {}
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/conversations
+    query_params: {}
+  response:
+    body:
+      created_at: 1776764510
+      id: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2
+      metadata: {}
+      object: conversation
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      conversation: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2
+      input: 'Remember the word CHERRY. Just say: OK'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764521
+      conversation:
+        id: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2
+      created_at: 1776764520
+      error: null
+      frequency_penalty: 0.0
+      id: resp_05530c27e06f63f20069e74668c1d88195b066d73edea6fdec
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: OK
+          type: output_text
+        id: msg_05530c27e06f63f20069e7466950f88195a158c1964323765e
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 17
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 19
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t3
+  request:
+    body:
+      conversation: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2
+      input: What word did I ask you to remember?
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764527
+      conversation:
+        id: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2
+      created_at: 1776764527
+      error: null
+      frequency_penalty: 0.0
+      id: resp_05530c27e06f63f20069e7466f296081958b2ebfa3bb4c69b4
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: CHERRY
+          type: output_text
+        id: msg_05530c27e06f63f20069e7466fb0088195b0361daeaac300a4
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 35
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 3
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 38
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-streaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-streaming.yaml
new file mode 100644
index 0000000..b6b8734
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-streaming.yaml
@@ -0,0 +1,234 @@
+turns:
+- filename: t1
+  request:
+    body: {}
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/conversations
+    query_params: {}
+  response:
+    body:
+      created_at: 1776764537
+      id: conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a
+      metadata: {}
+      object: conversation
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      conversation: conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a
+      input: 'Remember the word MANGO. Just say: OK'
+      model: gpt-4o
+      store: true
+      stream: true
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    headers:
+      content-type: text/event-stream; charset=utf-8
+    sse:
+    - 'event: response.created
+
+      '
+    - 'data: {"type":"response.created","response":{"id":"resp_0aeb5a0588bb8b2a0069e7467c813c8190a46d9810f8dc0355","object":"response","created_at":1776764540,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+      '
+    - '
+
+      '
+    - 'event: response.in_progress
+
+      '
+    - 'data: {"type":"response.in_progress","response":{"id":"resp_0aeb5a0588bb8b2a0069e7467c813c8190a46d9810f8dc0355","object":"response","created_at":1776764540,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.added
+
+      '
+    - 'data: {"type":"response.output_item.added","item":{"id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.added
+
+      '
+    - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"OK","item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","logprobs":[],"obfuscation":"LAhO6jFUD08oa2","output_index":0,"sequence_number":4}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.done
+
+      '
+    - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","logprobs":[],"output_index":0,"sequence_number":5,"text":"OK"}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.done
+
+      '
+    - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"},"sequence_number":6}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.done
+
+      '
+    - 'data: {"type":"response.output_item.done","item":{"id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"},"output_index":0,"sequence_number":7}
+
+      '
+    - '
+
+      '
+    - 'event: response.completed
+
+      '
+    - 'data: {"type":"response.completed","response":{"id":"resp_0aeb5a0588bb8b2a0069e7467c813c8190a46d9810f8dc0355","object":"response","created_at":1776764540,"status":"completed","background":false,"completed_at":1776764541,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":17,"input_tokens_details":{"cached_tokens":0},"output_tokens":2,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":19},"user":null,"metadata":{}},"sequence_number":8}
+
+      '
+    - '
+
+      '
+    status_code: 200
+- filename: t3
+  request:
+    body:
+      conversation: conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a
+      input: What word did I ask you to remember?
+      model: gpt-4o
+      store: true
+      stream: true
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    headers:
+      content-type: text/event-stream; charset=utf-8
+    sse:
+    - 'event: response.created
+
+      '
+    - 'data: {"type":"response.created","response":{"id":"resp_0aeb5a0588bb8b2a0069e74683f0388190b99690bad6b9b489","object":"response","created_at":1776764548,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+      '
+    - '
+
+      '
+    - 'event: response.in_progress
+
+      '
+    - 'data: {"type":"response.in_progress","response":{"id":"resp_0aeb5a0588bb8b2a0069e74683f0388190b99690bad6b9b489","object":"response","created_at":1776764548,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.added
+
+      '
+    - 'data: {"type":"response.output_item.added","item":{"id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.added
+
+      '
+    - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"M","item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","logprobs":[],"obfuscation":"uG6UWN3uEmZChjC","output_index":0,"sequence_number":4}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"ANGO","item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","logprobs":[],"obfuscation":"v0kxXmx9ogqa","output_index":0,"sequence_number":5}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.done
+
+      '
+    - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","logprobs":[],"output_index":0,"sequence_number":6,"text":"MANGO"}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.done
+
+      '
+    - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"MANGO"},"sequence_number":7}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.done
+
+      '
+    - 'data: {"type":"response.output_item.done","item":{"id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"MANGO"}],"role":"assistant"},"output_index":0,"sequence_number":8}
+
+      '
+    - '
+
+      '
+    - 'event: response.completed
+
+      '
+    - 'data: {"type":"response.completed","response":{"id":"resp_0aeb5a0588bb8b2a0069e74683f0388190b99690bad6b9b489","object":"response","created_at":1776764548,"status":"completed","background":false,"completed_at":1776764548,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"MANGO"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":35,"input_tokens_details":{"cached_tokens":0},"output_tokens":3,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":38},"user":null,"metadata":{}},"sequence_number":9}
+
+      '
+    - '
+
+      '
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-no-store-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-no-store-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..015207f
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-no-store-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,153 @@
+turns:
+- filename: t1
+  request:
+    body:
+      input: 'Say: NOT STORED'
+      model: gpt-4o
+      store: false
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764465
+      created_at: 1776764465
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0a47fc2a915dece50169e74631001881968085f5b231c7abe0
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: NOT STORED
+          type: output_text
+        id: msg_0a47fc2a915dece50169e7463180b48196bbaaae87363326c0
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: false
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 12
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 4
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 16
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      input: follow up
+      model: gpt-4o
+      store: false
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764472
+      created_at: 1776764471
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0ad5b1478c81aa5f0169e746378464819587a13f241d2fc344
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: Of course! What would you like to follow up on?
+          type: output_text
+        id: msg_0ad5b1478c81aa5f0169e74637e7388195a1494b0590f8a49e
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: false
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 9
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 13
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 22
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..cd82940
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,77 @@
+turns:
+- filename: t1
+  request:
+    body:
+      input: 'Reply with exactly one word: HELLO'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764143
+      created_at: 1776764142
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0508721937e20de90069e744ee9018819394c8e011dc6d7818
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: HI
+          type: output_text
+        id: msg_0508721937e20de90069e744ef1b2881939596753c5951691e
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 15
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 17
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-streaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-streaming.yaml
new file mode 100644
index 0000000..659197b
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-streaming.yaml
@@ -0,0 +1,120 @@
+turns:
+- filename: t1
+  request:
+    body:
+      input: 'Reply with exactly one word: WORLD'
+      model: gpt-4o
+      store: true
+      stream: true
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    headers:
+      content-type: text/event-stream; charset=utf-8
+    sse:
+    - 'event: response.created
+
+      '
+    - 'data: {"type":"response.created","response":{"id":"resp_0d119a97c73fc7550069e7450154448193b65975af8dfa2d59","object":"response","created_at":1776764161,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+      '
+    - '
+
+      '
+    - 'event: response.in_progress
+
+      '
+    - 'data: {"type":"response.in_progress","response":{"id":"resp_0d119a97c73fc7550069e7450154448193b65975af8dfa2d59","object":"response","created_at":1776764161,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.added
+
+      '
+    - 'data: {"type":"response.output_item.added","item":{"id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.added
+
+      '
+    - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"G","item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"obfuscation":"NpACml1t70MBPur","output_index":0,"sequence_number":4}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"LO","item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"obfuscation":"sYltOblE7Hn8l8","output_index":0,"sequence_number":5}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"BE","item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"obfuscation":"4BnRKbDQPKERxH","output_index":0,"sequence_number":6}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.done
+
+      '
+    - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"output_index":0,"sequence_number":7,"text":"GLOBE"}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.done
+
+      '
+    - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"GLOBE"},"sequence_number":8}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.done
+
+      '
+    - 'data: {"type":"response.output_item.done","item":{"id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"GLOBE"}],"role":"assistant"},"output_index":0,"sequence_number":9}
+
+      '
+    - '
+
+      '
+    - 'event: response.completed
+
+      '
+    - 'data: {"type":"response.completed","response":{"id":"resp_0d119a97c73fc7550069e7450154448193b65975af8dfa2d59","object":"response","created_at":1776764161,"status":"completed","background":false,"completed_at":1776764161,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"GLOBE"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":14,"input_tokens_details":{"cached_tokens":0},"output_tokens":4,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":18},"user":null,"metadata":{}},"sequence_number":10}
+
+      '
+    - '
+
+      '
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-nonstreaming.yaml
new file mode 100644
index 0000000..42e4300
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-nonstreaming.yaml
@@ -0,0 +1,154 @@
+turns:
+- filename: t1
+  request:
+    body:
+      input: 'Remember the word APPLE. Just say: OK'
+      model: gpt-4o
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764188
+      created_at: 1776764187
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0db0cfecd1a4eaa10069e7451beccc8195a9e7e09d9343aad0
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: OK
+          type: output_text
+        id: msg_0db0cfecd1a4eaa10069e7451c5dac8195a040d4c3725fe3e7
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 17
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 19
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      input: What word did I ask you to remember?
+      model: gpt-4o
+      previous_response_id: resp_0db0cfecd1a4eaa10069e7451beccc8195a9e7e09d9343aad0
+      store: true
+      stream: false
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    body:
+      background: false
+      billing:
+        payer: developer
+      completed_at: 1776764196
+      created_at: 1776764195
+      error: null
+      frequency_penalty: 0.0
+      id: resp_0db0cfecd1a4eaa10069e74523ef348195adc49fff1d49863a
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-2024-08-06
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: APPLE
+          type: output_text
+        id: msg_0db0cfecd1a4eaa10069e7452480608195b1c2d83f819edb60
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      presence_penalty: 0.0
+      previous_response_id: resp_0db0cfecd1a4eaa10069e7451beccc8195a9e7e09d9343aad0
+      prompt_cache_key: null
+      prompt_cache_retention: in_memory
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 35
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 2
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 37
+      user: null
+    headers:
+      content-type: application/json
+    status_code: 200
diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-streaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-streaming.yaml
new file mode 100644
index 0000000..d0a08cf
--- /dev/null
+++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-streaming.yaml
@@ -0,0 +1,213 @@
+turns:
+- filename: t1
+  request:
+    body:
+      input: 'Remember the word BANANA. Just say: OK'
+      model: gpt-4o
+      store: true
+      stream: true
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    headers:
+      content-type: text/event-stream; charset=utf-8
+    sse:
+    - 'event: response.created
+
+      '
+    - 'data: {"type":"response.created","response":{"id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","object":"response","created_at":1776764210,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+      '
+    - '
+
+      '
+    - 'event: response.in_progress
+
+      '
+    - 'data: {"type":"response.in_progress","response":{"id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","object":"response","created_at":1776764210,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.added
+
+      '
+    - 'data: {"type":"response.output_item.added","item":{"id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.added
+
+      '
+    - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"OK","item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","logprobs":[],"obfuscation":"fFzOZt2wTSxfuW","output_index":0,"sequence_number":4}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.done
+
+      '
+    - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","logprobs":[],"output_index":0,"sequence_number":5,"text":"OK"}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.done
+
+      '
+    - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"},"sequence_number":6}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.done
+
+      '
+    - 'data: {"type":"response.output_item.done","item":{"id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"},"output_index":0,"sequence_number":7}
+
+      '
+    - '
+
+      '
+    - 'event: response.completed
+
+      '
+    - 'data: {"type":"response.completed","response":{"id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","object":"response","created_at":1776764210,"status":"completed","background":false,"completed_at":1776764210,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":17,"input_tokens_details":{"cached_tokens":0},"output_tokens":2,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":19},"user":null,"metadata":{}},"sequence_number":8}
+
+      '
+    - '
+
+      '
+    status_code: 200
+- filename: t2
+  request:
+    body:
+      input: What word did I ask you to remember?
+      model: gpt-4o
+      previous_response_id: resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387
+      store: true
+      stream: true
+    headers:
+      accept: '*/*'
+      authorization: Bearer ***
+      content-type: application/json
+      user-agent: python-httpx/0.28.1
+    method: POST
+    path: /v1/responses
+    query_params: {}
+  response:
+    headers:
+      content-type: text/event-stream; charset=utf-8
+    sse:
+    - 'event: response.created
+
+      '
+    - 'data: {"type":"response.created","response":{"id":"resp_0f3cfbadf7c5eca80069e7453a2be4819383b068fd4663b6c8","object":"response","created_at":1776764218,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+      '
+    - '
+
+      '
+    - 'event: response.in_progress
+
+      '
+    - 'data: {"type":"response.in_progress","response":{"id":"resp_0f3cfbadf7c5eca80069e7453a2be4819383b068fd4663b6c8","object":"response","created_at":1776764218,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.added
+
+      '
+    - 'data: {"type":"response.output_item.added","item":{"id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.added
+
+      '
+    - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"BAN","item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","logprobs":[],"obfuscation":"ndpMUu4dcc0wQ","output_index":0,"sequence_number":4}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.delta
+
+      '
+    - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"ANA","item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","logprobs":[],"obfuscation":"rzt7Jc0RMv0V1","output_index":0,"sequence_number":5}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_text.done
+
+      '
+    - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","logprobs":[],"output_index":0,"sequence_number":6,"text":"BANANA"}
+
+      '
+    - '
+
+      '
+    - 'event: response.content_part.done
+
+      '
+    - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"BANANA"},"sequence_number":7}
+
+      '
+    - '
+
+      '
+    - 'event: response.output_item.done
+
+      '
+    - 'data: {"type":"response.output_item.done","item":{"id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"BANANA"}],"role":"assistant"},"output_index":0,"sequence_number":8}
+
+      '
+    - '
+
+      '
+    - 'event: response.completed
+
+      '
+    - 'data: {"type":"response.completed","response":{"id":"resp_0f3cfbadf7c5eca80069e7453a2be4819383b068fd4663b6c8","object":"response","created_at":1776764218,"status":"completed","background":false,"completed_at":1776764218,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"BANANA"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":35,"input_tokens_details":{"cached_tokens":0},"output_tokens":3,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":38},"user":null,"metadata":{}},"sequence_number":9}
+
+      '
+    - '
+
+      '
+    status_code: 200
diff --git a/crates/agentic-core/tests/stateful_conversation_integration.rs b/crates/agentic-core/tests/stateful_conversation_integration.rs
new file mode 100644
index 0000000..ba70599
--- /dev/null
+++ b/crates/agentic-core/tests/stateful_conversation_integration.rs
@@ -0,0 +1,305 @@
+//! Cassette-based integration tests for the Conversation API (cases 6–10).
+//!
+//! Mirrors `test_conversation_api.py`. Each conversation cassette includes a
+//! `/v1/conversations` creation turn — mirrored here via `create_conversation()`.
+//! `TestFixture` serves only `/v1/responses` turns on the mock HTTP server.
+
+mod support;
+
+use agentic_core::executor::{create_conversation, execute};
+use std::sync::Arc;
+use support::{
+    TestFixture, collect_stream, expected_text, load_cassette, make_request, output_text, responses_turns,
+    unwrap_blocking,
+};
+
+const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/text_only/conversation");
+
+/// Case 6 — two turns, non-streaming, via `conversation_id`.
+#[tokio::test]
+async fn test_two_turn_nonstreaming_conversation() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/conv-two-turn-gpt-4o-nonstreaming.yaml"));
+    let all: Vec<_> = cassette.turns.iter().collect();
+    let fixture = TestFixture::new(&all).await;
+    let ctx = &fixture.exec_ctx;
+    let resp = responses_turns(&cassette);
+    let (t1, t2) = (resp[0], resp[1]);
+
+    // Mirrors /v1/conversations creation turn
+    let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id;
+
+    // Act
+    let p1 = unwrap_blocking(
+        execute(
+            make_request(&t1.request.body.input, true, false, None, Some(conv_id.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t1"),
+    );
+    let p2 = unwrap_blocking(
+        execute(
+            make_request(&t2.request.body.input, true, false, None, Some(conv_id)),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t2"),
+    );
+
+    // Assert
+    assert!(p1.id.starts_with("resp_"));
+    assert_eq!(p1.status, "completed");
+    assert_eq!(output_text(&p1), expected_text(t1));
+    assert_ne!(p2.id, p1.id);
+    assert_eq!(p2.status, "completed");
+    assert_eq!(output_text(&p2), expected_text(t2));
+}
+
+/// Case 7 — two turns, streaming, via `conversation_id`.
+#[tokio::test]
+async fn test_two_turn_streaming_conversation() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/conv-two-turn-gpt-4o-streaming.yaml"));
+    let all: Vec<_> = cassette.turns.iter().collect();
+    let fixture = TestFixture::new(&all).await;
+    let ctx = &fixture.exec_ctx;
+    let resp = responses_turns(&cassette);
+    let (t1, t2) = (resp[0], resp[1]);
+
+    let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id;
+
+    // Act
+    let p1 = collect_stream(
+        execute(
+            make_request(&t1.request.body.input, true, true, None, Some(conv_id.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t1"),
+    )
+    .await;
+    let p2 = collect_stream(
+        execute(
+            make_request(&t2.request.body.input, true, true, None, Some(conv_id)),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t2"),
+    )
+    .await;
+
+    // Assert
+    assert!(p1.id.starts_with("resp_"));
+    assert_eq!(p1.status, "completed");
+    assert_eq!(output_text(&p1), expected_text(t1));
+    assert_ne!(p2.id, p1.id);
+    assert_eq!(p2.status, "completed");
+    assert_eq!(output_text(&p2), expected_text(t2));
+}
+
+/// Case 8 — two independent conversations must not share context.
+#[tokio::test]
+async fn test_conversation_isolation() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/conv-isolation-gpt-4o-nonstreaming.yaml"));
+    let all: Vec<_> = cassette.turns.iter().collect();
+    let fixture = TestFixture::new(&all).await;
+    let ctx = &fixture.exec_ctx;
+    let resp = responses_turns(&cassette);
+    let (ta1, ta2, ta3, tb1, tb2, tb3) = (resp[0], resp[1], resp[2], resp[3], resp[4], resp[5]);
+
+    // Conv A
+    let conv_a = create_conversation(ctx).await.expect("create conv A").conversation_id;
+    let pa1 = unwrap_blocking(
+        execute(
+            make_request(&ta1.request.body.input, true, false, None, Some(conv_a.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("a1"),
+    );
+    assert_eq!(output_text(&pa1), expected_text(ta1));
+    let pa2 = unwrap_blocking(
+        execute(
+            make_request(&ta2.request.body.input, true, false, None, Some(conv_a.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("a2"),
+    );
+    assert_eq!(output_text(&pa2), expected_text(ta2));
+    let pa3 = unwrap_blocking(
+        execute(
+            make_request(&ta3.request.body.input, true, false, None, Some(conv_a.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("a3"),
+    );
+    assert_eq!(output_text(&pa3), expected_text(ta3));
+
+    // Conv B
+    let conv_b = create_conversation(ctx).await.expect("create conv B").conversation_id;
+    let pb1 = unwrap_blocking(
+        execute(
+            make_request(&tb1.request.body.input, true, false, None, Some(conv_b.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("b1"),
+    );
+    assert_eq!(output_text(&pb1), expected_text(tb1));
+    let pb2 = unwrap_blocking(
+        execute(
+            make_request(&tb2.request.body.input, true, false, None, Some(conv_b.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("b2"),
+    );
+    assert_eq!(output_text(&pb2), expected_text(tb2));
+    let pb3 = unwrap_blocking(
+        execute(
+            make_request(&tb3.request.body.input, true, false, None, Some(conv_b.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("b3"),
+    );
+    assert_eq!(output_text(&pb3), expected_text(tb3));
+
+    // Assert — conversations are isolated
+    assert_ne!(conv_a, conv_b, "conversations must not share an id");
+}
+
+/// Case 9 — 3-turn chain then branch off turn 1 via `previous_response_id`.
+#[tokio::test]
+async fn test_branch_off_turn_1() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml"));
+    let all: Vec<_> = cassette.turns.iter().collect();
+    let fixture = TestFixture::new(&all).await;
+    let ctx = &fixture.exec_ctx;
+    let resp = responses_turns(&cassette);
+    let (t1, t2, t3, t4) = (resp[0], resp[1], resp[2], resp[3]);
+
+    let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id;
+
+    // Main chain
+    let p1 = unwrap_blocking(
+        execute(
+            make_request(&t1.request.body.input, true, false, None, Some(conv_id.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t1"),
+    );
+    assert_eq!(output_text(&p1), expected_text(t1));
+    let r1_id = p1.id.clone();
+
+    let p2 = unwrap_blocking(
+        execute(
+            make_request(&t2.request.body.input, true, false, None, Some(conv_id.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t2"),
+    );
+    assert_eq!(output_text(&p2), expected_text(t2));
+
+    let p3 = unwrap_blocking(
+        execute(
+            make_request(&t3.request.body.input, true, false, None, Some(conv_id)),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t3"),
+    );
+    assert_eq!(output_text(&p3), expected_text(t3));
+
+    // Branch off turn 1 — only turn 1 context visible
+    let p4 = unwrap_blocking(
+        execute(
+            make_request(&t4.request.body.input, true, false, Some(r1_id), None),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t4"),
+    );
+    assert_eq!(p4.status, "completed");
+    assert_eq!(output_text(&p4), expected_text(t4));
+}
+
+/// Case 10 — 5-turn chain with 2 inline branches.
+#[tokio::test]
+async fn test_multi_branch() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml"));
+    let all: Vec<_> = cassette.turns.iter().collect();
+    let fixture = TestFixture::new(&all).await;
+    let ctx = &fixture.exec_ctx;
+    let resp = responses_turns(&cassette);
+    let (t1, t2, t3, t4, t5) = (resp[0], resp[1], resp[2], resp[3], resp[4]);
+
+    let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id;
+
+    // Turn 1
+    let p1 = unwrap_blocking(
+        execute(
+            make_request(&t1.request.body.input, true, false, None, Some(conv_id.clone())),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t1"),
+    );
+    assert_eq!(output_text(&p1), expected_text(t1));
+    let r1_id = p1.id.clone();
+
+    // Turn 2 (main branch)
+    let p2 = unwrap_blocking(
+        execute(
+            make_request(&t2.request.body.input, true, false, None, Some(conv_id)),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t2"),
+    );
+    assert_eq!(output_text(&p2), expected_text(t2));
+    let r2_id = p2.id.clone();
+
+    // Branch 1 — off turn 1
+    let p3 = unwrap_blocking(
+        execute(
+            make_request(&t3.request.body.input, true, false, Some(r1_id), None),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t3"),
+    );
+    assert_eq!(p3.status, "completed");
+    assert_eq!(output_text(&p3), expected_text(t3));
+
+    let p4 = unwrap_blocking(
+        execute(
+            make_request(&t4.request.body.input, true, false, Some(p3.id.clone()), None),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t4"),
+    );
+    assert_eq!(p4.status, "completed");
+    assert_eq!(output_text(&p4), expected_text(t4));
+
+    // Branch 2 — off turn 2
+    let p5 = unwrap_blocking(
+        execute(
+            make_request(&t5.request.body.input, true, false, Some(r2_id), None),
+            Arc::clone(ctx),
+        )
+        .await
+        .expect("t5"),
+    );
+    assert_eq!(p5.status, "completed");
+    assert_eq!(output_text(&p5), expected_text(t5));
+}
diff --git a/crates/agentic-core/tests/stateful_responses_integration.rs b/crates/agentic-core/tests/stateful_responses_integration.rs
new file mode 100644
index 0000000..75dc545
--- /dev/null
+++ b/crates/agentic-core/tests/stateful_responses_integration.rs
@@ -0,0 +1,164 @@
+//! Cassette-based integration tests for the Responses API (cases 1–5).
+//!
+//! Mirrors `test_responses_api.py`. Each test replays a YAML cassette
+//! against a mock HTTP server and verifies `execute()` output.
+
+mod support;
+
+use agentic_core::executor::execute;
+use std::sync::Arc;
+use support::{TestFixture, collect_stream, expected_text, load_cassette, make_request, output_text, unwrap_blocking};
+
+const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/text_only/responses");
+
+/// Case 1 — single turn, non-streaming.
+#[tokio::test]
+async fn test_single_turn_nonstreaming() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/resp-single-gpt-4o-nonstreaming.yaml"));
+    let t1 = &cassette.turns[0];
+    let fixture = TestFixture::new(&[t1]).await;
+
+    // Act
+    let payload = unwrap_blocking(
+        execute(
+            make_request(&t1.request.body.input, t1.request.body.store, false, None, None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("execute"),
+    );
+
+    // Assert
+    assert!(payload.id.starts_with("resp_"), "id={}", payload.id);
+    assert_eq!(payload.status, "completed");
+    assert_eq!(output_text(&payload), expected_text(t1));
+}
+
+/// Case 2 — single turn, streaming.
+#[tokio::test]
+async fn test_single_turn_streaming() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/resp-single-gpt-4o-streaming.yaml"));
+    let t1 = &cassette.turns[0];
+    let fixture = TestFixture::new(&[t1]).await;
+
+    // Act
+    let payload = collect_stream(
+        execute(
+            make_request(&t1.request.body.input, t1.request.body.store, true, None, None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("execute"),
+    )
+    .await;
+
+    // Assert
+    assert!(payload.id.starts_with("resp_"), "id={}", payload.id);
+    assert_eq!(payload.status, "completed");
+    assert_eq!(output_text(&payload), expected_text(t1));
+}
+
+/// Case 3 — two turns, non-streaming, chained via `previous_response_id`.
+#[tokio::test]
+async fn test_two_turn_nonstreaming_previous_response_id() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/resp-two-turn-gpt-4o-nonstreaming.yaml"));
+    let (t1, t2) = (&cassette.turns[0], &cassette.turns[1]);
+    let fixture = TestFixture::new(&[t1, t2]).await;
+
+    // Act
+    let p1 = unwrap_blocking(
+        execute(
+            make_request(&t1.request.body.input, true, false, None, None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("t1"),
+    );
+    let p2 = unwrap_blocking(
+        execute(
+            make_request(&t2.request.body.input, true, false, Some(p1.id.clone()), None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("t2"),
+    );
+
+    // Assert
+    assert!(p1.id.starts_with("resp_"));
+    assert_eq!(p1.status, "completed");
+    assert_eq!(output_text(&p1), expected_text(t1));
+    assert_ne!(p2.id, p1.id);
+    assert_eq!(p2.status, "completed");
+    assert_eq!(p2.previous_response_id.as_deref(), Some(p1.id.as_str()));
+    assert_eq!(output_text(&p2), expected_text(t2));
+}
+
+/// Case 4 — two turns, streaming, chained via `previous_response_id`.
+#[tokio::test]
+async fn test_two_turn_streaming_previous_response_id() {
+    // Arrange
+    let cassette = load_cassette(&format!("{DIR}/resp-two-turn-gpt-4o-streaming.yaml"));
+    let (t1, t2) = (&cassette.turns[0], &cassette.turns[1]);
+    let fixture = TestFixture::new(&[t1, t2]).await;
+
+    // Act
+    let p1 = collect_stream(
+        execute(
+            make_request(&t1.request.body.input, true, true, None, None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("t1"),
+    )
+    .await;
+    let p2 = collect_stream(
+        execute(
+            make_request(&t2.request.body.input, true, true, Some(p1.id.clone()), None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("t2"),
+    )
+    .await;
+
+    // Assert
+    assert!(p1.id.starts_with("resp_"));
+    assert_eq!(p1.status, "completed");
+    assert_eq!(output_text(&p1), expected_text(t1));
+    assert_ne!(p2.id, p1.id);
+    assert_eq!(p2.status, "completed");
+    assert_eq!(output_text(&p2), expected_text(t2));
+}
+
+/// Case 5 — `store=false` response cannot be used as `previous_response_id`.
+#[tokio::test]
+async fn test_store_disabled_not_reusable_as_previous_response_id() {
+    // Arrange — only one mock needed; follow-up errors before hitting the LLM
+    let cassette = load_cassette(&format!("{DIR}/resp-no-store-gpt-4o-nonstreaming.yaml"));
+    let t1 = &cassette.turns[0];
+    let fixture = TestFixture::new(&[t1]).await;
+
+    // Act — turn 1, store=false
+    let p1 = unwrap_blocking(
+        execute(
+            make_request(&t1.request.body.input, false, false, None, None),
+            Arc::clone(&fixture.exec_ctx),
+        )
+        .await
+        .expect("t1"),
+    );
+    assert_eq!(p1.status, "completed");
+
+    // Act — follow-up with the unstored id
+    let result = execute(
+        make_request("follow up", false, false, Some(p1.id.clone()), None),
+        Arc::clone(&fixture.exec_ctx),
+    )
+    .await;
+
+    // Assert — executor errors at rehydrate, before calling the LLM
+    assert!(result.is_err(), "expected error for unstored previous_response_id");
+}
diff --git a/crates/agentic-core/tests/storage_integration.rs b/crates/agentic-core/tests/storage_integration.rs
index d4ac640..e12f154 100644
--- a/crates/agentic-core/tests/storage_integration.rs
+++ b/crates/agentic-core/tests/storage_integration.rs
@@ -1,21 +1,12 @@
+mod support;
+
 use agentic_core::storage::InOutItem;
 use agentic_core::storage::ResponseMetadata;
-use agentic_core::storage::{ConversationStore, DbPool, ResponseStore, create_pool_with_schema};
+use agentic_core::storage::{ConversationStore, ResponseStore};
 use agentic_core::types::io::{InputItem, InputMessage, InputMessageContent, OutputItem, OutputMessage};
 use std::sync::Arc;
 
-async fn setup_pool() -> Arc<DbPool> {
-    let db_url = format!(
-        "sqlite://{}",
-        std::env::temp_dir()
-            .join(format!("test_{}.db", uuid::Uuid::now_v7()))
-            .display()
-    );
-
-    create_pool_with_schema(Some(&db_url))
-        .await
-        .expect("failed to create pool with schema")
-}
+use support::setup_pool;
 
 fn create_input_item(text: &str) -> InOutItem {
     InOutItem::Input(InputItem::Message(InputMessage {
diff --git a/crates/agentic-core/tests/support/mod.rs b/crates/agentic-core/tests/support/mod.rs
new file mode 100644
index 0000000..bba6b3f
--- /dev/null
+++ b/crates/agentic-core/tests/support/mod.rs
@@ -0,0 +1,328 @@
+//! Shared test infrastructure for executor integration tests.
+//!
+//! - [`MockServer`] — axum-based HTTP mock with RAII shutdown (`Drop`).
+//! - [`TestFixture`] — bundles mock server + `ExecutionContext` for one test.
+//! - Cassette loading utilities.
+//! - Response helpers.
+
+#![allow(dead_code)]
+
+use std::sync::Arc;
+
+use axum::Router;
+use axum::http::header;
+use axum::response::{IntoResponse, Response};
+use axum::routing::post;
+use either::Either;
+use futures::StreamExt;
+use serde::Deserialize;
+use tokio::sync::Mutex;
+use tokio::task::JoinHandle;
+
+use agentic_core::executor::{BoxStream, ConversationHandler, ExecutionContext, ResponseHandler};
+use agentic_core::storage::{ConversationStore, DbPool, ResponseStore, create_pool_with_schema};
+use agentic_core::types::io::{OutputItem, ResponsesInput, ToolChoice};
+use agentic_core::types::request_response::{RequestPayload, ResponsePayload};
+
+#[derive(Debug, Deserialize)]
+pub struct Cassette {
+    pub turns: Vec<Turn>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Turn {
+    pub request: TurnRequest,
+    pub response: TurnResponse,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct TurnRequest {
+    pub path: String,
+    pub body: TurnBody,
+}
+
+#[derive(Debug, Deserialize, Default)]
+pub struct TurnBody {
+    #[serde(default)]
+    pub input: String,
+    #[serde(default = "default_true")]
+    pub store: bool,
+    #[serde(default)]
+    pub stream: bool,
+}
+
+fn default_true() -> bool {
+    true
+}
+
+#[derive(Debug, Deserialize)]
+pub struct TurnResponse {
+    /// Non-streaming: full JSON response body.
+    pub body: Option<serde_json::Value>,
+    /// Streaming: list of raw SSE strings from the recording.
+    pub sse: Option<Vec<String>>,
+}
+
+/// Load and parse a cassette YAML file (all turns preserved).
+pub fn load_cassette(path: &str) -> Cassette {
+    let text = std::fs::read_to_string(path).unwrap_or_else(|e| panic!("failed to read cassette {path}: {e}"));
+    serde_yaml::from_str(&text).unwrap_or_else(|e| panic!("failed to parse cassette {path}: {e}"))
+}
+
+/// Filter to only `/v1/responses` turns — the LLM inference turns that need a
+/// mock HTTP response.  Conversation cassettes interleave `/v1/conversations`
+/// management turns; the Rust executor handles those internally via
+/// [`ConversationHandler`] without any HTTP call.
+pub fn responses_turns(cassette: &Cassette) -> Vec<&Turn> {
+    cassette
+        .turns
+        .iter()
+        .filter(|t| t.request.path == "/v1/responses")
+        .collect()
+}
+
+/// Extract the expected output text from a cassette turn.
+///
+/// - Non-streaming: `body.output[0].content[0].text`
+/// - Streaming: concatenate all `response.output_text.delta` values
+pub fn expected_text(turn: &Turn) -> String {
+    if let Some(body) = &turn.response.body {
+        return body["output"][0]["content"][0]["text"]
+            .as_str()
+            .unwrap_or("")
+            .to_string();
+    }
+    if let Some(sse) = &turn.response.sse {
+        let mut out = String::new();
+        for raw in sse {
+            for line in raw.lines() {
+                if let Some(data) = line.strip_prefix("data: ") {
+                    if let Ok(json) = serde_json::from_str::<serde_json::Value>(data) {
+                        if json["type"].as_str() == Some("response.output_text.delta") {
+                            if let Some(delta) = json["delta"].as_str() {
+                                out.push_str(delta);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return out;
+    }
+    String::new()
+}
+
+/// A per-test HTTP mock server.  The server task is aborted when this struct
+/// is dropped, ensuring clean teardown even if a test panics.
+pub struct MockServer {
+    url: String,
+    handle: JoinHandle<()>,
+}
+
+impl MockServer {
+    pub fn url(&self) -> &str {
+        &self.url
+    }
+}
+
+impl Drop for MockServer {
+    fn drop(&mut self) {
+        self.handle.abort();
+    }
+}
+
+fn build_response(resp: MockResponse) -> Response {
+    match resp {
+        MockResponse::Json(body) => Response::builder()
+            .status(200)
+            .header(header::CONTENT_TYPE, "application/json")
+            .body(axum::body::Body::from(body))
+            .unwrap()
+            .into_response(),
+        MockResponse::Sse(body) => Response::builder()
+            .status(200)
+            .header(header::CONTENT_TYPE, "text/event-stream; charset=utf-8")
+            .body(axum::body::Body::from(body))
+            .unwrap()
+            .into_response(),
+    }
+}
+
+/// A single queued mock response.
+pub enum MockResponse {
+    Json(String),
+    Sse(String),
+}
+
+impl MockResponse {
+    /// Build a `MockResponse` from a cassette turn.
+    pub fn from_turn(turn: &Turn) -> Self {
+        if let Some(body) = &turn.response.body {
+            return Self::Json(serde_json::to_string(body).expect("cassette body is valid JSON"));
+        }
+        if let Some(sse) = &turn.response.sse {
+            let mut body = sse.join("");
+            // Ensure the stream is terminated.
+            if !body.contains("data: [DONE]") {
+                body.push_str("data: [DONE]\n\n");
+            }
+            return Self::Sse(body);
+        }
+        panic!("cassette turn has neither body nor sse");
+    }
+}
+
+// Use a VecDeque so pop_front is O(1).
+impl MockServer {
+    pub async fn start_deque(responses: Vec<MockResponse>) -> Self {
+        use std::collections::VecDeque;
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
+            .await
+            .expect("bind mock server");
+        let addr = listener.local_addr().expect("local addr");
+        let url = format!("http://{addr}");
+        // Store as VecDeque for O(1) pop_front.
+        let queue: Arc<Mutex<VecDeque<MockResponse>>> = Arc::new(Mutex::new(VecDeque::from(responses)));
+
+        let handle = tokio::spawn(async move {
+            let app = Router::new()
+                .route(
+                    "/v1/responses",
+                    post(move |_body: axum::body::Bytes| {
+                        let queue = Arc::clone(&queue);
+                        async move {
+                            let mut q = queue.lock().await;
+                            let resp = q.pop_front().expect("mock queue exhausted — check test setup");
+                            build_response(resp)
+                        }
+                    }),
+                )
+                // Conversation management calls don't go through the mock —
+                // the executor handles them via ConversationHandler (DB-only).
+                // This route is here so the server doesn't return 404 if called.
+                .route(
+                    "/v1/conversations",
+                    post(|| async { (axum::http::StatusCode::OK, "{}") }),
+                );
+            axum::serve(listener, app).await.ok();
+        });
+
+        Self { url, handle }
+    }
+}
+
+/// Create a fresh `SQLite` pool with schema applied.
+///
+/// Uses a unique temp-file per call so concurrent tests don't conflict.
+pub async fn setup_pool() -> Arc<DbPool> {
+    let db_path = std::env::temp_dir().join(format!("test_{}.db", uuid::Uuid::now_v7()));
+    let db_url = format!("sqlite://{}", db_path.display());
+    create_pool_with_schema(Some(&db_url))
+        .await
+        .expect("failed to create test pool")
+}
+
+/// Bundles everything a test needs.  Dropped at end of test scope.
+pub struct TestFixture {
+    pub exec_ctx: Arc<ExecutionContext>,
+    // Kept for its Drop impl — aborts the mock server when the test ends.
+    _server: MockServer,
+}
+
+impl TestFixture {
+    /// Build a fixture from a full cassette turn slice.
+    ///
+    /// The mock server queues only `/v1/responses` turns (LLM inference).
+    /// `/v1/conversations` turns are handled by the executor via
+    /// [`ConversationHandler`] (DB-only, no outbound HTTP).
+    pub async fn new(turns: &[&Turn]) -> Self {
+        let responses = turns
+            .iter()
+            .filter(|t| t.request.path == "/v1/responses")
+            .map(|t| MockResponse::from_turn(t))
+            .collect();
+        let server = MockServer::start_deque(responses).await;
+
+        let pool = setup_pool().await;
+        let conv_handler = ConversationHandler::new(ConversationStore::new(Arc::clone(&pool)));
+        let resp_handler = ResponseHandler::new(ResponseStore::new(Arc::clone(&pool)));
+        let client = Arc::new(reqwest::Client::new());
+        let exec_ctx = Arc::new(ExecutionContext::new(
+            conv_handler,
+            resp_handler,
+            client,
+            server.url().to_string(),
+            None,
+        ));
+
+        Self {
+            exec_ctx,
+            _server: server,
+        }
+    }
+}
+
+pub fn make_request(
+    input: &str,
+    store: bool,
+    stream: bool,
+    previous_response_id: Option<String>,
+    conversation_id: Option<String>,
+) -> RequestPayload {
+    RequestPayload {
+        model: "test-model".to_string(),
+        input: ResponsesInput::Text(input.to_string()),
+        instructions: None,
+        previous_response_id,
+        conversation_id,
+        tools: None,
+        tool_choice: ToolChoice::Auto,
+        stream,
+        store,
+        include: None,
+        temperature: None,
+        top_p: None,
+        max_output_tokens: None,
+        truncation: None,
+        metadata: None,
+    }
+}
+
+pub fn unwrap_blocking(result: Either<ResponsePayload, BoxStream>) -> ResponsePayload {
+    match result {
+        Either::Left(p) => p,
+        Either::Right(_) => panic!("expected non-streaming response, got stream"),
+    }
+}
+
+/// Collect a streaming response to its final `ResponsePayload`.
+pub async fn collect_stream(result: Either<ResponsePayload, BoxStream>) -> ResponsePayload {
+    let stream = match result {
+        Either::Right(s) => s,
+        Either::Left(_) => panic!("expected streaming response, got blocking"),
+    };
+    let mut stream = Box::pin(stream);
+    while let Some(chunk) = stream.next().await {
+        if let Some(data) = chunk.trim_end_matches('\n').strip_prefix("data: ") {
+            if data != "[DONE]" {
+                if let Ok(payload) = serde_json::from_str::<ResponsePayload>(data) {
+                    while stream.next().await.is_some() {}
+                    return payload;
+                }
+            }
+        }
+    }
+    panic!("stream ended without a ResponsePayload chunk");
+}
+
+/// Extract concatenated text content from a `ResponsePayload`.
+pub fn output_text(payload: &ResponsePayload) -> String {
+    payload
+        .output
+        .iter()
+        .filter_map(|item| match item {
+            OutputItem::Message(msg) => Some(msg.content.iter().map(|c| c.text.as_str()).collect::<String>()),
+            OutputItem::FunctionCall(_) | OutputItem::Unknown => None,
+        })
+        .collect::<String>()
+}