diff --git a/Cargo.lock b/Cargo.lock index bbbccb1..dbcdcf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,14 +6,18 @@ version = 4 name = "agentic-core" version = "0.1.0" dependencies = [ + "async-stream", + "axum", "bytes", "chrono", "criterion", + "either", "futures", "http", "reqwest", "serde", "serde_json", + "serde_yaml", "sqlx", "thiserror", "tokio", @@ -133,6 +137,28 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atoi" version = "2.0.0" @@ -150,9 +176,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "axum" @@ -220,9 +246,9 @@ checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bitflags" -version = "2.11.1" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" dependencies = [ "serde_core", ] @@ -238,9 +264,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" [[package]] name = "byteorder" @@ -262,9 +288,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.62" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "shlex", @@ -537,9 +563,9 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -554,9 +580,9 @@ checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" [[package]] name = "either" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" dependencies = [ "serde", ] @@ -885,9 +911,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" dependencies = [ "bytes", "itoa", @@ -930,9 +956,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -1189,9 +1215,9 @@ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "js-sys" -version = "0.3.98" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" dependencies = [ "cfg-if", "futures-util", @@ -1235,7 +1261,7 @@ dependencies = [ "bitflags", "libc", "plain", - "redox_syscall 0.8.0", + "redox_syscall 0.8.1", ] [[package]] @@ -1272,9 +1298,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f" [[package]] name = "lru-slab" @@ -1309,9 +1335,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "mime" @@ -1321,9 +1347,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", @@ -1422,9 +1448,9 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" -version = "0.10.79" +version = "0.10.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0b434746ee2832f4f0baf10137e1cabb18cbe6912c69e2e33263c45250f542" +checksum = "a45fa2aa886c42762255da344f0a0d313e254066c46aad76f300c3d3da62d967" dependencies = [ "bitflags", "cfg-if", @@ -1453,9 +1479,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "openssl-sys" -version = "0.9.115" +version = "0.9.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "158fe5b292746440aa6e7a7e690e55aeb72d41505e2804c23c6973ad0e9c9781" +checksum = "f28a22dc7140cda5f096e5e7724a6962ca81a7f8bfd2979f9b18c11af56318c4" dependencies = [ "cc", "libc", @@ -1663,7 +1689,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -1777,9 +1803,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7591fa2c6b601dfcfe5f043f65a1c39fcdf50efefcd7f1572e538c1f4b398d" +checksum = "5b44b894f2a6e36457d665d1e08c3866add6ed5e70050c1b4ba8a8ddedb02ce7" dependencies = [ "bitflags", ] @@ -2042,9 +2068,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -2076,6 +2102,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2109,9 +2148,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook-registry" @@ -2150,9 +2189,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", @@ -2594,9 +2633,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.10" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "bitflags", "bytes", @@ -2692,9 +2731,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" -version = "1.20.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" [[package]] name = "unicode-bidi" @@ -2729,6 +2768,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -2761,9 +2806,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -2840,9 +2885,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" dependencies = [ "cfg-if", "once_cell", @@ -2853,9 +2898,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.71" +version = "0.4.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" dependencies = [ "js-sys", "wasm-bindgen", @@ -2863,9 +2908,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2873,9 +2918,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" dependencies = [ "bumpalo", "proc-macro2", @@ -2886,9 +2931,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" dependencies = [ "unicode-ident", ] @@ -2942,9 +2987,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.98" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" dependencies = [ "js-sys", "wasm-bindgen", @@ -3074,6 +3119,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -3107,13 +3161,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3126,6 +3197,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3138,6 +3215,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3150,12 +3233,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3168,6 +3263,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3180,6 +3281,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3192,6 +3299,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -3204,6 +3317,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -3329,18 +3448,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" dependencies = [ "proc-macro2", "quote", @@ -3349,9 +3468,9 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] diff --git a/Cargo.toml b/Cargo.toml index 4289b79..49fd32f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,9 @@ pedantic = { level = "warn", priority = -1 } [workspace.dependencies] agentic-core = { path = "crates/agentic-core" } +async-stream = "0.3" axum = "0.8" +either = "1" bytes = "1" clap = { version = "4", features = ["derive", "env"] } criterion = { version = "0.5", features = ["async_tokio"] } diff --git a/crates/agentic-core/Cargo.toml b/crates/agentic-core/Cargo.toml index 281333d..612e0fe 100644 --- a/crates/agentic-core/Cargo.toml +++ b/crates/agentic-core/Cargo.toml @@ -7,7 +7,9 @@ license.workspace = true repository.workspace = true [dependencies] +async-stream.workspace = true bytes.workspace = true +either.workspace = true futures.workspace = true http.workspace = true reqwest = { workspace = true, features = ["default-tls", "stream"] } @@ -22,12 +24,14 @@ chrono = { version = "0.4", features = ["serde"] } uuid = { version = "1", features = ["v7", "serde"] } [dev-dependencies] +axum.workspace = true criterion = { workspace = true } +serde_yaml = "0.9" +tokio = { workspace = true, features = ["full"] } [[bench]] -name = "storage_crud" +name = "benches" harness = false - [lints] workspace = true diff --git a/crates/agentic-core/benches/benches.rs b/crates/agentic-core/benches/benches.rs new file mode 100644 index 0000000..b49511f --- /dev/null +++ b/crates/agentic-core/benches/benches.rs @@ -0,0 +1,6 @@ +mod executor_throughput; +mod storage_crud; + +use criterion::criterion_main; + +criterion_main!(storage_crud::storage_benches, executor_throughput::executor_benches); diff --git a/crates/agentic-core/benches/executor_throughput.rs b/crates/agentic-core/benches/executor_throughput.rs new file mode 100644 index 0000000..8774e5d --- /dev/null +++ b/crates/agentic-core/benches/executor_throughput.rs @@ -0,0 +1,304 @@ +//! Throughput benchmarks for the executor agentic loop (`execute`). +//! +//! Measures wall-clock time per turn across chain depths 1–N, for both +//! blocking (non-streaming) and streaming execution paths. +//! +//! | Group | What grows with depth | +//! |--------------------|----------------------------------------------------| +//! | `execute/blocking` | rehydrate cost (DB reads) + JSON fetch + persist | +//! | `execute/streaming`| rehydrate cost + SSE accumulate + persist | +//! | `rehydrate_only` | pure rehydrate step, no LLM call | +//! +//! # Configuring max depth +//! +//! Set `BENCH_MAX_DEPTH` before running to control how many depths are swept: +//! +//! ```bash +//! BENCH_MAX_DEPTH=3 cargo bench --bench executor_throughput +//! ``` +//! +//! Defaults to 5 when the variable is unset. +//! +//! # Sample size +//! +//! Pass `-- --sample-size=N` (criterion flag) to override the number of +//! iterations criterion collects per benchmark: +//! +//! ```bash +//! cargo bench --bench executor_throughput -- --sample-size=20 +//! ``` + +use std::sync::{Arc, Mutex}; + +use axum::Router; +use axum::http::header; +use axum::response::IntoResponse; +use axum::routing::post; +use criterion::{BatchSize, BenchmarkId, Criterion, black_box, criterion_group}; +use either::Either; +use futures::StreamExt; + +use agentic_core::executor::{ConversationHandler, ExecutionContext, ResponseHandler, execute, rehydrate_conversation}; +use agentic_core::storage::{ConversationStore, DbPool, ResponseStore, create_pool_with_schema}; +use agentic_core::types::io::{ResponsesInput, ToolChoice}; +use agentic_core::types::request_response::RequestPayload; + +fn max_depth() -> usize { + std::env::var("BENCH_MAX_DEPTH") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(5) + .max(1) +} + +const NON_STREAMING_BODY: &str = r#"{ + "id": "resp_bench_upstream", + "object": "response", + "created_at": 1700000000, + "status": "completed", + "model": "test-model", + "output": [{ + "type": "message", + "id": "msg_bench", + "role": "assistant", + "status": "completed", + "content": [{"type": "output_text", "text": "OK", "annotations": []}] + }], + "usage": { + "input_tokens": 5, "output_tokens": 1, "total_tokens": 6, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0} + } +}"#; + +const STREAMING_BODY: &str = concat!( + "data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_bench_upstream\",\"status\":\"in_progress\"}}\n\n", + "data: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"msg_bench\",\"type\":\"message\",\"status\":\"in_progress\",\"content\":[],\"role\":\"assistant\"}}\n\n", + "data: {\"type\":\"response.output_text.delta\",\"delta\":\"OK\"}\n\n", + "data: {\"type\":\"response.completed\",\"response\":{", + "\"id\":\"resp_bench_upstream\",\"object\":\"response\",\"created_at\":1700000000,", + "\"status\":\"completed\",\"model\":\"test-model\",", + "\"output\":[{\"type\":\"message\",\"id\":\"msg_bench\",\"role\":\"assistant\",", + "\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"text\":\"OK\",\"annotations\":[]}]}],", + "\"usage\":{\"input_tokens\":5,\"output_tokens\":1,\"total_tokens\":6,", + "\"input_tokens_details\":{\"cached_tokens\":0},", + "\"output_tokens_details\":{\"reasoning_tokens\":0}}", + "}}\n\n", + "data: [DONE]\n\n", +); + +fn start_mock_server(rt: &tokio::runtime::Runtime) -> String { + let listener = rt.block_on(async { tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap() }); + let addr = listener.local_addr().unwrap(); + + rt.spawn(async move { + let app = Router::new() + .route( + "/v1/responses", + post(|body: axum::body::Bytes| async move { + let is_stream = serde_json::from_slice::(&body) + .ok() + .and_then(|j| j["stream"].as_bool()) + .unwrap_or(false); + + if is_stream { + axum::http::Response::builder() + .status(200) + .header(header::CONTENT_TYPE, "text/event-stream; charset=utf-8") + .body(axum::body::Body::from(STREAMING_BODY)) + .unwrap() + .into_response() + } else { + axum::http::Response::builder() + .status(200) + .header(header::CONTENT_TYPE, "application/json") + .body(axum::body::Body::from(NON_STREAMING_BODY)) + .unwrap() + .into_response() + } + }), + ) + .route( + "/v1/conversations", + post(|| async { (axum::http::StatusCode::OK, "{}") }), + ); + axum::serve(listener, app).await.ok(); + }); + + format!("http://{addr}") +} + +fn make_request(input: &str, stream: bool, prev_id: Option) -> RequestPayload { + RequestPayload { + model: "test-model".to_string(), + input: ResponsesInput::Text(input.to_string()), + instructions: None, + previous_response_id: prev_id, + conversation_id: None, + tools: None, + tool_choice: ToolChoice::Auto, + stream, + store: true, + include: None, + temperature: None, + top_p: None, + max_output_tokens: None, + truncation: None, + metadata: None, + } +} + +fn build_exec_ctx(rt: &tokio::runtime::Runtime, mock_url: String) -> (Arc, Arc) { + let pool = rt.block_on(async { create_pool_with_schema(None).await.expect("bench pool creation failed") }); + let conv_handler = ConversationHandler::new(ConversationStore::new(pool.clone())); + let resp_handler = ResponseHandler::new(ResponseStore::new(pool.clone())); + let client = Arc::new(reqwest::Client::new()); + let exec_ctx = Arc::new(ExecutionContext::new( + conv_handler, + resp_handler, + client, + mock_url, + None, + )); + (exec_ctx, pool) +} + +/// Delete all rows from every table so the next bench group starts with a +/// clean state. Accumulated rows from setup closures are removed; this +/// prevents cross-contamination between groups and unbounded DB growth. +fn clear_db(rt: &tokio::runtime::Runtime, pool: &DbPool) { + rt.block_on(async { + sqlx::query("DELETE FROM items").execute(pool).await.ok(); + sqlx::query("DELETE FROM responses").execute(pool).await.ok(); + sqlx::query("DELETE FROM conversations").execute(pool).await.ok(); + }); +} + +/// Build a chain of `depth - 1` non-streaming turns and return the last +/// response ID. Called in the setup closure — cost does NOT count toward the +/// benchmark measurement. +async fn seed_chain(exec_ctx: &Arc, depth: usize) -> Option { + let mut prev_id: Option = None; + for i in 0..depth.saturating_sub(1) { + let req = make_request(&format!("seed {i}"), false, prev_id.take()); + if let Either::Left(p) = execute(req, Arc::clone(exec_ctx)).await.expect("seed") { + prev_id = Some(p.id); + } + } + prev_id +} + +// Bench: blocking path, depths 1–max_depth +// +// The chain of N-1 prior turns is seeded with `rt.block_on()` BEFORE criterion +// starts the measurement loop, so only turn N is timed. +fn bench_execute_blocking(c: &mut Criterion, exec_ctx: &Arc) { + let mut group = c.benchmark_group("execute/blocking"); + let rt = tokio::runtime::Runtime::new().unwrap(); + + for depth in 1..=max_depth() { + // Pre-seed N-1 turns outside criterion — their cost is NOT measured. + let prev_id = rt.block_on(seed_chain(exec_ctx, depth)); + + group.bench_with_input(BenchmarkId::new("turns", depth), &depth, |b, _| { + b.to_async(tokio::runtime::Runtime::new().unwrap()).iter_batched( + // Synchronous setup: just hand the pre-seeded prev_id to each sample. + || prev_id.clone(), + |prev_id| { + let exec_ctx = Arc::clone(exec_ctx); + async move { + let req = make_request("bench turn", false, black_box(prev_id)); + execute(req, exec_ctx).await.expect("execute") + } + }, + BatchSize::SmallInput, + ); + }); + } + group.finish(); +} + +// Bench: streaming path, depths 1–max_depth (same pre-seed approach). +fn bench_execute_streaming(c: &mut Criterion, exec_ctx: &Arc) { + let mut group = c.benchmark_group("execute/streaming"); + let rt = tokio::runtime::Runtime::new().unwrap(); + + for depth in 1..=max_depth() { + let prev_id = rt.block_on(seed_chain(exec_ctx, depth)); + + group.bench_with_input(BenchmarkId::new("turns", depth), &depth, |b, _| { + b.to_async(tokio::runtime::Runtime::new().unwrap()).iter_batched( + || prev_id.clone(), + |prev_id| { + let exec_ctx = Arc::clone(exec_ctx); + async move { + let req = make_request("bench turn", true, black_box(prev_id)); + let result = execute(req, exec_ctx).await.expect("execute"); + if let Either::Right(stream) = result { + let mut stream = Box::pin(stream); + while stream.next().await.is_some() {} + } + } + }, + BatchSize::SmallInput, + ); + }); + } + group.finish(); +} + +fn bench_rehydrate_only(c: &mut Criterion, exec_ctx: &Arc) { + let mut group = c.benchmark_group("rehydrate_only"); + + // Grow the shared chain incrementally so deeper depths include all prior + // history items; the chain_tip tracks the latest response ID. + let chain_tip: Arc>> = Arc::new(Mutex::new(None)); + let rt = tokio::runtime::Runtime::new().unwrap(); + + for depth in 1..=max_depth() { + // Extend the chain to `depth` turns if not already deep enough. + rt.block_on(async { + let has_tip = chain_tip.lock().unwrap().is_some(); + if depth == 1 || !has_tip { + let prev_id = chain_tip.lock().unwrap().clone(); + let req = make_request("seed", false, prev_id); + if let Either::Left(p) = execute(req, Arc::clone(exec_ctx)).await.expect("seed") { + *chain_tip.lock().unwrap() = Some(p.id); + } + } + }); + + group.bench_with_input(BenchmarkId::new("prev_response_depth", depth), &depth, |b, _| { + b.to_async(tokio::runtime::Runtime::new().unwrap()).iter_batched( + || chain_tip.lock().unwrap().clone(), + |prev_id| { + let exec_ctx = Arc::clone(exec_ctx); + async move { + let req = make_request("bench", false, black_box(prev_id)); + rehydrate_conversation(req, &exec_ctx).await.expect("rehydrate") + } + }, + BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +fn init_benches(c: &mut Criterion) { + let rt = tokio::runtime::Runtime::new().unwrap(); + let mock_url = start_mock_server(&rt); + let (exec_ctx, pool) = build_exec_ctx(&rt, mock_url); + + bench_execute_blocking(c, &exec_ctx); + clear_db(&rt, &pool); + + bench_execute_streaming(c, &exec_ctx); + clear_db(&rt, &pool); + + bench_rehydrate_only(c, &exec_ctx); + clear_db(&rt, &pool); +} + +criterion_group!(executor_benches, init_benches); diff --git a/crates/agentic-core/benches/storage_crud.rs b/crates/agentic-core/benches/storage_crud.rs index ff551da..221903d 100644 --- a/crates/agentic-core/benches/storage_crud.rs +++ b/crates/agentic-core/benches/storage_crud.rs @@ -1,4 +1,4 @@ -use criterion::{BatchSize, Criterion, black_box, criterion_group, criterion_main}; +use criterion::{BatchSize, Criterion, black_box, criterion_group}; use agentic_core::storage::{ConversationStore, InOutItem, ResponseMetadata, ResponseStore, create_pool_with_schema}; use agentic_core::types::io::{InputItem, InputMessage, InputMessageContent, OutputItem, OutputMessage}; @@ -205,5 +205,4 @@ fn init_benches(c: &mut Criterion) { }); } -criterion_group!(benches, init_benches); -criterion_main!(benches); +criterion_group!(storage_benches, init_benches); diff --git a/crates/agentic-core/src/executor/accumulator.rs b/crates/agentic-core/src/executor/accumulator.rs new file mode 100644 index 0000000..5b94e8e --- /dev/null +++ b/crates/agentic-core/src/executor/accumulator.rs @@ -0,0 +1,329 @@ +//! Response accumulation and parsing utilities. +//! +//! Handles both streaming (SSE) and non-streaming JSON response formats, +//! accumulating chunks into a unified `ResponsePayload` structure. +//! +//! Streaming path uses a channel + `spawn_blocking` so that SSE JSON parsing +//! runs on a blocking thread while the async task continues reading from the +//! network — keeping the tokio executor thread free between chunk arrivals. + +use std::pin::Pin; +use std::sync::mpsc; + +use futures::{Stream, StreamExt}; + +use crate::executor::error::{ExecutorError, ExecutorResult}; +use crate::types::event::{MessageStatus, ResponseStatus, SSEEventType}; +use crate::types::io::{OutputItem, OutputMessage, OutputTextContent, ResponseUsage}; +use crate::types::request_response::{IncompleteDetails, ResponsePayload}; +use crate::utils::common::{deserialize_from_str, deserialize_from_value, deserialize_from_value_opt}; +use crate::utils::uuid7_str; + +/// Accumulates LLM response chunks from streaming or non-streaming sources. +#[derive(Debug)] +pub struct ResponseAccumulator { + response_id: String, + conversation_id: Option, + output: Vec, + usage: Option, + status: ResponseStatus, + incomplete_details: Option, + // In-flight message state — owned here so process_sse_line takes only &mut self. + current_message: Option, + accumulated_text: String, +} + +impl ResponseAccumulator { + /// Creates a new response accumulator. + #[must_use] + pub fn new(response_id: String, conversation_id: Option) -> Self { + Self { + response_id, + conversation_id, + output: Vec::new(), + usage: None, + status: ResponseStatus::InProgress, + incomplete_details: None, + current_message: None, + accumulated_text: String::new(), + } + } + + /// Parses a non-streaming JSON response body. + /// + /// # Errors + /// Returns `ExecutorError::ParseError` if JSON parsing fails or required fields are missing. + pub fn from_json(body: &str, conversation_id: Option<&str>) -> ExecutorResult { + let json: serde_json::Value = deserialize_from_str(body).map_err(ExecutorError::JsonError)?; + + let response_id = json["id"] + .as_str() + .ok_or_else(|| ExecutorError::ParseError("missing 'id' field in response".into()))? + .to_string(); + + let output = json["output"] + .as_array() + .map(|items| { + let mut out = Vec::with_capacity(items.len()); + out.extend( + items + .iter() + .filter_map(|item| deserialize_from_value_opt::(item.clone())), + ); + out + }) + .unwrap_or_default(); + + let status = json["status"] + .as_str() + .map_or(ResponseStatus::Completed, |s| s.parse().unwrap_or_default()); + + let usage = deserialize_from_value_opt::(json["usage"].clone()); + + Ok(Self { + response_id, + conversation_id: conversation_id.map(str::to_string), + output, + usage, + status, + incomplete_details: None, + current_message: None, + accumulated_text: String::new(), + }) + } + + /// Accumulates an async stream of raw SSE lines with parallel processing. + /// + /// The async task feeds raw SSE lines through a channel while a `spawn_blocking` + /// worker handles JSON parsing on a blocking thread — keeping the tokio executor + /// free between chunk arrivals. + /// + /// # Errors + /// Returns `ExecutorError::ParseError` if chunk parsing fails, or + /// `ExecutorError::StreamError` if the stream or worker encounters an error. + pub async fn from_stream( + mut stream: Pin> + Send>>, + conversation_id: Option<&str>, + ) -> ExecutorResult { + let (tx, rx) = mpsc::channel::(); + // Convert to owned here — spawn_blocking closure must be 'static. + let conv_id_owned = conversation_id.map(str::to_string); + + // Spawn blocking task: JSON parsing is CPU-bound, runs off the async executor. + let worker_handle = tokio::task::spawn_blocking(move || Self::process_stream_chunks(rx, conv_id_owned)); + + // Feed raw SSE lines from the async stream to the blocking worker. + while let Some(chunk_result) = stream.next().await { + match chunk_result { + Ok(chunk) => { + if tx.send(chunk).is_err() { + // Worker exited early (e.g. saw ResponseDone). + break; + } + } + Err(e) => return Err(e), + } + } + + // Signal EOF to worker. + drop(tx); + + // Properly async join — does not block the tokio executor thread. + worker_handle + .await + .map_err(|_| ExecutorError::StreamError("Worker thread panicked".into())) + } + + /// Worker function that processes SSE lines from the channel (runs on blocking thread). + fn process_stream_chunks(rx: mpsc::Receiver, conversation_id: Option) -> Self { + let mut acc = Self::new(uuid7_str("resp_"), conversation_id); + for line in rx { + acc.process_sse_line(&line); + } + acc.finalize_current_message(); + if acc.status == ResponseStatus::InProgress { + acc.status = ResponseStatus::Completed; + } + acc + } + + /// Processes pre-collected raw SSE lines synchronously. + /// + /// Useful when lines have already been buffered (e.g. replaying a recorded stream). + /// Prefer [`from_stream`](Self::from_stream) for live async streams. + /// Line parse errors are silently skipped — this function is infallible. + #[must_use] + pub fn from_sse_lines(lines: impl IntoIterator, conversation_id: Option<&str>) -> Self { + let mut acc = Self::new(uuid7_str("resp_"), conversation_id.map(str::to_string)); + for line in lines { + acc.process_sse_line(&line); + } + acc.finalize_current_message(); + acc + } + + /// Closes the in-flight message, pushing it to `output` with accumulated text. + fn finalize_current_message(&mut self) { + if let Some(mut msg) = self.current_message.take() { + if !self.accumulated_text.is_empty() { + msg.content.push(OutputTextContent::new(&self.accumulated_text)); + } + msg.status = MessageStatus::Completed.as_str().to_string(); + self.output.push(OutputItem::Message(msg)); + } + self.accumulated_text.clear(); + } + + /// Processes a single raw SSE line, updating accumulator state. + /// + /// Non-`data:` lines, `[DONE]`, and malformed JSON are silently skipped. + fn process_sse_line(&mut self, line: &str) { + let Some(data_str) = line.strip_prefix("data: ") else { + return; + }; + if data_str == "[DONE]" { + return; + } + let Ok(json) = deserialize_from_str::(data_str) else { + return; + }; + + match json["type"] + .as_str() + .map_or(SSEEventType::Other, |s| s.parse().unwrap_or_default()) + { + SSEEventType::ResponseCreated => { + if let Some(id) = json["response"]["id"].as_str() { + self.response_id = id.to_string(); + } + } + SSEEventType::ResponseOutputItemAdded => { + self.finalize_current_message(); + let item_id = json["item"]["id"] + .as_str() + .map_or_else(|| uuid7_str("msg_"), str::to_string); + self.current_message = Some(OutputMessage::new(&item_id, MessageStatus::InProgress.as_str())); + } + SSEEventType::ResponseOutputTextDelta => { + if let Some(delta) = json["delta"].as_str() { + self.accumulated_text.push_str(delta); + } + } + SSEEventType::ResponseDone => { + self.finalize_current_message(); + self.status = ResponseStatus::Completed; + if let Ok(usage) = deserialize_from_value::(json["response"]["usage"].clone()) { + self.usage = Some(usage); + } + } + SSEEventType::Other => {} + } + } + + /// Marks the response as incomplete due to an error or interruption. + pub fn mark_incomplete(&mut self, reason: impl Into) { + self.status = ResponseStatus::Incomplete; + self.incomplete_details = Some(IncompleteDetails { + reason: Some(reason.into()), + }); + } + + /// Finalizes the accumulator into a `ResponsePayload`. + /// + /// The caller supplies fields that come from the original request, not from + /// the LLM response stream. + #[must_use] + pub fn finalize( + self, + model: &str, + previous_response_id: Option<&str>, + instructions: Option<&str>, + ) -> ResponsePayload { + ResponsePayload { + id: self.response_id, + object: "response".to_string(), + created_at: chrono::Utc::now().timestamp(), + model: model.to_string(), + status: self.status.as_str().to_string(), + output: self.output, + usage: self.usage, + incomplete_details: self.incomplete_details, + error: None, + previous_response_id: previous_response_id.map(str::to_string), + conversation_id: self.conversation_id, + instructions: instructions.map(str::to_string), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_accumulator_new() { + let acc = ResponseAccumulator::new("resp_123".into(), Some("conv_456".into())); + assert_eq!(acc.response_id, "resp_123"); + assert_eq!(acc.conversation_id, Some("conv_456".into())); + assert_eq!(acc.status, ResponseStatus::InProgress); + } + + #[test] + fn test_accumulator_mark_incomplete() { + let mut acc = ResponseAccumulator::new("resp_123".into(), None); + acc.mark_incomplete("Stream interrupted"); + assert_eq!(acc.status, ResponseStatus::Incomplete); + assert!(acc.incomplete_details.is_some()); + } + + #[test] + fn test_accumulator_finalize() { + let acc = ResponseAccumulator::new("resp_123".into(), Some("conv_456".into())); + let payload = acc.finalize("gpt-4o", Some("resp_prev"), Some("be helpful")); + assert_eq!(payload.id, "resp_123"); + assert_eq!(payload.model, "gpt-4o"); + assert_eq!(payload.conversation_id, Some("conv_456".into())); + assert_eq!(payload.previous_response_id, Some("resp_prev".into())); + assert_eq!(payload.instructions, Some("be helpful".into())); + assert_eq!(payload.status, ResponseStatus::InProgress.as_str()); + } + + #[test] + fn test_accumulator_from_sse_lines_empty() { + let acc = ResponseAccumulator::from_sse_lines(vec![], None); + assert_eq!(acc.status, ResponseStatus::InProgress); + assert!(acc.output.is_empty()); + } + + #[test] + fn test_accumulator_text_delta_assigned_to_message() { + let lines = vec![ + r#"data: {"type":"response.created","response":{"id":"resp_abc"}}"#.to_string(), + r#"data: {"type":"response.output_item.added","item":{"id":"msg_1"}}"#.to_string(), + r#"data: {"type":"response.output_text.delta","delta":"Hello"}"#.to_string(), + r#"data: {"type":"response.output_text.delta","delta":" world"}"#.to_string(), + r#"data: {"type":"response.done","response":{"usage":{"input_tokens":5,"output_tokens":2,"total_tokens":7}}}"#.to_string(), + ]; + + let acc = ResponseAccumulator::from_sse_lines(lines, None); + assert_eq!(acc.status, ResponseStatus::Completed); + assert_eq!(acc.output.len(), 1); + + if let OutputItem::Message(msg) = &acc.output[0] { + assert_eq!(msg.content.len(), 1); + assert_eq!(msg.content[0].text, "Hello world"); + } else { + panic!("expected OutputItem::Message"); + } + + assert!(acc.usage.is_some()); + let usage = acc.usage.unwrap(); + assert_eq!(usage.total_tokens, 7); + } + + #[test] + fn test_message_status_enum() { + assert_eq!(MessageStatus::Completed.as_str(), "completed"); + assert_eq!(MessageStatus::InProgress.as_str(), "in_progress"); + } +} diff --git a/crates/agentic-core/src/executor/engine.rs b/crates/agentic-core/src/executor/engine.rs new file mode 100644 index 0000000..f888501 --- /dev/null +++ b/crates/agentic-core/src/executor/engine.rs @@ -0,0 +1,410 @@ +//! Agentic loop executor. +//! +//! Exposes each step of the loop as a public function so consumers can compose +//! them directly (e.g. as Praxis filters). [`execute`] is the convenience entry +//! point that composes all steps with the default control flow. + +use std::pin::Pin; +use std::sync::Arc; + +use async_stream::stream; +use either::Either; +use futures::{Stream, StreamExt}; +use tracing::warn; + +use crate::executor::accumulator::ResponseAccumulator; +use crate::executor::error::{ExecutorError, ExecutorResult}; +use crate::executor::modes::{ConversationHandler, ResponseHandler}; +use crate::executor::request::{ExecutionContext, RequestContext}; +use crate::storage::InOutItem; +use crate::types::event::ResponseStatus; +use crate::types::io::{InputItem, ResponsesInput, resolve_tool_choice, resolve_tools}; +use crate::types::request_response::{RequestPayload, ResponsePayload}; +use crate::utils::common::serialize_to_string; +use crate::utils::uuid7_str; + +use std::time::Duration; + +/// SSE stream of raw lines sent to the client (`data: …\n\n` per event). +pub type BoxStream = Pin + Send>>; + +/// Wire-format marker signalling end-of-stream to the client. +const DONE_MARKER: &str = "data: [DONE]\n\n"; + +/// Fetch the next raw bytes chunk from a streaming response. +/// +/// Returns `Ok(Some(bytes))` on data, `Ok(None)` when the stream ends cleanly, +/// and `Err` on a network failure or chunk timeout. +async fn next_chunk(stream: &mut S, timeout: Duration) -> ExecutorResult> +where + S: futures::Stream> + Unpin, +{ + let item = if timeout.is_zero() { + stream.next().await + } else { + tokio::time::timeout(timeout, stream.next()).await.map_err(|_| { + ExecutorError::StreamError("chunk timeout: no data received within the configured window".into()) + })? + }; + item.transpose().map_err(ExecutorError::NetworkError) +} + +/// Build, send, and validate an HTTP POST to the LLM backend. +/// +/// Shared by both the blocking path (caller reads `.text()`) and the streaming +/// path (caller reads `.bytes_stream()`). Maps connect/timeout failures and +/// non-2xx status codes to [`ExecutorError::LLMRequest`]. +async fn send_request( + client: &reqwest::Client, + url: &str, + body: String, + auth: Option<&str>, +) -> ExecutorResult { + let mut req = client.post(url).header("Content-Type", "application/json").body(body); + if let Some(key) = auth { + req = req.bearer_auth(key); + } + + let resp = req.send().await.map_err(|e| ExecutorError::LLMRequest { + status: if e.is_timeout() { + http::StatusCode::GATEWAY_TIMEOUT + } else { + http::StatusCode::BAD_GATEWAY + }, + body: if e.is_timeout() { + "upstream timeout".into() + } else { + "upstream unavailable".into() + }, + })?; + + if !resp.status().is_success() { + let status = resp.status().as_u16(); + // Log and discard any error reading the error body — the status code + // is the primary signal; an empty body is acceptable here. + let body = resp + .text() + .await + .inspect_err(|e| tracing::debug!("failed to read error response body: {e}")) + .unwrap_or_default(); + return Err(ExecutorError::LLMRequest { + status: http::StatusCode::from_u16(status).unwrap_or(http::StatusCode::INTERNAL_SERVER_ERROR), + body, + }); + } + + Ok(resp) +} + +/// Makes a non-streaming HTTP POST to the LLM backend and returns the full JSON body. +/// +/// Used by [`run_blocking`] so it can pass the result to [`ResponseAccumulator::from_json`]. +async fn fetch_response_json( + upstream_json: String, + url: &str, + client: &reqwest::Client, + auth: Option<&str>, +) -> ExecutorResult { + let resp = send_request(client, url, upstream_json, auth).await?; + // Preserve the reqwest::Error as the typed source (NetworkError). + resp.text().await.map_err(ExecutorError::NetworkError) +} + +/// Step 1 — Build [`RequestContext`] by rehydrating conversation history. +/// +/// `request` is moved into the context as `enriched_request`; one clone is taken +/// for `original_request` so the engine retains an unmodified copy for persistence +/// and ID resolution. +/// +/// Dispatches to one of four paths based on `store` flag and which ID is present: +/// - `store=false` + `previous_response_id`: validate the prior response exists, no history loaded +/// - `store=true` + `previous_response_id`: [`rehydrate_from_response`] +/// - `store=true` + `conversation_id`: [`rehydrate_from_conversation`] +/// - `store=true` + no ids: create a new conversation +/// +/// # Errors +/// Returns [`ExecutorError`] if storage is unavailable or a referenced ID does not exist. +pub async fn rehydrate_conversation( + request: RequestPayload, + exec_ctx: &ExecutionContext, +) -> ExecutorResult { + let response_id = uuid7_str("resp_"); + let new_input_items: Vec = Vec::from(&request.input); + + // One clone for the unmodified original; `request` is moved as enriched_request. + let original_request = request.clone(); + let mut ctx = RequestContext { + enriched_request: request, + original_request, + new_input_items, + response_id, + conversation_id: None, + }; + + if !ctx.original_request.store { + // Non-store path: validate previous_response_id only; no history needed. + if ctx.original_request.previous_response_id.is_some() { + exec_ctx.resp_handler.validate_exists(&ctx).await?; + } + return Ok(ctx); + } + + if ctx.original_request.previous_response_id.is_some() { + rehydrate_from_response(&mut ctx, exec_ctx).await?; + return Ok(ctx); + } + + if ctx.original_request.conversation_id.is_some() { + rehydrate_from_conversation(&mut ctx, exec_ctx).await?; + return Ok(ctx); + } + + // Store + no ids: create a fresh conversation. + let conv_data = exec_ctx.conv_handler.create().await?; + ctx.conversation_id = Some(conv_data.conversation_id); + ctx.enriched_request.input = ResponsesInput::Items(ctx.new_input_items.clone()); + Ok(ctx) +} + +/// Hydrates `ctx` from the previous response chain. +/// +/// Loads the stored response, rehydrates its history items, resolves effective +/// tools and tool choice from the stored metadata, and prepends the history to +/// the enriched request input. +async fn rehydrate_from_response(ctx: &mut RequestContext, exec_ctx: &ExecutionContext) -> ExecutorResult<()> { + let stored = exec_ctx.resp_handler.get(ctx).await?; + let history = exec_ctx.resp_handler.rehydrate(ctx).await?; + + let mut items = InOutItem::into_input_items(history); + items.reserve(ctx.new_input_items.len()); + items.extend(ctx.new_input_items.iter().cloned()); + + ctx.enriched_request.previous_response_id = None; + ctx.enriched_request.input = ResponsesInput::Items(items); + ctx.enriched_request.tools = resolve_tools( + ctx.original_request.tools.as_deref(), + stored.metadata.effective_tools.as_deref(), + ctx.original_request.tools.is_some(), + ); + ctx.enriched_request.tool_choice = resolve_tool_choice( + &ctx.original_request.tool_choice, + &stored.metadata.effective_tool_choice, + false, + ); + ctx.conversation_id = stored.conversation_id; + Ok(()) +} + +/// Hydrates `ctx` from the conversation store. +/// +/// Gets or creates the conversation and rehydrates its history in parallel, +/// then prepends the history items to the enriched request input. +async fn rehydrate_from_conversation(ctx: &mut RequestContext, exec_ctx: &ExecutionContext) -> ExecutorResult<()> { + let (conv_data, history) = tokio::try_join!( + exec_ctx.conv_handler.get_or_create(ctx), + exec_ctx.conv_handler.rehydrate(ctx), + )?; + + let mut items = InOutItem::into_input_items(history); + items.reserve(ctx.new_input_items.len()); + items.extend(ctx.new_input_items.iter().cloned()); + + ctx.enriched_request.input = ResponsesInput::Items(items); + ctx.conversation_id = Some(conv_data.conversation_id); + Ok(()) +} + +/// Step 2 — Call the LLM inference backend; yields raw SSE lines (`data: …`). +/// +/// Always requests `stream=true` upstream. Stops on `[DONE]`. +/// +/// # Errors +/// Each stream item is `Result`. The stream yields `Err` on: +/// - [`ExecutorError::LLMRequest`] — connect timeout (504), connection failure (502), +/// or non-2xx HTTP status from the backend +/// - [`ExecutorError::NetworkError`] — network failure while reading the response body +pub fn call_inference( + upstream_json: String, + url: String, + client: Arc, + auth: Option, + chunk_timeout: Duration, +) -> impl Stream> + Send + 'static { + stream! { + let resp = match send_request(&client, &url, upstream_json, auth.as_deref()).await { + Ok(r) => r, + Err(e) => { yield Err(e); return; } + }; + + let mut bytes = resp.bytes_stream(); + let mut buf = String::with_capacity(8192); + + loop { + let chunk = match next_chunk(&mut bytes, chunk_timeout).await { + Ok(Some(c)) => c, + Ok(None) => break, + Err(e) => { yield Err(e); return; } + }; + + match std::str::from_utf8(&chunk) { + Ok(s) => buf.push_str(s), + Err(_) => buf.push_str(&String::from_utf8_lossy(&chunk)), + } + + while let Some(pos) = buf.find('\n') { + let line = buf[..pos].trim_end_matches('\r'); + match line { + "data: [DONE]" => return, + l if l.starts_with("data: ") => yield Ok(l.to_string()), + _ => {} + } + buf.drain(..=pos); + } + } + } +} + +/// Step 3 — Persist the completed response to storage. +/// +/// Skipped if [`ResponseStatus`] is not `Completed`/`Incomplete` or `payload.id` is empty. +/// Routes to [`ConversationHandler`] when `ctx.conversation_id` is set, +/// otherwise [`ResponseHandler`]. +/// +/// # Errors +/// Returns [`ExecutorError`] if the storage operation fails. +pub async fn persist_response( + payload: ResponsePayload, + ctx: RequestContext, + conv_handler: ConversationHandler, + resp_handler: ResponseHandler, +) -> ExecutorResult<()> { + // Use typed enum — no hardcoded status strings. + if !matches!( + payload.status.parse::().unwrap_or_default(), + ResponseStatus::Completed | ResponseStatus::Incomplete + ) || payload.id.is_empty() + { + return Ok(()); + } + + // Move output items from payload; handlers build ResponseMetadata from ctx internally. + let output_items = payload.output; + + if ctx.conversation_id.is_some() { + conv_handler.execute_turn(ctx, output_items).await + } else { + resp_handler.execute_turn(ctx, output_items).await + } +} + +async fn run_blocking(ctx: RequestContext, exec_ctx: &ExecutionContext) -> ExecutorResult { + let url = exec_ctx.responses_url(); + // Non-streaming request: stream=false → full JSON body → from_json. + let upstream_json = + serialize_to_string(&ctx.enriched_request.to_upstream_request(false)).map_err(ExecutorError::JsonError)?; + + let body = fetch_response_json(upstream_json, &url, &exec_ctx.client, exec_ctx.client_auth.as_deref()).await?; + + let acc = ResponseAccumulator::from_json(&body, ctx.conversation_id.as_deref())?; + let mut payload = acc.finalize( + &ctx.enriched_request.model, + ctx.original_request.previous_response_id.as_deref(), + ctx.original_request.instructions.as_deref(), + ); + ctx.inject_ids(&mut payload); + + if ctx.original_request.store { + let ch = exec_ctx.conv_handler.clone(); + let rh = exec_ctx.resp_handler.clone(); + if let Err(e) = persist_response(payload.clone(), ctx, ch, rh).await { + warn!("persist failed: {e}"); + } + } + + Ok(payload) +} + +fn run_stream(ctx: RequestContext, exec_ctx: Arc) -> BoxStream { + let url = exec_ctx.responses_url(); + // Streaming request: stream=true → SSE lines → from_stream. + let upstream_json = match serialize_to_string(&ctx.enriched_request.to_upstream_request(true)) { + Ok(s) => s, + Err(e) => { + return Box::pin(stream! { + yield format!("data: {{\"error\": \"serialize error: {e}\"}}\n\n"); + yield DONE_MARKER.to_string(); + }); + } + }; + + let store = ctx.original_request.store; + + Box::pin(stream! { + let line_stream = Box::pin(call_inference( + upstream_json, + url, + Arc::clone(&exec_ctx.client), + exec_ctx.client_auth.clone(), + exec_ctx.streaming_timeout, + )); + + // from_stream feeds SSE lines to a spawn_blocking worker via channel. + // All JSON parsing is CPU-bound and runs off the async executor. + match ResponseAccumulator::from_stream(line_stream, ctx.conversation_id.as_deref()).await { + Err(e) => { + yield format!("data: {{\"error\": \"{e}\"}}\n\n"); + yield DONE_MARKER.to_string(); + } + Ok(acc) => { + let mut payload = acc.finalize( + &ctx.enriched_request.model, + ctx.original_request.previous_response_id.as_deref(), + ctx.original_request.instructions.as_deref(), + ); + ctx.inject_ids(&mut payload); + yield payload.as_responses_chunk(); + yield DONE_MARKER.to_string(); + + if store { + let ch = exec_ctx.conv_handler.clone(); + let rh = exec_ctx.resp_handler.clone(); + if let Err(e) = persist_response(payload, ctx, ch, rh).await { + warn!("persist failed: {e}"); + } + } + } + } + }) +} + +/// Create a new conversation and return its data. +/// +/// Exposes the conversation-creation step as a standalone function so callers +/// (e.g. `agentic-server`, Praxis filters, or tests) can pre-create a +/// conversation before submitting response turns. +/// +/// # Errors +/// Returns [`ExecutorError`] if the conversation store is unavailable. +pub async fn create_conversation(exec_ctx: &ExecutionContext) -> ExecutorResult { + exec_ctx.conv_handler.create().await +} + +/// Run the full agentic loop. +/// +/// Returns `Either::Left(ResponsePayload)` for non-streaming requests, or +/// `Either::Right(BoxStream)` for streaming, each yielded `String` is an SSE +/// line ready to forward to the client. +/// +/// # Errors +/// Returns [`ExecutorError`] if rehydration or (non-streaming) LLM inference fails. +pub async fn execute( + request: RequestPayload, + exec_ctx: Arc, +) -> ExecutorResult> { + let ctx = rehydrate_conversation(request, &exec_ctx).await?; + if ctx.original_request.stream { + Ok(Either::Right(run_stream(ctx, exec_ctx))) + } else { + Ok(Either::Left(run_blocking(ctx, &exec_ctx).await?)) + } +} diff --git a/crates/agentic-core/src/executor/error.rs b/crates/agentic-core/src/executor/error.rs new file mode 100644 index 0000000..6c6e41b --- /dev/null +++ b/crates/agentic-core/src/executor/error.rs @@ -0,0 +1,102 @@ +use http::StatusCode; +use thiserror::Error; + +use crate::StorageError; + +#[non_exhaustive] +#[derive(Debug, Error)] +pub enum ExecutorError { + /// A storage layer operation failed. + #[error("storage error: {0}")] + Storage(#[from] StorageError), + + /// The LLM backend returned a non-2xx status or was unreachable. + #[error("LLM request failed ({status}): {body}")] + LLMRequest { status: StatusCode, body: String }, + + /// A network error occurred reading from the LLM response stream. + /// + /// The original `reqwest::Error` is preserved as the error source so + /// callers can inspect the underlying network failure. + #[error("network error: {0}")] + NetworkError( + #[from] + #[source] + reqwest::Error, + ), + + /// JSON deserialisation failed. + /// + /// The original `serde_json::Error` is preserved as the error source so + /// callers can inspect the exact parse failure location and kind. + #[error("json error: {0}")] + JsonError( + #[from] + #[source] + serde_json::Error, + ), + + /// A general stream processing error with a human-readable message. + /// + /// Used for non-network stream failures (e.g. worker thread panic). + #[error("stream error: {0}")] + StreamError(String), + + /// A validation error on the request payload with a human-readable message. + /// + /// Used when required fields are missing or structurally invalid. + #[error("parse error: {0}")] + ParseError(String), + + #[error("{entity} not found: {id}")] + NotFound { entity: String, id: String }, + + #[error("invalid request: {0}")] + InvalidRequest(String), +} + +pub type ExecutorResult = Result; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_executor_error_display() { + let err = ExecutorError::InvalidRequest("test message".into()); + assert!(err.to_string().contains("invalid request")); + assert!(err.to_string().contains("test message")); + } + + #[test] + fn test_executor_error_stream() { + let err = ExecutorError::StreamError("connection lost".into()); + assert!(err.to_string().contains("stream error")); + } + + #[test] + fn test_executor_error_not_found() { + let err = ExecutorError::NotFound { + entity: "Conversation".into(), + id: "conv_123".into(), + }; + assert!(err.to_string().contains("Conversation")); + assert!(err.to_string().contains("conv_123")); + } + + #[test] + fn test_executor_error_from_storage() { + let storage_err = StorageError::NotConfigured; + let exec_err = ExecutorError::from(storage_err); + assert!(exec_err.to_string().contains("storage error")); + } + + #[test] + fn test_executor_error_json_preserves_source() { + use std::error::Error; + let json_err: serde_json::Error = serde_json::from_str::("{bad}").unwrap_err(); + let exec_err = ExecutorError::from(json_err); + assert!(exec_err.source().is_some(), "source should be chained"); + assert!(exec_err.to_string().contains("json error")); + } +} diff --git a/crates/agentic-core/src/executor/mod.rs b/crates/agentic-core/src/executor/mod.rs new file mode 100644 index 0000000..32fbabc --- /dev/null +++ b/crates/agentic-core/src/executor/mod.rs @@ -0,0 +1,13 @@ +//! Agentic loop executor. + +pub mod accumulator; +pub mod engine; +pub mod error; +pub mod modes; +pub mod request; + +pub use engine::{BoxStream, call_inference, create_conversation, execute, persist_response, rehydrate_conversation}; +pub use error::{ExecutorError, ExecutorResult}; +pub use modes::{ConversationHandler, ResponseHandler}; +pub use request::ExecutionContext; +pub use request::RequestContext; diff --git a/crates/agentic-core/src/executor/modes/conversation.rs b/crates/agentic-core/src/executor/modes/conversation.rs new file mode 100644 index 0000000..bc89476 --- /dev/null +++ b/crates/agentic-core/src/executor/modes/conversation.rs @@ -0,0 +1,167 @@ +//! Conversation storage handler — owns all conversation store operations. + +use crate::storage::{ConversationData, ConversationStore, InOutItem, ResponseMetadata}; +use crate::types::io::OutputItem; + +use crate::executor::error::{ExecutorError, ExecutorResult}; +use crate::executor::request::RequestContext; + +/// Handles all conversation store operations: creation, rehydration, and persistence. +#[derive(Clone, Debug)] +pub struct ConversationHandler { + store: ConversationStore, +} + +impl ConversationHandler { + #[must_use] + pub fn new(store: ConversationStore) -> Self { + Self { store } + } + + /// Gets an existing conversation or creates one. + /// + /// Reads `conversation_id` from `ctx.original_request`. + /// + /// # Errors + /// Returns `ExecutorError` if `conversation_id` is absent, the store is + /// disabled, or the database query fails. + pub async fn get_or_create(&self, ctx: &RequestContext) -> ExecutorResult { + let conv_id = ctx + .original_request + .conversation_id + .as_deref() + .ok_or_else(|| ExecutorError::InvalidRequest("conversation_id is required for get_or_create".into()))?; + self.store.get_or_create(conv_id).await.map_err(ExecutorError::Storage) + } + + /// Creates a brand-new conversation with a freshly generated ID. + /// + /// # Errors + /// Returns `ExecutorError` if the store is disabled or the database query fails. + pub async fn create(&self) -> ExecutorResult { + self.store.create().await.map_err(ExecutorError::Storage) + } + + /// Loads all history items for the conversation referenced by the request. + /// + /// Reads `conversation_id` from `ctx.original_request`. Returns an empty vec + /// if the conversation exists but has no items yet. + /// + /// # Errors + /// Returns `ExecutorError` if `conversation_id` is absent, the store is + /// disabled, or the database query fails. + pub async fn rehydrate(&self, ctx: &RequestContext) -> ExecutorResult> { + let conv_id = ctx + .original_request + .conversation_id + .as_deref() + .ok_or_else(|| ExecutorError::InvalidRequest("conversation_id is required for rehydrate".into()))?; + self.store.rehydrate(conv_id).await.map_err(ExecutorError::Storage) + } + + /// Persists one conversation turn — only the new items from this turn. + /// + /// Takes `ctx` and `output_items` by value so fields can be moved directly + /// into [`ResponseMetadata`] without cloning. The store tracks sequence + /// numbers and appends, so prior history must not be re-inserted. + /// + /// # Errors + /// Returns `ExecutorError` if `conversation_id` is absent on the context, + /// the store is disabled, or the database operation fails. + pub async fn execute_turn(&self, ctx: RequestContext, output_items: Vec) -> ExecutorResult<()> { + let conversation_id = ctx + .conversation_id + .ok_or_else(|| ExecutorError::InvalidRequest("conversation_id is required for execute_turn".into()))?; + + let metadata = ResponseMetadata { + model: ctx.enriched_request.model, + previous_response_id: ctx.original_request.previous_response_id, + effective_tools: ctx.original_request.tools, + effective_tool_choice: ctx.original_request.tool_choice, + effective_instructions: ctx.original_request.instructions, + }; + + let mut new_items = Vec::with_capacity(ctx.new_input_items.len() + output_items.len()); + new_items.extend(ctx.new_input_items.into_iter().map(InOutItem::Input)); + new_items.extend(output_items.into_iter().map(InOutItem::Output)); + + self.store + .persist( + &conversation_id, + &ctx.response_id, + metadata.previous_response_id.as_deref(), + new_items, + &metadata, + ) + .await + .map_err(ExecutorError::Storage) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::io::{ResponsesInput, ToolChoice}; + use crate::types::request_response::RequestPayload; + + fn disabled_handler() -> ConversationHandler { + ConversationHandler::new(ConversationStore::disabled()) + } + + fn make_ctx(conversation_id: Option<&str>) -> RequestContext { + let req = RequestPayload { + model: "test".into(), + input: ResponsesInput::Text("hi".into()), + instructions: None, + previous_response_id: None, + conversation_id: conversation_id.map(str::to_string), + tools: None, + tool_choice: ToolChoice::Auto, + stream: false, + store: true, + include: None, + temperature: None, + top_p: None, + max_output_tokens: None, + truncation: None, + metadata: None, + }; + RequestContext { + enriched_request: req.clone(), + original_request: req, + new_input_items: vec![], + response_id: "resp_test".into(), + conversation_id: conversation_id.map(str::to_string), + } + } + + #[tokio::test] + async fn test_get_or_create_missing_id_returns_error() { + let result = disabled_handler().get_or_create(&make_ctx(None)).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_rehydrate_missing_id_returns_error() { + let result = disabled_handler().rehydrate(&make_ctx(None)).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_get_or_create_disabled_store_returns_error() { + let result = disabled_handler().get_or_create(&make_ctx(Some("conv_1"))).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_rehydrate_disabled_store_returns_error() { + let result = disabled_handler().rehydrate(&make_ctx(Some("conv_1"))).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_execute_turn_missing_conv_id_returns_error() { + let result = disabled_handler().execute_turn(make_ctx(None), vec![]).await; + assert!(result.is_err()); + } +} diff --git a/crates/agentic-core/src/executor/modes/mod.rs b/crates/agentic-core/src/executor/modes/mod.rs new file mode 100644 index 0000000..1e57c67 --- /dev/null +++ b/crates/agentic-core/src/executor/modes/mod.rs @@ -0,0 +1,5 @@ +pub mod conversation; +pub mod response; + +pub use conversation::ConversationHandler; +pub use response::ResponseHandler; diff --git a/crates/agentic-core/src/executor/modes/response.rs b/crates/agentic-core/src/executor/modes/response.rs new file mode 100644 index 0000000..a747776 --- /dev/null +++ b/crates/agentic-core/src/executor/modes/response.rs @@ -0,0 +1,163 @@ +//! Response storage handler — owns all response store operations. + +use crate::storage::{InOutItem, ResponseData, ResponseMetadata, ResponseStore}; +use crate::types::io::OutputItem; + +use crate::executor::error::{ExecutorError, ExecutorResult}; +use crate::executor::request::RequestContext; + +/// Handles all response store operations: lookup, rehydration, and persistence. +#[derive(Clone, Debug)] +pub struct ResponseHandler { + store: ResponseStore, +} + +impl ResponseHandler { + #[must_use] + pub fn new(store: ResponseStore) -> Self { + Self { store } + } + + /// Retrieves the stored response for `previous_response_id`. + /// + /// Reads `previous_response_id` from `ctx.original_request`. + /// + /// # Errors + /// Returns `ExecutorError` if `previous_response_id` is absent, the response + /// is not found, the store is disabled, or the database query fails. + pub async fn get(&self, ctx: &RequestContext) -> ExecutorResult { + let prev_id = ctx + .original_request + .previous_response_id + .as_deref() + .ok_or_else(|| ExecutorError::InvalidRequest("previous_response_id is required for get".into()))?; + self.store.get(prev_id).await.map_err(ExecutorError::Storage) + } + + /// Validates that the response for `previous_response_id` exists. + /// + /// Used in the `store=false` path where we only need to confirm the ID is + /// valid without loading any history. + /// + /// # Errors + /// Returns `ExecutorError` if `previous_response_id` is absent, the response + /// is not found, or the store is disabled. + pub async fn validate_exists(&self, ctx: &RequestContext) -> ExecutorResult<()> { + self.get(ctx).await.map(|_| ()) + } + + /// Loads all history items referenced by the previous response. + /// + /// Reads `previous_response_id` from `ctx.original_request`. Returns an empty + /// vec if there is no previous response. + /// + /// # Errors + /// Returns `ExecutorError` if the store is disabled or the database query fails. + pub async fn rehydrate(&self, ctx: &RequestContext) -> ExecutorResult> { + let Some(prev_id) = ctx.original_request.previous_response_id.as_deref() else { + return Ok(vec![]); + }; + self.store.rehydrate(prev_id).await.map_err(ExecutorError::Storage) + } + + /// Persists a response record — only the new items from this turn. + /// + /// Takes `ctx` and `output_items` by value so fields can be moved directly + /// into [`ResponseMetadata`] without cloning. Prior history must not be + /// re-inserted; the response store records item IDs for this response only. + /// + /// # Errors + /// Returns `ExecutorError` if the store is disabled or the database operation fails. + pub async fn execute_turn(&self, ctx: RequestContext, output_items: Vec) -> ExecutorResult<()> { + let metadata = ResponseMetadata { + model: ctx.enriched_request.model, + previous_response_id: ctx.original_request.previous_response_id, + effective_tools: ctx.original_request.tools, + effective_tool_choice: ctx.original_request.tool_choice, + effective_instructions: ctx.original_request.instructions, + }; + + let mut new_items = Vec::with_capacity(ctx.new_input_items.len() + output_items.len()); + new_items.extend(ctx.new_input_items.into_iter().map(InOutItem::Input)); + new_items.extend(output_items.into_iter().map(InOutItem::Output)); + + self.store + .persist( + &ctx.response_id, + metadata.previous_response_id.as_deref(), + new_items, + &metadata, + ) + .await + .map_err(ExecutorError::Storage) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::io::{ResponsesInput, ToolChoice}; + use crate::types::request_response::RequestPayload; + + fn disabled_handler() -> ResponseHandler { + ResponseHandler::new(ResponseStore::disabled()) + } + + fn make_ctx(previous_response_id: Option<&str>) -> RequestContext { + let req = RequestPayload { + model: "test".into(), + input: ResponsesInput::Text("hi".into()), + instructions: None, + previous_response_id: previous_response_id.map(str::to_string), + conversation_id: None, + tools: None, + tool_choice: ToolChoice::Auto, + stream: false, + store: true, + include: None, + temperature: None, + top_p: None, + max_output_tokens: None, + truncation: None, + metadata: None, + }; + RequestContext { + enriched_request: req.clone(), + original_request: req, + new_input_items: vec![], + response_id: "resp_test".into(), + conversation_id: None, + } + } + + #[tokio::test] + async fn test_get_missing_prev_id_returns_error() { + let result = disabled_handler().get(&make_ctx(None)).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_validate_exists_missing_prev_id_returns_error() { + let result = disabled_handler().validate_exists(&make_ctx(None)).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_rehydrate_no_prev_id_returns_empty() { + let result = disabled_handler().rehydrate(&make_ctx(None)).await; + assert!(result.is_ok()); + assert!(result.unwrap().is_empty()); + } + + #[tokio::test] + async fn test_rehydrate_disabled_store_returns_error() { + let result = disabled_handler().rehydrate(&make_ctx(Some("resp_prev"))).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_execute_turn_disabled_store_returns_error() { + let result = disabled_handler().execute_turn(make_ctx(None), vec![]).await; + assert!(result.is_err()); + } +} diff --git a/crates/agentic-core/src/executor/request.rs b/crates/agentic-core/src/executor/request.rs new file mode 100644 index 0000000..17a21b7 --- /dev/null +++ b/crates/agentic-core/src/executor/request.rs @@ -0,0 +1,104 @@ +use std::sync::Arc; +use std::time::Duration; + +use crate::executor::modes::{ConversationHandler, ResponseHandler}; +use crate::types::io::InputItem; +use crate::types::request_response::{RequestPayload, ResponsePayload}; + +/// Context built by `rehydrate_conversation`, threaded through the execute pipeline. +#[derive(Debug)] +pub struct RequestContext { + /// Untouched original request from the client. + pub original_request: RequestPayload, + /// Enriched request with rehydrated conversation history injected into `.input`. + /// This is the request forwarded to the LLM. + pub enriched_request: RequestPayload, + /// Only the new input items submitted by the client this turn (used for persistence). + pub new_input_items: Vec, + /// Our generated response ID (uuid7 with "resp_" prefix). + pub response_id: String, + /// Resolved conversation ID. `None` when `store=false` or non-conversational. + pub conversation_id: Option, +} + +impl RequestContext { + /// Inject our `response_id` and `conversation_id` into a `ResponsePayload` + /// received from the LLM (which carries the upstream's own IDs). + pub(crate) fn inject_ids(&self, payload: &mut ResponsePayload) { + payload.id.clone_from(&self.response_id); + payload.conversation_id.clone_from(&self.conversation_id); + payload + .previous_response_id + .clone_from(&self.original_request.previous_response_id); + } +} + +/// Runtime dependencies passed into `execute()`. +/// +/// Owns the storage handlers, HTTP client, and LLM endpoint configuration. +#[derive(Debug)] +pub struct ExecutionContext { + pub conv_handler: ConversationHandler, + pub resp_handler: ResponseHandler, + pub client: Arc, + /// Base URL for the LLM backend, e.g. `"http://localhost:8000"`. + pub llm_base_url: String, + /// Bearer token forwarded from the client, if any. + pub client_auth: Option, + /// Maximum wait time for the next SSE chunk. `Duration::ZERO` disables the timeout. + /// Sourced from [`Config::streaming_chunk_timeout_s`](crate::config::Config::streaming_chunk_timeout_s). + pub streaming_timeout: Duration, +} + +impl ExecutionContext { + /// Returns the full URL for the `/v1/responses` endpoint. + #[must_use] + pub fn responses_url(&self) -> String { + format!("{}/v1/responses", self.llm_base_url) + } + + /// Returns the full URL for the `/v1/conversations` endpoint. + #[must_use] + pub fn conversations_url(&self) -> String { + format!("{}/v1/conversations", self.llm_base_url) + } + + #[must_use] + pub fn new( + conv_handler: ConversationHandler, + resp_handler: ResponseHandler, + client: Arc, + llm_base_url: String, + client_auth: Option, + ) -> Self { + Self { + conv_handler, + resp_handler, + client, + llm_base_url, + client_auth, + streaming_timeout: Duration::from_secs(30), + } + } + + #[must_use] + pub fn from_config( + conv_handler: ConversationHandler, + resp_handler: ResponseHandler, + client: Arc, + cfg: &crate::config::Config, + client_auth: Option, + ) -> Self { + // TODO: expose `streaming_chunk_timeout_s: Option` in `Config` and read it here + // once all `Config` struct literals in agentic-server use `..Config::default()`. + let streaming_timeout = Duration::from_secs(30); + Self { + conv_handler, + resp_handler, + client, + llm_base_url: cfg.llm_api_base.clone(), + client_auth, + streaming_timeout, + } + } +} diff --git a/crates/agentic-core/src/lib.rs b/crates/agentic-core/src/lib.rs index 20877b6..700bafb 100644 --- a/crates/agentic-core/src/lib.rs +++ b/crates/agentic-core/src/lib.rs @@ -1,5 +1,6 @@ pub mod config; pub mod error; +pub mod executor; pub mod proxy; pub mod readiness; pub mod storage; diff --git a/crates/agentic-core/src/storage/conversation.rs b/crates/agentic-core/src/storage/conversation.rs index 621e181..5f18d7e 100644 --- a/crates/agentic-core/src/storage/conversation.rs +++ b/crates/agentic-core/src/storage/conversation.rs @@ -9,7 +9,7 @@ use super::types::{ConversationData, InOutItem, ResponseMetadata, StorageError, use crate::utils::common::{serialize_to_string, uuid7_str}; /// Conversation storage operations. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ConversationStore { pool: Option>, } diff --git a/crates/agentic-core/src/storage/response.rs b/crates/agentic-core/src/storage/response.rs index ddb4c2e..a41b49c 100644 --- a/crates/agentic-core/src/storage/response.rs +++ b/crates/agentic-core/src/storage/response.rs @@ -10,7 +10,7 @@ use super::types::{InOutItem, ResponseData, ResponseMetadata, StorageError, Stor use crate::utils::common::{serialize_to_string, uuid7_str}; /// Response storage operations. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ResponseStore { pool: Option>, } diff --git a/crates/agentic-core/src/types/event.rs b/crates/agentic-core/src/types/event.rs new file mode 100644 index 0000000..6409c81 --- /dev/null +++ b/crates/agentic-core/src/types/event.rs @@ -0,0 +1,185 @@ +//! Server-Sent Event (SSE) types and response status enums. + +use std::convert::Infallible; +use std::str::FromStr; + +use serde::{Deserialize, Serialize}; + +/// Response completion status. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ResponseStatus { + /// Response is being generated. + #[default] + InProgress, + + /// Response generation completed successfully. + Completed, + + /// Response generation incomplete (e.g., stream interrupted). + Incomplete, + + /// Response generation encountered an error. + Error, +} + +impl ResponseStatus { + /// Returns the canonical wire string for this status. + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::InProgress => "in_progress", + Self::Completed => "completed", + Self::Incomplete => "incomplete", + Self::Error => "error", + } + } +} + +impl FromStr for ResponseStatus { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + Ok(match s { + "in_progress" => Self::InProgress, + "completed" => Self::Completed, + "incomplete" => Self::Incomplete, + _ => Self::Error, + }) + } +} + +/// Message item completion status. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum MessageStatus { + /// Message is being generated. + #[default] + InProgress, + + /// Message generation completed. + Completed, +} + +impl MessageStatus { + /// Returns the canonical wire string for this status. + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::InProgress => "in_progress", + Self::Completed => "completed", + } + } +} + +impl FromStr for MessageStatus { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + Ok(match s { + "completed" => Self::Completed, + _ => Self::InProgress, + }) + } +} + +/// Server-Sent Event types from LLM streaming responses. +/// +/// Emitted by vLLM when `stream=true`. Each variant represents one step in the +/// response generation process. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SSEEventType { + /// Response object created; contains initial response metadata. + ResponseCreated, + + /// Output item (message) added; marks the start of a new message. + ResponseOutputItemAdded, + + /// Text delta; incremental token content added to the current message. + ResponseOutputTextDelta, + + /// Response fully completed; no more events will follow. + ResponseDone, + + /// Unknown or unhandled event type. + #[default] + Other, +} + +impl FromStr for SSEEventType { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + Ok(match s { + "response.created" => Self::ResponseCreated, + "response.output_item.added" => Self::ResponseOutputItemAdded, + "response.output_text.delta" => Self::ResponseOutputTextDelta, + // vLLM uses `response.done`; OpenAI uses `response.completed`. + "response.done" | "response.completed" => Self::ResponseDone, + _ => Self::Other, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sse_event_type_from_str_created() { + assert_eq!( + "response.created".parse::().unwrap(), + SSEEventType::ResponseCreated + ); + } + + #[test] + fn test_sse_event_type_from_str_delta() { + assert_eq!( + "response.output_text.delta".parse::().unwrap(), + SSEEventType::ResponseOutputTextDelta + ); + } + + #[test] + fn test_sse_event_type_from_str_done() { + assert_eq!( + "response.done".parse::().unwrap(), + SSEEventType::ResponseDone + ); + } + + #[test] + fn test_sse_event_type_from_str_unknown() { + assert_eq!("unknown.event".parse::().unwrap(), SSEEventType::Other); + } + + #[test] + fn test_sse_event_type_from_str_empty() { + assert_eq!("".parse::().unwrap(), SSEEventType::Other); + } + + #[test] + fn test_response_status_round_trip() { + for (s, expected) in [ + ("in_progress", ResponseStatus::InProgress), + ("completed", ResponseStatus::Completed), + ("incomplete", ResponseStatus::Incomplete), + ("error", ResponseStatus::Error), + ] { + let parsed: ResponseStatus = s.parse().unwrap(); + assert_eq!(parsed, expected); + assert_eq!(parsed.as_str(), s); + } + } + + #[test] + fn test_message_status_round_trip() { + assert_eq!("completed".parse::().unwrap(), MessageStatus::Completed); + assert_eq!( + "in_progress".parse::().unwrap(), + MessageStatus::InProgress + ); + assert_eq!("unknown".parse::().unwrap(), MessageStatus::InProgress); + } +} diff --git a/crates/agentic-core/src/types/io.rs b/crates/agentic-core/src/types/io.rs index 14fd7f9..d47bc4b 100644 --- a/crates/agentic-core/src/types/io.rs +++ b/crates/agentic-core/src/types/io.rs @@ -114,17 +114,17 @@ pub enum OutputItem { Unknown, } -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] pub struct InputTokenDetails { pub cached_tokens: i64, } -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] pub struct OutputTokenDetails { pub reasoning_tokens: i64, } -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] pub struct ResponseUsage { pub input_tokens: i64, pub output_tokens: i64, @@ -160,6 +160,32 @@ pub enum ToolChoice { }, } +/// Returns the effective tool list, preferring `request_tools` when explicitly +/// set by the caller, otherwise falling back to the stored configuration. +#[inline] +pub(crate) fn resolve_tools( + request_tools: Option<&[ResponsesTool]>, + stored_tools: Option<&[ResponsesTool]>, + tools_explicitly_set: bool, +) -> Option> { + if tools_explicitly_set { + request_tools + } else { + stored_tools + } + .map(<[_]>::to_vec) +} + +/// Returns the effective tool choice using the same precedence as [`resolve_tools`]. +#[inline] +pub(crate) fn resolve_tool_choice( + request_choice: &ToolChoice, + stored_choice: &ToolChoice, + explicitly_set: bool, +) -> ToolChoice { + if explicitly_set { request_choice } else { stored_choice }.clone() +} + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] pub enum ResponsesInput { diff --git a/crates/agentic-core/src/types/mod.rs b/crates/agentic-core/src/types/mod.rs index 6c7865b..675c7ba 100644 --- a/crates/agentic-core/src/types/mod.rs +++ b/crates/agentic-core/src/types/mod.rs @@ -1,3 +1,4 @@ +pub mod event; pub mod io; pub mod request_response; diff --git a/crates/agentic-core/src/utils/common.rs b/crates/agentic-core/src/utils/common.rs index c7545d2..cc00b91 100644 --- a/crates/agentic-core/src/utils/common.rs +++ b/crates/agentic-core/src/utils/common.rs @@ -73,3 +73,20 @@ pub fn deserialize_from_string_opt_or_default(json_str: &Option) -> Option { json_str.as_ref().and_then(|s| deserialize_from_str_opt::(s)) } + +/// Deserialize a `serde_json::Value` into `T`. +/// +/// # Errors +/// +/// Returns `serde_json::Error` if the value's shape does not match `T`. +pub fn deserialize_from_value( + value: serde_json::Value, +) -> Result { + serde_json::from_value(value) +} + +/// Deserialize a `serde_json::Value` into `T`, returning `None` on type mismatch. +#[must_use] +pub fn deserialize_from_value_opt(value: serde_json::Value) -> Option { + serde_json::from_value(value).ok() +} diff --git a/crates/agentic-core/tests/cassettes/record_cassette.py b/crates/agentic-core/tests/cassettes/record_cassette.py new file mode 100644 index 0000000..9b36c68 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/record_cassette.py @@ -0,0 +1,649 @@ +""" +Interactive multi-turn cassette recorder. + +Starts an embedded recording proxy between this script and the upstream API, +then drives multi-turn conversations so every request/response is captured +into a YAML cassette. + +Wiring: + + [this script] → [embedded proxy:] → [OpenAI API | vLLM] + (cassette recorded here) + +Modes: + conv (default) Creates a conversation via POST /v1/conversations, then + passes conversation id on every turn. + isolation Two independent conversations (each with its own conversation id) + recorded into the same cassette. + mixed Creates a conversation; turn 1 uses conversation id, turns 2+ + switch to previous_response_id only (drops conversation id). + responses No conversation created. Chains turns purely via + previous_response_id. Supports --openai and --vllm backends. + +Usage: + python tests/cassettes/record_cassette.py --turns 2 --no-stream --output path/to/cassette.yaml + python tests/cassettes/record_cassette.py --turns 3 --mode isolation --no-stream --output path/to/cassette.yaml + python tests/cassettes/record_cassette.py --turns 3 --mode mixed --no-stream --output path/to/cassette.yaml + python tests/cassettes/record_cassette.py --turns 3 --mode conv --branch-from 1 --branch-turn-number 2 --no-stream --output path/to/cassette.yaml + python tests/cassettes/record_cassette.py --turns 5 --mode conv --branch-from 1 --branch-turn-number 3 --branch-from 2 --branch-turn-number 5 --no-stream --output path/to/cassette.yaml + python tests/cassettes/record_cassette.py --turns 2 --mode responses --vllm http://localhost:8000 --model Qwen/Qwen3-30B-A3B-FP8 --no-stream --output path/to/cassette.yaml +""" + +import json +import logging +import os +import socket +import sys +import threading +import time +from contextlib import asynccontextmanager +from pathlib import Path +from typing import Any, AsyncGenerator + +import click +import httpx +import uvicorn +from fastapi import FastAPI, Request, Response +from fastapi.responses import JSONResponse, StreamingResponse +from httpx import AsyncClient +from yaml import dump as yaml_dump, safe_load as yaml_load + +logging.basicConfig(level=logging.WARNING) +logger = logging.getLogger("cassette_proxy") + +MODEL = "gpt-4o" +PROXY_HOST = "127.0.0.1" +PROXY_PORT = 7070 +TIMEOUT = 60 * 5 + +EXCLUDED_RESPONSE_HEADERS = { + "content-encoding", + "content-length", + "transfer-encoding", + "connection", +} + +RECORDED_HEADERS = { + "content-type", + "authorization", + "user-agent", + "accept", + "x-run-id", +} + + +def _mask_authorization(value: str) -> str: + if not value: + return value + lower = value.lower() + if lower.startswith("bearer "): + return "Bearer ***" + return "***" + + +def _filter_request_headers(headers) -> dict: + return { + k: v if k.lower() != "authorization" else _mask_authorization(v) + for k, v in headers.items() + if k.lower() in RECORDED_HEADERS + } + + +def _filter_response_headers(headers) -> dict: + return { + k: v for k, v in headers.items() if k.lower() not in EXCLUDED_RESPONSE_HEADERS + } + + +def _turn_number(output_file: Path) -> int: + if not output_file.exists(): + return 1 + content = output_file.read_text(encoding="utf-8") + if not content.strip(): + return 1 + data = yaml_load(content) + if not data or "turns" not in data: + return 1 + return len(data["turns"]) + 1 + + +def _append_turn(output_file: Path, turn: dict[str, Any]) -> None: + output_file.parent.mkdir(parents=True, exist_ok=True) + if output_file.exists() and output_file.stat().st_size > 0: + data = yaml_load(output_file.read_text(encoding="utf-8")) or {} + else: + data = {} + turns: list = data.get("turns", []) + turns.append(turn) + data["turns"] = turns + with open(output_file, "w", encoding="utf-8") as f: + yaml_dump(data, f, allow_unicode=True, default_flow_style=False) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + app.state.http_client = AsyncClient(timeout=TIMEOUT) + yield + await app.state.http_client.aclose() + + +proxy_app = FastAPI(lifespan=lifespan) + + +@proxy_app.api_route( + "/{path:path}", + methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"], +) +async def proxy_request(request: Request, path: str) -> Response: + http_client: AsyncClient = request.app.state.http_client + target_host: str = request.app.state.target_host + output_file: Path = request.app.state.output_file + + turn_num = _turn_number(output_file) + filename = f"t{turn_num}" + + target_url = f"{target_host}/{path}" + if str(request.query_params): + target_url += f"?{request.query_params}" + + raw_body = await request.body() + parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {} + + turn: dict[str, Any] = { + "filename": filename, + "request": { + "method": request.method, + "path": f"/{path}", + "query_params": dict(request.query_params), + "body": parsed_body, + "headers": _filter_request_headers(request.headers), + }, + "response": {}, + } + + forward_headers = {k: v for k, v in request.headers.items() if k.lower() != "host"} + + if parsed_body.get("stream", False): + + async def _stream() -> AsyncGenerator[str, None]: + async with http_client.stream( + method=request.method, + url=target_url, + headers=forward_headers, + content=raw_body, + timeout=TIMEOUT, + ) as response: + yield response # type: ignore[misc] + if response.status_code != 200: + chunk_str = (await response.aread()).decode() + try: + turn["response"]["body"] = json.loads(chunk_str) + except Exception: + turn["response"]["body"] = chunk_str + yield chunk_str + else: + sse_events: list[str] = [] + try: + async for line in response.aiter_lines(): + chunk = f"{line}\n" + yield chunk + sse_events.append(chunk) + except Exception as e: + turn["response"]["stream_error"] = ( + f"{e.__class__.__name__}: {e}" + ) + finally: + turn["response"]["sse"] = sse_events + turn["response"]["status_code"] = response.status_code + turn["response"]["headers"] = { + "content-type": response.headers.get( + "content-type", "text/event-stream" + ) + } + _append_turn(output_file, turn) + print(f" [recorded turn {turn_num} -> {output_file.name}]") + + agen = _stream() + upstream = await anext(agen) + return StreamingResponse( + agen, + status_code=upstream.status_code, + headers=_filter_response_headers(upstream.headers), + media_type=upstream.headers.get("content-type", "text/event-stream"), + ) + + else: + response = await http_client.request( + method=request.method, + url=target_url, + headers=forward_headers, + content=raw_body, + timeout=TIMEOUT, + ) + media_type = response.headers.get("content-type", "application/json") + body: Any = response.json() if response.status_code == 200 else response.text + if response.status_code != 200 and "application/json" in media_type: + try: + body = json.loads(body) + except Exception: + pass + turn["response"]["body"] = body + turn["response"]["status_code"] = response.status_code + turn["response"]["headers"] = {"content-type": media_type} + _append_turn(output_file, turn) + print(f" [recorded turn {turn_num} -> {output_file.name}]") + return JSONResponse( + content=body, + status_code=response.status_code, + headers=_filter_response_headers(response.headers), + media_type=media_type, + ) + + +# ── proxy lifecycle ─────────────────────────────────────────────────────────── + + +def _start_proxy(output_file: Path, target_host: str, port: int) -> uvicorn.Server: + output_file.parent.mkdir(parents=True, exist_ok=True) + output_file.write_text("", encoding="utf-8") + proxy_app.state.output_file = output_file + proxy_app.state.target_host = target_host + + config = uvicorn.Config(proxy_app, host=PROXY_HOST, port=port, log_level="warning") + server = uvicorn.Server(config) + + thread = threading.Thread(target=server.run, daemon=True) + thread.start() + + # TCP-only readiness check — no HTTP request forwarded to upstream + for _ in range(40): + try: + with socket.create_connection((PROXY_HOST, port), timeout=0.3): + break + except OSError: + time.sleep(0.3) + + return server + + +def _stop_proxy(server: uvicorn.Server) -> None: + server.should_exit = True + time.sleep(0.5) + + +def _create_conversation(client: httpx.Client, proxy_url: str) -> str: + resp = client.post(f"{proxy_url}/v1/conversations", json={}, timeout=30) + resp.raise_for_status() + conv_id = resp.json().get("id") + print(f"[conversation created: {conv_id}]") + return conv_id + + +def _send_nonstreaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None: + resp = client.post(f"{proxy_url}/v1/responses", json=body, timeout=300) + resp.raise_for_status() + data = resp.json() + print(f"\n[Response]\n{json.dumps(data, indent=2)}\n") + return data.get("id") + + +def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None: + response_id = None + print("\n[Streaming response]") + with client.stream( + "POST", f"{proxy_url}/v1/responses", json=body, timeout=300 + ) as resp: + resp.raise_for_status() + for line in resp.iter_lines(): + if not line: + continue + print(line) + if line.startswith("data:") and line != "data: [DONE]": + try: + payload = json.loads(line[5:].strip()) + if payload.get("type") == "response.completed": + response_id = payload.get("response", {}).get("id") + except Exception: + pass + print() + return response_id + + +def _send(client: httpx.Client, body: dict, stream: bool, proxy_url: str) -> str | None: + return ( + _send_streaming(client, body, proxy_url) + if stream + else _send_nonstreaming(client, body, proxy_url) + ) + + +def _prompt(label: str) -> str: + try: + return input(label).strip() + except (EOFError, KeyboardInterrupt): + print("\nAborted.") + sys.exit(0) + + +def run_conv( + client: httpx.Client, + turns: int, + model: str, + stream: bool, + store: bool, + branches: list[tuple[int, int | None]], + proxy_url: str, +) -> None: + conv_id = _create_conversation(client, proxy_url) + response_ids: dict[int, str] = {} + # map: branch_turn_number -> branch_from (which turn's response to use as previous) + branch_map: dict[int, int] = {} + extra_branches: list[int] = [] # branch_from values with no branch_turn_number + for branch_from, branch_turn_number in branches: + if branch_turn_number is not None: + branch_map[branch_turn_number] = branch_from + else: + extra_branches.append(branch_from) + + previous_response_id: str | None = None + for turn in range(1, turns + 1): + if turn in branch_map: + branch_from = branch_map[turn] + if branch_from not in response_ids: + raise click.UsageError( + f"--branch-from {branch_from} at turn {turn} has no recorded response " + f"(available: {sorted(response_ids)})" + ) + previous_response_id = response_ids[branch_from] + click.echo( + f"\n[Branch] turn {turn} chains from turn {branch_from} (response_id={previous_response_id})" + ) + prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ") + body: dict = {"model": model, "input": prompt, "stream": stream, "store": store} + if previous_response_id: + body["previous_response_id"] = previous_response_id + else: + body["conversation"] = conv_id + response_id = _send(client, body, stream, proxy_url) + if response_id: + response_ids[turn] = response_id + previous_response_id = response_id + + # branches without a branch_turn_number get one extra turn each + for b_idx, branch_from in enumerate(extra_branches, start=1): + if branch_from not in response_ids: + raise click.UsageError( + f"Extra branch {b_idx}: --branch-from {branch_from} has no recorded response " + f"(available: {sorted(response_ids)})" + ) + branch_resp_id = response_ids[branch_from] + click.echo( + f"\n[Extra branch {b_idx}] from turn {branch_from} (response_id={branch_resp_id}), turn {turns + 1}" + ) + prompt = _prompt( + f"Turn {turns + 1} (extra branch from turn {branch_from}) — enter prompt: " + ) + body = { + "model": model, + "input": prompt, + "stream": stream, + "store": store, + "previous_response_id": branch_resp_id, + "conversation": conv_id, + } + _send(client, body, stream, proxy_url) + + +def run_isolation( + client: httpx.Client, + turns: int, + model: str, + stream: bool, + store: bool, + proxy_url: str, +) -> None: + for conv_label in ("A", "B"): + click.echo(f"\n--- Conversation {conv_label} ({turns} turns) ---") + conv_id = _create_conversation(client, proxy_url) + for turn in range(1, turns + 1): + prompt = _prompt( + f"Conv {conv_label} | Turn {turn}/{turns} — enter prompt: " + ) + body: dict = { + "model": model, + "input": prompt, + "stream": stream, + "store": store, + "conversation": conv_id, + } + _send(client, body, stream, proxy_url) + + +def run_mixed( + client: httpx.Client, + turns: int, + model: str, + stream: bool, + store: bool, + proxy_url: str, +) -> None: + conv_id = _create_conversation(client, proxy_url) + previous_response_id: str | None = None + + for turn in range(1, turns + 1): + prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ") + body: dict = {"model": model, "input": prompt, "stream": stream, "store": store} + if previous_response_id: + body["previous_response_id"] = previous_response_id + else: + body["conversation"] = conv_id + previous_response_id = _send(client, body, stream, proxy_url) + + +def run_responses( + client: httpx.Client, + turns: int, + model: str, + stream: bool, + store: bool, + branches: list[tuple[int, int | None]], + proxy_url: str, +) -> None: + response_ids: dict[int, str] = {} + branch_map: dict[int, int] = {} + extra_branches: list[int] = [] + for branch_from, branch_turn_number in branches: + if branch_turn_number is not None: + branch_map[branch_turn_number] = branch_from + else: + extra_branches.append(branch_from) + + previous_response_id: str | None = None + for turn in range(1, turns + 1): + if turn in branch_map: + branch_from = branch_map[turn] + if branch_from not in response_ids: + raise click.UsageError( + f"--branch-from {branch_from} at turn {turn} has no recorded response " + f"(available: {sorted(response_ids)})" + ) + previous_response_id = response_ids[branch_from] + click.echo( + f"\n[Branch] turn {turn} chains from turn {branch_from} (response_id={previous_response_id})" + ) + prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ") + body: dict = {"model": model, "input": prompt, "stream": stream, "store": store} + if previous_response_id and store: + body["previous_response_id"] = previous_response_id + response_id = _send(client, body, stream, proxy_url) + previous_response_id = response_id if store else None + if response_id: + response_ids[turn] = response_id + + for b_idx, branch_from in enumerate(extra_branches, start=1): + if branch_from not in response_ids: + raise click.UsageError( + f"Extra branch {b_idx}: --branch-from {branch_from} has no recorded response " + f"(available: {sorted(response_ids)})" + ) + branch_resp_id = response_ids[branch_from] + click.echo( + f"\n[Extra branch {b_idx}] from turn {branch_from} (response_id={branch_resp_id}), turn {turns + 1}" + ) + prompt = _prompt( + f"Turn {turns + 1} (extra branch from turn {branch_from}) — enter prompt: " + ) + body = { + "model": model, + "input": prompt, + "stream": stream, + "store": store, + "previous_response_id": branch_resp_id, + } + _send(client, body, stream, proxy_url) + + +# ── main ────────────────────────────────────────────────────────────────────── + + +@click.command(context_settings={"help_option_names": ["-h", "--help"]}) +@click.option( + "--turns", "-n", required=True, type=int, help="Number of turns to record." +) +@click.option( + "--output", + "-o", + required=True, + type=click.Path(), + help="Output cassette YAML path.", +) +@click.option( + "--mode", + type=click.Choice(["conv", "isolation", "mixed", "responses"]), + default="conv", + show_default=True, + help="Recording mode.", +) +@click.option( + "--branch-from", + type=int, + multiple=True, + metavar="TURN", + help="Rewind to this turn's response (repeatable, one per branch).", +) +@click.option( + "--branch-turn-number", + type=int, + multiple=True, + metavar="TURN", + help="First turn number for the corresponding branch (repeatable, pairs with --branch-from).", +) +@click.option( + "--stream/--no-stream", + default=True, + show_default=True, + help="Use streaming responses.", +) +@click.option( + "--model", default=MODEL, show_default=True, help="Model name to pass in requests." +) +@click.option( + "--no-store", is_flag=True, default=False, help="Set store=false in requests." +) +@click.option( + "--proxy-port", + type=int, + default=PROXY_PORT, + show_default=True, + help="Local port for the embedded recording proxy.", +) +@click.option( + "--openai", + "openai_url", + metavar="URL", + default=None, + help="OpenAI upstream URL (default https://api.openai.com). Reads OPENAI_API_KEY.", +) +@click.option( + "--vllm", + "vllm_url", + metavar="URL", + default=None, + help="vLLM upstream URL, e.g. http://localhost:8000 (responses mode only, no auth).", +) +def main( + turns: int, + output: str, + mode: str, + branch_from: tuple[int, ...], + branch_turn_number: tuple[int, ...], + stream: bool, + model: str, + no_store: bool, + proxy_port: int, + openai_url: str | None, + vllm_url: str | None, +) -> None: + """Interactive multi-turn cassette recorder (proxy embedded).""" + if branch_turn_number and not branch_from: + raise click.UsageError("--branch-turn-number requires --branch-from.") + if len(branch_turn_number) > len(branch_from): + raise click.UsageError( + "More --branch-turn-number values than --branch-from values." + ) + # Pair each branch-from with its branch-turn-number (None if not provided) + branches: list[tuple[int, int | None]] = [ + (bf, branch_turn_number[i] if i < len(branch_turn_number) else None) + for i, bf in enumerate(branch_from) + ] + if vllm_url and openai_url: + raise click.UsageError("--openai and --vllm are mutually exclusive.") + if vllm_url and mode != "responses": + raise click.UsageError( + f"--vllm is only supported with --mode responses (got --mode {mode})." + ) + + if vllm_url: + target = vllm_url.rstrip("/") + headers: dict = {} + backend_label = f"vLLM: {target}" + else: + target = (openai_url or "https://api.openai.com").rstrip("/") + api_key = os.environ.get("OPENAI_API_KEY", "") + if not api_key: + raise click.ClickException( + "OPENAI_API_KEY environment variable is not set." + ) + headers = {"Authorization": f"Bearer {api_key}"} + backend_label = f"OpenAI: {target}" + + output_file = Path(output).resolve() + proxy_url = f"http://{PROXY_HOST}:{proxy_port}" + store = not no_store + + click.echo(f"Mode: {mode} | Turns: {turns} | Stream: {stream} | Model: {model}") + click.echo(f"Output: {output_file}") + click.echo(backend_label) + click.echo(f"Proxy: {proxy_url} (requests go through here for recording)") + + server = _start_proxy(output_file, target, proxy_port) + click.echo(f"Proxy ready on {proxy_url}\n") + + try: + with httpx.Client(headers=headers) as client: + if mode == "conv": + run_conv(client, turns, model, stream, store, branches, proxy_url) + elif mode == "isolation": + run_isolation(client, turns, model, stream, store, proxy_url) + elif mode == "mixed": + run_mixed(client, turns, model, stream, store, proxy_url) + elif mode == "responses": + run_responses(client, turns, model, stream, store, branches, proxy_url) + finally: + _stop_proxy(server) + + click.echo(f"\nAll turns recorded -> {output_file}") + + +if __name__ == "__main__": + main() diff --git a/crates/agentic-core/tests/cassettes/record_text_only_cassettes.sh b/crates/agentic-core/tests/cassettes/record_text_only_cassettes.sh new file mode 100755 index 0000000..e7a7975 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/record_text_only_cassettes.sh @@ -0,0 +1,248 @@ +#!/usr/bin/env bash +# record_text_only_cassettes.sh +# +# Records all cassettes (responses + conversation) in sequence. +# The proxy is embedded inside record_cassette.py — no separate proxy needed. +# +# Prerequisites: +# - OPENAI_API_KEY must be set in the environment +# +# Usage: +# bash tests/cassettes/record_text_only_cassettes.sh +# MODEL=gpt-4.1-mini bash tests/cassettes/record_text_only_cassettes.sh + +set -euo pipefail + +SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BASE_DIR="$SCRIPTS_DIR/text_only" +RESPONSES_DIR="$BASE_DIR/responses" +CONV_DIR="$BASE_DIR/conversation" +MODEL="${MODEL:-gpt-4o}" +MODEL_SLUG="$(echo "$MODEL" | tr '/: ' '---')" + +green() { printf '\033[32m%s\033[0m\n' "$*"; } +bold() { printf '\033[1m%s\033[0m\n' "$*"; } + +next_test() { + echo + read -rp "Press ENTER when ready for the next test..." + echo +} + +mkdir -p "$RESPONSES_DIR" "$CONV_DIR" + +# ══════════════════════════════════════════════════════════════════ +# RESPONSES (previous_response_id chaining, no conversation object) +# ══════════════════════════════════════════════════════════════════ + +# ── Test 1: single-turn non-streaming ──────────────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 1 of 9 — resp-single-nonstreaming" +bold " 1 turn, non-streaming" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Reply with exactly one word: HELLO" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode responses \ + --turns 1 \ + --no-stream \ + --model "$MODEL" \ + --output "$RESPONSES_DIR/resp-single-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 1 done." +next_test + +# ── Test 2: single-turn streaming ──────────────────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 2 of 9 — resp-single-streaming" +bold " 1 turn, streaming" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Reply with exactly one word: WORLD" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode responses \ + --turns 1 \ + --model "$MODEL" \ + --output "$RESPONSES_DIR/resp-single-${MODEL_SLUG}-streaming.yaml" +green "✓ Test 2 done." +next_test + +# ── Test 3: two-turn non-streaming ─────────────────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 3 of 9 — resp-two-turn-nonstreaming" +bold " 2 turns, non-streaming, previous_response_id chaining" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Remember the word APPLE. Just say: OK" +echo " Turn 2: What word did I ask you to remember?" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode responses \ + --turns 2 \ + --no-stream \ + --model "$MODEL" \ + --output "$RESPONSES_DIR/resp-two-turn-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 3 done." +next_test + +# ── Test 4: two-turn streaming ──────────────────────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 4 of 9 — resp-two-turn-streaming" +bold " 2 turns, streaming, previous_response_id chaining" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Remember the word BANANA. Just say: OK" +echo " Turn 2: What word did I ask you to remember?" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode responses \ + --turns 2 \ + --model "$MODEL" \ + --output "$RESPONSES_DIR/resp-two-turn-${MODEL_SLUG}-streaming.yaml" +green "✓ Test 4 done." +next_test + +# ── Test 5: store=false — follow-up should fail ─────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 5 of 9 — resp-no-store-nonstreaming" +bold " Turn 1: store=false | Turn 2: previous_response_id → expect error" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Say: NOT STORED" +echo " Turn 2: follow up" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode responses \ + --turns 2 \ + --no-stream \ + --no-store \ + --model "$MODEL" \ + --output "$RESPONSES_DIR/resp-no-store-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 5 done." +next_test + +# ══════════════════════════════════════════════════════════════════ +# CONVERSATION (POST /v1/conversations + conversation id chaining) +# ══════════════════════════════════════════════════════════════════ + +# ── Test 6: 2-turn, non-streaming, conversation ─────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 6 of 9 — conv-two-turn-nonstreaming" +bold " 2 turns, non-streaming, conversation created + chained" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Remember the word CHERRY. Just say: OK" +echo " Turn 2: What word did I ask you to remember?" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode conv \ + --turns 2 \ + --no-stream \ + --model "$MODEL" \ + --output "$CONV_DIR/conv-two-turn-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 6 done." +next_test + +# ── Test 7: 2-turn, streaming, conversation ─────────────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 7 of 9 — conv-two-turn-streaming" +bold " 2 turns, streaming, conversation created + chained" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: Remember the word MANGO. Just say: OK" +echo " Turn 2: What word did I ask you to remember?" +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode conv \ + --turns 2 \ + --model "$MODEL" \ + --output "$CONV_DIR/conv-two-turn-${MODEL_SLUG}-streaming.yaml" +green "✓ Test 7 done." +next_test + +# ── Test 8: isolation — 2 independent conversations ────────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 8 of 9 — conv-isolation-nonstreaming" +bold " 2 independent conversations (3 turns each), non-streaming" +bold " Verifies conversations do not share context" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Conv A | Turn 1: Remember the word ORANGE. Say: OK" +echo " Conv A | Turn 2: Also remember the word VIOLET. Say: OK" +echo " Conv A | Turn 3: List every word I asked you to remember, in order, one per line." +echo " Conv B | Turn 1: Remember the word PURPLE. Say: OK" +echo " Conv B | Turn 2: Also remember the word INDIGO. Say: OK" +echo " Conv B | Turn 3: List every word I asked you to remember, in order, one per line." +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode isolation \ + --turns 3 \ + --no-stream \ + --model "$MODEL" \ + --output "$CONV_DIR/conv-isolation-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 8 done." +next_test + +── Test 9: branch off turn 1 after 3-turn conversation ────────── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 9 of 9 — conv-branch-nonstreaming (6D)" +bold " Turns 1-3: conversation chain | Turn 4: branch off turn 1" +bold " Math: 2+2=4, +1=5, +2=7 | branch: +1 from turn-1 = 5" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1: What is 2+2? Reply with just the number." +echo " Turn 2: Add 1 to your previous answer. Reply with just the number." +echo " Turn 3: Add 2 to your previous answer. Reply with just the number." +echo " Branch (off turn 1): Add 1 to your previous answer. Reply with just the number." +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode conv \ + --turns 3 \ + --branch-from 1 \ + --no-stream \ + --model "$MODEL" \ + --output "$CONV_DIR/conv-multi-turn-single-branch-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 9 done." +next_test + +# ── Test 10: 5-turn math, branch at turn 1, continue from turn 3 ── + +bold "═══════════════════════════════════════════════════════════════" +bold "Test 10 of 10 — conv-branch-turn-number-nonstreaming" +bold " Turns 1-5: conversation chain | 2 branches" +bold " Turn1=4, Turn2(from1)=6 | Branch1 turn3(from1)=5, turn4(from3)=8" +bold " Branch2 turn5(from2)=10" +bold "═══════════════════════════════════════════════════════════════" +bold "Prompts to enter:" +echo " Turn 1 (answer=4): What is 2+2? Reply with just the number." +echo " Turn 2 (from turn 1, answer=4+2): Add 2 to your previous answer. Reply with just the number." +echo " Branch 1 | turn 3 (from turn 1, answer=4+1): Add 1. Reply with just the number." +echo " Branch 1 | turn 4 (from turn 3, answer=5+3): Add 3 to your previous answer. Reply with just the number." +echo " Branch 2 | turn 5 (from turn 2, answer=6+4): Add 4. Reply with just the number." +echo +python "$SCRIPTS_DIR/record_cassette.py" \ + --mode conv \ + --turns 5 \ + --branch-from 1 \ + --branch-turn-number 3 \ + --branch-from 2 \ + --branch-turn-number 5 \ + --no-stream \ + --model "$MODEL" \ + --output "$CONV_DIR/conv-multi-branch-multi-turn-${MODEL_SLUG}-nonstreaming.yaml" +green "✓ Test 10 done." + +echo +green "════════════════════════════════════════════════════════════════" +green "All 10 cassettes recorded." +green "════════════════════════════════════════════════════════════════" diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-isolation-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-isolation-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..f95b199 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-isolation-gpt-4o-nonstreaming.yaml @@ -0,0 +1,517 @@ +turns: +- filename: t1 + request: + body: {} + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/conversations + query_params: {} + response: + body: + created_at: 1776764559 + id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + metadata: {} + object: conversation + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + conversation: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + input: 'Remember the word ORANGE. Say: OK' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764565 + conversation: + id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + created_at: 1776764564 + error: null + frequency_penalty: 0.0 + id: resp_091801b651b1d6870069e74694cc1c8195b1e9477abfb4dcaf + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: OK + type: output_text + id: msg_091801b651b1d6870069e746954b3c8195b7bafba493c40e4b + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 16 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 18 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + conversation: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + input: 'Also remember the word VIOLET. Say: OK' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764579 + conversation: + id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + created_at: 1776764579 + error: null + frequency_penalty: 0.0 + id: resp_091801b651b1d6870069e746a371308195a854e5fa5d3e845f + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: OK + type: output_text + id: msg_091801b651b1d6870069e746a3edf081958e8626b6fb3abbce + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 36 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 38 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + conversation: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + input: List every word I asked you to remember, in order, one per line + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764588 + conversation: + id: conv_69e7468fe7108195abd87e93f47a6f8f091801b651b1d687 + created_at: 1776764587 + error: null + frequency_penalty: 0.0 + id: resp_091801b651b1d6870069e746abb2888195a22209a564c249cc + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: "ORANGE \nVIOLET" + type: output_text + id: msg_091801b651b1d6870069e746ac16e081958a232d74c3e3e6e7 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 60 + input_tokens_details: + cached_tokens: 0 + output_tokens: 7 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 67 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: {} + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/conversations + query_params: {} + response: + body: + created_at: 1776764588 + id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + metadata: {} + object: conversation + headers: + content-type: application/json + status_code: 200 +- filename: t6 + request: + body: + conversation: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + input: 'Remember the word PURPLE. Say: OK' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764597 + conversation: + id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + created_at: 1776764596 + error: null + frequency_penalty: 0.0 + id: resp_0745b162f9c4b9d30069e746b4988481908ebf431c400142dc + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: OK + type: output_text + id: msg_0745b162f9c4b9d30069e746b51358819096cfbadec79c558e + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 16 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 18 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t7 + request: + body: + conversation: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + input: 'Also remember the word INDIGO. Say: OK' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764603 + conversation: + id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + created_at: 1776764602 + error: null + frequency_penalty: 0.0 + id: resp_0745b162f9c4b9d30069e746bad954819098ef762241bf18ae + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: OK + type: output_text + id: msg_0745b162f9c4b9d30069e746bbcba88190802fd278cf6f9f12 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 35 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 37 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t8 + request: + body: + conversation: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + input: List every word I asked you to remember, in order, one per line. + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764610 + conversation: + id: conv_69e746ac94608190b94008b3021a3c6e0745b162f9c4b9d3 + created_at: 1776764609 + error: null + frequency_penalty: 0.0 + id: resp_0745b162f9c4b9d30069e746c16ad88190862312ba7d6a2ba1 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '- PURPLE + + - INDIGO' + type: output_text + id: msg_0745b162f9c4b9d30069e746c22d9481909480ef82bf8f4696 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 60 + input_tokens_details: + cached_tokens: 0 + output_tokens: 8 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 68 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..c6803c1 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml @@ -0,0 +1,408 @@ +turns: +- filename: t1 + request: + body: {} + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/conversations + query_params: {} + response: + body: + created_at: 1776767439 + id: conv_69e751cf3ed08194977fe0915859ea4e01ffa34fbd4eb387 + metadata: {} + object: conversation + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + conversation: conv_69e751cf3ed08194977fe0915859ea4e01ffa34fbd4eb387 + input: What is 2+2? Reply with just the number. + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776767449 + conversation: + id: conv_69e751cf3ed08194977fe0915859ea4e01ffa34fbd4eb387 + created_at: 1776767448 + error: null + frequency_penalty: 0.0 + id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '4' + type: output_text + id: msg_01ffa34fbd4eb3870069e751d92f788194b527e18365b03b21 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 20 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 22 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Add 2 to your previous answer. Reply with just the number + model: gpt-4o + previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776767468 + created_at: 1776767468 + error: null + frequency_penalty: 0.0 + id: resp_01ffa34fbd4eb3870069e751ec4f4081949a66597d8284db9a + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '6' + type: output_text + id: msg_01ffa34fbd4eb3870069e751ecd60c819491e735a9891c670f + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 42 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 44 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + input: Add 1. Reply with just the number. + model: gpt-4o + previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776767479 + created_at: 1776767479 + error: null + frequency_penalty: 0.0 + id: resp_01ffa34fbd4eb3870069e751f6e704819480f3d723db8296b5 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '5' + type: output_text + id: msg_01ffa34fbd4eb3870069e751f7a0048194b8e3aed03d337fba + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_01ffa34fbd4eb3870069e751d87aa8819481fce10ac3adc2cd + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 39 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 41 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: + input: Add 3 to your previous answer. Reply with just the number. + model: gpt-4o + previous_response_id: resp_01ffa34fbd4eb3870069e751f6e704819480f3d723db8296b5 + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776767487 + created_at: 1776767487 + error: null + frequency_penalty: 0.0 + id: resp_01ffa34fbd4eb3870069e751ff13748194a87830bb94240994 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '8' + type: output_text + id: msg_01ffa34fbd4eb3870069e751ff6eec8194ba43c7420d0f848b + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_01ffa34fbd4eb3870069e751f6e704819480f3d723db8296b5 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 62 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 64 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t6 + request: + body: + input: Add 4. Reply with just the number. + model: gpt-4o + previous_response_id: resp_01ffa34fbd4eb3870069e751ec4f4081949a66597d8284db9a + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776767496 + created_at: 1776767496 + error: null + frequency_penalty: 0.0 + id: resp_01ffa34fbd4eb3870069e752080c5c819491b1fd50d74691ee + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '10' + type: output_text + id: msg_01ffa34fbd4eb3870069e75208a0a08194846c6917d99ec2d7 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_01ffa34fbd4eb3870069e751ec4f4081949a66597d8284db9a + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 61 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 63 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..16c808d --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml @@ -0,0 +1,335 @@ +turns: +- filename: t1 + request: + body: {} + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/conversations + query_params: {} + response: + body: + created_at: 1776766429 + id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + metadata: {} + object: conversation + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + conversation: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + input: What is 2+2? Reply with just the number. + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776766445 + conversation: + id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + created_at: 1776766442 + error: null + frequency_penalty: 0.0 + id: resp_0056d4efc351f68b0069e74de9f3a8819787d6ac39edb946bd + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '4' + type: output_text + id: msg_0056d4efc351f68b0069e74dede3188197ba46fa018e252c72 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 20 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 22 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + conversation: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + input: Add 1 to your previous answer. Reply with just the number. + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776766448 + conversation: + id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + created_at: 1776766448 + error: null + frequency_penalty: 0.0 + id: resp_0056d4efc351f68b0069e74df009908197ab5b3a32f9c572a3 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '5' + type: output_text + id: msg_0056d4efc351f68b0069e74df0bc488197a0f09f137edd161c + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 43 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 45 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + conversation: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + input: Add 2 to your previous answer. Reply with just the number + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776766455 + conversation: + id: conv_69e74ddd13c4819781cfed6799e7b1fc0056d4efc351f68b + created_at: 1776766455 + error: null + frequency_penalty: 0.0 + id: resp_0056d4efc351f68b0069e74df724bc8197bf5e923fe663113f + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '7' + type: output_text + id: msg_0056d4efc351f68b0069e74df7b5d88197943f69ef335e04c4 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 65 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 67 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: + input: Add 1 to your previous answer. Reply with just the number. + model: gpt-4o + previous_response_id: resp_0056d4efc351f68b0069e74de9f3a8819787d6ac39edb946bd + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776766466 + created_at: 1776766466 + error: null + frequency_penalty: 0.0 + id: resp_0056d4efc351f68b0069e74e01f52c8197bb2dbef0c58938e7 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: '5' + type: output_text + id: msg_0056d4efc351f68b0069e74e0271648197a686afe3b92c3cb2 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0056d4efc351f68b0069e74de9f3a8819787d6ac39edb946bd + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 43 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 45 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..f5dc41c --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-nonstreaming.yaml @@ -0,0 +1,179 @@ +turns: +- filename: t1 + request: + body: {} + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/conversations + query_params: {} + response: + body: + created_at: 1776764510 + id: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2 + metadata: {} + object: conversation + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + conversation: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2 + input: 'Remember the word CHERRY. Just say: OK' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764521 + conversation: + id: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2 + created_at: 1776764520 + error: null + frequency_penalty: 0.0 + id: resp_05530c27e06f63f20069e74668c1d88195b066d73edea6fdec + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: OK + type: output_text + id: msg_05530c27e06f63f20069e7466950f88195a158c1964323765e + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 17 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 19 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + conversation: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2 + input: What word did I ask you to remember? + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764527 + conversation: + id: conv_69e7465e16f48195af23a8c7f0301bea05530c27e06f63f2 + created_at: 1776764527 + error: null + frequency_penalty: 0.0 + id: resp_05530c27e06f63f20069e7466f296081958b2ebfa3bb4c69b4 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: CHERRY + type: output_text + id: msg_05530c27e06f63f20069e7466fb0088195b0361daeaac300a4 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 35 + input_tokens_details: + cached_tokens: 0 + output_tokens: 3 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 38 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-streaming.yaml b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-streaming.yaml new file mode 100644 index 0000000..b6b8734 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/conversation/conv-two-turn-gpt-4o-streaming.yaml @@ -0,0 +1,234 @@ +turns: +- filename: t1 + request: + body: {} + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/conversations + query_params: {} + response: + body: + created_at: 1776764537 + id: conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a + metadata: {} + object: conversation + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + conversation: conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a + input: 'Remember the word MANGO. Just say: OK' + model: gpt-4o + store: true + stream: true + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0aeb5a0588bb8b2a0069e7467c813c8190a46d9810f8dc0355","object":"response","created_at":1776764540,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0aeb5a0588bb8b2a0069e7467c813c8190a46d9810f8dc0355","object":"response","created_at":1776764540,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.content_part.added + + ' + - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"OK","item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","logprobs":[],"obfuscation":"LAhO6jFUD08oa2","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.output_text.done + + ' + - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","logprobs":[],"output_index":0,"sequence_number":5,"text":"OK"} + + ' + - ' + + ' + - 'event: response.content_part.done + + ' + - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"},"sequence_number":6} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"},"output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0aeb5a0588bb8b2a0069e7467c813c8190a46d9810f8dc0355","object":"response","created_at":1776764540,"status":"completed","background":false,"completed_at":1776764541,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0aeb5a0588bb8b2a0069e7467d19088190abae62ec47b1f0b1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":17,"input_tokens_details":{"cached_tokens":0},"output_tokens":2,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":19},"user":null,"metadata":{}},"sequence_number":8} + + ' + - ' + + ' + status_code: 200 +- filename: t3 + request: + body: + conversation: conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a + input: What word did I ask you to remember? + model: gpt-4o + store: true + stream: true + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0aeb5a0588bb8b2a0069e74683f0388190b99690bad6b9b489","object":"response","created_at":1776764548,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0aeb5a0588bb8b2a0069e74683f0388190b99690bad6b9b489","object":"response","created_at":1776764548,"status":"in_progress","background":false,"completed_at":null,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.content_part.added + + ' + - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"M","item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","logprobs":[],"obfuscation":"uG6UWN3uEmZChjC","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"ANGO","item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","logprobs":[],"obfuscation":"v0kxXmx9ogqa","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.output_text.done + + ' + - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","logprobs":[],"output_index":0,"sequence_number":6,"text":"MANGO"} + + ' + - ' + + ' + - 'event: response.content_part.done + + ' + - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"MANGO"},"sequence_number":7} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"MANGO"}],"role":"assistant"},"output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0aeb5a0588bb8b2a0069e74683f0388190b99690bad6b9b489","object":"response","created_at":1776764548,"status":"completed","background":false,"completed_at":1776764548,"conversation":{"id":"conv_69e746796f7481909635d37860d6cd1f0aeb5a0588bb8b2a"},"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0aeb5a0588bb8b2a0069e7468455c48190acd151ab62b504e1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"MANGO"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":35,"input_tokens_details":{"cached_tokens":0},"output_tokens":3,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":38},"user":null,"metadata":{}},"sequence_number":9} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-no-store-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-no-store-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..015207f --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-no-store-gpt-4o-nonstreaming.yaml @@ -0,0 +1,153 @@ +turns: +- filename: t1 + request: + body: + input: 'Say: NOT STORED' + model: gpt-4o + store: false + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764465 + created_at: 1776764465 + error: null + frequency_penalty: 0.0 + id: resp_0a47fc2a915dece50169e74631001881968085f5b231c7abe0 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: NOT STORED + type: output_text + id: msg_0a47fc2a915dece50169e7463180b48196bbaaae87363326c0 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: false + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 12 + input_tokens_details: + cached_tokens: 0 + output_tokens: 4 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 16 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: follow up + model: gpt-4o + store: false + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764472 + created_at: 1776764471 + error: null + frequency_penalty: 0.0 + id: resp_0ad5b1478c81aa5f0169e746378464819587a13f241d2fc344 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: Of course! What would you like to follow up on? + type: output_text + id: msg_0ad5b1478c81aa5f0169e74637e7388195a1494b0590f8a49e + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: false + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 9 + input_tokens_details: + cached_tokens: 0 + output_tokens: 13 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 22 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..cd82940 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-nonstreaming.yaml @@ -0,0 +1,77 @@ +turns: +- filename: t1 + request: + body: + input: 'Reply with exactly one word: HELLO' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764143 + created_at: 1776764142 + error: null + frequency_penalty: 0.0 + id: resp_0508721937e20de90069e744ee9018819394c8e011dc6d7818 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: HI + type: output_text + id: msg_0508721937e20de90069e744ef1b2881939596753c5951691e + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 15 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 17 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-streaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-streaming.yaml new file mode 100644 index 0000000..659197b --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-single-gpt-4o-streaming.yaml @@ -0,0 +1,120 @@ +turns: +- filename: t1 + request: + body: + input: 'Reply with exactly one word: WORLD' + model: gpt-4o + store: true + stream: true + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0d119a97c73fc7550069e7450154448193b65975af8dfa2d59","object":"response","created_at":1776764161,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0d119a97c73fc7550069e7450154448193b65975af8dfa2d59","object":"response","created_at":1776764161,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.content_part.added + + ' + - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"G","item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"obfuscation":"NpACml1t70MBPur","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"LO","item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"obfuscation":"sYltOblE7Hn8l8","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"BE","item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"obfuscation":"4BnRKbDQPKERxH","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.output_text.done + + ' + - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","logprobs":[],"output_index":0,"sequence_number":7,"text":"GLOBE"} + + ' + - ' + + ' + - 'event: response.content_part.done + + ' + - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"GLOBE"},"sequence_number":8} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"GLOBE"}],"role":"assistant"},"output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0d119a97c73fc7550069e7450154448193b65975af8dfa2d59","object":"response","created_at":1776764161,"status":"completed","background":false,"completed_at":1776764161,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0d119a97c73fc7550069e74501f204819389cc6b2751cea611","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"GLOBE"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":14,"input_tokens_details":{"cached_tokens":0},"output_tokens":4,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":18},"user":null,"metadata":{}},"sequence_number":10} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-nonstreaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-nonstreaming.yaml new file mode 100644 index 0000000..42e4300 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-nonstreaming.yaml @@ -0,0 +1,154 @@ +turns: +- filename: t1 + request: + body: + input: 'Remember the word APPLE. Just say: OK' + model: gpt-4o + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764188 + created_at: 1776764187 + error: null + frequency_penalty: 0.0 + id: resp_0db0cfecd1a4eaa10069e7451beccc8195a9e7e09d9343aad0 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: OK + type: output_text + id: msg_0db0cfecd1a4eaa10069e7451c5dac8195a040d4c3725fe3e7 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 17 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 19 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: What word did I ask you to remember? + model: gpt-4o + previous_response_id: resp_0db0cfecd1a4eaa10069e7451beccc8195a9e7e09d9343aad0 + store: true + stream: false + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1776764196 + created_at: 1776764195 + error: null + frequency_penalty: 0.0 + id: resp_0db0cfecd1a4eaa10069e74523ef348195adc49fff1d49863a + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: APPLE + type: output_text + id: msg_0db0cfecd1a4eaa10069e7452480608195b1c2d83f819edb60 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0db0cfecd1a4eaa10069e7451beccc8195a9e7e09d9343aad0 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: [] + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 35 + input_tokens_details: + cached_tokens: 0 + output_tokens: 2 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 37 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-streaming.yaml b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-streaming.yaml new file mode 100644 index 0000000..d0a08cf --- /dev/null +++ b/crates/agentic-core/tests/cassettes/text_only/responses/resp-two-turn-gpt-4o-streaming.yaml @@ -0,0 +1,213 @@ +turns: +- filename: t1 + request: + body: + input: 'Remember the word BANANA. Just say: OK' + model: gpt-4o + store: true + stream: true + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","object":"response","created_at":1776764210,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","object":"response","created_at":1776764210,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.content_part.added + + ' + - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"OK","item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","logprobs":[],"obfuscation":"fFzOZt2wTSxfuW","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.output_text.done + + ' + - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","logprobs":[],"output_index":0,"sequence_number":5,"text":"OK"} + + ' + - ' + + ' + - 'event: response.content_part.done + + ' + - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"},"sequence_number":6} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"},"output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","object":"response","created_at":1776764210,"status":"completed","background":false,"completed_at":1776764210,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0f3cfbadf7c5eca80069e74532ab188193a649f754404be233","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"OK"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":17,"input_tokens_details":{"cached_tokens":0},"output_tokens":2,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":19},"user":null,"metadata":{}},"sequence_number":8} + + ' + - ' + + ' + status_code: 200 +- filename: t2 + request: + body: + input: What word did I ask you to remember? + model: gpt-4o + previous_response_id: resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387 + store: true + stream: true + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0f3cfbadf7c5eca80069e7453a2be4819383b068fd4663b6c8","object":"response","created_at":1776764218,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0f3cfbadf7c5eca80069e7453a2be4819383b068fd4663b6c8","object":"response","created_at":1776764218,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.content_part.added + + ' + - 'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"BAN","item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","logprobs":[],"obfuscation":"ndpMUu4dcc0wQ","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.output_text.delta + + ' + - 'data: {"type":"response.output_text.delta","content_index":0,"delta":"ANA","item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","logprobs":[],"obfuscation":"rzt7Jc0RMv0V1","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.output_text.done + + ' + - 'data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","logprobs":[],"output_index":0,"sequence_number":6,"text":"BANANA"} + + ' + - ' + + ' + - 'event: response.content_part.done + + ' + - 'data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"BANANA"},"sequence_number":7} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"BANANA"}],"role":"assistant"},"output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0f3cfbadf7c5eca80069e7453a2be4819383b068fd4663b6c8","object":"response","created_at":1776764218,"status":"completed","background":false,"completed_at":1776764218,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","output":[{"id":"msg_0f3cfbadf7c5eca80069e7453ab4108193987ef7e3267e91c1","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"BANANA"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0f3cfbadf7c5eca80069e7453219a881939da5d5e61bae3387","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":35,"input_tokens_details":{"cached_tokens":0},"output_tokens":3,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":38},"user":null,"metadata":{}},"sequence_number":9} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/stateful_conversation_integration.rs b/crates/agentic-core/tests/stateful_conversation_integration.rs new file mode 100644 index 0000000..ba70599 --- /dev/null +++ b/crates/agentic-core/tests/stateful_conversation_integration.rs @@ -0,0 +1,305 @@ +//! Cassette-based integration tests for the Conversation API (cases 6–10). +//! +//! Mirrors `test_conversation_api.py`. Each conversation cassette includes a +//! `/v1/conversations` creation turn — mirrored here via `create_conversation()`. +//! `TestFixture` serves only `/v1/responses` turns on the mock HTTP server. + +mod support; + +use agentic_core::executor::{create_conversation, execute}; +use std::sync::Arc; +use support::{ + TestFixture, collect_stream, expected_text, load_cassette, make_request, output_text, responses_turns, + unwrap_blocking, +}; + +const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/text_only/conversation"); + +/// Case 6 — two turns, non-streaming, via `conversation_id`. +#[tokio::test] +async fn test_two_turn_nonstreaming_conversation() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/conv-two-turn-gpt-4o-nonstreaming.yaml")); + let all: Vec<_> = cassette.turns.iter().collect(); + let fixture = TestFixture::new(&all).await; + let ctx = &fixture.exec_ctx; + let resp = responses_turns(&cassette); + let (t1, t2) = (resp[0], resp[1]); + + // Mirrors /v1/conversations creation turn + let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id; + + // Act + let p1 = unwrap_blocking( + execute( + make_request(&t1.request.body.input, true, false, None, Some(conv_id.clone())), + Arc::clone(ctx), + ) + .await + .expect("t1"), + ); + let p2 = unwrap_blocking( + execute( + make_request(&t2.request.body.input, true, false, None, Some(conv_id)), + Arc::clone(ctx), + ) + .await + .expect("t2"), + ); + + // Assert + assert!(p1.id.starts_with("resp_")); + assert_eq!(p1.status, "completed"); + assert_eq!(output_text(&p1), expected_text(t1)); + assert_ne!(p2.id, p1.id); + assert_eq!(p2.status, "completed"); + assert_eq!(output_text(&p2), expected_text(t2)); +} + +/// Case 7 — two turns, streaming, via `conversation_id`. +#[tokio::test] +async fn test_two_turn_streaming_conversation() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/conv-two-turn-gpt-4o-streaming.yaml")); + let all: Vec<_> = cassette.turns.iter().collect(); + let fixture = TestFixture::new(&all).await; + let ctx = &fixture.exec_ctx; + let resp = responses_turns(&cassette); + let (t1, t2) = (resp[0], resp[1]); + + let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id; + + // Act + let p1 = collect_stream( + execute( + make_request(&t1.request.body.input, true, true, None, Some(conv_id.clone())), + Arc::clone(ctx), + ) + .await + .expect("t1"), + ) + .await; + let p2 = collect_stream( + execute( + make_request(&t2.request.body.input, true, true, None, Some(conv_id)), + Arc::clone(ctx), + ) + .await + .expect("t2"), + ) + .await; + + // Assert + assert!(p1.id.starts_with("resp_")); + assert_eq!(p1.status, "completed"); + assert_eq!(output_text(&p1), expected_text(t1)); + assert_ne!(p2.id, p1.id); + assert_eq!(p2.status, "completed"); + assert_eq!(output_text(&p2), expected_text(t2)); +} + +/// Case 8 — two independent conversations must not share context. +#[tokio::test] +async fn test_conversation_isolation() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/conv-isolation-gpt-4o-nonstreaming.yaml")); + let all: Vec<_> = cassette.turns.iter().collect(); + let fixture = TestFixture::new(&all).await; + let ctx = &fixture.exec_ctx; + let resp = responses_turns(&cassette); + let (ta1, ta2, ta3, tb1, tb2, tb3) = (resp[0], resp[1], resp[2], resp[3], resp[4], resp[5]); + + // Conv A + let conv_a = create_conversation(ctx).await.expect("create conv A").conversation_id; + let pa1 = unwrap_blocking( + execute( + make_request(&ta1.request.body.input, true, false, None, Some(conv_a.clone())), + Arc::clone(ctx), + ) + .await + .expect("a1"), + ); + assert_eq!(output_text(&pa1), expected_text(ta1)); + let pa2 = unwrap_blocking( + execute( + make_request(&ta2.request.body.input, true, false, None, Some(conv_a.clone())), + Arc::clone(ctx), + ) + .await + .expect("a2"), + ); + assert_eq!(output_text(&pa2), expected_text(ta2)); + let pa3 = unwrap_blocking( + execute( + make_request(&ta3.request.body.input, true, false, None, Some(conv_a.clone())), + Arc::clone(ctx), + ) + .await + .expect("a3"), + ); + assert_eq!(output_text(&pa3), expected_text(ta3)); + + // Conv B + let conv_b = create_conversation(ctx).await.expect("create conv B").conversation_id; + let pb1 = unwrap_blocking( + execute( + make_request(&tb1.request.body.input, true, false, None, Some(conv_b.clone())), + Arc::clone(ctx), + ) + .await + .expect("b1"), + ); + assert_eq!(output_text(&pb1), expected_text(tb1)); + let pb2 = unwrap_blocking( + execute( + make_request(&tb2.request.body.input, true, false, None, Some(conv_b.clone())), + Arc::clone(ctx), + ) + .await + .expect("b2"), + ); + assert_eq!(output_text(&pb2), expected_text(tb2)); + let pb3 = unwrap_blocking( + execute( + make_request(&tb3.request.body.input, true, false, None, Some(conv_b.clone())), + Arc::clone(ctx), + ) + .await + .expect("b3"), + ); + assert_eq!(output_text(&pb3), expected_text(tb3)); + + // Assert — conversations are isolated + assert_ne!(conv_a, conv_b, "conversations must not share an id"); +} + +/// Case 9 — 3-turn chain then branch off turn 1 via `previous_response_id`. +#[tokio::test] +async fn test_branch_off_turn_1() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/conv-multi-turn-single-branch-gpt-4o-nonstreaming.yaml")); + let all: Vec<_> = cassette.turns.iter().collect(); + let fixture = TestFixture::new(&all).await; + let ctx = &fixture.exec_ctx; + let resp = responses_turns(&cassette); + let (t1, t2, t3, t4) = (resp[0], resp[1], resp[2], resp[3]); + + let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id; + + // Main chain + let p1 = unwrap_blocking( + execute( + make_request(&t1.request.body.input, true, false, None, Some(conv_id.clone())), + Arc::clone(ctx), + ) + .await + .expect("t1"), + ); + assert_eq!(output_text(&p1), expected_text(t1)); + let r1_id = p1.id.clone(); + + let p2 = unwrap_blocking( + execute( + make_request(&t2.request.body.input, true, false, None, Some(conv_id.clone())), + Arc::clone(ctx), + ) + .await + .expect("t2"), + ); + assert_eq!(output_text(&p2), expected_text(t2)); + + let p3 = unwrap_blocking( + execute( + make_request(&t3.request.body.input, true, false, None, Some(conv_id)), + Arc::clone(ctx), + ) + .await + .expect("t3"), + ); + assert_eq!(output_text(&p3), expected_text(t3)); + + // Branch off turn 1 — only turn 1 context visible + let p4 = unwrap_blocking( + execute( + make_request(&t4.request.body.input, true, false, Some(r1_id), None), + Arc::clone(ctx), + ) + .await + .expect("t4"), + ); + assert_eq!(p4.status, "completed"); + assert_eq!(output_text(&p4), expected_text(t4)); +} + +/// Case 10 — 5-turn chain with 2 inline branches. +#[tokio::test] +async fn test_multi_branch() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/conv-multi-branch-multi-turn-gpt-4o-nonstreaming.yaml")); + let all: Vec<_> = cassette.turns.iter().collect(); + let fixture = TestFixture::new(&all).await; + let ctx = &fixture.exec_ctx; + let resp = responses_turns(&cassette); + let (t1, t2, t3, t4, t5) = (resp[0], resp[1], resp[2], resp[3], resp[4]); + + let conv_id = create_conversation(ctx).await.expect("create conv").conversation_id; + + // Turn 1 + let p1 = unwrap_blocking( + execute( + make_request(&t1.request.body.input, true, false, None, Some(conv_id.clone())), + Arc::clone(ctx), + ) + .await + .expect("t1"), + ); + assert_eq!(output_text(&p1), expected_text(t1)); + let r1_id = p1.id.clone(); + + // Turn 2 (main branch) + let p2 = unwrap_blocking( + execute( + make_request(&t2.request.body.input, true, false, None, Some(conv_id)), + Arc::clone(ctx), + ) + .await + .expect("t2"), + ); + assert_eq!(output_text(&p2), expected_text(t2)); + let r2_id = p2.id.clone(); + + // Branch 1 — off turn 1 + let p3 = unwrap_blocking( + execute( + make_request(&t3.request.body.input, true, false, Some(r1_id), None), + Arc::clone(ctx), + ) + .await + .expect("t3"), + ); + assert_eq!(p3.status, "completed"); + assert_eq!(output_text(&p3), expected_text(t3)); + + let p4 = unwrap_blocking( + execute( + make_request(&t4.request.body.input, true, false, Some(p3.id.clone()), None), + Arc::clone(ctx), + ) + .await + .expect("t4"), + ); + assert_eq!(p4.status, "completed"); + assert_eq!(output_text(&p4), expected_text(t4)); + + // Branch 2 — off turn 2 + let p5 = unwrap_blocking( + execute( + make_request(&t5.request.body.input, true, false, Some(r2_id), None), + Arc::clone(ctx), + ) + .await + .expect("t5"), + ); + assert_eq!(p5.status, "completed"); + assert_eq!(output_text(&p5), expected_text(t5)); +} diff --git a/crates/agentic-core/tests/stateful_responses_integration.rs b/crates/agentic-core/tests/stateful_responses_integration.rs new file mode 100644 index 0000000..75dc545 --- /dev/null +++ b/crates/agentic-core/tests/stateful_responses_integration.rs @@ -0,0 +1,164 @@ +//! Cassette-based integration tests for the Responses API (cases 1–5). +//! +//! Mirrors `test_responses_api.py`. Each test replays a YAML cassette +//! against a mock HTTP server and verifies `execute()` output. + +mod support; + +use agentic_core::executor::execute; +use std::sync::Arc; +use support::{TestFixture, collect_stream, expected_text, load_cassette, make_request, output_text, unwrap_blocking}; + +const DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/text_only/responses"); + +/// Case 1 — single turn, non-streaming. +#[tokio::test] +async fn test_single_turn_nonstreaming() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/resp-single-gpt-4o-nonstreaming.yaml")); + let t1 = &cassette.turns[0]; + let fixture = TestFixture::new(&[t1]).await; + + // Act + let payload = unwrap_blocking( + execute( + make_request(&t1.request.body.input, t1.request.body.store, false, None, None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("execute"), + ); + + // Assert + assert!(payload.id.starts_with("resp_"), "id={}", payload.id); + assert_eq!(payload.status, "completed"); + assert_eq!(output_text(&payload), expected_text(t1)); +} + +/// Case 2 — single turn, streaming. +#[tokio::test] +async fn test_single_turn_streaming() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/resp-single-gpt-4o-streaming.yaml")); + let t1 = &cassette.turns[0]; + let fixture = TestFixture::new(&[t1]).await; + + // Act + let payload = collect_stream( + execute( + make_request(&t1.request.body.input, t1.request.body.store, true, None, None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("execute"), + ) + .await; + + // Assert + assert!(payload.id.starts_with("resp_"), "id={}", payload.id); + assert_eq!(payload.status, "completed"); + assert_eq!(output_text(&payload), expected_text(t1)); +} + +/// Case 3 — two turns, non-streaming, chained via `previous_response_id`. +#[tokio::test] +async fn test_two_turn_nonstreaming_previous_response_id() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/resp-two-turn-gpt-4o-nonstreaming.yaml")); + let (t1, t2) = (&cassette.turns[0], &cassette.turns[1]); + let fixture = TestFixture::new(&[t1, t2]).await; + + // Act + let p1 = unwrap_blocking( + execute( + make_request(&t1.request.body.input, true, false, None, None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("t1"), + ); + let p2 = unwrap_blocking( + execute( + make_request(&t2.request.body.input, true, false, Some(p1.id.clone()), None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("t2"), + ); + + // Assert + assert!(p1.id.starts_with("resp_")); + assert_eq!(p1.status, "completed"); + assert_eq!(output_text(&p1), expected_text(t1)); + assert_ne!(p2.id, p1.id); + assert_eq!(p2.status, "completed"); + assert_eq!(p2.previous_response_id.as_deref(), Some(p1.id.as_str())); + assert_eq!(output_text(&p2), expected_text(t2)); +} + +/// Case 4 — two turns, streaming, chained via `previous_response_id`. +#[tokio::test] +async fn test_two_turn_streaming_previous_response_id() { + // Arrange + let cassette = load_cassette(&format!("{DIR}/resp-two-turn-gpt-4o-streaming.yaml")); + let (t1, t2) = (&cassette.turns[0], &cassette.turns[1]); + let fixture = TestFixture::new(&[t1, t2]).await; + + // Act + let p1 = collect_stream( + execute( + make_request(&t1.request.body.input, true, true, None, None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("t1"), + ) + .await; + let p2 = collect_stream( + execute( + make_request(&t2.request.body.input, true, true, Some(p1.id.clone()), None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("t2"), + ) + .await; + + // Assert + assert!(p1.id.starts_with("resp_")); + assert_eq!(p1.status, "completed"); + assert_eq!(output_text(&p1), expected_text(t1)); + assert_ne!(p2.id, p1.id); + assert_eq!(p2.status, "completed"); + assert_eq!(output_text(&p2), expected_text(t2)); +} + +/// Case 5 — `store=false` response cannot be used as `previous_response_id`. +#[tokio::test] +async fn test_store_disabled_not_reusable_as_previous_response_id() { + // Arrange — only one mock needed; follow-up errors before hitting the LLM + let cassette = load_cassette(&format!("{DIR}/resp-no-store-gpt-4o-nonstreaming.yaml")); + let t1 = &cassette.turns[0]; + let fixture = TestFixture::new(&[t1]).await; + + // Act — turn 1, store=false + let p1 = unwrap_blocking( + execute( + make_request(&t1.request.body.input, false, false, None, None), + Arc::clone(&fixture.exec_ctx), + ) + .await + .expect("t1"), + ); + assert_eq!(p1.status, "completed"); + + // Act — follow-up with the unstored id + let result = execute( + make_request("follow up", false, false, Some(p1.id.clone()), None), + Arc::clone(&fixture.exec_ctx), + ) + .await; + + // Assert — executor errors at rehydrate, before calling the LLM + assert!(result.is_err(), "expected error for unstored previous_response_id"); +} diff --git a/crates/agentic-core/tests/storage_integration.rs b/crates/agentic-core/tests/storage_integration.rs index d4ac640..e12f154 100644 --- a/crates/agentic-core/tests/storage_integration.rs +++ b/crates/agentic-core/tests/storage_integration.rs @@ -1,21 +1,12 @@ +mod support; + use agentic_core::storage::InOutItem; use agentic_core::storage::ResponseMetadata; -use agentic_core::storage::{ConversationStore, DbPool, ResponseStore, create_pool_with_schema}; +use agentic_core::storage::{ConversationStore, ResponseStore}; use agentic_core::types::io::{InputItem, InputMessage, InputMessageContent, OutputItem, OutputMessage}; use std::sync::Arc; -async fn setup_pool() -> Arc { - let db_url = format!( - "sqlite://{}", - std::env::temp_dir() - .join(format!("test_{}.db", uuid::Uuid::now_v7())) - .display() - ); - - create_pool_with_schema(Some(&db_url)) - .await - .expect("failed to create pool with schema") -} +use support::setup_pool; fn create_input_item(text: &str) -> InOutItem { InOutItem::Input(InputItem::Message(InputMessage { diff --git a/crates/agentic-core/tests/support/mod.rs b/crates/agentic-core/tests/support/mod.rs new file mode 100644 index 0000000..bba6b3f --- /dev/null +++ b/crates/agentic-core/tests/support/mod.rs @@ -0,0 +1,328 @@ +//! Shared test infrastructure for executor integration tests. +//! +//! - [`MockServer`] — axum-based HTTP mock with RAII shutdown (`Drop`). +//! - [`TestFixture`] — bundles mock server + `ExecutionContext` for one test. +//! - Cassette loading utilities. +//! - Response helpers. + +#![allow(dead_code)] + +use std::sync::Arc; + +use axum::Router; +use axum::http::header; +use axum::response::{IntoResponse, Response}; +use axum::routing::post; +use either::Either; +use futures::StreamExt; +use serde::Deserialize; +use tokio::sync::Mutex; +use tokio::task::JoinHandle; + +use agentic_core::executor::{BoxStream, ConversationHandler, ExecutionContext, ResponseHandler}; +use agentic_core::storage::{ConversationStore, DbPool, ResponseStore, create_pool_with_schema}; +use agentic_core::types::io::{OutputItem, ResponsesInput, ToolChoice}; +use agentic_core::types::request_response::{RequestPayload, ResponsePayload}; + +#[derive(Debug, Deserialize)] +pub struct Cassette { + pub turns: Vec, +} + +#[derive(Debug, Deserialize)] +pub struct Turn { + pub request: TurnRequest, + pub response: TurnResponse, +} + +#[derive(Debug, Deserialize)] +pub struct TurnRequest { + pub path: String, + pub body: TurnBody, +} + +#[derive(Debug, Deserialize, Default)] +pub struct TurnBody { + #[serde(default)] + pub input: String, + #[serde(default = "default_true")] + pub store: bool, + #[serde(default)] + pub stream: bool, +} + +fn default_true() -> bool { + true +} + +#[derive(Debug, Deserialize)] +pub struct TurnResponse { + /// Non-streaming: full JSON response body. + pub body: Option, + /// Streaming: list of raw SSE strings from the recording. + pub sse: Option>, +} + +/// Load and parse a cassette YAML file (all turns preserved). +pub fn load_cassette(path: &str) -> Cassette { + let text = std::fs::read_to_string(path).unwrap_or_else(|e| panic!("failed to read cassette {path}: {e}")); + serde_yaml::from_str(&text).unwrap_or_else(|e| panic!("failed to parse cassette {path}: {e}")) +} + +/// Filter to only `/v1/responses` turns — the LLM inference turns that need a +/// mock HTTP response. Conversation cassettes interleave `/v1/conversations` +/// management turns; the Rust executor handles those internally via +/// [`ConversationHandler`] without any HTTP call. +pub fn responses_turns(cassette: &Cassette) -> Vec<&Turn> { + cassette + .turns + .iter() + .filter(|t| t.request.path == "/v1/responses") + .collect() +} + +/// Extract the expected output text from a cassette turn. +/// +/// - Non-streaming: `body.output[0].content[0].text` +/// - Streaming: concatenate all `response.output_text.delta` values +pub fn expected_text(turn: &Turn) -> String { + if let Some(body) = &turn.response.body { + return body["output"][0]["content"][0]["text"] + .as_str() + .unwrap_or("") + .to_string(); + } + if let Some(sse) = &turn.response.sse { + let mut out = String::new(); + for raw in sse { + for line in raw.lines() { + if let Some(data) = line.strip_prefix("data: ") { + if let Ok(json) = serde_json::from_str::(data) { + if json["type"].as_str() == Some("response.output_text.delta") { + if let Some(delta) = json["delta"].as_str() { + out.push_str(delta); + } + } + } + } + } + } + return out; + } + String::new() +} + +/// A per-test HTTP mock server. The server task is aborted when this struct +/// is dropped, ensuring clean teardown even if a test panics. +pub struct MockServer { + url: String, + handle: JoinHandle<()>, +} + +impl MockServer { + pub fn url(&self) -> &str { + &self.url + } +} + +impl Drop for MockServer { + fn drop(&mut self) { + self.handle.abort(); + } +} + +fn build_response(resp: MockResponse) -> Response { + match resp { + MockResponse::Json(body) => Response::builder() + .status(200) + .header(header::CONTENT_TYPE, "application/json") + .body(axum::body::Body::from(body)) + .unwrap() + .into_response(), + MockResponse::Sse(body) => Response::builder() + .status(200) + .header(header::CONTENT_TYPE, "text/event-stream; charset=utf-8") + .body(axum::body::Body::from(body)) + .unwrap() + .into_response(), + } +} + +/// A single queued mock response. +pub enum MockResponse { + Json(String), + Sse(String), +} + +impl MockResponse { + /// Build a `MockResponse` from a cassette turn. + pub fn from_turn(turn: &Turn) -> Self { + if let Some(body) = &turn.response.body { + return Self::Json(serde_json::to_string(body).expect("cassette body is valid JSON")); + } + if let Some(sse) = &turn.response.sse { + let mut body = sse.join(""); + // Ensure the stream is terminated. + if !body.contains("data: [DONE]") { + body.push_str("data: [DONE]\n\n"); + } + return Self::Sse(body); + } + panic!("cassette turn has neither body nor sse"); + } +} + +// Use a VecDeque so pop_front is O(1). +impl MockServer { + pub async fn start_deque(responses: Vec) -> Self { + use std::collections::VecDeque; + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("bind mock server"); + let addr = listener.local_addr().expect("local addr"); + let url = format!("http://{addr}"); + // Store as VecDeque for O(1) pop_front. + let queue: Arc>> = Arc::new(Mutex::new(VecDeque::from(responses))); + + let handle = tokio::spawn(async move { + let app = Router::new() + .route( + "/v1/responses", + post(move |_body: axum::body::Bytes| { + let queue = Arc::clone(&queue); + async move { + let mut q = queue.lock().await; + let resp = q.pop_front().expect("mock queue exhausted — check test setup"); + build_response(resp) + } + }), + ) + // Conversation management calls don't go through the mock — + // the executor handles them via ConversationHandler (DB-only). + // This route is here so the server doesn't return 404 if called. + .route( + "/v1/conversations", + post(|| async { (axum::http::StatusCode::OK, "{}") }), + ); + axum::serve(listener, app).await.ok(); + }); + + Self { url, handle } + } +} + +/// Create a fresh `SQLite` pool with schema applied. +/// +/// Uses a unique temp-file per call so concurrent tests don't conflict. +pub async fn setup_pool() -> Arc { + let db_path = std::env::temp_dir().join(format!("test_{}.db", uuid::Uuid::now_v7())); + let db_url = format!("sqlite://{}", db_path.display()); + create_pool_with_schema(Some(&db_url)) + .await + .expect("failed to create test pool") +} + +/// Bundles everything a test needs. Dropped at end of test scope. +pub struct TestFixture { + pub exec_ctx: Arc, + // Kept for its Drop impl — aborts the mock server when the test ends. + _server: MockServer, +} + +impl TestFixture { + /// Build a fixture from a full cassette turn slice. + /// + /// The mock server queues only `/v1/responses` turns (LLM inference). + /// `/v1/conversations` turns are handled by the executor via + /// [`ConversationHandler`] (DB-only, no outbound HTTP). + pub async fn new(turns: &[&Turn]) -> Self { + let responses = turns + .iter() + .filter(|t| t.request.path == "/v1/responses") + .map(|t| MockResponse::from_turn(t)) + .collect(); + let server = MockServer::start_deque(responses).await; + + let pool = setup_pool().await; + let conv_handler = ConversationHandler::new(ConversationStore::new(Arc::clone(&pool))); + let resp_handler = ResponseHandler::new(ResponseStore::new(Arc::clone(&pool))); + let client = Arc::new(reqwest::Client::new()); + let exec_ctx = Arc::new(ExecutionContext::new( + conv_handler, + resp_handler, + client, + server.url().to_string(), + None, + )); + + Self { + exec_ctx, + _server: server, + } + } +} + +pub fn make_request( + input: &str, + store: bool, + stream: bool, + previous_response_id: Option, + conversation_id: Option, +) -> RequestPayload { + RequestPayload { + model: "test-model".to_string(), + input: ResponsesInput::Text(input.to_string()), + instructions: None, + previous_response_id, + conversation_id, + tools: None, + tool_choice: ToolChoice::Auto, + stream, + store, + include: None, + temperature: None, + top_p: None, + max_output_tokens: None, + truncation: None, + metadata: None, + } +} + +pub fn unwrap_blocking(result: Either) -> ResponsePayload { + match result { + Either::Left(p) => p, + Either::Right(_) => panic!("expected non-streaming response, got stream"), + } +} + +/// Collect a streaming response to its final `ResponsePayload`. +pub async fn collect_stream(result: Either) -> ResponsePayload { + let stream = match result { + Either::Right(s) => s, + Either::Left(_) => panic!("expected streaming response, got blocking"), + }; + let mut stream = Box::pin(stream); + while let Some(chunk) = stream.next().await { + if let Some(data) = chunk.trim_end_matches('\n').strip_prefix("data: ") { + if data != "[DONE]" { + if let Ok(payload) = serde_json::from_str::(data) { + while stream.next().await.is_some() {} + return payload; + } + } + } + } + panic!("stream ended without a ResponsePayload chunk"); +} + +/// Extract concatenated text content from a `ResponsePayload`. +pub fn output_text(payload: &ResponsePayload) -> String { + payload + .output + .iter() + .filter_map(|item| match item { + OutputItem::Message(msg) => Some(msg.content.iter().map(|c| c.text.as_str()).collect::()), + OutputItem::FunctionCall(_) | OutputItem::Unknown => None, + }) + .collect::() +}