diff --git a/Cargo.lock b/Cargo.lock index be0fd27..629480c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -667,7 +667,7 @@ dependencies = [ "byteorder", "byteview", "dashmap", - "log", + "log 0.4.29", "lsm-tree", "path-absolutize", "std-semaphore", @@ -797,7 +797,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "log", + "log 0.4.29", "rustversion", "windows-link", "windows-result", @@ -884,6 +884,15 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hpack" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c68f61350ad23817dd207b035b5258d91ac5eaef69e96f906628aaed8854dda" +dependencies = [ + "log 0.3.9", +] + [[package]] name = "http" version = "1.4.0" @@ -989,7 +998,7 @@ dependencies = [ "http", "hyper", "hyper-util", - "log", + "log 0.4.29", "rustls", "rustls-pki-types", "tokio", @@ -1286,6 +1295,15 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" +dependencies = [ + "log 0.4.29", +] + [[package]] name = "log" version = "0.4.29" @@ -1329,7 +1347,7 @@ dependencies = [ "enum_dispatch", "guardian", "interval-heap", - "log", + "log 0.4.29", "lz4_flex", "path-absolutize", "quick_cache", @@ -1385,7 +1403,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "log", + "log 0.4.29", "wasi", "windows-sys 0.61.2", ] @@ -1582,6 +1600,7 @@ dependencies = [ name = "phantom-agent" version = "0.1.0" dependencies = [ + "hpack", "httparse", "libc", "redhook", @@ -1904,7 +1923,7 @@ version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ - "log", + "log 0.4.29", "ring", "rustls-pki-types", "rustls-webpki", @@ -2338,7 +2357,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38" dependencies = [ "futures-util", - "log", + "log 0.4.29", "rustls", "rustls-pki-types", "tokio", @@ -2372,7 +2391,7 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ - "log", + "log 0.4.29", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -2405,7 +2424,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ - "log", + "log 0.4.29", "once_cell", "tracing-core", ] @@ -2445,7 +2464,7 @@ dependencies = [ "data-encoding", "http", "httparse", - "log", + "log 0.4.29", "rand", "rustls", "rustls-pki-types", @@ -2564,7 +2583,7 @@ dependencies = [ "byteorder", "byteview", "interval-heap", - "log", + "log 0.4.29", "path-absolutize", "rustc-hash", "tempfile", @@ -2976,7 +2995,7 @@ dependencies = [ "anyhow", "bitflags", "indexmap", - "log", + "log 0.4.29", "serde", "serde_derive", "serde_json", @@ -2995,7 +3014,7 @@ dependencies = [ "anyhow", "id-arena", "indexmap", - "log", + "log 0.4.29", "semver", "serde", "serde_derive", diff --git a/Dockerfile b/Dockerfile index 6e1b678..bce549a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,6 +70,7 @@ RUN apt-get update \ jq \ ncat \ openssl \ + python3 \ && rm -rf /var/lib/apt/lists/* # phantom binary diff --git a/crates/phantom-agent/Cargo.toml b/crates/phantom-agent/Cargo.toml index 338d706..e689e8b 100644 --- a/crates/phantom-agent/Cargo.toml +++ b/crates/phantom-agent/Cargo.toml @@ -13,5 +13,6 @@ crate-type = ["dylib"] redhook = "2" libc = "0.2" httparse = "1" +hpack = "0.3" serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/crates/phantom-agent/src/lib.rs b/crates/phantom-agent/src/lib.rs index 1abb018..c744e5c 100644 --- a/crates/phantom-agent/src/lib.rs +++ b/crates/phantom-agent/src/lib.rs @@ -6,8 +6,8 @@ //! The agent hooks `send()` / `recv()` / `close()` from libc to intercept //! plain-text HTTP/1.x traffic, and `SSL_write()` / `SSL_read()` / `SSL_free()` //! from OpenSSL/LibreSSL/BoringSSL to intercept HTTPS traffic (plaintext above -//! the TLS layer). Captured traces are sent as JSON datagrams over a Unix -//! datagram socket to the phantom main process. +//! the TLS layer). Both HTTP/1.x and HTTP/2 are captured. Captured traces are +//! sent as JSON datagrams over a Unix datagram socket to the phantom main process. //! //! **Note**: HTTPS capture requires the target to dynamically link `libssl`. //! Statically-linked TLS (e.g. Go's native crypto, Rust's rustls) is not @@ -80,6 +80,7 @@ struct TraceMsg { timestamp_ms: u64, #[serde(skip_serializing_if = "Option::is_none")] dest_addr: Option, + protocol_version: String, } fn emit_msg(msg: &TraceMsg) { @@ -129,6 +130,384 @@ fn body_b64(raw: &[u8]) -> Option { } } +// ───────────────────────────────────────────────────────────────────────────── +// HTTP/2 — constants, frame parsing, per-stream/connection state +// ───────────────────────────────────────────────────────────────────────────── + +/// HTTP/2 client connection preface (RFC 7540 §3.5). +const H2_PREFACE: &[u8] = b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"; +/// Size of an HTTP/2 frame header in bytes. +const H2_FRAME_HDR_LEN: usize = 9; +// Frame types we care about. +const H2_TYPE_DATA: u8 = 0x0; +const H2_TYPE_HEADERS: u8 = 0x1; +const H2_TYPE_CONTINUATION: u8 = 0x9; +// Frame flags. +const H2_FLAG_END_STREAM: u8 = 0x1; +const H2_FLAG_END_HEADERS: u8 = 0x4; +const H2_FLAG_PADDED: u8 = 0x8; +const H2_FLAG_PRIORITY: u8 = 0x20; + +/// Parse the 9-byte HTTP/2 frame header. +/// Returns `(payload_len, frame_type, flags, stream_id)` or `None` if buf is too short. +fn parse_h2_frame_header(buf: &[u8]) -> Option<(usize, u8, u8, u32)> { + if buf.len() < H2_FRAME_HDR_LEN { + return None; + } + let payload_len = ((buf[0] as usize) << 16) | ((buf[1] as usize) << 8) | (buf[2] as usize); + let frame_type = buf[3]; + let flags = buf[4]; + // Mask off the reserved R bit (bit 31). + let stream_id = u32::from_be_bytes([buf[5] & 0x7f, buf[6], buf[7], buf[8]]); + Some((payload_len, frame_type, flags, stream_id)) +} + +/// Return the `[start, end)` byte range of the header block fragment within a +/// HEADERS frame payload, stripping optional padding and priority bytes. +fn h2_header_block_range(payload: &[u8], flags: u8) -> (usize, usize) { + let mut start = 0usize; + let mut end = payload.len(); + + if flags & H2_FLAG_PADDED != 0 { + if payload.is_empty() { + return (0, 0); + } + let pad_len = payload[0] as usize; + start += 1; + end = end.saturating_sub(pad_len); + } + if flags & H2_FLAG_PRIORITY != 0 { + start += 5; // 4 bytes stream dependency + 1 byte weight + } + if start > end { (0, 0) } else { (start, end) } +} + +/// Per-stream state for a single HTTP/2 request-response pair. +struct H2Stream { + req_method: Option, + req_path: Option, + req_authority: Option, + req_scheme: Option, + req_headers: HashMap, + req_body: Vec, + /// True once we have seen END_STREAM on the request side. + req_done: bool, + started_at: Instant, + timestamp_ms: u64, + resp_status: Option, + resp_headers: HashMap, + resp_body: Vec, + /// True once we have seen END_STREAM on the response side. + resp_done: bool, + tls: bool, +} + +impl H2Stream { + fn new(tls: bool) -> Self { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + Self { + req_method: None, + req_path: None, + req_authority: None, + req_scheme: None, + req_headers: HashMap::new(), + req_body: Vec::new(), + req_done: false, + started_at: Instant::now(), + timestamp_ms: ts, + resp_status: None, + resp_headers: HashMap::new(), + resp_body: Vec::new(), + resp_done: false, + tls, + } + } +} + +/// Per-connection state for an HTTP/2 connection. +struct H2ConnState { + tls: bool, + /// Buffered outgoing (app→server) bytes not yet consumed into complete frames. + send_buf: Vec, + /// Buffered incoming (server→app) bytes not yet consumed into complete frames. + recv_buf: Vec, + /// HPACK decoder for client-sent request headers. + send_hpack: hpack::Decoder<'static>, + /// HPACK decoder for server-sent response headers. + recv_hpack: hpack::Decoder<'static>, + /// Active streams keyed by HTTP/2 stream ID. + streams: HashMap, + // CONTINUATION frame accumulation (send direction). + send_cont_sid: Option, + send_cont_buf: Vec, + send_cont_end_stream: bool, + // CONTINUATION frame accumulation (recv direction). + recv_cont_sid: Option, + recv_cont_buf: Vec, + recv_cont_end_stream: bool, +} + +impl H2ConnState { + fn new(tls: bool) -> Self { + Self { + tls, + send_buf: Vec::new(), + recv_buf: Vec::new(), + send_hpack: hpack::Decoder::new(), + recv_hpack: hpack::Decoder::new(), + streams: HashMap::new(), + send_cont_sid: None, + send_cont_buf: Vec::new(), + send_cont_end_stream: false, + recv_cont_sid: None, + recv_cont_buf: Vec::new(), + recv_cont_end_stream: false, + } + } +} + +/// Apply decoded HPACK name-value pairs to a stream's request pseudo-headers and +/// regular headers. +fn apply_h2_request_headers(stream: &mut H2Stream, headers: Vec<(Vec, Vec)>) { + for (name, value) in headers { + let name = String::from_utf8_lossy(&name).into_owned(); + let value = String::from_utf8_lossy(&value).into_owned(); + match name.as_str() { + ":method" => stream.req_method = Some(value), + ":path" => stream.req_path = Some(value), + ":scheme" => stream.req_scheme = Some(value), + ":authority" => stream.req_authority = Some(value), + n if !n.starts_with(':') => { + stream.req_headers.insert(n.to_string(), value); + } + _ => {} + } + } +} + +/// Apply decoded HPACK name-value pairs to a stream's response pseudo-headers and +/// regular headers. +fn apply_h2_response_headers(stream: &mut H2Stream, headers: Vec<(Vec, Vec)>) { + for (name, value) in headers { + let name = String::from_utf8_lossy(&name).into_owned(); + let value = String::from_utf8_lossy(&value).into_owned(); + if name == ":status" { + stream.resp_status = value.parse().ok(); + } else if !name.starts_with(':') { + stream.resp_headers.insert(name, value); + } + } +} + +/// Process all complete HTTP/2 frames in `h2.send_buf` (outgoing / request side). +fn process_h2_send_frames(h2: &mut H2ConnState) { + // Skip the 24-byte client connection preface if present at the start. + if h2.send_buf.starts_with(H2_PREFACE) { + h2.send_buf.drain(..H2_PREFACE.len()); + } + + loop { + let Some((payload_len, frame_type, flags, stream_id)) = parse_h2_frame_header(&h2.send_buf) + else { + break; + }; + let total = H2_FRAME_HDR_LEN + payload_len; + if h2.send_buf.len() < total { + break; // Frame not yet fully buffered. + } + + // Clone payload so we can drain the buffer cleanly. + let payload = h2.send_buf[H2_FRAME_HDR_LEN..total].to_vec(); + h2.send_buf.drain(..total); + + let tls = h2.tls; + match frame_type { + H2_TYPE_HEADERS if stream_id > 0 => { + let end_stream = flags & H2_FLAG_END_STREAM != 0; + let end_headers = flags & H2_FLAG_END_HEADERS != 0; + let (hb_start, hb_end) = h2_header_block_range(&payload, flags); + let hblock = &payload[hb_start..hb_end]; + + if end_headers { + let decoded = h2.send_hpack.decode(hblock).unwrap_or_default(); + let stream = h2 + .streams + .entry(stream_id) + .or_insert_with(|| H2Stream::new(tls)); + apply_h2_request_headers(stream, decoded); + stream.req_done |= end_stream; + } else { + // Header block continues in CONTINUATION frames. + h2.send_cont_sid = Some(stream_id); + h2.send_cont_buf = hblock.to_vec(); + h2.send_cont_end_stream = end_stream; + } + } + H2_TYPE_DATA if stream_id > 0 => { + let end_stream = flags & H2_FLAG_END_STREAM != 0; + // Strip padding. + let (data_start, data_end) = if flags & H2_FLAG_PADDED != 0 && !payload.is_empty() { + let pad = payload[0] as usize; + (1, payload.len().saturating_sub(pad)) + } else { + (0, payload.len()) + }; + if let Some(stream) = h2.streams.get_mut(&stream_id) { + if stream.req_body.len() < MAX_BUF { + stream + .req_body + .extend_from_slice(&payload[data_start..data_end]); + } + stream.req_done |= end_stream; + } + } + H2_TYPE_CONTINUATION if stream_id > 0 => { + if h2.send_cont_sid == Some(stream_id) { + h2.send_cont_buf.extend_from_slice(&payload); + if flags & H2_FLAG_END_HEADERS != 0 { + let hblock = std::mem::take(&mut h2.send_cont_buf); + let decoded = h2.send_hpack.decode(&hblock).unwrap_or_default(); + let end_stream = h2.send_cont_end_stream; + let stream = h2 + .streams + .entry(stream_id) + .or_insert_with(|| H2Stream::new(tls)); + apply_h2_request_headers(stream, decoded); + stream.req_done |= end_stream; + h2.send_cont_sid = None; + h2.send_cont_end_stream = false; + } + } + } + _ => {} // SETTINGS, WINDOW_UPDATE, PING, GOAWAY, etc. — ignore. + } + } +} + +/// Process all complete HTTP/2 frames in `h2.recv_buf` (incoming / response side). +fn process_h2_recv_frames(h2: &mut H2ConnState) { + loop { + let Some((payload_len, frame_type, flags, stream_id)) = parse_h2_frame_header(&h2.recv_buf) + else { + break; + }; + let total = H2_FRAME_HDR_LEN + payload_len; + if h2.recv_buf.len() < total { + break; + } + + let payload = h2.recv_buf[H2_FRAME_HDR_LEN..total].to_vec(); + h2.recv_buf.drain(..total); + + let tls = h2.tls; + match frame_type { + H2_TYPE_HEADERS if stream_id > 0 => { + let end_stream = flags & H2_FLAG_END_STREAM != 0; + let end_headers = flags & H2_FLAG_END_HEADERS != 0; + let (hb_start, hb_end) = h2_header_block_range(&payload, flags); + let hblock = &payload[hb_start..hb_end]; + + if end_headers { + let decoded = h2.recv_hpack.decode(hblock).unwrap_or_default(); + let stream = h2 + .streams + .entry(stream_id) + .or_insert_with(|| H2Stream::new(tls)); + apply_h2_response_headers(stream, decoded); + stream.resp_done |= end_stream; + } else { + h2.recv_cont_sid = Some(stream_id); + h2.recv_cont_buf = hblock.to_vec(); + h2.recv_cont_end_stream = end_stream; + } + } + H2_TYPE_DATA if stream_id > 0 => { + let end_stream = flags & H2_FLAG_END_STREAM != 0; + let (data_start, data_end) = if flags & H2_FLAG_PADDED != 0 && !payload.is_empty() { + let pad = payload[0] as usize; + (1, payload.len().saturating_sub(pad)) + } else { + (0, payload.len()) + }; + if let Some(stream) = h2.streams.get_mut(&stream_id) { + if stream.resp_body.len() < MAX_BUF { + stream + .resp_body + .extend_from_slice(&payload[data_start..data_end]); + } + stream.resp_done |= end_stream; + } + } + H2_TYPE_CONTINUATION if stream_id > 0 => { + if h2.recv_cont_sid == Some(stream_id) { + h2.recv_cont_buf.extend_from_slice(&payload); + if flags & H2_FLAG_END_HEADERS != 0 { + let hblock = std::mem::take(&mut h2.recv_cont_buf); + let decoded = h2.recv_hpack.decode(&hblock).unwrap_or_default(); + let end_stream = h2.recv_cont_end_stream; + let stream = h2 + .streams + .entry(stream_id) + .or_insert_with(|| H2Stream::new(tls)); + apply_h2_response_headers(stream, decoded); + stream.resp_done |= end_stream; + h2.recv_cont_sid = None; + h2.recv_cont_end_stream = false; + } + } + } + _ => {} + } + } +} + +/// Remove and return all streams that have a complete response (status + END_STREAM). +fn drain_completed_h2_streams(h2: &mut H2ConnState) -> Vec { + let done_ids: Vec = h2 + .streams + .iter() + .filter(|(_, s)| s.resp_status.is_some() && s.resp_done) + .map(|(id, _)| *id) + .collect(); + let mut completed = Vec::with_capacity(done_ids.len()); + for id in done_ids { + if let Some(stream) = h2.streams.remove(&id) { + completed.push(stream); + } + } + completed +} + +/// Build and send a `TraceMsg` for a completed HTTP/2 stream. +fn emit_h2_stream(stream: H2Stream) { + let method = stream.req_method.unwrap_or_else(|| "GET".to_string()); + let path = stream.req_path.unwrap_or_else(|| "/".to_string()); + let authority = stream.req_authority.unwrap_or_default(); + let scheme = stream + .req_scheme + .unwrap_or_else(|| if stream.tls { "https" } else { "http" }.to_string()); + let url = format!("{scheme}://{authority}{path}"); + let status_code = stream.resp_status.unwrap_or(0); + let duration = stream.started_at.elapsed(); + + emit_msg(&TraceMsg { + method, + url, + status_code, + request_headers: stream.req_headers, + response_headers: stream.resp_headers, + request_body_b64: body_b64(&stream.req_body), + response_body_b64: body_b64(&stream.resp_body), + duration_ms: duration.as_millis() as u64, + timestamp_ms: stream.timestamp_ms, + dest_addr: None, + protocol_version: "HTTP/2".to_string(), + }); +} + // ───────────────────────────────────────────────────────────────────────────── // Per-connection state machine // @@ -161,6 +540,8 @@ enum FdState { content_length: Option, headers_end: Option, }, + /// HTTP/2 connection (may carry many multiplexed streams). + Http2(Box), } static STATE_MAP: OnceLock>> = OnceLock::new(); @@ -288,7 +669,7 @@ fn try_parse_response_headers(buf: &[u8]) -> Option { } // ───────────────────────────────────────────────────────────────────────────── -// Emit a completed trace +// Emit a completed HTTP/1.x trace // ───────────────────────────────────────────────────────────────────────────── fn do_emit( @@ -315,6 +696,7 @@ fn do_emit( duration_ms: duration.as_millis() as u64, timestamp_ms: req.timestamp_ms, dest_addr: None, + protocol_version: "HTTP/1.1".to_string(), }); } @@ -331,6 +713,25 @@ fn process_outgoing(key: usize, data: &[u8], tls: bool) { Err(_) => return, }; + // ── HTTP/2 path ────────────────────────────────────────────────────────── + // If we already know this connection is HTTP/2, route directly. + if let Some(FdState::Http2(h2)) = map.get_mut(&key) { + if h2.send_buf.len() < MAX_BUF { + h2.send_buf.extend_from_slice(data); + } + process_h2_send_frames(h2); + return; + } + // Detect a new HTTP/2 connection by its client preface. + if data.starts_with(H2_PREFACE) { + let mut h2 = Box::new(H2ConnState::new(tls)); + h2.send_buf.extend_from_slice(data); + process_h2_send_frames(&mut h2); + map.insert(key, FdState::Http2(h2)); + return; + } + + // ── HTTP/1.x path ──────────────────────────────────────────────────────── if looks_like_http_request(data) { // Start fresh tracking for this key (may overwrite stale state). let buf = data.to_vec(); @@ -380,6 +781,33 @@ fn process_outgoing(key: usize, data: &[u8], tls: bool) { } fn process_incoming(key: usize, data: &[u8]) { + // ── HTTP/2 path ────────────────────────────────────────────────────────── + // Handle HTTP/2 streams, collecting those that have a complete response. + // We release the lock before emitting. + let h2_completed = { + let mut map = match state_map().lock() { + Ok(m) => m, + Err(_) => return, + }; + if let Some(FdState::Http2(h2)) = map.get_mut(&key) { + if h2.recv_buf.len() < MAX_BUF { + h2.recv_buf.extend_from_slice(data); + } + process_h2_recv_frames(h2); + Some(drain_completed_h2_streams(h2)) + } else { + None + } + }; // lock released + + if let Some(completed) = h2_completed { + for stream in completed { + emit_h2_stream(stream); + } + return; + } + + // ── HTTP/1.x path ──────────────────────────────────────────────────────── // Phase 1: accumulate, parse headers if ready, check completeness. // Return owned FdState if the response is complete (to emit outside the lock). let to_emit = { @@ -454,23 +882,33 @@ fn process_teardown(key: usize) { map.remove(&key) }; // Lock released - // Emit partial response (e.g. chunked or connection-close semantics). - if let Some(FdState::CollectingResponse { - req, - buf, - tls, - status_code: Some(sc), - resp_headers: Some(rh), - content_length, - headers_end: Some(he), - }) = state - { - let cl = content_length.unwrap_or_else(|| buf.len().saturating_sub(he)); - let body_end = (he + cl).min(buf.len()); - let duration = req.started_at.elapsed(); - do_emit(*req, sc, rh, &buf[he..body_end], duration, tls); + match state { + // HTTP/1.x: emit partial response (e.g. chunked or connection-close semantics). + Some(FdState::CollectingResponse { + req, + buf, + tls, + status_code: Some(sc), + resp_headers: Some(rh), + content_length, + headers_end: Some(he), + }) => { + let cl = content_length.unwrap_or_else(|| buf.len().saturating_sub(he)); + let body_end = (he + cl).min(buf.len()); + let duration = req.started_at.elapsed(); + do_emit(*req, sc, rh, &buf[he..body_end], duration, tls); + } + // HTTP/2: emit any streams for which we received at least a response status. + Some(FdState::Http2(h2)) => { + for (_sid, stream) in h2.streams { + if stream.resp_status.is_some() { + emit_h2_stream(stream); + } + } + } + // If headers were never parsed, we have nothing useful to emit. + _ => {} } - // If headers were never parsed, we have nothing useful to emit. } // ───────────────────────────────────────────────────────────────────────────── diff --git a/crates/phantom-capture/src/ldpreload.rs b/crates/phantom-capture/src/ldpreload.rs index 8dea623..761ee5c 100644 --- a/crates/phantom-capture/src/ldpreload.rs +++ b/crates/phantom-capture/src/ldpreload.rs @@ -33,6 +33,8 @@ struct AgentTrace { duration_ms: u64, timestamp_ms: u64, dest_addr: Option, + #[serde(default)] + protocol_version: Option, } fn parse_method(s: &str) -> HttpMethod { @@ -84,7 +86,7 @@ fn agent_trace_to_http_trace(a: AgentTrace) -> HttpTrace { duration: Duration::from_millis(a.duration_ms), source_addr: None, dest_addr: a.dest_addr, - protocol_version: "HTTP/1.1".to_string(), + protocol_version: a.protocol_version.unwrap_or_else(|| "HTTP/1.1".to_string()), } } diff --git a/tests/integration/lib.sh b/tests/integration/lib.sh index 7bbcb08..de98e80 100755 --- a/tests/integration/lib.sh +++ b/tests/integration/lib.sh @@ -88,6 +88,196 @@ start_mock_https() { echo $! } +# Start a one-shot HTTP/2 cleartext (h2c) mock server using Python 3. +# Reads the client connection preface, negotiates a SETTINGS exchange, then +# replies to every request with a fixed response body. +# Returns the server PID. +# +# Usage: start_mock_h2c PORT RESPONSE_BODY +start_mock_h2c() { + local port="$1" + local body="$2" + local srv + srv=$(mktemp /tmp/h2c_XXXX.py) + # Write a minimal, stdlib-only HTTP/2 h2c server. + # Frame format: 3-byte length | 1-byte type | 1-byte flags | 4-byte stream-id | payload + # HPACK byte 0x88 = indexed representation of static-table entry 8 (:status: 200). + cat > "$srv" << 'PYEOF' +import socket, struct, sys, time + +PREFACE = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n' + +def mk(t, f, s, p=b''): + n = len(p) + return bytes([n >> 16 & 0xff, n >> 8 & 0xff, n & 0xff, t, f]) \ + + struct.pack('!I', s & 0x7fffffff) + p + +port = int(sys.argv[1]) +body = (sys.argv[2] if len(sys.argv) > 2 else '{}').encode() + +srv = socket.socket() +srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +srv.bind(('127.0.0.1', port)) +srv.listen(1) +srv.settimeout(5) +conn, _ = srv.accept() +conn.settimeout(3) + +buf = b'' +while len(buf) < len(PREFACE): + try: + buf += conn.recv(4096) + except OSError: + break + +buf = buf[len(PREFACE):] +conn.sendall(mk(0x4, 0, 0)) # server SETTINGS + +replied = set() +for _ in range(500): + if len(buf) >= 9: + n = (buf[0] << 16) | (buf[1] << 8) | buf[2] + t, f = buf[3], buf[4] + s = struct.unpack('!I', buf[5:9])[0] & 0x7fffffff + if len(buf) >= 9 + n: + buf = buf[9 + n:] + if t == 0x4 and not (f & 0x1): # SETTINGS → ACK + conn.sendall(mk(0x4, 0x1, 0)) + elif t == 0x1 and s > 0 and s not in replied: # HEADERS + if f & 0x1: # END_STREAM (GET/HEAD) + conn.sendall(mk(0x1, 0x4, s, b'\x88')) + conn.sendall(mk(0x0, 0x1, s, body)) + replied.add(s) + elif t == 0x0 and s > 0 and (f & 0x1) and s not in replied: # DATA END_STREAM (POST) + conn.sendall(mk(0x1, 0x4, s, b'\x88')) + conn.sendall(mk(0x0, 0x1, s, body)) + replied.add(s) + continue + if replied: + break + try: + chunk = conn.recv(4096) + if chunk: + buf += chunk + else: + break + except OSError: + break + +time.sleep(0.2) +try: + conn.close() +except OSError: + pass +try: + srv.close() +except OSError: + pass +PYEOF + python3 "$srv" "$port" "$body" >/dev/null 2>&1 & + echo $! + # Temp file is intentionally left; the OS cleans /tmp on reboot. +} + +# Start a one-shot HTTP/2 over HTTPS (TLS + ALPN h2) mock server using Python 3. +# Identical frame logic to start_mock_h2c, but wrapped in TLS with ALPN "h2" +# so that curl --http2 negotiates HTTP/2 via ALPN rather than prior-knowledge. +# Returns the server PID. +# +# Usage: start_mock_h2tls PORT CERT_FILE KEY_FILE RESPONSE_BODY +start_mock_h2tls() { + local port="$1" + local cert="$2" + local key="$3" + local body="$4" + local srv + srv=$(mktemp /tmp/h2tls_XXXX.py) + cat > "$srv" << 'PYEOF' +import socket, ssl, struct, sys, time + +PREFACE = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n' + +def mk(t, f, s, p=b''): + n = len(p) + return bytes([n >> 16 & 0xff, n >> 8 & 0xff, n & 0xff, t, f]) \ + + struct.pack('!I', s & 0x7fffffff) + p + +port = int(sys.argv[1]) +cert = sys.argv[2] +key = sys.argv[3] +body = (sys.argv[4] if len(sys.argv) > 4 else '{}').encode() + +ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) +ctx.load_cert_chain(cert, key) +ctx.set_alpn_protocols(['h2']) + +raw = socket.socket() +raw.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +raw.bind(('127.0.0.1', port)) +raw.listen(1) +raw.settimeout(5) +raw_conn, _ = raw.accept() +try: + conn = ctx.wrap_socket(raw_conn, server_side=True) +except ssl.SSLError: + sys.exit(1) +conn.settimeout(3) + +buf = b'' +while len(buf) < len(PREFACE): + try: + buf += conn.recv(4096) + except OSError: + break + +buf = buf[len(PREFACE):] +conn.sendall(mk(0x4, 0, 0)) # server SETTINGS + +replied = set() +for _ in range(500): + if len(buf) >= 9: + n = (buf[0] << 16) | (buf[1] << 8) | buf[2] + t, f = buf[3], buf[4] + s = struct.unpack('!I', buf[5:9])[0] & 0x7fffffff + if len(buf) >= 9 + n: + buf = buf[9 + n:] + if t == 0x4 and not (f & 0x1): + conn.sendall(mk(0x4, 0x1, 0)) + elif t == 0x1 and s > 0 and s not in replied: + if f & 0x1: + conn.sendall(mk(0x1, 0x4, s, b'\x88')) + conn.sendall(mk(0x0, 0x1, s, body)) + replied.add(s) + elif t == 0x0 and s > 0 and (f & 0x1) and s not in replied: + conn.sendall(mk(0x1, 0x4, s, b'\x88')) + conn.sendall(mk(0x0, 0x1, s, body)) + replied.add(s) + continue + if replied: + break + try: + chunk = conn.recv(4096) + if chunk: + buf += chunk + else: + break + except OSError: + break + +time.sleep(0.2) +try: + conn.close() +except OSError: + pass +try: + raw.close() +except OSError: + pass +PYEOF + python3 "$srv" "$port" "$cert" "$key" "$body" >/dev/null 2>&1 & + echo $! +} + # ── Phantom runner ─────────────────────────────────────────────────────────── # Run phantom in JSONL+ldpreload mode, executing the given command. diff --git a/tests/integration/run_tests.sh b/tests/integration/run_tests.sh index 2ef461f..de228a8 100755 --- a/tests/integration/run_tests.sh +++ b/tests/integration/run_tests.sh @@ -334,6 +334,113 @@ test_https_post_body() { } run_test "HTTPS POST with body" test_https_post_body +# ── Test 11: HTTP/2 cleartext GET ─────────────────────────────────────────── +# +# Uses curl --http2-prior-knowledge (h2c) to send a plain-TCP HTTP/2 request. +# The agent should detect the HTTP/2 client preface, parse the binary frames, +# and emit a trace with protocol_version="HTTP/2". + +test_h2c_get_200() { + # Require curl with HTTP/2 support. + if ! curl --version 2>&1 | grep -q 'HTTP2'; then + echo " SKIP: curl lacks HTTP/2 support" + return 0 + fi + + local port=18093 + local body='{"h2":true}' + + local pid + pid=$(start_mock_h2c "$port" "$body") + sleep 0.2 + + local output + output=$(run_phantom_capture "curl -s --http2-prior-knowledge http://127.0.0.1:$port/h2test") + + kill "$pid" 2>/dev/null; wait "$pid" 2>/dev/null || true + + [ -n "$output" ] || { echo " FAIL: no output"; return 1; } + + local line + line=$(echo "$output" | head -1) + assert_json_field "$line" '.protocol_version' 'HTTP/2' && + assert_json_field "$line" '.method' 'GET' && + assert_json_field "$line" '.status_code' '200' && + assert_json_field_contains "$line" '.url' '/h2test' && + assert_json_field_contains "$line" '.response_body' 'h2' +} +run_test "HTTP/2 cleartext GET" test_h2c_get_200 + +# ── Test 12: HTTP/2 cleartext POST with body ───────────────────────────────── + +test_h2c_post_body() { + if ! curl --version 2>&1 | grep -q 'HTTP2'; then + echo " SKIP: curl lacks HTTP/2 support" + return 0 + fi + + local port=18094 + local body='{"posted":true}' + + local pid + pid=$(start_mock_h2c "$port" "$body") + sleep 0.2 + + local output + output=$(run_phantom_capture "curl -s --http2-prior-knowledge -X POST -d {\"key\":\"val\"} http://127.0.0.1:$port/h2post") + + kill "$pid" 2>/dev/null; wait "$pid" 2>/dev/null || true + + [ -n "$output" ] || { echo " FAIL: no output"; return 1; } + + local line + line=$(echo "$output" | head -1) + assert_json_field "$line" '.protocol_version' 'HTTP/2' && + assert_json_field "$line" '.method' 'POST' && + assert_json_field "$line" '.status_code' '200' && + assert_json_field_contains "$line" '.url' '/h2post' && + assert_json_field_contains "$line" '.request_body' 'key' && + assert_json_field_contains "$line" '.response_body' 'posted' +} +run_test "HTTP/2 cleartext POST" test_h2c_post_body + +# ── Test 13: HTTP/2 over HTTPS (TLS + ALPN h2) ────────────────────────────── +# +# Uses curl --http2 (no --http2-prior-knowledge) against an HTTPS server that +# advertises "h2" via ALPN. After the TLS handshake the agent intercepts the +# plaintext HTTP/2 frames through its SSL_write()/SSL_read() hooks, exercising +# the same code path used when tracing real-world HTTPS/2 applications. + +test_h2_https_get() { + if ! curl --version 2>&1 | grep -q 'HTTP2'; then + echo " SKIP: curl lacks HTTP/2 support" + return 0 + fi + + local port=18095 + local body='{"h2tls":true}' + + local pid + pid=$(start_mock_h2tls "$port" "$CERT_DIR/cert.pem" "$CERT_DIR/key.pem" "$body") + sleep 0.5 + + local output + output=$(run_phantom_capture "curl -sk --http2 https://127.0.0.1:$port/h2tls") + + kill "$pid" 2>/dev/null; wait "$pid" 2>/dev/null || true + + [ -n "$output" ] || { echo " FAIL: no output"; return 1; } + + local line + line=$(echo "$output" | head -1) + assert_json_field "$line" '.protocol_version' 'HTTP/2' && + assert_json_field "$line" '.method' 'GET' && + assert_json_field "$line" '.status_code' '200' && + assert_json_field_contains "$line" '.url' '/h2tls' && + assert_json_field_contains "$line" '.response_body' 'h2tls' +} +run_test "HTTP/2 HTTPS (ALPN)" test_h2_https_get + # ── Results ────────────────────────────────────────────────────────────────── report_results