From 0e592fd064494011663182bb17700e890b7de41a Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Sun, 29 Mar 2026 20:53:53 -0400 Subject: [PATCH 01/28] chore(deps): update libmagic-rs name and dependencies versions Signed-off-by: UncleSp1d3r --- tessl.json | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tessl.json b/tessl.json index 63c7a65..65dc6d1 100644 --- a/tessl.json +++ b/tessl.json @@ -1,9 +1,9 @@ { - "name": "stringy", + "name": "libmagic-rs", "mode": "vendored", "dependencies": { "actionbook/rust-skills": { - "version": "3ea748280d2fa5680675fe4abe1a5e764f7c021e", + "version": "1f4becdcb88d1cbccc1880594479f28891102843", "source": "https://github.com/actionbook/rust-skills", "include": { "skills": [ @@ -35,6 +35,27 @@ "unsafe-checker" ] } + }, + "pantheon-ai/github-actions-generator": { + "version": "0.1.1" + }, + "pantheon-ai/mise-complete": { + "version": "0.1.1" + }, + "pantheon-ai/dockerfile-toolkit": { + "version": "0.1.0" + }, + "pantheon-ai/moscow-prioritization": { + "version": "0.1.1" + }, + "pantheon-ai/software-design-principles": { + "version": "0.1.4" + }, + "cisco/software-security": { + "version": "1.2.5" + }, + "tessl-labs/good-oss-citizen": { + "version": "1.0.1" } } -} +} \ No newline at end of file From 281b64339ee69b868873e75cf1f00308c7398372 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Sun, 29 Mar 2026 20:54:08 -0400 Subject: [PATCH 02/28] chore(deps): update cargo-binstall to version 1.17.7 and rust to 1.94.1 Signed-off-by: UncleSp1d3r --- mise.lock | 50 +------------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/mise.lock b/mise.lock index fceea7b..d4f6e2b 100644 --- a/mise.lock +++ b/mise.lock @@ -96,54 +96,6 @@ url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows- checksum = "sha256:715709c69b176e20994533d3292bd0b7c32de9c0c5575b916746ec6b2aa38346" url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows-x64-baseline.zip" -[[tools.cargo-binstall]] -version = "1.17.6" -backend = "aqua:cargo-bins/cargo-binstall" - -[tools.cargo-binstall."platforms.linux-arm64"] -checksum = "sha256:e5f2c4b79b10370dff707b86a14e7a0ad399c5dc5853824e933432910741992c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-aarch64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-arm64-musl"] -checksum = "sha256:e5f2c4b79b10370dff707b86a14e7a0ad399c5dc5853824e933432910741992c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-aarch64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64-baseline"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64-musl"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64-musl-baseline"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.macos-arm64"] -checksum = "sha256:101447fa30a723ca8e1a13cec11bb1350b7179331b2aa7054d27bef7a3e19021" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-aarch64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.macos-x64"] -checksum = "sha256:cd07fd79e2848b13b994e3f83fa5377b631625b847f0734219f2706feb518258" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.macos-x64-baseline"] -checksum = "sha256:cd07fd79e2848b13b994e3f83fa5377b631625b847f0734219f2706feb518258" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.windows-x64"] -checksum = "sha256:5fcbddde2d415704d2432bbe606a5767ddaf1ef4ee2c16b7828f8be2ed1e5a5c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-pc-windows-msvc.zip" - -[tools.cargo-binstall."platforms.windows-x64-baseline"] -checksum = "sha256:5fcbddde2d415704d2432bbe606a5767ddaf1ef4ee2c16b7828f8be2ed1e5a5c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-pc-windows-msvc.zip" - [[tools.cargo-binstall]] version = "1.17.7" backend = "aqua:cargo-bins/cargo-binstall" @@ -440,7 +392,7 @@ checksum = "sha256:950c5f21a015c1bdd1337f233456df2470fab71e4d794407d27a84cb8b990 url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" [[tools.rust]] -version = "1.94.0" +version = "1.94.1" backend = "core:rust" [[tools.scorecard]] From d10cdb68d75d54ee5c5114ba3f4284549e5d15a9 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Sun, 29 Mar 2026 20:54:23 -0400 Subject: [PATCH 03/28] docs(agents): add agent rules section and reference to RULES.md Signed-off-by: UncleSp1d3r --- AGENTS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 964af34..e0c74b0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -570,3 +570,7 @@ This project has the OSSF Best Practices passing badge. Maintain these standards - SECURITY.md documents vulnerability reporting with scope, safe harbor, and PGP key - AGENTS.md must accurately reflect implemented features (not aspirational) - `docs/src/release-verification.md` documents artifact signing for users + +## Agent Rules + +@.tessl/RULES.md follow the [instructions](.tessl/RULES.md) From 44691178e58ca3f08c7d254e3d071df5bb2318da Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Sun, 29 Mar 2026 20:54:35 -0400 Subject: [PATCH 04/28] docs(policy): add AI usage policy to clarify accountability and guidelines Signed-off-by: UncleSp1d3r --- AI_POLICY.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 AI_POLICY.md diff --git a/AI_POLICY.md b/AI_POLICY.md new file mode 100644 index 0000000..25e6927 --- /dev/null +++ b/AI_POLICY.md @@ -0,0 +1,31 @@ +# AI Usage Policy + +We build operator-focused security tools. AI coding assistants are part of how we do that. This policy is not anti-AI -- it is pro-accountability. + +Think of AI assistance like spellcheck. It catches typos, suggests corrections, and speeds up the mechanical parts of writing. But you are still responsible for your words and their consequences. + +## The Rule + +**You own every line you submit.** You must be able to explain what it does and how it interacts with the rest of the system without asking your AI to explain it back to you. + +Everything else follows from that. + +## How We Work + +- **Disclose your tools.** Note what you used in your PR description -- Claude Code, Copilot, Cursor, whatever. No specific format required. + +- **Review AI-generated text before posting.** Issues, discussions, and PR descriptions must reflect your understanding, not a language model's first draft. Read it, cut the filler, make sure it says what you mean. + +- **No AI-generated media.** No generated images, logos, audio, or video. Text-based diagrams (ASCII art, Mermaid) and code are acceptable. + +- **Unreviewed output gets closed.** Hallucinated APIs, boilerplate that ignores project conventions, suggestions you clearly did not run -- these get closed without review. We are not a QA service for your AI's output. + +## Why + +Transparent by design means knowing what the code does and why it is there. Tested under pressure means every change was understood by the person who submitted it. AI makes capable engineers faster. It does not replace the understanding that makes contributions trustworthy. + +Every pull request is reviewed by a human. Submitting work you do not understand shifts that burden onto maintainers. That is not how we operate. + +## New Contributors + +Use AI to learn the codebase. Read the code it generates. Run it. Break it. Then submit work that reflects your understanding. We will help you through review -- that deal only works if the code is yours. From 11cd856ad02d50f9271e7c1790c7776eefe6e623 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:06:10 -0400 Subject: [PATCH 05/28] chore(ci): update mise-action to version 4.0.1 for improved functionality Signed-off-by: UncleSp1d3r --- .github/workflows/security.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 2923472..9da27ba 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -25,7 +25,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: install: true cache: true From ad857aebad3bd47c04afc8bbf09fac0439c28b28 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:06:16 -0400 Subject: [PATCH 06/28] feat(offset): implement indirect offset resolution functionality Signed-off-by: UncleSp1d3r --- src/evaluator/offset/indirect.rs | 740 ++++++++++++++++++++++++++++++- src/evaluator/offset/mod.rs | 26 +- 2 files changed, 733 insertions(+), 33 deletions(-) diff --git a/src/evaluator/offset/indirect.rs b/src/evaluator/offset/indirect.rs index 794f041..83a8b79 100644 --- a/src/evaluator/offset/indirect.rs +++ b/src/evaluator/offset/indirect.rs @@ -1,27 +1,735 @@ // Copyright (c) 2025-2026 the libmagic-rs contributors // SPDX-License-Identifier: Apache-2.0 -//! Indirect offset resolution (not yet implemented) +//! Indirect offset resolution +//! +//! Indirect offsets read a pointer value from the file at a base offset, +//! then use that value (with optional adjustment) as the final offset. use crate::LibmagicError; -use crate::parser::ast::OffsetSpec; +use crate::error::EvaluationError; +use crate::evaluator::types::{TypeReadError, read_byte, read_long, read_quad, read_short}; +use crate::parser::ast::{Endianness, OffsetSpec, TypeKind, Value}; -/// Resolve an indirect offset specification +use super::{map_offset_error, resolve_absolute_offset}; + +/// Resolve an indirect offset specification. +/// +/// Indirect offsets dereference a pointer stored in the file buffer: +/// 1. Resolve `base_offset` to an absolute position (supports negative/from-end). +/// 2. Read a numeric pointer value at that position using `pointer_type` and `endian`. +/// 3. Apply `adjustment` with checked arithmetic. +/// 4. Validate the final offset against `buffer.len()`. +/// +/// # Arguments /// -/// Indirect offsets read a pointer value from the file at a base offset, -/// then use that value (with optional adjustment) as the final offset. +/// * `spec` - Must be `OffsetSpec::Indirect { .. }` +/// * `buffer` - The file buffer to read from /// /// # Errors /// -/// Currently returns `LibmagicError::EvaluationError` with `UnsupportedType` -/// as indirect offset resolution is not yet implemented. -// TODO: Implement indirect offset resolution (issue #37) -pub fn resolve_indirect_offset(spec: &OffsetSpec, _buffer: &[u8]) -> Result { - debug_assert!( - matches!(spec, OffsetSpec::Indirect { .. }), - "resolve_indirect_offset called with non-indirect spec" - ); - Err(LibmagicError::EvaluationError( - crate::error::EvaluationError::unsupported_type("Indirect offsets not yet implemented"), - )) +/// * `EvaluationError::InvalidOffset` - If `base_offset` is out of bounds or arithmetic overflows +/// * `EvaluationError::BufferOverrun` - If the pointer read or final offset exceeds buffer bounds +/// * `EvaluationError::UnsupportedType` - If `pointer_type` is not a numeric type +pub fn resolve_indirect_offset(spec: &OffsetSpec, buffer: &[u8]) -> Result { + let (base_offset, pointer_type, adjustment, endian) = match spec { + OffsetSpec::Indirect { + base_offset, + pointer_type, + adjustment, + endian, + } => (*base_offset, pointer_type, *adjustment, *endian), + _ => { + return Err(LibmagicError::EvaluationError( + EvaluationError::internal_error( + "resolve_indirect_offset called with non-indirect spec", + ), + )); + } + }; + + // Step 1: Resolve base_offset to an absolute position + let abs_base = resolve_absolute_offset(base_offset, buffer) + .map_err(|e| map_offset_error(&e, base_offset))?; + + // Step 2: Read pointer value using the appropriate numeric reader + let pointer_value = read_pointer(buffer, abs_base, pointer_type, endian)?; + + // Step 3: Apply adjustment with checked arithmetic + let final_offset = apply_adjustment(pointer_value, adjustment)?; + + // Step 4: Validate final offset against buffer length + if final_offset >= buffer.len() { + return Err(LibmagicError::EvaluationError( + EvaluationError::BufferOverrun { + offset: final_offset, + }, + )); + } + + Ok(final_offset) +} + +/// Read a pointer value from the buffer and extract it as a raw `u64`. +fn read_pointer( + buffer: &[u8], + offset: usize, + pointer_type: &TypeKind, + endian: Endianness, +) -> Result { + let value = match pointer_type { + TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), + TypeKind::Short { signed, .. } => read_short(buffer, offset, endian, *signed), + TypeKind::Long { signed, .. } => read_long(buffer, offset, endian, *signed), + TypeKind::Quad { signed, .. } => read_quad(buffer, offset, endian, *signed), + _ => { + return Err(LibmagicError::EvaluationError( + EvaluationError::unsupported_type(format!( + "Indirect offset pointer type not supported: {pointer_type:?}" + )), + )); + } + } + .map_err(|e| map_type_read_error(e, offset))?; + + extract_raw_unsigned(&value) +} + +/// Extract a raw unsigned integer from a `Value`, converting signed values. +fn extract_raw_unsigned(value: &Value) -> Result { + match value { + Value::Uint(v) => Ok(*v), + #[allow(clippy::cast_sign_loss)] + Value::Int(v) => Ok(*v as u64), + _ => Err(LibmagicError::EvaluationError( + EvaluationError::internal_error("Pointer read returned non-integer value"), + )), + } +} + +/// Apply an `i64` adjustment to a `u64` pointer value with checked arithmetic. +fn apply_adjustment(pointer: u64, adjustment: i64) -> Result { + let adjusted = if adjustment >= 0 { + #[allow(clippy::cast_sign_loss)] + pointer + .checked_add(adjustment as u64) + .ok_or_else(|| overflow_error(pointer, adjustment))? + } else { + // Negative adjustment + if adjustment == i64::MIN { + return Err(overflow_error(pointer, adjustment)); + } + #[allow(clippy::cast_sign_loss)] + let abs_adj = (-adjustment) as u64; + pointer + .checked_sub(abs_adj) + .ok_or_else(|| overflow_error(pointer, adjustment))? + }; + + usize::try_from(adjusted).map_err(|_| overflow_error(pointer, adjustment)) +} + +/// Map a `TypeReadError` to a `LibmagicError`. +fn map_type_read_error(e: TypeReadError, offset: usize) -> LibmagicError { + match e { + TypeReadError::BufferOverrun { .. } => { + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { offset }) + } + other => LibmagicError::EvaluationError(EvaluationError::from(other)), + } +} + +/// Create an overflow error for failed adjustment arithmetic. +fn overflow_error(_pointer: u64, adjustment: i64) -> LibmagicError { + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { offset: adjustment }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::ast::Endianness; + + /// Helper to build an `OffsetSpec::Indirect` for tests. + fn indirect( + base_offset: i64, + pointer_type: TypeKind, + adjustment: i64, + endian: Endianness, + ) -> OffsetSpec { + OffsetSpec::Indirect { + base_offset, + pointer_type, + adjustment, + endian, + } + } + + // ── Byte pointer ───────────────────────────────────────────── + + #[test] + fn test_byte_pointer_unsigned() { + // Buffer: [pointer=0x04, ..., target_byte_at_4] + let buffer = &[0x04, 0x00, 0x00, 0x00, 0xAA]; + let spec = indirect(0, TypeKind::Byte { signed: false }, 0, Endianness::Little); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 4); + } + + #[test] + fn test_byte_pointer_signed_positive() { + let buffer = &[0x03, 0x00, 0x00, 0xBB]; + let spec = indirect(0, TypeKind::Byte { signed: true }, 0, Endianness::Little); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 3); + } + + // ── Short pointer, both endiannesses ───────────────────────── + + #[test] + fn test_short_pointer_little_endian() { + // LE short at offset 0: bytes [0x04, 0x00] → 0x0004 + let mut buffer = vec![0x04, 0x00, 0x00, 0x00, 0xCC]; + buffer.resize(5, 0); + let spec = indirect( + 0, + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 4); + } + + #[test] + fn test_short_pointer_big_endian() { + // BE short at offset 0: bytes [0x00, 0x04] → 0x0004 + let buffer = &[0x00, 0x04, 0x00, 0x00, 0xDD]; + let spec = indirect( + 0, + TypeKind::Short { + endian: Endianness::Big, + signed: false, + }, + 0, + Endianness::Big, + ); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 4); + } + + // ── Long pointer, both endiannesses ────────────────────────── + + #[test] + fn test_long_pointer_little_endian() { + // LE long at offset 0: bytes [0x08, 0x00, 0x00, 0x00] → 8 + let mut buffer = vec![0x08, 0x00, 0x00, 0x00]; + buffer.resize(9, 0xAA); + let spec = indirect( + 0, + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 8); + } + + #[test] + fn test_long_pointer_big_endian() { + // BE long at offset 0: bytes [0x00, 0x00, 0x00, 0x06] → 6 + let buffer = &[0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0xFF]; + let spec = indirect( + 0, + TypeKind::Long { + endian: Endianness::Big, + signed: false, + }, + 0, + Endianness::Big, + ); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 6); + } + + // ── Quad pointer ───────────────────────────────────────────── + + #[test] + fn test_quad_pointer_little_endian() { + // LE quad at offset 0: value = 16 + let mut buffer = vec![0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + buffer.resize(17, 0xBB); + let spec = indirect( + 0, + TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 16); + } + + #[test] + fn test_quad_pointer_big_endian() { + // BE quad at offset 0: bytes [0x00..0x00, 0x10] → 0x0000_0000_0000_0010 = 16 + let mut buffer = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10]; + buffer.resize(17, 0xCC); + let spec = indirect( + 0, + TypeKind::Quad { + endian: Endianness::Big, + signed: false, + }, + 0, + Endianness::Big, + ); + assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 16); + } + + // ── extract_raw_unsigned unit tests ──────────────────────── + + #[test] + fn test_extract_raw_unsigned_negative_one() { + // Value::Int(-1) must reinterpret as u64::MAX (0xFFFF_FFFF_FFFF_FFFF) + let value = Value::Int(-1); + assert_eq!(extract_raw_unsigned(&value).unwrap(), u64::MAX); + } + + #[test] + fn test_extract_raw_unsigned_negative_two() { + // Value::Int(-2) must reinterpret as u64::MAX - 1 + let value = Value::Int(-2); + assert_eq!(extract_raw_unsigned(&value).unwrap(), 0xFFFF_FFFF_FFFF_FFFE); + } + + #[test] + fn test_extract_raw_unsigned_i32_min_sign_extended() { + // A signed 32-bit -1 is sign-extended to i64 -1 by the reader, + // so extract_raw_unsigned must yield u64::MAX. + let value = Value::Int(-1); + assert_eq!(extract_raw_unsigned(&value).unwrap(), 0xFFFF_FFFF_FFFF_FFFF); + } + + #[test] + fn test_extract_raw_unsigned_positive_int() { + let value = Value::Int(42); + assert_eq!(extract_raw_unsigned(&value).unwrap(), 42); + } + + #[test] + fn test_extract_raw_unsigned_uint() { + let value = Value::Uint(0xDEAD_BEEF); + assert_eq!(extract_raw_unsigned(&value).unwrap(), 0xDEAD_BEEF); + } + + #[test] + fn test_extract_raw_unsigned_rejects_non_integer() { + let value = Value::String("hello".to_string()); + assert!(extract_raw_unsigned(&value).is_err()); + } + + // ── read_pointer signed-negative unit tests ───────────────── + + #[test] + fn test_read_pointer_signed_long_negative_one() { + // LE signed long: [0xFF, 0xFF, 0xFF, 0xFF] → i32 = -1 → i64 = -1 → u64 = 0xFFFF_FFFF_FFFF_FFFF + let buffer = &[0xFF, 0xFF, 0xFF, 0xFF]; + let raw = read_pointer( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ) + .unwrap(); + assert_eq!(raw, u64::MAX); + } + + #[test] + fn test_read_pointer_signed_short_negative_two() { + // LE signed short: [0xFE, 0xFF] → i16 = -2 → i64 = -2 → u64 = 0xFFFF_FFFF_FFFF_FFFE + let buffer = &[0xFE, 0xFF]; + let raw = read_pointer( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ) + .unwrap(); + assert_eq!(raw, 0xFFFF_FFFF_FFFF_FFFE); + } + + #[test] + fn test_read_pointer_signed_byte_negative_one() { + // Signed byte: [0xFF] → i8 = -1 → i64 = -1 → u64 = 0xFFFF_FFFF_FFFF_FFFF + let buffer = &[0xFF]; + let raw = read_pointer( + buffer, + 0, + &TypeKind::Byte { signed: true }, + Endianness::Little, + ) + .unwrap(); + assert_eq!(raw, u64::MAX); + } + + // ── Signed negative pointer end-to-end ────────────────────── + + #[test] + fn test_signed_short_negative_pointer_overruns_after_raw_conversion() { + // Signed LE short at offset 0: bytes [0xFE, 0xFF] → i16 = -2 + // read_pointer extracts raw u64 = 0xFFFF_FFFF_FFFF_FFFE (verified by unit tests above). + // That enormous pointer value must fail bounds validation, NOT be rejected + // during extraction. An implementation that rejects negative Value::Int early + // would not reach the bounds check. + let buffer = &[0xFE, 0xFF, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + 0, + Endianness::Little, + ); + let err = resolve_indirect_offset(&spec, buffer).unwrap_err(); + + // After raw unsigned reinterpretation, the pointer is 0xFFFF_FFFF_FFFF_FFFE. + // On 64-bit: usize::try_from succeeds → BufferOverrun with that exact offset. + // On 32-bit: usize::try_from overflows → InvalidOffset from apply_adjustment. + if usize::BITS == 64 { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { offset }) + if offset == 0xFFFF_FFFF_FFFF_FFFE + ), + "Expected BufferOverrun at 0xFFFF_FFFF_FFFF_FFFE, got: {err:?}" + ); + } else { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) + ), + "Expected InvalidOffset from usize::try_from overflow on 32-bit, got: {err:?}" + ); + } + } + + #[test] + fn test_signed_long_negative_pointer_with_adjustment_overruns() { + // Signed LE long at offset 0: bytes [0xFF, 0xFF, 0xFF, 0xFF] → i32 = -1 + // extract_raw_unsigned converts Value::Int(-1) → u64::MAX (0xFFFF_FFFF_FFFF_FFFF). + // Adjustment of -1 yields u64::MAX - 1 = 0xFFFF_FFFF_FFFF_FFFE via checked_sub. + // Must fail at bounds validation, not during raw extraction. + let buffer = &[0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + -1, + Endianness::Little, + ); + let err = resolve_indirect_offset(&spec, buffer).unwrap_err(); + + // After raw reinterpretation: u64::MAX. After adjustment of -1: 0xFFFF_FFFF_FFFF_FFFE. + // On 64-bit: usize::try_from succeeds → BufferOverrun with that exact offset. + // On 32-bit: usize::try_from overflows → InvalidOffset from apply_adjustment. + if usize::BITS == 64 { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { offset }) + if offset == 0xFFFF_FFFF_FFFF_FFFE + ), + "Expected BufferOverrun at 0xFFFF_FFFF_FFFF_FFFE, got: {err:?}" + ); + } else { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) + ), + "Expected InvalidOffset from usize::try_from overflow on 32-bit, got: {err:?}" + ); + } + } + + // ── Positive and negative adjustments ──────────────────────── + + #[test] + fn test_positive_adjustment() { + // Pointer value = 2, adjustment = +3 → final = 5 + let buffer = &[0x02, 0x00, 0x00, 0x00, 0x00, 0xEE]; + let spec = indirect(0, TypeKind::Byte { signed: false }, 3, Endianness::Little); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 5); + } + + #[test] + fn test_negative_adjustment() { + // Pointer value = 5, adjustment = -2 → final = 3 + let buffer = &[0x05, 0x00, 0x00, 0xFF]; + let spec = indirect(0, TypeKind::Byte { signed: false }, -2, Endianness::Little); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 3); + } + + // ── From-end base offset ───────────────────────────────────── + + #[test] + fn test_from_end_base_offset() { + // 8-byte buffer, base_offset = -1 → resolves to index 7 + // Byte at index 7 = 0x02 → pointer value = 2 → final = 2 + let buffer = &[0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x00, 0x02]; + let spec = indirect(-1, TypeKind::Byte { signed: false }, 0, Endianness::Little); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 2); + } + + // ── Pointer read overrun ───────────────────────────────────── + + #[test] + fn test_pointer_read_overrun_short() { + // Buffer has 1 byte, trying to read a short (2 bytes) at offset 0 + let buffer = &[0x04]; + let spec = indirect( + 0, + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) + )); + } + + #[test] + fn test_pointer_read_overrun_long() { + // Buffer has 3 bytes, trying to read a long (4 bytes) at offset 0 + let buffer = &[0x00, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) + )); + } + + // ── Final offset overrun ───────────────────────────────────── + + #[test] + fn test_final_offset_overrun() { + // Pointer value = 0xFF (255), buffer only 5 bytes → overrun + let buffer = &[0xFF, 0x00, 0x00, 0x00, 0x00]; + let spec = indirect(0, TypeKind::Byte { signed: false }, 0, Endianness::Little); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) + )); + } + + #[test] + fn test_final_offset_overrun_with_adjustment() { + // Pointer value = 3, adjustment = +10, buffer only 8 bytes → 13 overruns + let buffer = &[0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let spec = indirect(0, TypeKind::Byte { signed: false }, 10, Endianness::Little); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) + )); + } + + // ── Arithmetic overflow/underflow ──────────────────────────── + + #[test] + fn test_adjustment_overflow() { + // Unsigned quad reading u64::MAX + positive adjustment → overflow + let buffer = &[0xFF; 16]; + let spec = indirect( + 0, + TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + 1, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) + )); + } + + #[test] + fn test_adjustment_underflow() { + // Pointer value = 0, adjustment = -1 → underflow + let buffer = &[0x00, 0x00, 0x00, 0x00]; + let spec = indirect(0, TypeKind::Byte { signed: false }, -1, Endianness::Little); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) + )); + } + + // ── Unsupported pointer types ──────────────────────────────── + + #[test] + fn test_unsupported_pointer_type_string() { + let buffer = &[0x00, 0x00, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::String { max_length: None }, + 0, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::UnsupportedType { .. }) + )); + } + + #[test] + fn test_unsupported_pointer_type_float() { + let buffer = &[0x00, 0x00, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::Float { + endian: Endianness::Little, + }, + 0, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::UnsupportedType { .. }) + )); + } + + #[test] + fn test_unsupported_pointer_type_double() { + let buffer = &[0x00; 8]; + let spec = indirect( + 0, + TypeKind::Double { + endian: Endianness::Little, + }, + 0, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::UnsupportedType { .. }) + )); + } + + // ── PE-header-style 32-bit LE pointer at 0x3c ──────────────── + + #[test] + fn test_pe_header_style_offset_0x3c() { + // Simulate a PE file: 32-bit LE pointer at offset 0x3C points to PE header. + // At offset 0x3C we store LE u32 = 0x80 (128). + let mut buffer = vec![0u8; 256]; + // Write LE u32 value 0x80 at offset 0x3C + buffer[0x3C] = 0x80; + buffer[0x3D] = 0x00; + buffer[0x3E] = 0x00; + buffer[0x3F] = 0x00; + // Place "PE\0\0" signature at offset 0x80 + buffer[0x80] = b'P'; + buffer[0x81] = b'E'; + buffer[0x82] = 0x00; + buffer[0x83] = 0x00; + + let spec = indirect( + 0x3C, + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + let offset = resolve_indirect_offset(&spec, &buffer).unwrap(); + assert_eq!(offset, 0x80); + // Verify we can read the PE signature at that offset + assert_eq!(&buffer[offset..offset + 4], b"PE\0\0"); + } + + // ── Base offset out of bounds ──────────────────────────────── + + #[test] + fn test_base_offset_out_of_bounds() { + let buffer = &[0x00, 0x01, 0x02]; + let spec = indirect(100, TypeKind::Byte { signed: false }, 0, Endianness::Little); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + } + + // ── Signed pointer extraction ──────────────────────────────── + + #[test] + fn test_signed_long_pointer_positive() { + // Signed long value = 4 (positive) → final offset = 4 + let buffer = &[0x04, 0x00, 0x00, 0x00, 0xAA]; + let spec = indirect( + 0, + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + 0, + Endianness::Little, + ); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 4); + } + + // ── Non-indirect spec produces internal error ──────────────── + + #[test] + fn test_non_indirect_spec_returns_error() { + let buffer = &[0x00; 8]; + let spec = OffsetSpec::Absolute(0); + let result = resolve_indirect_offset(&spec, buffer); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + LibmagicError::EvaluationError(EvaluationError::InternalError { .. }) + )); + } } diff --git a/src/evaluator/offset/mod.rs b/src/evaluator/offset/mod.rs index 687b6fa..b92c9a6 100644 --- a/src/evaluator/offset/mod.rs +++ b/src/evaluator/offset/mod.rs @@ -16,7 +16,7 @@ use crate::LibmagicError; use crate::parser::ast::OffsetSpec; /// Map an `OffsetError` to a `LibmagicError` for a given original offset value -fn map_offset_error(e: &OffsetError, original_offset: i64) -> LibmagicError { +pub(crate) fn map_offset_error(e: &OffsetError, original_offset: i64) -> LibmagicError { match e { OffsetError::BufferOverrun { offset, @@ -35,8 +35,8 @@ fn map_offset_error(e: &OffsetError, original_offset: i64) -> LibmagicError { /// Resolve any offset specification to an absolute position /// /// This is a higher-level function that handles all types of offset specifications. -/// Currently only supports absolute offsets, but will be extended to handle indirect, -/// relative, and from-end offsets in future tasks. +/// Supports absolute, from-end, and indirect offsets. Relative offsets are not yet +/// implemented. /// /// # Arguments /// @@ -127,26 +127,18 @@ mod tests { } #[test] - fn test_resolve_offset_indirect_not_implemented() { - let buffer = b"Test data"; + fn test_resolve_offset_indirect_success() { + // Byte pointer at offset 0 with value 5 → resolves to offset 5 + let buffer = b"\x05TestXdata"; let spec = OffsetSpec::Indirect { base_offset: 0, - pointer_type: crate::parser::ast::TypeKind::Byte { signed: true }, + pointer_type: crate::parser::ast::TypeKind::Byte { signed: false }, adjustment: 0, endian: crate::parser::ast::Endianness::Little, }; - let result = resolve_offset(&spec, buffer); - assert!(result.is_err()); - - match result.unwrap_err() { - LibmagicError::EvaluationError(crate::error::EvaluationError::UnsupportedType { - type_name, - }) => { - assert!(type_name.contains("Indirect offsets not yet implemented")); - } - _ => panic!("Expected EvaluationError with UnsupportedType"), - } + let result = resolve_offset(&spec, buffer).unwrap(); + assert_eq!(result, 5); } #[test] From 014fa8ac0ad428d9f51ea2ae8b68f4da0f09b3e3 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:06:22 -0400 Subject: [PATCH 07/28] docs(ast): enhance PString documentation with structure and behavior details Signed-off-by: UncleSp1d3r --- docs/src/ast-structures.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/src/ast-structures.md b/docs/src/ast-structures.md index 0910425..9bf455c 100644 --- a/docs/src/ast-structures.md +++ b/docs/src/ast-structures.md @@ -238,17 +238,21 @@ let string_type = TypeKind::String { Pascal-style length-prefixed strings where the length prefix can be 1, 2, or 4 bytes depending on the `length_width` field. **Structure:** + - Length prefix: 1, 2, or 4 bytes indicating string length, with configurable endianness - String data: The number of bytes specified by the length prefix **Example:** + ``` 0 pstring JPEG 0 pstring/H JPEG ``` + The first line reads a 1-byte length prefix (default), then reads that many bytes as a string. The second line reads a 2-byte big-endian length prefix. **Behavior:** + - Returns `Value::String` containing the string data (without the length prefix) - Performs bounds checking on both the length prefix and the string data - Supports all string comparison operators @@ -275,6 +279,7 @@ pub enum PStringLengthWidth { ``` **Suffix conventions:** + - `/B` - 1-byte length prefix (default if no suffix specified) - `/H` - 2-byte big-endian length prefix - `/h` - 2-byte little-endian length prefix From 25754b0848a29bcd3bb7b2d2d07244ab9cc10acb Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:06:27 -0400 Subject: [PATCH 08/28] docs(evaluator): clarify length interpretation in type reading section Signed-off-by: UncleSp1d3r --- docs/src/evaluator.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/evaluator.md b/docs/src/evaluator.md index f6f7018..7668f70 100644 --- a/docs/src/evaluator.md +++ b/docs/src/evaluator.md @@ -195,7 +195,7 @@ pub fn read_pstring( - `PStringLengthWidth::TwoByteLE` - 2-byte little-endian length prefix (`/h` suffix) - `PStringLengthWidth::FourByteBE` - 4-byte big-endian length prefix (`/L` suffix) - `PStringLengthWidth::FourByteLE` - 4-byte little-endian length prefix (`/l` suffix) -- **Length interpretation**: +- **Length interpretation**: - Reads 1, 2, or 4 bytes from buffer using `from_be_bytes` or `from_le_bytes` depending on variant - The length value specifies how many bytes of string data follow the prefix - **`/J` flag** (`length_includes_itself`): From 082b017b27d9020aceb1132c283b3bad4a8389a7 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:06:32 -0400 Subject: [PATCH 09/28] docs(parser): improve formatting for date and timestamp types Signed-off-by: UncleSp1d3r --- docs/src/magic-format.md | 28 ++++++++++++++-------------- docs/src/parser.md | 2 ++ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/src/magic-format.md b/docs/src/magic-format.md index 8a372f5..b7c9775 100644 --- a/docs/src/magic-format.md +++ b/docs/src/magic-format.md @@ -179,20 +179,20 @@ Float comparison behavior: ### Date/Timestamp Types -| Type | Size | Endianness | UTC/Local | Description | -| ----------- | ------- | ------------- | --------- | ----------------------------------------------------------------------- | -| `date` | 4 bytes | native | UTC | 32-bit Unix timestamp (signed seconds since epoch), formatted as UTC | -| `ldate` | 4 bytes | native | Local | 32-bit Unix timestamp, formatted as local time | -| `bedate` | 4 bytes | big-endian | UTC | 32-bit Unix timestamp, big-endian byte order, UTC | -| `beldate` | 4 bytes | big-endian | Local | 32-bit Unix timestamp, big-endian byte order, local time | -| `ledate` | 4 bytes | little-endian | UTC | 32-bit Unix timestamp, little-endian byte order, UTC | -| `leldate` | 4 bytes | little-endian | Local | 32-bit Unix timestamp, little-endian byte order, local time | -| `qdate` | 8 bytes | native | UTC | 64-bit Unix timestamp (signed seconds since epoch), formatted as UTC | -| `qldate` | 8 bytes | native | Local | 64-bit Unix timestamp, formatted as local time | -| `beqdate` | 8 bytes | big-endian | UTC | 64-bit Unix timestamp, big-endian byte order, UTC | -| `beqldate` | 8 bytes | big-endian | Local | 64-bit Unix timestamp, big-endian byte order, local time | -| `leqdate` | 8 bytes | little-endian | UTC | 64-bit Unix timestamp, little-endian byte order, UTC | -| `leqldate` | 8 bytes | little-endian | Local | 64-bit Unix timestamp, little-endian byte order, local time | +| Type | Size | Endianness | UTC/Local | Description | +| ---------- | ------- | ------------- | --------- | -------------------------------------------------------------------- | +| `date` | 4 bytes | native | UTC | 32-bit Unix timestamp (signed seconds since epoch), formatted as UTC | +| `ldate` | 4 bytes | native | Local | 32-bit Unix timestamp, formatted as local time | +| `bedate` | 4 bytes | big-endian | UTC | 32-bit Unix timestamp, big-endian byte order, UTC | +| `beldate` | 4 bytes | big-endian | Local | 32-bit Unix timestamp, big-endian byte order, local time | +| `ledate` | 4 bytes | little-endian | UTC | 32-bit Unix timestamp, little-endian byte order, UTC | +| `leldate` | 4 bytes | little-endian | Local | 32-bit Unix timestamp, little-endian byte order, local time | +| `qdate` | 8 bytes | native | UTC | 64-bit Unix timestamp (signed seconds since epoch), formatted as UTC | +| `qldate` | 8 bytes | native | Local | 64-bit Unix timestamp, formatted as local time | +| `beqdate` | 8 bytes | big-endian | UTC | 64-bit Unix timestamp, big-endian byte order, UTC | +| `beqldate` | 8 bytes | big-endian | Local | 64-bit Unix timestamp, big-endian byte order, local time | +| `leqdate` | 8 bytes | little-endian | UTC | 64-bit Unix timestamp, little-endian byte order, UTC | +| `leqldate` | 8 bytes | little-endian | Local | 64-bit Unix timestamp, little-endian byte order, local time | Timestamp values are formatted as strings matching GNU file output format: "Www Mmm DD HH:MM:SS YYYY" diff --git a/docs/src/parser.md b/docs/src/parser.md index 3a25386..b59adde 100644 --- a/docs/src/parser.md +++ b/docs/src/parser.md @@ -266,6 +266,7 @@ Pascal strings store the length as a prefix (1, 2, or 4 bytes depending on the v The parser supports date and timestamp types for parsing Unix timestamps (signed seconds since epoch). There are 12 type keywords: **32-bit timestamps (Date):** + - `date` - Native endian, UTC - `ldate` - Native endian, local time - `bedate` - Big-endian, UTC @@ -274,6 +275,7 @@ The parser supports date and timestamp types for parsing Unix timestamps (signed - `leldate` - Little-endian, local time **64-bit timestamps (QDate):** + - `qdate` - Native endian, UTC - `qldate` - Native endian, local time - `beqdate` - Big-endian, UTC From f420dfc14fa652f4b3da7f76a585f5424c76dd5f Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:40:42 -0400 Subject: [PATCH 10/28] chore(gitignore): remove unnecessary entries from .gitignore Signed-off-by: UncleSp1d3r --- .tessl/.gitignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .tessl/.gitignore diff --git a/.tessl/.gitignore b/.tessl/.gitignore deleted file mode 100644 index 7bbb394..0000000 --- a/.tessl/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -tiles/ -RULES.md From 66c0723567b5c6952b0f5e2e57152c8fd11b82d7 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:40:51 -0400 Subject: [PATCH 11/28] feat(offset): implement parsing and evaluation for indirect offsets Signed-off-by: UncleSp1d3r --- AGENTS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index e0c74b0..97750d7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -204,7 +204,7 @@ cargo test --doc # Test documentation examples ### Currently Implemented (v0.1.0) -- **Offsets**: Absolute and from-end specifications (indirect and relative are parsed but not yet evaluated) +- **Offsets**: Absolute, from-end, and indirect specifications (relative offsets are parsed but not yet evaluated) - **Types**: `byte`, `short`, `long`, `quad`, `float`, `double`, `string`, `pstring` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`, `uquad`/`ubequad`/`ulequad`; float/double endian variants `befloat`/`lefloat`, `bedouble`/`ledouble`; 32-bit date/timestamp types `date`/`ldate`/`bedate`/`beldate`/`ledate`/`leldate`; 64-bit date/timestamp types `qdate`/`qldate`/`beqdate`/`beqldate`/`leqdate`/`leqldate`; `pstring` is a Pascal string (length-prefixed) with support for 1/2/4-byte length prefixes via `/B`, `/H` (2-byte BE), `/h` (2-byte LE), `/L` (4-byte BE), `/l` (4-byte LE) suffixes, and the `/J` flag (stored length includes prefix width, JPEG convention) which is combinable with width suffixes (e.g., `pstring/HJ`); date values formatted as "Www Mmm DD HH:MM:SS YYYY" matching GNU `file` output; types are signed by default (libmagic-compatible) - **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask), `^` (bitwise XOR), `~` (bitwise NOT), `x` (any value) - **Nested Rules**: Hierarchical rule evaluation with proper indentation @@ -245,7 +245,7 @@ impl BinaryRegex for regex::bytes::Regex { ### Offset Specifications -- Indirect offsets are parsed into the AST but evaluation is not yet implemented (#37) +- Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = native endian, uppercase = big-endian; supports `+/-` adjustments - Relative offsets are parsed into the AST but evaluation is not yet implemented (#38) - Only absolute and from-end offsets are fully functional From a77973610addfb2503518b7c805dc3dd9b5d968a Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:40:58 -0400 Subject: [PATCH 12/28] feat(parser): implement parsing for indirect offset specifications Signed-off-by: UncleSp1d3r --- src/parser/grammar/mod.rs | 136 ++++++++++++- src/parser/grammar/tests.rs | 382 ++++++++++++++++++++++++++++++++++++ 2 files changed, 511 insertions(+), 7 deletions(-) diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index ca8074a..b928bb3 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs @@ -17,7 +17,9 @@ use nom::{ sequence::pair, }; -use crate::parser::ast::{MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value}; +use crate::parser::ast::{ + Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value, +}; /// Parse a decimal number with overflow protection fn parse_decimal_number(input: &str) -> IResult<&str, i64> { @@ -153,21 +155,134 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { Ok((input, result)) } -/// Parse an offset specification for absolute offsets +/// Map a single-character pointer specifier to its `TypeKind` and `Endianness`. +/// +/// Libmagic convention: lowercase = native endian, uppercase = big-endian. +/// +/// | Specifier | Width | Endianness | +/// |-----------|--------|------------| +/// | `b`, `B` | 1 byte | N/A | +/// | `s` | 2 byte | Native | +/// | `S` | 2 byte | Big | +/// | `l` | 4 byte | Native | +/// | `L` | 4 byte | Big | +/// | `q` | 4 byte | Native | +/// | `Q` | 8 byte | Big | +fn pointer_specifier_to_type(spec: char) -> Option<(TypeKind, Endianness)> { + match spec { + 'b' | 'B' => Some((TypeKind::Byte { signed: false }, Endianness::Native)), + 's' => Some(( + TypeKind::Short { + endian: Endianness::Native, + signed: false, + }, + Endianness::Native, + )), + 'S' => Some(( + TypeKind::Short { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + )), + 'l' => Some(( + TypeKind::Long { + endian: Endianness::Native, + signed: false, + }, + Endianness::Native, + )), + 'L' => Some(( + TypeKind::Long { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + )), + 'q' => Some(( + TypeKind::Quad { + endian: Endianness::Native, + signed: false, + }, + Endianness::Native, + )), + 'Q' => Some(( + TypeKind::Quad { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + )), + _ => None, + } +} + +/// Parse an indirect offset specification: `(base.type)` or `(base.type+/-adj)` +/// +/// Reads a pointer specifier after the dot, and an optional `+N` or `-N` adjustment +/// before the closing `)`. +fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { + let (input, _) = char('(')(input)?; + let (input, base_offset) = parse_number(input)?; + let (input, _) = char('.')(input)?; + let (input, spec_char) = one_of("bBsSlLqQ")(input)?; + + let (pointer_type, endian) = pointer_specifier_to_type(spec_char) + .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::OneOf)))?; + + // Optional adjustment: +N or -N + // parse_number handles '-' but not '+', so consume '+' manually + let (input, adjustment) = if input.starts_with('+') { + let (input, _) = char('+')(input)?; + parse_number(input)? + } else if input.starts_with('-') { + parse_number(input)? + } else { + (input, 0) + }; + + let (input, _) = char(')')(input)?; + + Ok(( + input, + OffsetSpec::Indirect { + base_offset, + pointer_type, + adjustment, + endian, + }, + )) +} + +/// Parse an offset specification (absolute or indirect) /// -/// Supports decimal and hexadecimal formats, both positive and negative. +/// Supports: +/// - Absolute offsets: decimal and hexadecimal, positive and negative +/// - Indirect offsets: `(base.type)` or `(base.type+adj)` syntax /// /// # Examples /// /// ``` /// use libmagic_rs::parser::grammar::parse_offset; -/// use libmagic_rs::parser::ast::OffsetSpec; +/// use libmagic_rs::parser::ast::{Endianness, OffsetSpec, TypeKind}; /// +/// // Absolute offsets /// assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0)))); /// assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123)))); /// assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16)))); /// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); /// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255)))); +/// +/// // Indirect offset +/// assert_eq!( +/// parse_offset("(0x3c.l)"), +/// Ok(("", OffsetSpec::Indirect { +/// base_offset: 0x3c, +/// pointer_type: TypeKind::Long { endian: Endianness::Native, signed: false }, +/// adjustment: 0, +/// endian: Endianness::Native, +/// })) +/// ); /// ``` /// /// # Errors @@ -176,12 +291,19 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { /// - The input contains invalid number format (propagated from `parse_number`) /// - Input is empty or contains no parseable offset value /// - The offset value cannot be represented as a valid `i64` +/// - Indirect offset has invalid pointer specifier or missing closing `)` pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> { let (input, _) = multispace0(input)?; - let (input, offset_value) = parse_number(input)?; - let (input, _) = multispace0(input)?; - Ok((input, OffsetSpec::Absolute(offset_value))) + if input.starts_with('(') { + let (input, spec) = parse_indirect_offset(input)?; + let (input, _) = multispace0(input)?; + Ok((input, spec)) + } else { + let (input, offset_value) = parse_number(input)?; + let (input, _) = multispace0(input)?; + Ok((input, OffsetSpec::Absolute(offset_value))) + } } /// Parse comparison operators for magic rules diff --git a/src/parser/grammar/tests.rs b/src/parser/grammar/tests.rs index b01dc3c..6a30ffc 100644 --- a/src/parser/grammar/tests.rs +++ b/src/parser/grammar/tests.rs @@ -281,6 +281,388 @@ fn test_parse_offset_boundary_values() { ); } +// Indirect offset parsing tests +#[test] +fn test_parse_offset_indirect_all_specifiers() { + // .b / .B - byte + assert_eq!( + parse_offset("(0.b)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Byte { signed: false }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(0.B)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Byte { signed: false }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + + // .s - short native, .S - short big-endian + assert_eq!( + parse_offset("(0.s)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Short { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(0.S)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Short { + endian: Endianness::Big, + signed: false + }, + adjustment: 0, + endian: Endianness::Big, + } + )) + ); + + // .l - long native, .L - long big-endian + assert_eq!( + parse_offset("(0x3c.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(0x3c.L)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Big, + signed: false + }, + adjustment: 0, + endian: Endianness::Big, + } + )) + ); + + // .q - quad native, .Q - quad big-endian + assert_eq!( + parse_offset("(0.q)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Quad { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(0.Q)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Quad { + endian: Endianness::Big, + signed: false + }, + adjustment: 0, + endian: Endianness::Big, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_positive_adjustment() { + assert_eq!( + parse_offset("(0x3c.l+4)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 4, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(0.b+0xFF)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Byte { signed: false }, + adjustment: 255, + endian: Endianness::Native, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_negative_adjustment() { + assert_eq!( + parse_offset("(0x3c.l-8)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: -8, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(100.s-0x10)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 100, + pointer_type: TypeKind::Short { + endian: Endianness::Native, + signed: false + }, + adjustment: -16, + endian: Endianness::Native, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_negative_base() { + // Negative base offsets (from end of file) + assert_eq!( + parse_offset("(-4.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: -4, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + assert_eq!( + parse_offset("(-0x10.s+2)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: -16, + pointer_type: TypeKind::Short { + endian: Endianness::Native, + signed: false + }, + adjustment: 2, + endian: Endianness::Native, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_hex_base() { + assert_eq!( + parse_offset("(0xFF.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0xFF, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_whitespace() { + // Leading whitespace should be handled + assert_eq!( + parse_offset(" (0x3c.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); + // Trailing content should be left unconsumed + assert_eq!( + parse_offset("(0x3c.l) string"), + Ok(( + "string", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_parse_failures() { + // Missing closing paren + assert!(parse_offset("(0x3c.l").is_err()); + // Missing dot and type + assert!(parse_offset("(0x3c)").is_err()); + // Invalid specifier character + assert!(parse_offset("(0x3c.x)").is_err()); + // Empty parens + assert!(parse_offset("()").is_err()); + // Missing base + assert!(parse_offset("(.l)").is_err()); +} + +#[test] +fn test_parse_rule_offset_indirect() { + // Level 0 indirect + assert_eq!( + parse_rule_offset("(0x3c.l)"), + Ok(( + "", + ( + 0, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + ) + )) + ); +} + +#[test] +fn test_parse_rule_offset_indirect_child() { + // Level 1 child with indirect offset: >(0x3c.l) + assert_eq!( + parse_rule_offset(">(0x3c.l)"), + Ok(( + "", + ( + 1, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + ) + )) + ); + // Level 2 child with indirect offset + adjustment + assert_eq!( + parse_rule_offset(">>(0x3c.l+4)"), + Ok(( + "", + ( + 2, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 4, + endian: Endianness::Native, + } + ) + )) + ); +} + +#[test] +fn test_parse_rule_offset_indirect_with_remaining() { + assert_eq!( + parse_rule_offset(">(0x3c.l) string"), + Ok(( + "string", + ( + 1, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Native, + signed: false + }, + adjustment: 0, + endian: Endianness::Native, + } + ) + )) + ); +} + // Operator parsing tests #[test] fn test_parse_operator_equality() { From f24206b145c1d52fa0c5dc4b45be49de501ea17b Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:41:04 -0400 Subject: [PATCH 13/28] test(offset): add integration tests for indirect offset resolution Signed-off-by: UncleSp1d3r --- tests/indirect_offset_integration.rs | 183 +++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 tests/indirect_offset_integration.rs diff --git a/tests/indirect_offset_integration.rs b/tests/indirect_offset_integration.rs new file mode 100644 index 0000000..98476a2 --- /dev/null +++ b/tests/indirect_offset_integration.rs @@ -0,0 +1,183 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for indirect offset parsing and evaluation +//! +//! Exercises the full pipeline: write a magic file with indirect-offset syntax, +//! load it through `MagicDatabase::load_from_file()`, evaluate buffers, and +//! assert correct match / no-match behavior. + +use std::fs; +use std::io::Write; + +use libmagic_rs::MagicDatabase; +use tempfile::TempDir; + +/// Build a PE-like buffer where offset 0x3c holds a big-endian 4-byte pointer +/// to the PE signature (`PE\0\0`). +/// +/// Layout: +/// [0x00] "MZ" DOS header stub +/// [0x3c] 4-byte big-endian pointer -> 0x80 (PE header location) +/// [0x80] "PE\0\0" signature +fn build_pe_like_buffer() -> Vec { + let mut buf = vec![0u8; 0x84]; + // DOS stub magic + buf[0] = b'M'; + buf[1] = b'Z'; + // Big-endian pointer at 0x3c -> 0x80 + buf[0x3c] = 0x00; + buf[0x3d] = 0x00; + buf[0x3e] = 0x00; + buf[0x3f] = 0x80; + // PE signature at 0x80 + buf[0x80] = b'P'; + buf[0x81] = b'E'; + buf[0x82] = 0x00; + buf[0x83] = 0x00; + buf +} + +#[test] +fn test_indirect_offset_pe_detection_via_magic_file() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("pe.magic"); + + // Use .L (big-endian long) for deterministic cross-platform behavior. + // String values must be quoted for the parser. + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); + writeln!(f, r#">(0x3c.L) string "PE" (PE)"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + let buf = build_pe_like_buffer(); + let result = db.evaluate_buffer(&buf).unwrap(); + + assert!( + result.description.contains("DOS executable"), + "Expected DOS executable match, got: {}", + result.description + ); + assert!( + result.description.contains("(PE)"), + "Expected PE child match via indirect offset, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_no_match_when_pointer_out_of_bounds() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("pe.magic"); + + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); + writeln!(f, r#">(0x3c.L) string "PE" (PE)"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // Buffer has "MZ" but the pointer at 0x3c points beyond the buffer + let mut buf = vec![0u8; 0x40]; + buf[0] = b'M'; + buf[1] = b'Z'; + // Pointer at 0x3c -> 0xFF (beyond buffer length) + buf[0x3c] = 0x00; + buf[0x3d] = 0x00; + buf[0x3e] = 0x00; + buf[0x3f] = 0xFF; + + let result = db.evaluate_buffer(&buf).unwrap(); + + // The parent "MZ" rule should still match + assert!( + result.description.contains("DOS executable"), + "Expected DOS match even when child fails, got: {}", + result.description + ); + // But the PE child should NOT match (pointer out of bounds) + assert!( + !result.description.contains("(PE)"), + "PE child should not match when pointer is out of bounds, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_with_adjustment() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("adj.magic"); + + // Indirect offset with +4 adjustment: read pointer at 0, add 4, check there + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"(0.L+4) string "MAGIC" Adjusted match"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // Pointer at offset 0 = 0x00000006 (big-endian), +4 = 10, "MAGIC" at offset 10 + let mut buf = vec![0u8; 20]; + buf[0] = 0x00; + buf[1] = 0x00; + buf[2] = 0x00; + buf[3] = 0x06; + buf[10] = b'M'; + buf[11] = b'A'; + buf[12] = b'G'; + buf[13] = b'I'; + buf[14] = b'C'; + + let result = db.evaluate_buffer(&buf).unwrap(); + assert!( + result.description.contains("Adjusted match"), + "Expected adjusted indirect match, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_byte_specifier() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("byte_ptr.magic"); + + // Use .b (byte pointer): read 1 byte at offset 0, use as offset + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"(0.b) string "OK" Byte pointer match"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // Byte at offset 0 = 5, so check for "OK" at offset 5 + let mut buf = vec![0u8; 10]; + buf[0] = 5; + buf[5] = b'O'; + buf[6] = b'K'; + + let result = db.evaluate_buffer(&buf).unwrap(); + assert!( + result.description.contains("Byte pointer match"), + "Expected byte pointer match, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_loading_does_not_error() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("load.magic"); + + // Verify the parsing path succeeds for all specifier variants + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"(0.b) string "A" byte ptr"#).unwrap(); + writeln!(f, r#"(0.B) string "A" Byte ptr"#).unwrap(); + writeln!(f, r#"(0.s) string "A" short native ptr"#).unwrap(); + writeln!(f, r#"(0.S) string "A" short BE ptr"#).unwrap(); + writeln!(f, r#"(0.l) string "A" long native ptr"#).unwrap(); + writeln!(f, r#"(0.L) string "A" long BE ptr"#).unwrap(); + writeln!(f, r#"(0.q) string "A" quad native ptr"#).unwrap(); + writeln!(f, r#"(0.Q) string "A" quad BE ptr"#).unwrap(); + + let result = MagicDatabase::load_from_file(&magic_path); + assert!( + result.is_ok(), + "Loading magic file with all indirect specifiers should succeed: {:?}", + result.err() + ); +} From 96b0a1e746e239dd4190fa17de00094664b602a6 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:41:21 -0400 Subject: [PATCH 14/28] feat(parser): implement indirect offset parsing for magic file grammar Signed-off-by: UncleSp1d3r --- .../indirect-offset-parser-evaluator-sync.md | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md diff --git a/docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md b/docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md new file mode 100644 index 0000000..bb611da --- /dev/null +++ b/docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md @@ -0,0 +1,165 @@ +--- +title: Implement indirect offset parsing in magic file grammar +date: 2026-03-30 +status: resolved +severity: high +category: integration-issues +components: + - parser/grammar + - evaluator/offset + - integration +tags: + - parser + - indirect-offset + - nom + - magic-file-syntax + - pointer-specifier +issue: '#37' +branch: 37-evaluator-implement-indirect-offset-resolution +symptoms: + - parse_offset("(0x3c.l)") fails with parse error + - Magic files containing indirect offset syntax cannot be loaded via MagicDatabase::load_from_file() + - resolve_indirect_offset() is unreachable dead code from text-magic loading path +root_cause: parse_offset() had no branch for '('-prefixed input; always delegated to parse_number() which only handles numeric literals +solution_files: + - src/parser/grammar/mod.rs + - src/parser/grammar/tests.rs + - tests/indirect_offset_integration.rs +related_gotchas: + - parse_number() handles '-' prefix but not '+'; positive adjustments need manual '+' consumption + - parse_value() requires quoted strings; bare string literals cause integration test failures +--- + +# Indirect Offset Parser-Evaluator Sync + +## Problem + +The evaluator for indirect offsets (`resolve_indirect_offset()` in `src/evaluator/offset/indirect.rs`) was fully implemented with 35 unit tests, but the parser in `src/parser/grammar/mod.rs` could not produce `OffsetSpec::Indirect` AST nodes. The `parse_offset()` function only handled absolute numeric offsets and had no branch for `(`-prefixed indirect offset syntax like `(0x3c.l)` or `(0x3c.l+4)`. + +This meant the feature was unreachable through the public `MagicDatabase::load_from_file()` API -- the primary way users load text magic files. + +## Root Cause + +`parse_offset()` unconditionally delegated to `parse_number()`, which only parses numeric literals. Input starting with `(` was rejected as a parse error. The evaluator code was effectively dead code from the text-magic loading path. + +## Solution + +### 1. Added `pointer_specifier_to_type()` helper + +Maps single-character pointer specifiers to `(TypeKind, Endianness)` per libmagic convention: + +| Specifier | Width | Endianness | +| ---------- | ------ | ---------- | +| `.b`, `.B` | 1 byte | Native | +| `.s` | 2 byte | Native | +| `.S` | 2 byte | Big | +| `.l` | 4 byte | Native | +| `.L` | 4 byte | Big | +| `.q` | 8 byte | Native | +| `.Q` | 8 byte | Big | + +All pointer types are unsigned (`signed: false`). Lowercase = native endian, uppercase = big-endian. + +### 2. Added `parse_indirect_offset()` function + +Parses `(base.type)` and `(base.type+/-adj)` syntax: + +1. Consume `(` +2. Parse base offset via `parse_number()` +3. Consume `.` and type specifier character +4. Optionally parse adjustment (see gotcha below) +5. Consume `)` +6. Return `OffsetSpec::Indirect { base_offset, pointer_type, adjustment, endian }` + +### 3. Updated `parse_offset()` to branch on leading `(` + +```rust +pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> { + let (input, _) = multispace0(input)?; + if input.starts_with('(') { + let (input, spec) = parse_indirect_offset(input)?; + let (input, _) = multispace0(input)?; + Ok((input, spec)) + } else { + let (input, offset_value) = parse_number(input)?; + let (input, _) = multispace0(input)?; + Ok((input, OffsetSpec::Absolute(offset_value))) + } +} +``` + +### 4. No changes needed to `parse_rule_offset()` + +It delegates to `parse_offset()`, so hierarchical forms like `>(0x3c.l)` work automatically. + +## Gotchas Discovered + +### `parse_number()` does not handle `+` prefix + +`parse_number()` handles `-` internally but not `+`. For `+N` adjustments, the `+` must be consumed manually: + +```rust +let (input, adjustment) = if input.starts_with('+') { + let (input, _) = char('+')(input)?; + parse_number(input)? +} else if input.starts_with('-') { + parse_number(input)? +} else { + (input, 0) +}; +``` + +Do NOT modify `parse_number()` globally -- it is shared by offset and value parsing, and adding `+` support would change semantics elsewhere. + +### `parse_value()` requires quoted strings + +Integration tests initially failed because `parse_value()` does not accept bare strings. Magic file string values must be quoted: + +```text +# Correct +0 string "MZ" DOS executable + +# Wrong -- parse_value() rejects bare "MZ" +0 string MZ DOS executable +``` + +### Use big-endian specifiers in cross-platform tests + +Prefer `.L` (big-endian long) over `.l` (native) in integration test magic files so byte buffers are deterministic across architectures. + +## Prevention Strategies + +### Parser-Evaluator Parity Checklist + +When adding a new AST variant, ensure: + +1. **Parser produces it** -- unit test parses raw syntax, asserts correct AST node +2. **Evaluator consumes it** -- unit test constructs AST node, asserts evaluation result +3. **End-to-end test exists** -- integration test through `MagicDatabase::load_from_file()` proves the full pipeline works +4. **Codegen handles it** -- if it can appear in built-in rules, update `src/parser/codegen.rs` +5. **Strength calculation covers it** -- update `src/evaluator/strength.rs` if scoring changes + +### Integration Test Template + +```rust +#[test] +fn test_feature_end_to_end() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("test.magic"); + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MAGIC" Test match"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + let result = db.evaluate_buffer(b"MAGIC\x00data").unwrap(); + assert!(result.description.contains("Test match")); +} +``` + +## Cross-References + +- **Evaluator solution**: `docs/solutions/logic-errors/indirect-offset-resolution.md` +- **Magic format spec**: `docs/MAGIC_FORMAT.md` (lines 106-126, indirect offset section) +- **Gotchas**: `GOTCHAS.md` sections 3.5 (`parse_number` `+` limitation) and 3.6 (quoted strings) +- **Architecture**: `AGENTS.md` offset specifications section +- **Issue**: #37 (indirect offset resolution) +- **Related gotchas**: S2 (enum variant checklists), S3 (parser architecture split), S5 (numeric type pitfalls) From a9e90ebd3e8e151c112e881de367798ecada3659 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:41:35 -0400 Subject: [PATCH 15/28] feat(evaluator): implement indirect offset resolution for binary formats Signed-off-by: UncleSp1d3r --- .../indirect-offset-resolution.md | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 docs/solutions/logic-errors/indirect-offset-resolution.md diff --git a/docs/solutions/logic-errors/indirect-offset-resolution.md b/docs/solutions/logic-errors/indirect-offset-resolution.md new file mode 100644 index 0000000..be10ce0 --- /dev/null +++ b/docs/solutions/logic-errors/indirect-offset-resolution.md @@ -0,0 +1,82 @@ +--- +title: Implementing Indirect Offset Resolution for Binary Format Detection +category: logic-errors +date: 2026-03-30 +tags: [evaluator, offsets, indirect, binary-formats, pe-header, pointer-chasing] +issue: '#37' +severity: high +components: [evaluator/offset/indirect.rs, evaluator/offset/mod.rs] +--- + +# Implementing Indirect Offset Resolution + +## Problem + +Indirect offsets (`OffsetSpec::Indirect`) were parsed into the AST but evaluation returned "not yet implemented." This blocked detection of complex binary formats like PE executables, where a pointer at offset `0x3C` must be read and dereferenced to locate the PE header. + +Syntax: `(0x3c.l)` -- read a 32-bit long at offset 0x3C, use that value as the actual offset. + +## Root Cause + +The evaluator's `resolve_offset()` dispatcher in `offset/mod.rs` had a stub for `OffsetSpec::Indirect` that returned `UnsupportedType`. The implementation required a multi-step pointer dereference pipeline that did not exist. + +## Solution + +Implemented a 4-step pipeline in `evaluator/offset/indirect.rs`: + +1. **Resolve base offset** to absolute position (reuses `resolve_absolute_offset`, supports negative/from-end) +2. **Read pointer value** at that position using the specified numeric type and endianness +3. **Apply adjustment** with checked arithmetic (`checked_add`/`checked_sub`) +4. **Validate final offset** against buffer bounds + +### Key Design Decisions + +**Signed pointer reinterpretation**: Signed negative pointer values (e.g., `i32(-1)` from `[0xFF, 0xFF, 0xFF, 0xFF]`) are reinterpreted as raw unsigned (`u64::MAX`) via `extract_raw_unsigned()`. This matches libmagic's behavior where the bit pattern is what matters, not the signed interpretation. The bounds check at step 4 catches these enormous values. + +**Separated concerns**: `read_pointer()` handles type dispatch and endianness, `extract_raw_unsigned()` handles signed-to-unsigned conversion, `apply_adjustment()` handles arithmetic with overflow protection. Each is independently testable. + +**`i64::MIN` edge case**: `apply_adjustment` explicitly handles `i64::MIN` because `-i64::MIN` overflows. Returns an error rather than panicking. + +```rust +// Core pipeline +let abs_base = resolve_absolute_offset(base_offset, buffer)?; +let pointer_value = read_pointer(buffer, abs_base, pointer_type, endian)?; +let final_offset = apply_adjustment(pointer_value, adjustment)?; +if final_offset >= buffer.len() { return Err(BufferOverrun) } +``` + +### Dispatcher Update + +`offset/mod.rs` line 71 changed from stub to: + +```rust +OffsetSpec::Indirect { .. } => indirect::resolve_indirect_offset(spec, buffer), +``` + +## Prevention Tips + +- When adding new offset types, follow the same pattern: resolve base, read value, apply adjustment, validate bounds. The 4-step pipeline is the established pattern. +- Always use `checked_add`/`checked_sub` for offset arithmetic -- malicious files can craft values targeting overflow. +- Signed pointer values must be treated as raw bit patterns (reinterpret as unsigned), not as mathematical negatives. This is a libmagic compatibility requirement. + +## Test Coverage + +35 unit tests covering: + +- All pointer types (byte, short, long, quad) with both endiannesses +- Signed and unsigned pointer values +- Positive and negative adjustments +- From-end base offsets +- Pointer read buffer overruns +- Final offset buffer overruns +- Arithmetic overflow and underflow +- Unsupported pointer types (string, float, double) +- PE-header-style real-world scenario (0x3C pointer) +- 32-bit platform awareness (conditional assertions) + +## Related + +- Issue #38: Relative offset resolution (next offset type to implement) +- `evaluator/offset/absolute.rs`: Reused for base offset resolution +- `evaluator/types/`: `read_byte`, `read_short`, `read_long`, `read_quad` reused for pointer reading +- GOTCHAS.md S5.1: `usize::from(u32)` does not compile on 32-bit targets From 2e8b0348ce34ad2f790e8d2aba556769525b7d9d Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:41:43 -0400 Subject: [PATCH 16/28] docs(gotchas): document limitations of parse_number and parse_value functions Signed-off-by: UncleSp1d3r --- GOTCHAS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/GOTCHAS.md b/GOTCHAS.md index dfcf651..2dc97f4 100644 --- a/GOTCHAS.md +++ b/GOTCHAS.md @@ -59,6 +59,14 @@ The nom `tuple` combinator is deprecated. Use bare tuple syntax `(a, b, c)` dire `type_keyword_to_kind` has `#[allow(clippy::too_many_lines)]` because it exceeds 100 lines with all date keywords. +### 3.5 `parse_number` Does Not Handle `+` Prefix + +`parse_number` handles `-` signs but not `+`. When parsing syntax like `+4` (e.g., indirect offset adjustments), consume the `+` character manually before calling `parse_number`. + +### 3.6 `parse_value` Requires Quoted Strings + +`parse_value()` does not accept bare unquoted strings. String values in magic file rules must be quoted (e.g., `string "MZ"` not `string MZ`). Integration tests writing magic files must use `r#"0 string "MZ" description"#` format. + ## 4. Module Visibility & Re-exports ### 4.1 Private Engine Module From 94d8b91d3f9d214e7a93316ac8262b14f9d699d2 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:58:37 -0400 Subject: [PATCH 17/28] docs(agents): clarify indirect offset specifications and GNU semantics Signed-off-by: UncleSp1d3r --- AGENTS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 97750d7..7fcc444 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -245,7 +245,7 @@ impl BinaryRegex for regex::bytes::Regex { ### Offset Specifications -- Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = native endian, uppercase = big-endian; supports `+/-` adjustments +- Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = little-endian, uppercase = big-endian (GNU `file` semantics); pointer types signed by default; adjustment after closing paren: `(base.type)+adj` - Relative offsets are parsed into the AST but evaluation is not yet implemented (#38) - Only absolute and from-end offsets are fully functional From 9bd7759b017b5c19c3870672f54cb00db3dac2c5 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:58:44 -0400 Subject: [PATCH 18/28] docs(gotchas): document indirect offset pointer specifiers and GNU semantics Signed-off-by: UncleSp1d3r --- GOTCHAS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/GOTCHAS.md b/GOTCHAS.md index 2dc97f4..b872928 100644 --- a/GOTCHAS.md +++ b/GOTCHAS.md @@ -67,6 +67,10 @@ The nom `tuple` combinator is deprecated. Use bare tuple syntax `(a, b, c)` dire `parse_value()` does not accept bare unquoted strings. String values in magic file rules must be quoted (e.g., `string "MZ"` not `string MZ`). Integration tests writing magic files must use `r#"0 string "MZ" description"#` format. +### 3.7 Indirect Offset Pointer Specifiers Follow GNU `file` Semantics + +Lowercase pointer specifiers (`.s`, `.l`, `.q`) map to **little-endian**, not native endian. Uppercase (`.S`, `.L`, `.Q`) map to big-endian. All numeric pointer types are **signed by default** (per S6.3). The adjustment is parsed **after** the closing paren: `(base.type)+adj`, not `(base.type+adj)`. + ## 4. Module Visibility & Re-exports ### 4.1 Private Engine Module From b926bec20cdf9e36ddebaabbb18bbfa121dfefb0 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:58:57 -0400 Subject: [PATCH 19/28] fix(parser): correct indirect offset parser for GNU file semantics Signed-off-by: UncleSp1d3r --- .../indirect-offset-gnu-file-semantics.md | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md diff --git a/docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md b/docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md new file mode 100644 index 0000000..87bb25c --- /dev/null +++ b/docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md @@ -0,0 +1,123 @@ +--- +title: 'Fix indirect offset parser: endianness, signedness, and adjustment placement' +date: 2026-03-30 +status: resolved +severity: high +category: logic-errors +tags: + - parser + - indirect-offset + - gnu-file-semantics + - endianness + - signed-by-default +components: + - src/parser/grammar/mod.rs + - src/parser/grammar/tests.rs + - tests/indirect_offset_integration.rs + - GOTCHAS.md + - AGENTS.md +symptoms: + - (0x3c.l)+4 parsed as indirect with adjustment=0 and leftover +4, breaking parse_magic_rule() + - Lowercase pointer specifiers (.s, .l, .q) produced Endianness::Native instead of Endianness::Little + - Pointer types were unsigned, mismatching libmagic signed-by-default convention +root_causes: + - pointer_specifier_to_type() mapped lowercase specifiers to Endianness::Native instead of Endianness::Little + - 'pointer_specifier_to_type() set signed: false instead of signed: true' + - parse_indirect_offset() consumed adjustment inside parentheses instead of after closing paren +references: + - GOTCHAS.md S6.3 (signed-by-default types) + - GOTCHAS.md S3.7 (added by this fix) + - 'GNU file(1) man page: indirect offset syntax' +related_issues: + - 37 +--- + +# Fix Indirect Offset Parser: GNU `file` Semantics + +## Problem + +The indirect offset parser had three semantic errors that caused it to produce incorrect AST nodes. The code compiled and tests passed, but behavior was wrong relative to the GNU `file` specification: + +1. **Endianness**: Lowercase specifiers (`.s`, `.l`, `.q`) mapped to `Endianness::Native` instead of `Endianness::Little` +2. **Signedness**: Pointer types set to `signed: false` instead of `signed: true` (GOTCHAS S6.3) +3. **Adjustment syntax**: Parsed inside parens `(0x3c.l+4)` instead of after them `(0x3c.l)+4` + +The tests validated the wrong implementation rather than the specification -- a "tests match code but not spec" anti-pattern. + +## Root Cause + +The initial implementation followed incorrect assumptions: + +- Lowercase = native endian (wrong: GNU `file` defines lowercase = little-endian) +- Pointer types = unsigned (wrong: libmagic types are signed by default per S6.3) +- Adjustment inside parens (wrong: GNU `file` syntax places adjustment after `)`) + +Tests were written alongside the code, so they confirmed the implementation's behavior rather than the spec's requirements. + +## Solution + +Three changes in `src/parser/grammar/mod.rs`: + +### Fix 1: Endianness mapping + +```rust +// Before (wrong) +'l' => Some((TypeKind::Long { endian: Endianness::Native, signed: false }, Endianness::Native)) + +// After (correct -- GNU `file` lowercase = little-endian) +'l' => Some((TypeKind::Long { endian: Endianness::Little, signed: true }, Endianness::Little)) +``` + +Applied to all lowercase specifiers (`b`, `s`, `l`, `q`). Uppercase specifiers were already correct (`Endianness::Big`). + +### Fix 2: Signed-by-default + +Changed all pointer types from `signed: false` to `signed: true` across every specifier arm. + +### Fix 3: Adjustment after closing paren + +```rust +// Before (wrong): adjustment consumed inside parens +let (input, adjustment) = parse_adjustment(input)?; +let (input, _) = char(')')(input)?; + +// After (correct): close paren first, then adjustment +let (input, _) = char(')')(input)?; +let (input, adjustment) = parse_adjustment(input)?; +``` + +### Test corrections + +- All parser unit tests updated to expect `Endianness::Little`, `signed: true`, and `(base.type)+adj` syntax +- Integration tests updated with little-endian byte layouts and lowercase `.l` specifier +- Added new test: `>(0x3c.l)+4` child rule with adjustment after paren + +## Prevention Strategies + +### Spec-first test writing + +Write test expectations from the spec (GNU `file` man page, GOTCHAS.md) before implementing. Document the spec reference above each test case. In TDD, the RED phase must derive expected values from the spec, not from running the code. + +### Cross-reference GOTCHAS.md for type mappings + +Treat GOTCHAS.md as a mandatory checklist when adding type mappings: + +- **S6.3**: Default to `signed: true` unless keyword has `u` prefix +- **S6.1**: Uppercase = big-endian, lowercase = little-endian +- **S3.7**: Indirect offset specifiers follow GNU `file` semantics + +### Prefer deterministic endianness + +`Endianness::Native` should never appear in indirect offset resolution. Every endianness value must be explicitly `Little` or `Big` per the spec. Tests must use explicit byte sequences, not `to_ne_bytes()`. + +### Verify against real magic files + +Extract test inputs from `/usr/share/misc/magic` or the upstream [file/file](https://github.com/file/file) repository rather than inventing syntax. + +## Cross-References + +- **Evaluator solution**: `docs/solutions/logic-errors/indirect-offset-resolution.md` +- **Parser-evaluator sync**: `docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md` +- **Magic format spec**: `docs/MAGIC_FORMAT.md` (lines 106-126) +- **Gotchas**: `GOTCHAS.md` sections 3.5, 3.6, 3.7, 6.3 +- **Issue**: #37 From 5ea12f2b0c57db2342173a35169a984a1baa287a Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 00:59:05 -0400 Subject: [PATCH 20/28] feat(parser): update indirect offset parsing to align with GNU semantics Signed-off-by: UncleSp1d3r --- src/parser/grammar/mod.rs | 80 ++++--- src/parser/grammar/tests.rs | 308 +++++++++++++-------------- tests/indirect_offset_integration.rs | 87 +++++--- 3 files changed, 253 insertions(+), 222 deletions(-) diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index b928bb3..57af1dc 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs @@ -157,59 +157,60 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { /// Map a single-character pointer specifier to its `TypeKind` and `Endianness`. /// -/// Libmagic convention: lowercase = native endian, uppercase = big-endian. -/// -/// | Specifier | Width | Endianness | -/// |-----------|--------|------------| -/// | `b`, `B` | 1 byte | N/A | -/// | `s` | 2 byte | Native | -/// | `S` | 2 byte | Big | -/// | `l` | 4 byte | Native | -/// | `L` | 4 byte | Big | -/// | `q` | 4 byte | Native | -/// | `Q` | 8 byte | Big | +/// GNU `file` semantics: lowercase = little-endian, uppercase = big-endian. +/// Numeric pointer types are signed by default per GOTCHAS S6.3. +/// +/// | Specifier | Width | Endianness | +/// |-----------|--------|---------------| +/// | `b`, `B` | 1 byte | Little-endian | +/// | `s` | 2 byte | Little-endian | +/// | `S` | 2 byte | Big-endian | +/// | `l` | 4 byte | Little-endian | +/// | `L` | 4 byte | Big-endian | +/// | `q` | 8 byte | Little-endian | +/// | `Q` | 8 byte | Big-endian | fn pointer_specifier_to_type(spec: char) -> Option<(TypeKind, Endianness)> { match spec { - 'b' | 'B' => Some((TypeKind::Byte { signed: false }, Endianness::Native)), + 'b' | 'B' => Some((TypeKind::Byte { signed: true }, Endianness::Little)), 's' => Some(( TypeKind::Short { - endian: Endianness::Native, - signed: false, + endian: Endianness::Little, + signed: true, }, - Endianness::Native, + Endianness::Little, )), 'S' => Some(( TypeKind::Short { endian: Endianness::Big, - signed: false, + signed: true, }, Endianness::Big, )), 'l' => Some(( TypeKind::Long { - endian: Endianness::Native, - signed: false, + endian: Endianness::Little, + signed: true, }, - Endianness::Native, + Endianness::Little, )), 'L' => Some(( TypeKind::Long { endian: Endianness::Big, - signed: false, + signed: true, }, Endianness::Big, )), 'q' => Some(( TypeKind::Quad { - endian: Endianness::Native, - signed: false, + endian: Endianness::Little, + signed: true, }, - Endianness::Native, + Endianness::Little, )), 'Q' => Some(( TypeKind::Quad { endian: Endianness::Big, - signed: false, + signed: true, }, Endianness::Big, )), @@ -217,10 +218,10 @@ fn pointer_specifier_to_type(spec: char) -> Option<(TypeKind, Endianness)> { } } -/// Parse an indirect offset specification: `(base.type)` or `(base.type+/-adj)` +/// Parse an indirect offset specification: `(base.type)` or `(base.type)+/-adj` /// -/// Reads a pointer specifier after the dot, and an optional `+N` or `-N` adjustment -/// before the closing `)`. +/// Reads a pointer specifier after the dot, closes the parenthesized expression, +/// then optionally parses `+N` or `-N` adjustment after the `)`. fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { let (input, _) = char('(')(input)?; let (input, base_offset) = parse_number(input)?; @@ -230,7 +231,9 @@ fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { let (pointer_type, endian) = pointer_specifier_to_type(spec_char) .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::OneOf)))?; - // Optional adjustment: +N or -N + let (input, _) = char(')')(input)?; + + // Optional adjustment AFTER closing paren: (base.type)+N or (base.type)-N // parse_number handles '-' but not '+', so consume '+' manually let (input, adjustment) = if input.starts_with('+') { let (input, _) = char('+')(input)?; @@ -241,8 +244,6 @@ fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { (input, 0) }; - let (input, _) = char(')')(input)?; - Ok(( input, OffsetSpec::Indirect { @@ -258,7 +259,7 @@ fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { /// /// Supports: /// - Absolute offsets: decimal and hexadecimal, positive and negative -/// - Indirect offsets: `(base.type)` or `(base.type+adj)` syntax +/// - Indirect offsets: `(base.type)` or `(base.type)+adj` syntax /// /// # Examples /// @@ -273,14 +274,25 @@ fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { /// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); /// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255)))); /// -/// // Indirect offset +/// // Indirect offset (lowercase = little-endian, signed by default) /// assert_eq!( /// parse_offset("(0x3c.l)"), /// Ok(("", OffsetSpec::Indirect { /// base_offset: 0x3c, -/// pointer_type: TypeKind::Long { endian: Endianness::Native, signed: false }, +/// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: true }, /// adjustment: 0, -/// endian: Endianness::Native, +/// endian: Endianness::Little, +/// })) +/// ); +/// +/// // Adjustment after closing paren +/// assert_eq!( +/// parse_offset("(0x3c.l)+4"), +/// Ok(("", OffsetSpec::Indirect { +/// base_offset: 0x3c, +/// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: true }, +/// adjustment: 4, +/// endian: Endianness::Little, /// })) /// ); /// ``` diff --git a/src/parser/grammar/tests.rs b/src/parser/grammar/tests.rs index 6a30ffc..d41dcf8 100644 --- a/src/parser/grammar/tests.rs +++ b/src/parser/grammar/tests.rs @@ -282,157 +282,116 @@ fn test_parse_offset_boundary_values() { } // Indirect offset parsing tests +// +// GNU `file` semantics: lowercase = little-endian, uppercase = big-endian. +// Numeric pointer types are signed by default (GOTCHAS S6.3). +// Adjustment is parsed AFTER the closing `)`: (base.type)+adj + #[test] fn test_parse_offset_indirect_all_specifiers() { - // .b / .B - byte - assert_eq!( - parse_offset("(0.b)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Byte { signed: false }, - adjustment: 0, - endian: Endianness::Native, - } - )) - ); - assert_eq!( - parse_offset("(0.B)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Byte { signed: false }, - adjustment: 0, - endian: Endianness::Native, - } - )) - ); - - // .s - short native, .S - short big-endian - assert_eq!( - parse_offset("(0.s)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Short { - endian: Endianness::Native, - signed: false - }, - adjustment: 0, - endian: Endianness::Native, - } - )) - ); - assert_eq!( - parse_offset("(0.S)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Short { - endian: Endianness::Big, - signed: false - }, - adjustment: 0, + // Table-driven: (input, expected_pointer_type, expected_endian) + let cases: &[(&str, TypeKind, Endianness)] = &[ + // .b / .B - byte (little-endian, signed) + ("(0.b)", TypeKind::Byte { signed: true }, Endianness::Little), + ("(0.B)", TypeKind::Byte { signed: true }, Endianness::Little), + // .s - short little-endian, .S - short big-endian + ( + "(0.s)", + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0.S)", + TypeKind::Short { endian: Endianness::Big, - } - )) - ); - - // .l - long native, .L - long big-endian - assert_eq!( - parse_offset("(0x3c.l)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false - }, - adjustment: 0, - endian: Endianness::Native, - } - )) - ); - assert_eq!( - parse_offset("(0x3c.L)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Big, - signed: false - }, - adjustment: 0, + signed: true, + }, + Endianness::Big, + ), + // .l - long little-endian, .L - long big-endian + ( + "(0x3c.l)", + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0x3c.L)", + TypeKind::Long { endian: Endianness::Big, - } - )) - ); - - // .q - quad native, .Q - quad big-endian - assert_eq!( - parse_offset("(0.q)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Quad { - endian: Endianness::Native, - signed: false - }, - adjustment: 0, - endian: Endianness::Native, - } - )) - ); - assert_eq!( - parse_offset("(0.Q)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Quad { - endian: Endianness::Big, - signed: false - }, - adjustment: 0, + signed: true, + }, + Endianness::Big, + ), + // .q - quad little-endian, .Q - quad big-endian + ( + "(0.q)", + TypeKind::Quad { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0.Q)", + TypeKind::Quad { endian: Endianness::Big, - } - )) - ); + signed: true, + }, + Endianness::Big, + ), + ]; + + for (input, expected_type, expected_endian) in cases { + let base = if input.contains("0x3c") { 0x3c } else { 0 }; + assert_eq!( + parse_offset(input), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: base, + pointer_type: expected_type.clone(), + adjustment: 0, + endian: *expected_endian, + } + )), + "Failed for input: {input}" + ); + } } #[test] fn test_parse_offset_indirect_with_positive_adjustment() { + // Adjustment AFTER closing paren: (base.type)+adj assert_eq!( - parse_offset("(0x3c.l+4)"), + parse_offset("(0x3c.l)+4"), Ok(( "", OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 4, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); assert_eq!( - parse_offset("(0.b+0xFF)"), + parse_offset("(0.b)+0xFF"), Ok(( "", OffsetSpec::Indirect { base_offset: 0, - pointer_type: TypeKind::Byte { signed: false }, + pointer_type: TypeKind::Byte { signed: true }, adjustment: 255, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); @@ -441,32 +400,32 @@ fn test_parse_offset_indirect_with_positive_adjustment() { #[test] fn test_parse_offset_indirect_with_negative_adjustment() { assert_eq!( - parse_offset("(0x3c.l-8)"), + parse_offset("(0x3c.l)-8"), Ok(( "", OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: -8, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); assert_eq!( - parse_offset("(100.s-0x10)"), + parse_offset("(100.s)-0x10"), Ok(( "", OffsetSpec::Indirect { base_offset: 100, pointer_type: TypeKind::Short { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: -16, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); @@ -482,26 +441,27 @@ fn test_parse_offset_indirect_negative_base() { OffsetSpec::Indirect { base_offset: -4, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); + // Negative base with adjustment after paren assert_eq!( - parse_offset("(-0x10.s+2)"), + parse_offset("(-0x10.s)+2"), Ok(( "", OffsetSpec::Indirect { base_offset: -16, pointer_type: TypeKind::Short { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 2, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); @@ -516,11 +476,11 @@ fn test_parse_offset_indirect_hex_base() { OffsetSpec::Indirect { base_offset: 0xFF, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); @@ -536,15 +496,15 @@ fn test_parse_offset_indirect_with_whitespace() { OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); - // Trailing content should be left unconsumed + // Trailing content after adjustment-free form assert_eq!( parse_offset("(0x3c.l) string"), Ok(( @@ -552,11 +512,11 @@ fn test_parse_offset_indirect_with_whitespace() { OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, } )) ); @@ -588,11 +548,11 @@ fn test_parse_rule_offset_indirect() { OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, } ) )) @@ -611,18 +571,18 @@ fn test_parse_rule_offset_indirect_child() { OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, } ) )) ); - // Level 2 child with indirect offset + adjustment + // Level 2 child with adjustment after paren: >>(0x3c.l)+4 assert_eq!( - parse_rule_offset(">>(0x3c.l+4)"), + parse_rule_offset(">>(0x3c.l)+4"), Ok(( "", ( @@ -630,11 +590,11 @@ fn test_parse_rule_offset_indirect_child() { OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 4, - endian: Endianness::Native, + endian: Endianness::Little, } ) )) @@ -643,6 +603,7 @@ fn test_parse_rule_offset_indirect_child() { #[test] fn test_parse_rule_offset_indirect_with_remaining() { + // >(0x3c.l) followed by type keyword assert_eq!( parse_rule_offset(">(0x3c.l) string"), Ok(( @@ -652,11 +613,30 @@ fn test_parse_rule_offset_indirect_with_remaining() { OffsetSpec::Indirect { base_offset: 0x3c, pointer_type: TypeKind::Long { - endian: Endianness::Native, - signed: false + endian: Endianness::Little, + signed: true }, adjustment: 0, - endian: Endianness::Native, + endian: Endianness::Little, + } + ) + )) + ); + // >(0x3c.l)+4 followed by type keyword + assert_eq!( + parse_rule_offset(">(0x3c.l)+4 string"), + Ok(( + "string", + ( + 1, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 4, + endian: Endianness::Little, } ) )) diff --git a/tests/indirect_offset_integration.rs b/tests/indirect_offset_integration.rs index 98476a2..45312e6 100644 --- a/tests/indirect_offset_integration.rs +++ b/tests/indirect_offset_integration.rs @@ -6,6 +6,10 @@ //! Exercises the full pipeline: write a magic file with indirect-offset syntax, //! load it through `MagicDatabase::load_from_file()`, evaluate buffers, and //! assert correct match / no-match behavior. +//! +//! GNU `file` semantics: lowercase specifiers are little-endian, uppercase are +//! big-endian. Pointer types are signed by default (GOTCHAS S6.3). +//! Adjustment is parsed after the closing paren: `(base.type)+adj`. use std::fs; use std::io::Write; @@ -13,23 +17,23 @@ use std::io::Write; use libmagic_rs::MagicDatabase; use tempfile::TempDir; -/// Build a PE-like buffer where offset 0x3c holds a big-endian 4-byte pointer +/// Build a PE-like buffer where offset 0x3c holds a little-endian 4-byte pointer /// to the PE signature (`PE\0\0`). /// /// Layout: /// [0x00] "MZ" DOS header stub -/// [0x3c] 4-byte big-endian pointer -> 0x80 (PE header location) +/// [0x3c] 4-byte little-endian pointer -> 0x80 (PE header location) /// [0x80] "PE\0\0" signature fn build_pe_like_buffer() -> Vec { let mut buf = vec![0u8; 0x84]; // DOS stub magic buf[0] = b'M'; buf[1] = b'Z'; - // Big-endian pointer at 0x3c -> 0x80 - buf[0x3c] = 0x00; + // Little-endian pointer at 0x3c -> 0x80 + buf[0x3c] = 0x80; buf[0x3d] = 0x00; buf[0x3e] = 0x00; - buf[0x3f] = 0x80; + buf[0x3f] = 0x00; // PE signature at 0x80 buf[0x80] = b'P'; buf[0x81] = b'E'; @@ -43,11 +47,10 @@ fn test_indirect_offset_pe_detection_via_magic_file() { let temp_dir = TempDir::new().unwrap(); let magic_path = temp_dir.path().join("pe.magic"); - // Use .L (big-endian long) for deterministic cross-platform behavior. - // String values must be quoted for the parser. + // Use lowercase .l (little-endian long) -- GNU `file` semantics. let mut f = fs::File::create(&magic_path).unwrap(); writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); - writeln!(f, r#">(0x3c.L) string "PE" (PE)"#).unwrap(); + writeln!(f, r#">(0x3c.l) string "PE" (PE)"#).unwrap(); let db = MagicDatabase::load_from_file(&magic_path).unwrap(); let buf = build_pe_like_buffer(); @@ -72,19 +75,19 @@ fn test_indirect_offset_no_match_when_pointer_out_of_bounds() { let mut f = fs::File::create(&magic_path).unwrap(); writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); - writeln!(f, r#">(0x3c.L) string "PE" (PE)"#).unwrap(); + writeln!(f, r#">(0x3c.l) string "PE" (PE)"#).unwrap(); let db = MagicDatabase::load_from_file(&magic_path).unwrap(); - // Buffer has "MZ" but the pointer at 0x3c points beyond the buffer + // Buffer has "MZ" but the LE pointer at 0x3c points beyond the buffer let mut buf = vec![0u8; 0x40]; buf[0] = b'M'; buf[1] = b'Z'; - // Pointer at 0x3c -> 0xFF (beyond buffer length) - buf[0x3c] = 0x00; + // Little-endian pointer at 0x3c -> 0xFF (beyond buffer length) + buf[0x3c] = 0xFF; buf[0x3d] = 0x00; buf[0x3e] = 0x00; - buf[0x3f] = 0xFF; + buf[0x3f] = 0x00; let result = db.evaluate_buffer(&buf).unwrap(); @@ -103,22 +106,22 @@ fn test_indirect_offset_no_match_when_pointer_out_of_bounds() { } #[test] -fn test_indirect_offset_with_adjustment() { +fn test_indirect_offset_with_adjustment_after_paren() { let temp_dir = TempDir::new().unwrap(); let magic_path = temp_dir.path().join("adj.magic"); - // Indirect offset with +4 adjustment: read pointer at 0, add 4, check there + // Adjustment AFTER closing paren: (base.type)+adj let mut f = fs::File::create(&magic_path).unwrap(); - writeln!(f, r#"(0.L+4) string "MAGIC" Adjusted match"#).unwrap(); + writeln!(f, r#"(0.l)+4 string "MAGIC" Adjusted match"#).unwrap(); let db = MagicDatabase::load_from_file(&magic_path).unwrap(); - // Pointer at offset 0 = 0x00000006 (big-endian), +4 = 10, "MAGIC" at offset 10 + // LE pointer at offset 0 = 0x06 (little-endian), +4 = 10, "MAGIC" at offset 10 let mut buf = vec![0u8; 20]; - buf[0] = 0x00; + buf[0] = 0x06; buf[1] = 0x00; buf[2] = 0x00; - buf[3] = 0x06; + buf[3] = 0x00; buf[10] = b'M'; buf[11] = b'A'; buf[12] = b'G'; @@ -165,13 +168,13 @@ fn test_indirect_offset_loading_does_not_error() { // Verify the parsing path succeeds for all specifier variants let mut f = fs::File::create(&magic_path).unwrap(); - writeln!(f, r#"(0.b) string "A" byte ptr"#).unwrap(); - writeln!(f, r#"(0.B) string "A" Byte ptr"#).unwrap(); - writeln!(f, r#"(0.s) string "A" short native ptr"#).unwrap(); + writeln!(f, r#"(0.b) string "A" byte LE ptr"#).unwrap(); + writeln!(f, r#"(0.B) string "A" Byte LE ptr"#).unwrap(); + writeln!(f, r#"(0.s) string "A" short LE ptr"#).unwrap(); writeln!(f, r#"(0.S) string "A" short BE ptr"#).unwrap(); - writeln!(f, r#"(0.l) string "A" long native ptr"#).unwrap(); + writeln!(f, r#"(0.l) string "A" long LE ptr"#).unwrap(); writeln!(f, r#"(0.L) string "A" long BE ptr"#).unwrap(); - writeln!(f, r#"(0.q) string "A" quad native ptr"#).unwrap(); + writeln!(f, r#"(0.q) string "A" quad LE ptr"#).unwrap(); writeln!(f, r#"(0.Q) string "A" quad BE ptr"#).unwrap(); let result = MagicDatabase::load_from_file(&magic_path); @@ -181,3 +184,39 @@ fn test_indirect_offset_loading_does_not_error() { result.err() ); } + +#[test] +fn test_indirect_offset_child_with_adjustment_after_paren() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("pe_adj.magic"); + + // Child rule with (base.type)+adj syntax + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); + writeln!(f, r#">(0x3c.l)+4 string "PE" (PE+4)"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // LE pointer at 0x3c = 0x7C, +4 = 0x80, "PE" at 0x80 + let mut buf = vec![0u8; 0x84]; + buf[0] = b'M'; + buf[1] = b'Z'; + buf[0x3c] = 0x7C; + buf[0x3d] = 0x00; + buf[0x3e] = 0x00; + buf[0x3f] = 0x00; + buf[0x80] = b'P'; + buf[0x81] = b'E'; + + let result = db.evaluate_buffer(&buf).unwrap(); + assert!( + result.description.contains("DOS executable"), + "Expected DOS match, got: {}", + result.description + ); + assert!( + result.description.contains("(PE+4)"), + "Expected child match with adjustment, got: {}", + result.description + ); +} From 2befffa33e52c899713490c7f25a4021411f5e35 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 01:52:59 -0400 Subject: [PATCH 21/28] feat(deps): update bun and cargo-binstall versions in mise.lock Signed-off-by: UncleSp1d3r --- mise.lock | 223 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 201 insertions(+), 22 deletions(-) diff --git a/mise.lock b/mise.lock index d4f6e2b..16a0322 100644 --- a/mise.lock +++ b/mise.lock @@ -96,14 +96,82 @@ url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows- checksum = "sha256:715709c69b176e20994533d3292bd0b7c32de9c0c5575b916746ec6b2aa38346" url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows-x64-baseline.zip" +[[tools.bun]] +version = "1.3.11" +backend = "core:bun" + +[tools.bun."platforms.linux-arm64"] +checksum = "sha256:d13944da12a53ecc74bf6a720bd1d04c4555c038dfe422365356a7be47691fdf" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-aarch64.zip" + +[tools.bun."platforms.linux-arm64-musl"] +checksum = "sha256:0f5bf5dc3f276053196274bb84f90a44e2fa40c9432bd6757e3247a8d9476a3d" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-aarch64-musl.zip" + +[tools.bun."platforms.linux-x64"] +checksum = "sha256:8611ba935af886f05a6f38740a15160326c15e5d5d07adef966130b4493607ed" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64.zip" + +[tools.bun."platforms.linux-x64-baseline"] +checksum = "sha256:abe346f63414547cdf6b35b7a649a490c728b93d006226156923918a84c0e59b" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-baseline.zip" + +[tools.bun."platforms.linux-x64-musl"] +checksum = "sha256:b0fce3bc4fab52f26a1e0d8886dc07fd0c0eb2a274cb343b59c83a2d5997b5b1" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-musl.zip" + +[tools.bun."platforms.linux-x64-musl-baseline"] +checksum = "sha256:2fa2b697f14ada86a28df771d3876ca7606d7453b2339454893b1937aa9c0c7e" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-musl-baseline.zip" + +[tools.bun."platforms.macos-arm64"] +checksum = "sha256:6f5a3467ed9caec4795bf78cd476507d9f870c7d57b86c945fcb338126772ffc" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-aarch64.zip" + +[tools.bun."platforms.macos-x64"] +checksum = "sha256:c4fe2b9247218b0295f24e895aaec8fee62e74452679a9026b67eacbd611a286" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-x64.zip" + +[tools.bun."platforms.macos-x64-baseline"] +checksum = "sha256:fb6739b08bf54550edaa7c824cd5b2dca45b6a06afef408443087a63105f6f8d" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-x64-baseline.zip" + +[tools.bun."platforms.windows-x64"] +checksum = "sha256:066f8694f8b7d8df592452746d18f01710d4053e93030922dbc6e8c34a8c4b9f" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64.zip" + +[tools.bun."platforms.windows-x64-baseline"] +checksum = "sha256:9d0e0f923e9626f3bc6044fc32e0d3ab29039aea753f5678ef8801cf26f75288" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64-baseline.zip" + [[tools.cargo-binstall]] version = "1.17.7" backend = "aqua:cargo-bins/cargo-binstall" +[tools.cargo-binstall."platforms.linux-arm64"] +checksum = "sha256:b0658b0a7f0959bc1dbb4ab665931c31c7dd1109ff01cb8772af17dfdc52a9af" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-arm64-musl"] +checksum = "sha256:b0658b0a7f0959bc1dbb4ab665931c31c7dd1109ff01cb8772af17dfdc52a9af" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-unknown-linux-musl.tgz" + [tools.cargo-binstall."platforms.linux-x64"] checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" +[tools.cargo-binstall."platforms.linux-x64-baseline"] +checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-x64-musl"] +checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-x64-musl-baseline"] +checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" + [tools.cargo-binstall."platforms.macos-arm64"] checksum = "sha256:1ad3c0c56fa3970634cce5009ed0ce61b943515f9115f8e480fd0e41d8d89085" url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-apple-darwin.zip" @@ -112,10 +180,66 @@ url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/ca checksum = "sha256:aa7174fb938e668dea4b4c3d22fe6cefed97642cc3a7a419ba96d63d63fd729b" url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-apple-darwin.zip" +[tools.cargo-binstall."platforms.macos-x64-baseline"] +checksum = "sha256:aa7174fb938e668dea4b4c3d22fe6cefed97642cc3a7a419ba96d63d63fd729b" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-apple-darwin.zip" + [tools.cargo-binstall."platforms.windows-x64"] checksum = "sha256:c5cb2444ee04480502a8ac73d96abd9f97af8300ec04ea1c1f2a9e959c02e4d6" url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-pc-windows-msvc.zip" +[tools.cargo-binstall."platforms.windows-x64-baseline"] +checksum = "sha256:c5cb2444ee04480502a8ac73d96abd9f97af8300ec04ea1c1f2a9e959c02e4d6" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-pc-windows-msvc.zip" + +[[tools.cargo-binstall]] +version = "1.17.8" +backend = "aqua:cargo-bins/cargo-binstall" + +[tools.cargo-binstall."platforms.linux-arm64"] +checksum = "sha256:81d6245bd1a7a89e914d29af81d82280540e94927e61492a0fc359820cd97abb" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-arm64-musl"] +checksum = "sha256:81d6245bd1a7a89e914d29af81d82280540e94927e61492a0fc359820cd97abb" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-x64"] +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-x64-baseline"] +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-x64-musl"] +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.linux-x64-musl-baseline"] +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" + +[tools.cargo-binstall."platforms.macos-arm64"] +checksum = "sha256:af87346fdb186f0a2333bc0a30cfddd6faa98b31145ef1bb19c284aedea65972" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-apple-darwin.zip" + +[tools.cargo-binstall."platforms.macos-x64"] +checksum = "sha256:db353e01b582c97382178db9b4dfe22d81109782e480a38f3db953e62f569952" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-apple-darwin.zip" + +[tools.cargo-binstall."platforms.macos-x64-baseline"] +checksum = "sha256:db353e01b582c97382178db9b4dfe22d81109782e480a38f3db953e62f569952" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-apple-darwin.zip" + +[tools.cargo-binstall."platforms.windows-x64"] +checksum = "sha256:fef07560d4e391812091bb30c6ed1bd5289f74403a0c947b47b8a8c7a597b51b" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-pc-windows-msvc.zip" + +[tools.cargo-binstall."platforms.windows-x64-baseline"] +checksum = "sha256:fef07560d4e391812091bb30c6ed1bd5289f74403a0c947b47b8a8c7a597b51b" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-pc-windows-msvc.zip" + [[tools.cargo-insta]] version = "1.46.3" backend = "aqua:mitsuhiko/insta" @@ -284,6 +408,54 @@ url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64 checksum = "sha256:f0acf3f8ccbcf360b481baae9cae4c921774c89d5d932012481d3e0bda78ab39" url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-pc-windows-msvc.zip" +[[tools.just]] +version = "1.48.1" +backend = "aqua:casey/just" + +[tools.just."platforms.linux-arm64"] +checksum = "sha256:3308721b991cf88cf2b9bbb3b31ac40550ec61a0c9b6fc011564e25e87964030" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-unknown-linux-musl.tar.gz" + +[tools.just."platforms.linux-arm64-musl"] +checksum = "sha256:3308721b991cf88cf2b9bbb3b31ac40550ec61a0c9b6fc011564e25e87964030" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-unknown-linux-musl.tar.gz" + +[tools.just."platforms.linux-x64"] +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" + +[tools.just."platforms.linux-x64-baseline"] +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" + +[tools.just."platforms.linux-x64-musl"] +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" + +[tools.just."platforms.linux-x64-musl-baseline"] +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" + +[tools.just."platforms.macos-arm64"] +checksum = "sha256:03a73339ff55bcf7411a3c940cdcb0a726d98134b87203c83a9008575434e2a8" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-apple-darwin.tar.gz" + +[tools.just."platforms.macos-x64"] +checksum = "sha256:4c3e9c880b8fc93d7fc24abfde3c36b0cc59f6e9f8b31f7175095700f64125a7" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-apple-darwin.tar.gz" + +[tools.just."platforms.macos-x64-baseline"] +checksum = "sha256:4c3e9c880b8fc93d7fc24abfde3c36b0cc59f6e9f8b31f7175095700f64125a7" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-apple-darwin.tar.gz" + +[tools.just."platforms.windows-x64"] +checksum = "sha256:368cd9ca827cba04d9e6fc00f7ad840773c4605b6f64b9f87bdb00325d351029" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-pc-windows-msvc.zip" + +[tools.just."platforms.windows-x64-baseline"] +checksum = "sha256:368cd9ca827cba04d9e6fc00f7ad840773c4605b6f64b9f87bdb00325d351029" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-pc-windows-msvc.zip" + [[tools.lychee]] version = "0.23.0" backend = "aqua:lycheeverse/lychee" @@ -348,48 +520,48 @@ version = "3.14.3" backend = "core:python" [tools.python."platforms.linux-arm64"] -checksum = "sha256:be0f4dc2932f762292b27d46ea7d3e8e66ddf3969a5eb0254a229015ed402625" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-arm64-musl"] -checksum = "sha256:be0f4dc2932f762292b27d46ea7d3e8e66ddf3969a5eb0254a229015ed402625" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64-baseline"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64-musl"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64-musl-baseline"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.macos-arm64"] -checksum = "sha256:4703cdf18b26798fde7b49b6b66149674c25f97127be6a10dbcf29309bdcdcdb" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-aarch64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:c43aecde4a663aebff99b9b83da0efec506479f1c3f98331442f33d2c43501f9" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-apple-darwin-install_only_stripped.tar.gz" [tools.python."platforms.macos-x64"] -checksum = "sha256:76f1cc26e3d262eae8ca546a93e8bded10cf0323613f7e246fea2e10a8115eb7" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" [tools.python."platforms.macos-x64-baseline"] -checksum = "sha256:76f1cc26e3d262eae8ca546a93e8bded10cf0323613f7e246fea2e10a8115eb7" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" [tools.python."platforms.windows-x64"] -checksum = "sha256:950c5f21a015c1bdd1337f233456df2470fab71e4d794407d27a84cb8b9909a0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" [tools.python."platforms.windows-x64-baseline"] -checksum = "sha256:950c5f21a015c1bdd1337f233456df2470fab71e4d794407d27a84cb8b9909a0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" [[tools.rust]] version = "1.94.1" @@ -402,10 +574,12 @@ backend = "aqua:ossf/scorecard" [tools.scorecard."platforms.linux-arm64"] checksum = "sha256:3f8b6354c62ec0287a8e9694481d834e16bff8451cf5b5dca435e8400ce5adaf" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_arm64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-arm64-musl"] checksum = "sha256:3f8b6354c62ec0287a8e9694481d834e16bff8451cf5b5dca435e8400ce5adaf" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_arm64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-x64"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" @@ -415,14 +589,17 @@ provenance = "slsa" [tools.scorecard."platforms.linux-x64-baseline"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-x64-musl"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-x64-musl-baseline"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.macos-arm64"] checksum = "sha256:2c672695a27d35537dd4054f690f31fa1d6a72b0957598f45181296487f537f4" @@ -437,6 +614,7 @@ provenance = "slsa" [tools.scorecard."platforms.macos-x64-baseline"] checksum = "sha256:2abfec13b8eecc9b730e3782c9b3a9544d31ae861ce21ea7fe6a369d887d7c89" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_darwin_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.windows-x64"] checksum = "sha256:f7d0ece0dde703e4baa5f96e9b6ed33e6e786138c90db8de2c4943f24015b9ff" @@ -446,6 +624,7 @@ provenance = "slsa" [tools.scorecard."platforms.windows-x64-baseline"] checksum = "sha256:f7d0ece0dde703e4baa5f96e9b6ed33e6e786138c90db8de2c4943f24015b9ff" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_windows_amd64.tar.gz" +provenance = "slsa" [[tools.shellcheck]] version = "0.11.0" From 41763f2883ca3aa2b50167948f9905ee0ec47f61 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 01:59:21 -0400 Subject: [PATCH 22/28] refactor: address PR review feedback for indirect offset implementation - Fix AGENTS.md: remove contradictory "only absolute and from-end are fully functional" line since indirect offsets are now implemented - Fix ast-structures.md: add `text` language tag to fenced code block (MD040) - Split .b/.B byte pointer specifiers into separate match arms to preserve GNU file endianness distinction in the AST - Add debug_assert for endian consistency between inner TypeKind and outer OffsetSpec::Indirect endian field - Consolidate indirect.rs tests into table-driven format (735 -> 538 lines) - Extract indirect offset parser tests into dedicated submodule Signed-off-by: UncleSp1d3r --- AGENTS.md | 1 - docs/src/ast-structures.md | 2 +- src/evaluator/offset/indirect.rs | 761 ++++++++---------- src/parser/grammar/mod.rs | 6 +- src/parser/grammar/tests/indirect_offset.rs | 282 +++++++ src/parser/grammar/{tests.rs => tests/mod.rs} | 280 +------ 6 files changed, 620 insertions(+), 712 deletions(-) create mode 100644 src/parser/grammar/tests/indirect_offset.rs rename src/parser/grammar/{tests.rs => tests/mod.rs} (90%) diff --git a/AGENTS.md b/AGENTS.md index 7fcc444..91ace5b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -247,7 +247,6 @@ impl BinaryRegex for regex::bytes::Regex { - Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = little-endian, uppercase = big-endian (GNU `file` semantics); pointer types signed by default; adjustment after closing paren: `(base.type)+adj` - Relative offsets are parsed into the AST but evaluation is not yet implemented (#38) -- Only absolute and from-end offsets are fully functional ### Magic File Syntax diff --git a/docs/src/ast-structures.md b/docs/src/ast-structures.md index 9bf455c..2058335 100644 --- a/docs/src/ast-structures.md +++ b/docs/src/ast-structures.md @@ -244,7 +244,7 @@ Pascal-style length-prefixed strings where the length prefix can be 1, 2, or 4 b **Example:** -``` +```text 0 pstring JPEG 0 pstring/H JPEG ``` diff --git a/src/evaluator/offset/indirect.rs b/src/evaluator/offset/indirect.rs index 83a8b79..d6ed172 100644 --- a/src/evaluator/offset/indirect.rs +++ b/src/evaluator/offset/indirect.rs @@ -48,6 +48,21 @@ pub fn resolve_indirect_offset(spec: &OffsetSpec, buffer: &[u8]) -> Result { + debug_assert_eq!( + *inner, endian, + "Indirect offset: inner TypeKind endianness ({inner:?}) \ + contradicts outer endian field ({endian:?})" + ); + } + _ => {} + } + // Step 1: Resolve base_offset to an absolute position let abs_base = resolve_absolute_offset(base_offset, buffer) .map_err(|e| map_offset_error(&e, base_offset))?; @@ -149,7 +164,6 @@ mod tests { use super::*; use crate::parser::ast::Endianness; - /// Helper to build an `OffsetSpec::Indirect` for tests. fn indirect( base_offset: i64, pointer_type: TypeKind, @@ -164,230 +178,184 @@ mod tests { } } - // ── Byte pointer ───────────────────────────────────────────── - - #[test] - fn test_byte_pointer_unsigned() { - // Buffer: [pointer=0x04, ..., target_byte_at_4] - let buffer = &[0x04, 0x00, 0x00, 0x00, 0xAA]; - let spec = indirect(0, TypeKind::Byte { signed: false }, 0, Endianness::Little); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 4); - } - - #[test] - fn test_byte_pointer_signed_positive() { - let buffer = &[0x03, 0x00, 0x00, 0xBB]; - let spec = indirect(0, TypeKind::Byte { signed: true }, 0, Endianness::Little); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 3); - } - - // ── Short pointer, both endiannesses ───────────────────────── - - #[test] - fn test_short_pointer_little_endian() { - // LE short at offset 0: bytes [0x04, 0x00] → 0x0004 - let mut buffer = vec![0x04, 0x00, 0x00, 0x00, 0xCC]; - buffer.resize(5, 0); - let spec = indirect( - 0, - TypeKind::Short { - endian: Endianness::Little, - signed: false, - }, - 0, - Endianness::Little, - ); - assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 4); - } - - #[test] - fn test_short_pointer_big_endian() { - // BE short at offset 0: bytes [0x00, 0x04] → 0x0004 - let buffer = &[0x00, 0x04, 0x00, 0x00, 0xDD]; - let spec = indirect( - 0, - TypeKind::Short { - endian: Endianness::Big, - signed: false, - }, - 0, - Endianness::Big, - ); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 4); - } - - // ── Long pointer, both endiannesses ────────────────────────── - #[test] - fn test_long_pointer_little_endian() { - // LE long at offset 0: bytes [0x08, 0x00, 0x00, 0x00] → 8 - let mut buffer = vec![0x08, 0x00, 0x00, 0x00]; - buffer.resize(9, 0xAA); - let spec = indirect( - 0, - TypeKind::Long { - endian: Endianness::Little, - signed: false, - }, - 0, - Endianness::Little, - ); - assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 8); - } + fn test_pointer_type_and_endianness() { + let cases: &[(&str, &[u8], TypeKind, Endianness, usize)] = &[ + ( + "byte unsigned", + &[0x04, 0x00, 0x00, 0x00, 0xAA], + TypeKind::Byte { signed: false }, + Endianness::Little, + 4, + ), + ( + "byte signed positive", + &[0x03, 0x00, 0x00, 0xBB], + TypeKind::Byte { signed: true }, + Endianness::Little, + 3, + ), + ( + "short LE", + &[0x04, 0x00, 0x00, 0x00, 0xCC], + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + Endianness::Little, + 4, + ), + ( + "short BE", + &[0x00, 0x04, 0x00, 0x00, 0xDD], + TypeKind::Short { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + 4, + ), + ( + "long LE", + &[0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF], + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + Endianness::Little, + 6, + ), + ( + "long BE", + &[0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0xFF], + TypeKind::Long { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + 6, + ), + ( + "signed long positive", + &[0x04, 0x00, 0x00, 0x00, 0xAA], + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + 4, + ), + ]; + for (name, buf, ptype, endian, expected) in cases { + let spec = indirect(0, ptype.clone(), 0, *endian); + assert_eq!( + resolve_indirect_offset(&spec, buf).unwrap(), + *expected, + "Failed for case: {name}" + ); + } - #[test] - fn test_long_pointer_big_endian() { - // BE long at offset 0: bytes [0x00, 0x00, 0x00, 0x06] → 6 - let buffer = &[0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0xFF]; - let spec = indirect( - 0, - TypeKind::Long { - endian: Endianness::Big, - signed: false, - }, - 0, - Endianness::Big, - ); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 6); + // Quad types need resizable buffers (target offset > inline slice length) + let quad_cases: &[(&str, Endianness, &[u8])] = &[ + ( + "quad LE", + Endianness::Little, + &[0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + ), + ( + "quad BE", + Endianness::Big, + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10], + ), + ]; + for (name, endian, prefix) in quad_cases { + let mut buffer = prefix.to_vec(); + buffer.resize(17, 0xBB); + let spec = indirect( + 0, + TypeKind::Quad { + endian: *endian, + signed: false, + }, + 0, + *endian, + ); + assert_eq!( + resolve_indirect_offset(&spec, &buffer).unwrap(), + 16, + "Failed for case: {name}" + ); + } } - // ── Quad pointer ───────────────────────────────────────────── - #[test] - fn test_quad_pointer_little_endian() { - // LE quad at offset 0: value = 16 - let mut buffer = vec![0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - buffer.resize(17, 0xBB); - let spec = indirect( - 0, - TypeKind::Quad { - endian: Endianness::Little, - signed: false, - }, - 0, - Endianness::Little, - ); - assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 16); - } + fn test_extract_raw_unsigned_values() { + let ok_cases: &[(&str, Value, u64)] = &[ + ("Int(-1) -> u64::MAX", Value::Int(-1), u64::MAX), + ( + "Int(-2) -> u64::MAX-1", + Value::Int(-2), + 0xFFFF_FFFF_FFFF_FFFE, + ), + ( + "Int(-1) sign-extended", + Value::Int(-1), + 0xFFFF_FFFF_FFFF_FFFF, + ), + ("Int(42)", Value::Int(42), 42), + ("Uint(0xDEAD_BEEF)", Value::Uint(0xDEAD_BEEF), 0xDEAD_BEEF), + ]; + for (name, value, expected) in ok_cases { + assert_eq!( + extract_raw_unsigned(value).unwrap(), + *expected, + "Failed for case: {name}" + ); + } - #[test] - fn test_quad_pointer_big_endian() { - // BE quad at offset 0: bytes [0x00..0x00, 0x10] → 0x0000_0000_0000_0010 = 16 - let mut buffer = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10]; - buffer.resize(17, 0xCC); - let spec = indirect( - 0, - TypeKind::Quad { - endian: Endianness::Big, - signed: false, - }, - 0, - Endianness::Big, + let err_value = Value::String("hello".to_string()); + assert!( + extract_raw_unsigned(&err_value).is_err(), + "Failed for case: rejects non-integer" ); - assert_eq!(resolve_indirect_offset(&spec, &buffer).unwrap(), 16); - } - - // ── extract_raw_unsigned unit tests ──────────────────────── - - #[test] - fn test_extract_raw_unsigned_negative_one() { - // Value::Int(-1) must reinterpret as u64::MAX (0xFFFF_FFFF_FFFF_FFFF) - let value = Value::Int(-1); - assert_eq!(extract_raw_unsigned(&value).unwrap(), u64::MAX); - } - - #[test] - fn test_extract_raw_unsigned_negative_two() { - // Value::Int(-2) must reinterpret as u64::MAX - 1 - let value = Value::Int(-2); - assert_eq!(extract_raw_unsigned(&value).unwrap(), 0xFFFF_FFFF_FFFF_FFFE); - } - - #[test] - fn test_extract_raw_unsigned_i32_min_sign_extended() { - // A signed 32-bit -1 is sign-extended to i64 -1 by the reader, - // so extract_raw_unsigned must yield u64::MAX. - let value = Value::Int(-1); - assert_eq!(extract_raw_unsigned(&value).unwrap(), 0xFFFF_FFFF_FFFF_FFFF); - } - - #[test] - fn test_extract_raw_unsigned_positive_int() { - let value = Value::Int(42); - assert_eq!(extract_raw_unsigned(&value).unwrap(), 42); - } - - #[test] - fn test_extract_raw_unsigned_uint() { - let value = Value::Uint(0xDEAD_BEEF); - assert_eq!(extract_raw_unsigned(&value).unwrap(), 0xDEAD_BEEF); - } - - #[test] - fn test_extract_raw_unsigned_rejects_non_integer() { - let value = Value::String("hello".to_string()); - assert!(extract_raw_unsigned(&value).is_err()); - } - - // ── read_pointer signed-negative unit tests ───────────────── - - #[test] - fn test_read_pointer_signed_long_negative_one() { - // LE signed long: [0xFF, 0xFF, 0xFF, 0xFF] → i32 = -1 → i64 = -1 → u64 = 0xFFFF_FFFF_FFFF_FFFF - let buffer = &[0xFF, 0xFF, 0xFF, 0xFF]; - let raw = read_pointer( - buffer, - 0, - &TypeKind::Long { - endian: Endianness::Little, - signed: true, - }, - Endianness::Little, - ) - .unwrap(); - assert_eq!(raw, u64::MAX); } #[test] - fn test_read_pointer_signed_short_negative_two() { - // LE signed short: [0xFE, 0xFF] → i16 = -2 → i64 = -2 → u64 = 0xFFFF_FFFF_FFFF_FFFE - let buffer = &[0xFE, 0xFF]; - let raw = read_pointer( - buffer, - 0, - &TypeKind::Short { - endian: Endianness::Little, - signed: true, - }, - Endianness::Little, - ) - .unwrap(); - assert_eq!(raw, 0xFFFF_FFFF_FFFF_FFFE); - } - - #[test] - fn test_read_pointer_signed_byte_negative_one() { - // Signed byte: [0xFF] → i8 = -1 → i64 = -1 → u64 = 0xFFFF_FFFF_FFFF_FFFF - let buffer = &[0xFF]; - let raw = read_pointer( - buffer, - 0, - &TypeKind::Byte { signed: true }, - Endianness::Little, - ) - .unwrap(); - assert_eq!(raw, u64::MAX); + fn test_read_pointer_signed_negative() { + let cases: &[(&str, &[u8], TypeKind, u64)] = &[ + ( + "signed long -1", + &[0xFF, 0xFF, 0xFF, 0xFF], + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + u64::MAX, + ), + ( + "signed short -2", + &[0xFE, 0xFF], + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + 0xFFFF_FFFF_FFFF_FFFE, + ), + ( + "signed byte -1", + &[0xFF], + TypeKind::Byte { signed: true }, + u64::MAX, + ), + ]; + for (name, buf, ptype, expected) in cases { + let raw = read_pointer(buf, 0, ptype, Endianness::Little).unwrap(); + assert_eq!(raw, *expected, "Failed for case: {name}"); + } } - // ── Signed negative pointer end-to-end ────────────────────── - #[test] fn test_signed_short_negative_pointer_overruns_after_raw_conversion() { - // Signed LE short at offset 0: bytes [0xFE, 0xFF] → i16 = -2 - // read_pointer extracts raw u64 = 0xFFFF_FFFF_FFFF_FFFE (verified by unit tests above). - // That enormous pointer value must fail bounds validation, NOT be rejected - // during extraction. An implementation that rejects negative Value::Int early - // would not reach the bounds check. let buffer = &[0xFE, 0xFF, 0x00, 0x00]; let spec = indirect( 0, @@ -399,10 +367,6 @@ mod tests { Endianness::Little, ); let err = resolve_indirect_offset(&spec, buffer).unwrap_err(); - - // After raw unsigned reinterpretation, the pointer is 0xFFFF_FFFF_FFFF_FFFE. - // On 64-bit: usize::try_from succeeds → BufferOverrun with that exact offset. - // On 32-bit: usize::try_from overflows → InvalidOffset from apply_adjustment. if usize::BITS == 64 { assert!( matches!( @@ -425,10 +389,6 @@ mod tests { #[test] fn test_signed_long_negative_pointer_with_adjustment_overruns() { - // Signed LE long at offset 0: bytes [0xFF, 0xFF, 0xFF, 0xFF] → i32 = -1 - // extract_raw_unsigned converts Value::Int(-1) → u64::MAX (0xFFFF_FFFF_FFFF_FFFF). - // Adjustment of -1 yields u64::MAX - 1 = 0xFFFF_FFFF_FFFF_FFFE via checked_sub. - // Must fail at bounds validation, not during raw extraction. let buffer = &[0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00]; let spec = indirect( 0, @@ -440,10 +400,6 @@ mod tests { Endianness::Little, ); let err = resolve_indirect_offset(&spec, buffer).unwrap_err(); - - // After raw reinterpretation: u64::MAX. After adjustment of -1: 0xFFFF_FFFF_FFFF_FFFE. - // On 64-bit: usize::try_from succeeds → BufferOverrun with that exact offset. - // On 32-bit: usize::try_from overflows → InvalidOffset from apply_adjustment. if usize::BITS == 64 { assert!( matches!( @@ -464,213 +420,178 @@ mod tests { } } - // ── Positive and negative adjustments ──────────────────────── - #[test] - fn test_positive_adjustment() { - // Pointer value = 2, adjustment = +3 → final = 5 - let buffer = &[0x02, 0x00, 0x00, 0x00, 0x00, 0xEE]; - let spec = indirect(0, TypeKind::Byte { signed: false }, 3, Endianness::Little); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 5); - } - - #[test] - fn test_negative_adjustment() { - // Pointer value = 5, adjustment = -2 → final = 3 - let buffer = &[0x05, 0x00, 0x00, 0xFF]; - let spec = indirect(0, TypeKind::Byte { signed: false }, -2, Endianness::Little); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 3); + fn test_adjustments() { + let cases: &[(&str, &[u8], i64, usize)] = &[ + ("positive +3", &[0x02, 0x00, 0x00, 0x00, 0x00, 0xEE], 3, 5), + ("negative -2", &[0x05, 0x00, 0x00, 0xFF], -2, 3), + ]; + for (name, buf, adj, expected) in cases { + let spec = indirect( + 0, + TypeKind::Byte { signed: false }, + *adj, + Endianness::Little, + ); + assert_eq!( + resolve_indirect_offset(&spec, buf).unwrap(), + *expected, + "Failed for case: {name}" + ); + } } - // ── From-end base offset ───────────────────────────────────── - #[test] fn test_from_end_base_offset() { - // 8-byte buffer, base_offset = -1 → resolves to index 7 - // Byte at index 7 = 0x02 → pointer value = 2 → final = 2 let buffer = &[0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x00, 0x02]; let spec = indirect(-1, TypeKind::Byte { signed: false }, 0, Endianness::Little); assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 2); } - // ── Pointer read overrun ───────────────────────────────────── - #[test] - fn test_pointer_read_overrun_short() { - // Buffer has 1 byte, trying to read a short (2 bytes) at offset 0 - let buffer = &[0x04]; - let spec = indirect( - 0, - TypeKind::Short { - endian: Endianness::Little, - signed: false, - }, - 0, - Endianness::Little, - ); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) - )); - } - - #[test] - fn test_pointer_read_overrun_long() { - // Buffer has 3 bytes, trying to read a long (4 bytes) at offset 0 - let buffer = &[0x00, 0x00, 0x00]; - let spec = indirect( - 0, - TypeKind::Long { - endian: Endianness::Little, - signed: false, - }, - 0, - Endianness::Little, - ); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) - )); + fn test_pointer_read_overrun() { + let cases: &[(&str, &[u8], TypeKind)] = &[ + ( + "short from 1-byte buffer", + &[0x04], + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ), + ( + "long from 3-byte buffer", + &[0x00, 0x00, 0x00], + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ), + ]; + for (name, buf, ptype) in cases { + let spec = indirect(0, ptype.clone(), 0, Endianness::Little); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::BufferOverrun { .. } + )) + ), + "Failed for case: {name}" + ); + } } - // ── Final offset overrun ───────────────────────────────────── - #[test] fn test_final_offset_overrun() { - // Pointer value = 0xFF (255), buffer only 5 bytes → overrun - let buffer = &[0xFF, 0x00, 0x00, 0x00, 0x00]; - let spec = indirect(0, TypeKind::Byte { signed: false }, 0, Endianness::Little); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) - )); - } - - #[test] - fn test_final_offset_overrun_with_adjustment() { - // Pointer value = 3, adjustment = +10, buffer only 8 bytes → 13 overruns - let buffer = &[0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - let spec = indirect(0, TypeKind::Byte { signed: false }, 10, Endianness::Little); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::BufferOverrun { .. }) - )); - } - - // ── Arithmetic overflow/underflow ──────────────────────────── - - #[test] - fn test_adjustment_overflow() { - // Unsigned quad reading u64::MAX + positive adjustment → overflow - let buffer = &[0xFF; 16]; - let spec = indirect( - 0, - TypeKind::Quad { - endian: Endianness::Little, - signed: false, - }, - 1, - Endianness::Little, - ); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) - )); - } - - #[test] - fn test_adjustment_underflow() { - // Pointer value = 0, adjustment = -1 → underflow - let buffer = &[0x00, 0x00, 0x00, 0x00]; - let spec = indirect(0, TypeKind::Byte { signed: false }, -1, Endianness::Little); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) - )); - } - - // ── Unsupported pointer types ──────────────────────────────── - - #[test] - fn test_unsupported_pointer_type_string() { - let buffer = &[0x00, 0x00, 0x00, 0x00]; - let spec = indirect( - 0, - TypeKind::String { max_length: None }, - 0, - Endianness::Little, - ); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::UnsupportedType { .. }) - )); + let cases: &[(&str, &[u8], i64)] = &[ + ( + "pointer=0xFF, no adjustment", + &[0xFF, 0x00, 0x00, 0x00, 0x00], + 0, + ), + ( + "pointer=3, adjustment=+10", + &[0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + 10, + ), + ]; + for (name, buf, adj) in cases { + let spec = indirect( + 0, + TypeKind::Byte { signed: false }, + *adj, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::BufferOverrun { .. } + )) + ), + "Failed for case: {name}" + ); + } } #[test] - fn test_unsupported_pointer_type_float() { - let buffer = &[0x00, 0x00, 0x00, 0x00]; - let spec = indirect( - 0, - TypeKind::Float { - endian: Endianness::Little, - }, - 0, - Endianness::Little, - ); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::UnsupportedType { .. }) - )); + fn test_adjustment_overflow_underflow() { + let cases: &[(&str, &[u8], TypeKind, i64)] = &[ + ( + "overflow: u64::MAX + 1", + &[0xFF; 16], + TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + 1, + ), + ( + "underflow: 0 - 1", + &[0x00, 0x00, 0x00, 0x00], + TypeKind::Byte { signed: false }, + -1, + ), + ]; + for (name, buf, ptype, adj) in cases { + let spec = indirect(0, ptype.clone(), *adj, Endianness::Little); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::InvalidOffset { .. } + )) + ), + "Failed for case: {name}" + ); + } } #[test] - fn test_unsupported_pointer_type_double() { - let buffer = &[0x00; 8]; - let spec = indirect( - 0, - TypeKind::Double { - endian: Endianness::Little, - }, - 0, - Endianness::Little, - ); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::UnsupportedType { .. }) - )); + fn test_unsupported_pointer_types() { + let cases: &[(&str, &[u8], TypeKind)] = &[ + ("string", &[0x00; 4], TypeKind::String { max_length: None }), + ( + "float", + &[0x00; 4], + TypeKind::Float { + endian: Endianness::Little, + }, + ), + ( + "double", + &[0x00; 8], + TypeKind::Double { + endian: Endianness::Little, + }, + ), + ]; + for (name, buf, ptype) in cases { + let spec = indirect(0, ptype.clone(), 0, Endianness::Little); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::UnsupportedType { .. } + )) + ), + "Failed for case: {name}" + ); + } } - // ── PE-header-style 32-bit LE pointer at 0x3c ──────────────── - #[test] fn test_pe_header_style_offset_0x3c() { - // Simulate a PE file: 32-bit LE pointer at offset 0x3C points to PE header. - // At offset 0x3C we store LE u32 = 0x80 (128). let mut buffer = vec![0u8; 256]; - // Write LE u32 value 0x80 at offset 0x3C buffer[0x3C] = 0x80; buffer[0x3D] = 0x00; buffer[0x3E] = 0x00; buffer[0x3F] = 0x00; - // Place "PE\0\0" signature at offset 0x80 buffer[0x80] = b'P'; buffer[0x81] = b'E'; buffer[0x82] = 0x00; @@ -687,49 +608,29 @@ mod tests { ); let offset = resolve_indirect_offset(&spec, &buffer).unwrap(); assert_eq!(offset, 0x80); - // Verify we can read the PE signature at that offset assert_eq!(&buffer[offset..offset + 4], b"PE\0\0"); } - // ── Base offset out of bounds ──────────────────────────────── - #[test] fn test_base_offset_out_of_bounds() { let buffer = &[0x00, 0x01, 0x02]; let spec = indirect(100, TypeKind::Byte { signed: false }, 0, Endianness::Little); - let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); + assert!(resolve_indirect_offset(&spec, buffer).is_err()); } - // ── Signed pointer extraction ──────────────────────────────── - - #[test] - fn test_signed_long_pointer_positive() { - // Signed long value = 4 (positive) → final offset = 4 - let buffer = &[0x04, 0x00, 0x00, 0x00, 0xAA]; - let spec = indirect( - 0, - TypeKind::Long { - endian: Endianness::Little, - signed: true, - }, - 0, - Endianness::Little, - ); - assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 4); - } - - // ── Non-indirect spec produces internal error ──────────────── - #[test] fn test_non_indirect_spec_returns_error() { let buffer = &[0x00; 8]; let spec = OffsetSpec::Absolute(0); let result = resolve_indirect_offset(&spec, buffer); - assert!(result.is_err()); - assert!(matches!( - result.unwrap_err(), - LibmagicError::EvaluationError(EvaluationError::InternalError { .. }) - )); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::InternalError { .. } + )) + ), + "Expected InternalError for non-indirect spec" + ); } } diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index 57af1dc..62fa1bd 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs @@ -162,7 +162,8 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { /// /// | Specifier | Width | Endianness | /// |-----------|--------|---------------| -/// | `b`, `B` | 1 byte | Little-endian | +/// | `b` | 1 byte | Little-endian | +/// | `B` | 1 byte | Big-endian | /// | `s` | 2 byte | Little-endian | /// | `S` | 2 byte | Big-endian | /// | `l` | 4 byte | Little-endian | @@ -171,7 +172,8 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { /// | `Q` | 8 byte | Big-endian | fn pointer_specifier_to_type(spec: char) -> Option<(TypeKind, Endianness)> { match spec { - 'b' | 'B' => Some((TypeKind::Byte { signed: true }, Endianness::Little)), + 'b' => Some((TypeKind::Byte { signed: true }, Endianness::Little)), + 'B' => Some((TypeKind::Byte { signed: true }, Endianness::Big)), 's' => Some(( TypeKind::Short { endian: Endianness::Little, diff --git a/src/parser/grammar/tests/indirect_offset.rs b/src/parser/grammar/tests/indirect_offset.rs new file mode 100644 index 0000000..c3434b2 --- /dev/null +++ b/src/parser/grammar/tests/indirect_offset.rs @@ -0,0 +1,282 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::*; + +// Indirect offset parsing tests +// +// GNU `file` semantics: lowercase = little-endian, uppercase = big-endian. +// Numeric pointer types are signed by default (GOTCHAS S6.3). +// Adjustment is parsed AFTER the closing `)`: (base.type)+adj + +#[test] +fn test_parse_offset_indirect_all_specifiers() { + // Table-driven: (input, expected_pointer_type, expected_endian) + let cases: &[(&str, TypeKind, Endianness)] = &[ + // .b / .B - byte (little-endian, signed) + ("(0.b)", TypeKind::Byte { signed: true }, Endianness::Little), + ("(0.B)", TypeKind::Byte { signed: true }, Endianness::Big), + // .s - short little-endian, .S - short big-endian + ( + "(0.s)", + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0.S)", + TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + ), + // .l - long little-endian, .L - long big-endian + ( + "(0x3c.l)", + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0x3c.L)", + TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + ), + // .q - quad little-endian, .Q - quad big-endian + ( + "(0.q)", + TypeKind::Quad { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0.Q)", + TypeKind::Quad { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + ), + ]; + + for (input, expected_type, expected_endian) in cases { + let base = if input.contains("0x3c") { 0x3c } else { 0 }; + assert_eq!( + parse_offset(input), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: base, + pointer_type: expected_type.clone(), + adjustment: 0, + endian: *expected_endian, + } + )), + "Failed for input: {input}" + ); + } +} + +#[test] +fn test_parse_offset_indirect_with_positive_adjustment() { + // Adjustment AFTER closing paren: (base.type)+adj + assert_eq!( + parse_offset("(0x3c.l)+4"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 4, + endian: Endianness::Little, + } + )) + ); + assert_eq!( + parse_offset("(0.b)+0xFF"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Byte { signed: true }, + adjustment: 255, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_negative_adjustment() { + assert_eq!( + parse_offset("(0x3c.l)-8"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: -8, + endian: Endianness::Little, + } + )) + ); + assert_eq!( + parse_offset("(100.s)-0x10"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 100, + pointer_type: TypeKind::Short { + endian: Endianness::Little, + signed: true + }, + adjustment: -16, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_negative_base() { + // Negative base offsets (from end of file) + assert_eq!( + parse_offset("(-4.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: -4, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); + // Negative base with adjustment after paren + assert_eq!( + parse_offset("(-0x10.s)+2"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: -16, + pointer_type: TypeKind::Short { + endian: Endianness::Little, + signed: true + }, + adjustment: 2, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_hex_base() { + assert_eq!( + parse_offset("(0xFF.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0xFF, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_whitespace() { + // Leading whitespace should be handled + assert_eq!( + parse_offset(" (0x3c.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); + // Trailing content after adjustment-free form + assert_eq!( + parse_offset("(0x3c.l) string"), + Ok(( + "string", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_parse_failures() { + // Missing closing paren + assert!(parse_offset("(0x3c.l").is_err()); + // Missing dot and type + assert!(parse_offset("(0x3c)").is_err()); + // Invalid specifier character + assert!(parse_offset("(0x3c.x)").is_err()); + // Empty parens + assert!(parse_offset("()").is_err()); + // Missing base + assert!(parse_offset("(.l)").is_err()); +} + +#[test] +fn test_parse_rule_offset_indirect() { + // Level 0 indirect + assert_eq!( + parse_rule_offset("(0x3c.l)"), + Ok(( + "", + ( + 0, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + ) + )) + ); +} diff --git a/src/parser/grammar/tests.rs b/src/parser/grammar/tests/mod.rs similarity index 90% rename from src/parser/grammar/tests.rs rename to src/parser/grammar/tests/mod.rs index d41dcf8..c6a6958 100644 --- a/src/parser/grammar/tests.rs +++ b/src/parser/grammar/tests/mod.rs @@ -1,6 +1,8 @@ // Copyright (c) 2025-2026 the libmagic-rs contributors // SPDX-License-Identifier: Apache-2.0 +mod indirect_offset; + use super::*; use crate::parser::ast::Endianness; use crate::parser::ast::PStringLengthWidth; @@ -281,284 +283,6 @@ fn test_parse_offset_boundary_values() { ); } -// Indirect offset parsing tests -// -// GNU `file` semantics: lowercase = little-endian, uppercase = big-endian. -// Numeric pointer types are signed by default (GOTCHAS S6.3). -// Adjustment is parsed AFTER the closing `)`: (base.type)+adj - -#[test] -fn test_parse_offset_indirect_all_specifiers() { - // Table-driven: (input, expected_pointer_type, expected_endian) - let cases: &[(&str, TypeKind, Endianness)] = &[ - // .b / .B - byte (little-endian, signed) - ("(0.b)", TypeKind::Byte { signed: true }, Endianness::Little), - ("(0.B)", TypeKind::Byte { signed: true }, Endianness::Little), - // .s - short little-endian, .S - short big-endian - ( - "(0.s)", - TypeKind::Short { - endian: Endianness::Little, - signed: true, - }, - Endianness::Little, - ), - ( - "(0.S)", - TypeKind::Short { - endian: Endianness::Big, - signed: true, - }, - Endianness::Big, - ), - // .l - long little-endian, .L - long big-endian - ( - "(0x3c.l)", - TypeKind::Long { - endian: Endianness::Little, - signed: true, - }, - Endianness::Little, - ), - ( - "(0x3c.L)", - TypeKind::Long { - endian: Endianness::Big, - signed: true, - }, - Endianness::Big, - ), - // .q - quad little-endian, .Q - quad big-endian - ( - "(0.q)", - TypeKind::Quad { - endian: Endianness::Little, - signed: true, - }, - Endianness::Little, - ), - ( - "(0.Q)", - TypeKind::Quad { - endian: Endianness::Big, - signed: true, - }, - Endianness::Big, - ), - ]; - - for (input, expected_type, expected_endian) in cases { - let base = if input.contains("0x3c") { 0x3c } else { 0 }; - assert_eq!( - parse_offset(input), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: base, - pointer_type: expected_type.clone(), - adjustment: 0, - endian: *expected_endian, - } - )), - "Failed for input: {input}" - ); - } -} - -#[test] -fn test_parse_offset_indirect_with_positive_adjustment() { - // Adjustment AFTER closing paren: (base.type)+adj - assert_eq!( - parse_offset("(0x3c.l)+4"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: 4, - endian: Endianness::Little, - } - )) - ); - assert_eq!( - parse_offset("(0.b)+0xFF"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0, - pointer_type: TypeKind::Byte { signed: true }, - adjustment: 255, - endian: Endianness::Little, - } - )) - ); -} - -#[test] -fn test_parse_offset_indirect_with_negative_adjustment() { - assert_eq!( - parse_offset("(0x3c.l)-8"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: -8, - endian: Endianness::Little, - } - )) - ); - assert_eq!( - parse_offset("(100.s)-0x10"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 100, - pointer_type: TypeKind::Short { - endian: Endianness::Little, - signed: true - }, - adjustment: -16, - endian: Endianness::Little, - } - )) - ); -} - -#[test] -fn test_parse_offset_indirect_negative_base() { - // Negative base offsets (from end of file) - assert_eq!( - parse_offset("(-4.l)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: -4, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: 0, - endian: Endianness::Little, - } - )) - ); - // Negative base with adjustment after paren - assert_eq!( - parse_offset("(-0x10.s)+2"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: -16, - pointer_type: TypeKind::Short { - endian: Endianness::Little, - signed: true - }, - adjustment: 2, - endian: Endianness::Little, - } - )) - ); -} - -#[test] -fn test_parse_offset_indirect_hex_base() { - assert_eq!( - parse_offset("(0xFF.l)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0xFF, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: 0, - endian: Endianness::Little, - } - )) - ); -} - -#[test] -fn test_parse_offset_indirect_with_whitespace() { - // Leading whitespace should be handled - assert_eq!( - parse_offset(" (0x3c.l)"), - Ok(( - "", - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: 0, - endian: Endianness::Little, - } - )) - ); - // Trailing content after adjustment-free form - assert_eq!( - parse_offset("(0x3c.l) string"), - Ok(( - "string", - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: 0, - endian: Endianness::Little, - } - )) - ); -} - -#[test] -fn test_parse_offset_indirect_parse_failures() { - // Missing closing paren - assert!(parse_offset("(0x3c.l").is_err()); - // Missing dot and type - assert!(parse_offset("(0x3c)").is_err()); - // Invalid specifier character - assert!(parse_offset("(0x3c.x)").is_err()); - // Empty parens - assert!(parse_offset("()").is_err()); - // Missing base - assert!(parse_offset("(.l)").is_err()); -} - -#[test] -fn test_parse_rule_offset_indirect() { - // Level 0 indirect - assert_eq!( - parse_rule_offset("(0x3c.l)"), - Ok(( - "", - ( - 0, - OffsetSpec::Indirect { - base_offset: 0x3c, - pointer_type: TypeKind::Long { - endian: Endianness::Little, - signed: true - }, - adjustment: 0, - endian: Endianness::Little, - } - ) - )) - ); -} - #[test] fn test_parse_rule_offset_indirect_child() { // Level 1 child with indirect offset: >(0x3c.l) From 585ad217cc1a304acebfa2f89e65a958c6910e53 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 02:01:36 -0400 Subject: [PATCH 23/28] fix: split oversized table-driven test to satisfy clippy too_many_lines Signed-off-by: UncleSp1d3r --- src/evaluator/offset/indirect.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/evaluator/offset/indirect.rs b/src/evaluator/offset/indirect.rs index d6ed172..5e56743 100644 --- a/src/evaluator/offset/indirect.rs +++ b/src/evaluator/offset/indirect.rs @@ -254,8 +254,10 @@ mod tests { "Failed for case: {name}" ); } + } - // Quad types need resizable buffers (target offset > inline slice length) + #[test] + fn test_quad_pointer_endianness() { let quad_cases: &[(&str, Endianness, &[u8])] = &[ ( "quad LE", From 8ca4bd22eaa083994be1b95477e769685ad535a6 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 02:21:02 -0400 Subject: [PATCH 24/28] chore(deps): update mdformat version to 1.0.0 with new arguments Signed-off-by: UncleSp1d3r --- mise.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mise.toml b/mise.toml index 3c0e40b..69370e5 100644 --- a/mise.toml +++ b/mise.toml @@ -23,7 +23,7 @@ python = "latest" "cargo:cargo-release" = "1.1.1" "cargo:cargo-auditable" = "0.7.4" "cargo:cargo-cyclonedx" = "0.5.7" -"pipx:mdformat" = { version = "0.7.21", uvx_args = "--with mdformat-gfm --with mdformat-frontmatter --with mdformat-footnote --with mdformat-simple-breaks --with mdformat-gfm-alerts --with mdformat-toc --with mdformat-wikilink --with mdformat-tables" } +"pipx:mdformat" = { version = "1.0.0", uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote --with mdformat-front-matters --with mdformat-simple-breaks --with mdformat-web --with mdformat-wikilink --with mdformat-gfm-alerts --with mdformat-toc" } prettier = "3.8.1" actionlint = "1.7.11" lychee = "0.23.0" From aa5a2495e8de47184ce37dbdbc8968f182a81045 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 02:21:07 -0400 Subject: [PATCH 25/28] chore(deps): update bun and cargo-binstall versions in mise.lock Signed-off-by: UncleSp1d3r --- mise.lock | 148 +----------------------------------------------------- 1 file changed, 2 insertions(+), 146 deletions(-) diff --git a/mise.lock b/mise.lock index 16a0322..501bf6f 100644 --- a/mise.lock +++ b/mise.lock @@ -48,54 +48,6 @@ url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_ checksum = "sha256:5414b7124a91f4b5abee62e5c9d84802237734f8d15b9b7032732a32c3ebffa3" url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_windows_amd64.zip" -[[tools.bun]] -version = "1.3.10" -backend = "core:bun" - -[tools.bun."platforms.linux-arm64"] -checksum = "sha256:fa5ecb25cafa8e8f5c87a0f833719d46dd0af0a86c7837d806531212d55636d3" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-aarch64.zip" - -[tools.bun."platforms.linux-arm64-musl"] -checksum = "sha256:d2c81365a2e529b78a42330d3a0056e8dbd7896b4a6782c8e392b6532141e34d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-aarch64-musl.zip" - -[tools.bun."platforms.linux-x64"] -checksum = "sha256:f57bc0187e39623de716ba3a389fda5486b2d7be7131a980ba54dc7b733d2e08" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64.zip" - -[tools.bun."platforms.linux-x64-baseline"] -checksum = "sha256:41201a8c5ee74a9dcbb1ce25a1104f1f929838b57a845aa78d98379b0ce7cde2" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64-baseline.zip" - -[tools.bun."platforms.linux-x64-musl"] -checksum = "sha256:48a6c32277d343db0148ce066336472ffd380358a4d26bb1329714742492d824" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64-musl.zip" - -[tools.bun."platforms.linux-x64-musl-baseline"] -checksum = "sha256:a7bc4cdea1ef255a83adbf39c7aafcd30e09f2b8f74deec4b10ee318bc024d1f" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64-musl-baseline.zip" - -[tools.bun."platforms.macos-arm64"] -checksum = "sha256:82034e87c9d9b4398ea619aee2eed5d2a68c8157e9a6ae2d1052d84d533ccd8d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-darwin-aarch64.zip" - -[tools.bun."platforms.macos-x64"] -checksum = "sha256:c1d90bf6140f20e572c473065dc6b37a4b036349b5e9e4133779cc642ad94323" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-darwin-x64.zip" - -[tools.bun."platforms.macos-x64-baseline"] -checksum = "sha256:f9686c4e4e760db4cde77a0f1fad05e552648b9c9cbfa4f7fc9a7ec26b9f3267" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-darwin-x64-baseline.zip" - -[tools.bun."platforms.windows-x64"] -checksum = "sha256:7a77b3e245e2e26965c93089a4a1332e8a326d3364c89fae1d1fd99cdd3cd73d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows-x64.zip" - -[tools.bun."platforms.windows-x64-baseline"] -checksum = "sha256:715709c69b176e20994533d3292bd0b7c32de9c0c5575b916746ec6b2aa38346" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows-x64-baseline.zip" - [[tools.bun]] version = "1.3.11" backend = "core:bun" @@ -144,54 +96,6 @@ url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows- checksum = "sha256:9d0e0f923e9626f3bc6044fc32e0d3ab29039aea753f5678ef8801cf26f75288" url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64-baseline.zip" -[[tools.cargo-binstall]] -version = "1.17.7" -backend = "aqua:cargo-bins/cargo-binstall" - -[tools.cargo-binstall."platforms.linux-arm64"] -checksum = "sha256:b0658b0a7f0959bc1dbb4ab665931c31c7dd1109ff01cb8772af17dfdc52a9af" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-arm64-musl"] -checksum = "sha256:b0658b0a7f0959bc1dbb4ab665931c31c7dd1109ff01cb8772af17dfdc52a9af" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64"] -checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64-baseline"] -checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64-musl"] -checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.linux-x64-musl-baseline"] -checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.macos-arm64"] -checksum = "sha256:1ad3c0c56fa3970634cce5009ed0ce61b943515f9115f8e480fd0e41d8d89085" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.macos-x64"] -checksum = "sha256:aa7174fb938e668dea4b4c3d22fe6cefed97642cc3a7a419ba96d63d63fd729b" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.macos-x64-baseline"] -checksum = "sha256:aa7174fb938e668dea4b4c3d22fe6cefed97642cc3a7a419ba96d63d63fd729b" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.windows-x64"] -checksum = "sha256:c5cb2444ee04480502a8ac73d96abd9f97af8300ec04ea1c1f2a9e959c02e4d6" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-pc-windows-msvc.zip" - -[tools.cargo-binstall."platforms.windows-x64-baseline"] -checksum = "sha256:c5cb2444ee04480502a8ac73d96abd9f97af8300ec04ea1c1f2a9e959c02e4d6" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-pc-windows-msvc.zip" - [[tools.cargo-binstall]] version = "1.17.8" backend = "aqua:cargo-bins/cargo-binstall" @@ -360,54 +264,6 @@ backend = "cargo:mdbook-toc" version = "0.3.156" backend = "cargo:release-plz" -[[tools.just]] -version = "1.46.0" -backend = "aqua:casey/just" - -[tools.just."platforms.linux-arm64"] -checksum = "sha256:b81970c8247fa64cfb30d2a3da0e487e4253f9f2d01865ed5e7d53cdc7b02188" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-aarch64-unknown-linux-musl.tar.gz" - -[tools.just."platforms.linux-arm64-musl"] -checksum = "sha256:b81970c8247fa64cfb30d2a3da0e487e4253f9f2d01865ed5e7d53cdc7b02188" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-aarch64-unknown-linux-musl.tar.gz" - -[tools.just."platforms.linux-x64"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" - -[tools.just."platforms.linux-x64-baseline"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" - -[tools.just."platforms.linux-x64-musl"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" - -[tools.just."platforms.linux-x64-musl-baseline"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" - -[tools.just."platforms.macos-arm64"] -checksum = "sha256:438eaf6468a115aa7db93e501cc7e3272f453f6b7083be3863adfab546b23358" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-aarch64-apple-darwin.tar.gz" - -[tools.just."platforms.macos-x64"] -checksum = "sha256:ec54dd60ac876261b7318f1852ef9c0319fede1e5a73c14f56d908a8edf595b8" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-apple-darwin.tar.gz" - -[tools.just."platforms.macos-x64-baseline"] -checksum = "sha256:ec54dd60ac876261b7318f1852ef9c0319fede1e5a73c14f56d908a8edf595b8" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-apple-darwin.tar.gz" - -[tools.just."platforms.windows-x64"] -checksum = "sha256:f0acf3f8ccbcf360b481baae9cae4c921774c89d5d932012481d3e0bda78ab39" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-pc-windows-msvc.zip" - -[tools.just."platforms.windows-x64-baseline"] -checksum = "sha256:f0acf3f8ccbcf360b481baae9cae4c921774c89d5d932012481d3e0bda78ab39" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-pc-windows-msvc.zip" - [[tools.just]] version = "1.48.1" backend = "aqua:casey/just" @@ -501,11 +357,11 @@ version = "0.21.0" backend = "npm:markdownlint-cli2" [[tools."pipx:mdformat"]] -version = "0.7.21" +version = "1.0.0" backend = "pipx:mdformat" [tools."pipx:mdformat".options] -uvx_args = "--with mdformat-gfm --with mdformat-frontmatter --with mdformat-footnote --with mdformat-simple-breaks --with mdformat-gfm-alerts --with mdformat-toc --with mdformat-wikilink --with mdformat-tables" +uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote --with mdformat-front-matters --with mdformat-simple-breaks --with mdformat-web --with mdformat-wikilink --with mdformat-gfm-alerts --with mdformat-toc" [[tools."pipx:pre-commit"]] version = "4.5.1" From 25f35b101d0935e58f6fcb72e1cbe2e2a591f56e Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 02:21:17 -0400 Subject: [PATCH 26/28] style(settings): format JSON files for consistency and readability Signed-off-by: UncleSp1d3r --- .gemini/settings.json | 15 ++++++--------- .mcp.json | 15 ++++++--------- .vscode/settings.json | 10 +++------- tessl.json | 2 +- 4 files changed, 16 insertions(+), 26 deletions(-) diff --git a/.gemini/settings.json b/.gemini/settings.json index ebfccaa..c4f2241 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -1,12 +1,9 @@ { - "mcpServers": { - "tessl": { - "type": "stdio", - "command": "tessl", - "args": [ - "mcp", - "start" - ] + "mcpServers": { + "tessl": { + "type": "stdio", + "command": "tessl", + "args": ["mcp", "start"] + } } - } } diff --git a/.mcp.json b/.mcp.json index ebfccaa..c4f2241 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,12 +1,9 @@ { - "mcpServers": { - "tessl": { - "type": "stdio", - "command": "tessl", - "args": [ - "mcp", - "start" - ] + "mcpServers": { + "tessl": { + "type": "stdio", + "command": "tessl", + "args": ["mcp", "start"] + } } - } } diff --git a/.vscode/settings.json b/.vscode/settings.json index 24d614f..13fcd97 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -21,12 +21,8 @@ "git.rebaseWhenSync": true, "git.replaceTagsWhenPull": true, "githubPullRequests.codingAgent.uiIntegration": true, - "ruff.path": [ - "${workspaceFolder}/.vscode/mise-tools/ruff" - ], - "ruff.interpreter": [ - "${workspaceFolder}/.vscode/mise-tools/python" - ], + "ruff.path": ["${workspaceFolder}/.vscode/mise-tools/ruff"], + "ruff.interpreter": ["${workspaceFolder}/.vscode/mise-tools/python"], "python.defaultInterpreterPath": "${workspaceFolder}/.vscode/mise-tools/python", "bun.runtime": "${workspaceFolder}/.vscode/mise-tools/bun" -} \ No newline at end of file +} diff --git a/tessl.json b/tessl.json index 65dc6d1..41560da 100644 --- a/tessl.json +++ b/tessl.json @@ -58,4 +58,4 @@ "version": "1.0.1" } } -} \ No newline at end of file +} From e0a7dd2e492e7e03f95ae4d1ba06c91813b9d643 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 02:21:23 -0400 Subject: [PATCH 27/28] style(devcontainer): format rust-analyzer extraArgs for consistency Signed-off-by: UncleSp1d3r --- .devcontainer/devcontainer.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1630007..a3fafa0 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -69,10 +69,7 @@ "--all-features" ], "rust-analyzer.cargo.features": "all", - "rust-analyzer.rustfmt.extraArgs": [ - "--edition", - "2024" - ], + "rust-analyzer.rustfmt.extraArgs": ["--edition", "2024"], "editor.formatOnSave": true, "editor.codeActionsOnSave": { "source.fixAll": "explicit" From 36b7444c88a37b9cd54abdc25889ee2bb88dbffb Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Mon, 30 Mar 2026 02:21:32 -0400 Subject: [PATCH 28/28] chore(format): update mdformat configuration for improved consistency Signed-off-by: UncleSp1d3r --- .mdformat.toml | 21 ++++++++++++--------- justfile | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.mdformat.toml b/.mdformat.toml index d1a1c22..22c2e4e 100644 --- a/.mdformat.toml +++ b/.mdformat.toml @@ -10,20 +10,23 @@ exclude = [ "megalinter-reports/**", "**/*.result", "**/*.testfile", + "**/SKILL.md", # AI stuff + ".claude/**/*", # AI stuff + ".tessl/**/*", # AI stuff ] validate = true number = true wrap = "no" end_of_line = "lf" -# extensions = [ -# "gfm", -# "frontmatter", -# "footnote", -# "simple_breaks", -# "gfm_alerts", -# "toc", -# "wikilink", -# ] +extensions = [ + "gfm", + "footnote", + "front_matters", + "simple_breaks", + "wikilink", + "gfm_alerts", + "toc", +] [plugin.mkdocs] align_semantic_breaks_in_lists = true diff --git a/justfile b/justfile index dd22d66..0274d9e 100644 --- a/justfile +++ b/justfile @@ -44,7 +44,7 @@ format-json-yaml: @{{ mise_exec }} prettier --write "**/*.{json,yaml,yml}" format-docs: - @{{ mise_exec }} mdformat --exclude "target/*" --exclude "node_modules/*" . + @{{ mise_exec }} mdformat . fmt: @{{ mise_exec }} cargo fmt --all