diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1630007..a3fafa0 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -69,10 +69,7 @@ "--all-features" ], "rust-analyzer.cargo.features": "all", - "rust-analyzer.rustfmt.extraArgs": [ - "--edition", - "2024" - ], + "rust-analyzer.rustfmt.extraArgs": ["--edition", "2024"], "editor.formatOnSave": true, "editor.codeActionsOnSave": { "source.fixAll": "explicit" diff --git a/.gemini/settings.json b/.gemini/settings.json index ebfccaa..c4f2241 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -1,12 +1,9 @@ { - "mcpServers": { - "tessl": { - "type": "stdio", - "command": "tessl", - "args": [ - "mcp", - "start" - ] + "mcpServers": { + "tessl": { + "type": "stdio", + "command": "tessl", + "args": ["mcp", "start"] + } } - } } diff --git a/.mcp.json b/.mcp.json index ebfccaa..c4f2241 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,12 +1,9 @@ { - "mcpServers": { - "tessl": { - "type": "stdio", - "command": "tessl", - "args": [ - "mcp", - "start" - ] + "mcpServers": { + "tessl": { + "type": "stdio", + "command": "tessl", + "args": ["mcp", "start"] + } } - } } diff --git a/.mdformat.toml b/.mdformat.toml index d1a1c22..22c2e4e 100644 --- a/.mdformat.toml +++ b/.mdformat.toml @@ -10,20 +10,23 @@ exclude = [ "megalinter-reports/**", "**/*.result", "**/*.testfile", + "**/SKILL.md", # AI stuff + ".claude/**/*", # AI stuff + ".tessl/**/*", # AI stuff ] validate = true number = true wrap = "no" end_of_line = "lf" -# extensions = [ -# "gfm", -# "frontmatter", -# "footnote", -# "simple_breaks", -# "gfm_alerts", -# "toc", -# "wikilink", -# ] +extensions = [ + "gfm", + "footnote", + "front_matters", + "simple_breaks", + "wikilink", + "gfm_alerts", + "toc", +] [plugin.mkdocs] align_semantic_breaks_in_lists = true diff --git a/.tessl/.gitignore b/.tessl/.gitignore deleted file mode 100644 index 7bbb394..0000000 --- a/.tessl/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -tiles/ -RULES.md diff --git a/.vscode/settings.json b/.vscode/settings.json index 24d614f..13fcd97 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -21,12 +21,8 @@ "git.rebaseWhenSync": true, "git.replaceTagsWhenPull": true, "githubPullRequests.codingAgent.uiIntegration": true, - "ruff.path": [ - "${workspaceFolder}/.vscode/mise-tools/ruff" - ], - "ruff.interpreter": [ - "${workspaceFolder}/.vscode/mise-tools/python" - ], + "ruff.path": ["${workspaceFolder}/.vscode/mise-tools/ruff"], + "ruff.interpreter": ["${workspaceFolder}/.vscode/mise-tools/python"], "python.defaultInterpreterPath": "${workspaceFolder}/.vscode/mise-tools/python", "bun.runtime": "${workspaceFolder}/.vscode/mise-tools/bun" -} \ No newline at end of file +} diff --git a/AGENTS.md b/AGENTS.md index 964af34..91ace5b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -204,7 +204,7 @@ cargo test --doc # Test documentation examples ### Currently Implemented (v0.1.0) -- **Offsets**: Absolute and from-end specifications (indirect and relative are parsed but not yet evaluated) +- **Offsets**: Absolute, from-end, and indirect specifications (relative offsets are parsed but not yet evaluated) - **Types**: `byte`, `short`, `long`, `quad`, `float`, `double`, `string`, `pstring` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`, `uquad`/`ubequad`/`ulequad`; float/double endian variants `befloat`/`lefloat`, `bedouble`/`ledouble`; 32-bit date/timestamp types `date`/`ldate`/`bedate`/`beldate`/`ledate`/`leldate`; 64-bit date/timestamp types `qdate`/`qldate`/`beqdate`/`beqldate`/`leqdate`/`leqldate`; `pstring` is a Pascal string (length-prefixed) with support for 1/2/4-byte length prefixes via `/B`, `/H` (2-byte BE), `/h` (2-byte LE), `/L` (4-byte BE), `/l` (4-byte LE) suffixes, and the `/J` flag (stored length includes prefix width, JPEG convention) which is combinable with width suffixes (e.g., `pstring/HJ`); date values formatted as "Www Mmm DD HH:MM:SS YYYY" matching GNU `file` output; types are signed by default (libmagic-compatible) - **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask), `^` (bitwise XOR), `~` (bitwise NOT), `x` (any value) - **Nested Rules**: Hierarchical rule evaluation with proper indentation @@ -245,9 +245,8 @@ impl BinaryRegex for regex::bytes::Regex { ### Offset Specifications -- Indirect offsets are parsed into the AST but evaluation is not yet implemented (#37) +- Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = little-endian, uppercase = big-endian (GNU `file` semantics); pointer types signed by default; adjustment after closing paren: `(base.type)+adj` - Relative offsets are parsed into the AST but evaluation is not yet implemented (#38) -- Only absolute and from-end offsets are fully functional ### Magic File Syntax @@ -570,3 +569,7 @@ This project has the OSSF Best Practices passing badge. Maintain these standards - SECURITY.md documents vulnerability reporting with scope, safe harbor, and PGP key - AGENTS.md must accurately reflect implemented features (not aspirational) - `docs/src/release-verification.md` documents artifact signing for users + +## Agent Rules + +@.tessl/RULES.md follow the [instructions](.tessl/RULES.md) diff --git a/AI_POLICY.md b/AI_POLICY.md new file mode 100644 index 0000000..25e6927 --- /dev/null +++ b/AI_POLICY.md @@ -0,0 +1,31 @@ +# AI Usage Policy + +We build operator-focused security tools. AI coding assistants are part of how we do that. This policy is not anti-AI -- it is pro-accountability. + +Think of AI assistance like spellcheck. It catches typos, suggests corrections, and speeds up the mechanical parts of writing. But you are still responsible for your words and their consequences. + +## The Rule + +**You own every line you submit.** You must be able to explain what it does and how it interacts with the rest of the system without asking your AI to explain it back to you. + +Everything else follows from that. + +## How We Work + +- **Disclose your tools.** Note what you used in your PR description -- Claude Code, Copilot, Cursor, whatever. No specific format required. + +- **Review AI-generated text before posting.** Issues, discussions, and PR descriptions must reflect your understanding, not a language model's first draft. Read it, cut the filler, make sure it says what you mean. + +- **No AI-generated media.** No generated images, logos, audio, or video. Text-based diagrams (ASCII art, Mermaid) and code are acceptable. + +- **Unreviewed output gets closed.** Hallucinated APIs, boilerplate that ignores project conventions, suggestions you clearly did not run -- these get closed without review. We are not a QA service for your AI's output. + +## Why + +Transparent by design means knowing what the code does and why it is there. Tested under pressure means every change was understood by the person who submitted it. AI makes capable engineers faster. It does not replace the understanding that makes contributions trustworthy. + +Every pull request is reviewed by a human. Submitting work you do not understand shifts that burden onto maintainers. That is not how we operate. + +## New Contributors + +Use AI to learn the codebase. Read the code it generates. Run it. Break it. Then submit work that reflects your understanding. We will help you through review -- that deal only works if the code is yours. diff --git a/GOTCHAS.md b/GOTCHAS.md index dfcf651..b872928 100644 --- a/GOTCHAS.md +++ b/GOTCHAS.md @@ -59,6 +59,18 @@ The nom `tuple` combinator is deprecated. Use bare tuple syntax `(a, b, c)` dire `type_keyword_to_kind` has `#[allow(clippy::too_many_lines)]` because it exceeds 100 lines with all date keywords. +### 3.5 `parse_number` Does Not Handle `+` Prefix + +`parse_number` handles `-` signs but not `+`. When parsing syntax like `+4` (e.g., indirect offset adjustments), consume the `+` character manually before calling `parse_number`. + +### 3.6 `parse_value` Requires Quoted Strings + +`parse_value()` does not accept bare unquoted strings. String values in magic file rules must be quoted (e.g., `string "MZ"` not `string MZ`). Integration tests writing magic files must use `r#"0 string "MZ" description"#` format. + +### 3.7 Indirect Offset Pointer Specifiers Follow GNU `file` Semantics + +Lowercase pointer specifiers (`.s`, `.l`, `.q`) map to **little-endian**, not native endian. Uppercase (`.S`, `.L`, `.Q`) map to big-endian. All numeric pointer types are **signed by default** (per S6.3). The adjustment is parsed **after** the closing paren: `(base.type)+adj`, not `(base.type+adj)`. + ## 4. Module Visibility & Re-exports ### 4.1 Private Engine Module diff --git a/docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md b/docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md new file mode 100644 index 0000000..bb611da --- /dev/null +++ b/docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md @@ -0,0 +1,165 @@ +--- +title: Implement indirect offset parsing in magic file grammar +date: 2026-03-30 +status: resolved +severity: high +category: integration-issues +components: + - parser/grammar + - evaluator/offset + - integration +tags: + - parser + - indirect-offset + - nom + - magic-file-syntax + - pointer-specifier +issue: '#37' +branch: 37-evaluator-implement-indirect-offset-resolution +symptoms: + - parse_offset("(0x3c.l)") fails with parse error + - Magic files containing indirect offset syntax cannot be loaded via MagicDatabase::load_from_file() + - resolve_indirect_offset() is unreachable dead code from text-magic loading path +root_cause: parse_offset() had no branch for '('-prefixed input; always delegated to parse_number() which only handles numeric literals +solution_files: + - src/parser/grammar/mod.rs + - src/parser/grammar/tests.rs + - tests/indirect_offset_integration.rs +related_gotchas: + - parse_number() handles '-' prefix but not '+'; positive adjustments need manual '+' consumption + - parse_value() requires quoted strings; bare string literals cause integration test failures +--- + +# Indirect Offset Parser-Evaluator Sync + +## Problem + +The evaluator for indirect offsets (`resolve_indirect_offset()` in `src/evaluator/offset/indirect.rs`) was fully implemented with 35 unit tests, but the parser in `src/parser/grammar/mod.rs` could not produce `OffsetSpec::Indirect` AST nodes. The `parse_offset()` function only handled absolute numeric offsets and had no branch for `(`-prefixed indirect offset syntax like `(0x3c.l)` or `(0x3c.l+4)`. + +This meant the feature was unreachable through the public `MagicDatabase::load_from_file()` API -- the primary way users load text magic files. + +## Root Cause + +`parse_offset()` unconditionally delegated to `parse_number()`, which only parses numeric literals. Input starting with `(` was rejected as a parse error. The evaluator code was effectively dead code from the text-magic loading path. + +## Solution + +### 1. Added `pointer_specifier_to_type()` helper + +Maps single-character pointer specifiers to `(TypeKind, Endianness)` per libmagic convention: + +| Specifier | Width | Endianness | +| ---------- | ------ | ---------- | +| `.b`, `.B` | 1 byte | Native | +| `.s` | 2 byte | Native | +| `.S` | 2 byte | Big | +| `.l` | 4 byte | Native | +| `.L` | 4 byte | Big | +| `.q` | 8 byte | Native | +| `.Q` | 8 byte | Big | + +All pointer types are unsigned (`signed: false`). Lowercase = native endian, uppercase = big-endian. + +### 2. Added `parse_indirect_offset()` function + +Parses `(base.type)` and `(base.type+/-adj)` syntax: + +1. Consume `(` +2. Parse base offset via `parse_number()` +3. Consume `.` and type specifier character +4. Optionally parse adjustment (see gotcha below) +5. Consume `)` +6. Return `OffsetSpec::Indirect { base_offset, pointer_type, adjustment, endian }` + +### 3. Updated `parse_offset()` to branch on leading `(` + +```rust +pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> { + let (input, _) = multispace0(input)?; + if input.starts_with('(') { + let (input, spec) = parse_indirect_offset(input)?; + let (input, _) = multispace0(input)?; + Ok((input, spec)) + } else { + let (input, offset_value) = parse_number(input)?; + let (input, _) = multispace0(input)?; + Ok((input, OffsetSpec::Absolute(offset_value))) + } +} +``` + +### 4. No changes needed to `parse_rule_offset()` + +It delegates to `parse_offset()`, so hierarchical forms like `>(0x3c.l)` work automatically. + +## Gotchas Discovered + +### `parse_number()` does not handle `+` prefix + +`parse_number()` handles `-` internally but not `+`. For `+N` adjustments, the `+` must be consumed manually: + +```rust +let (input, adjustment) = if input.starts_with('+') { + let (input, _) = char('+')(input)?; + parse_number(input)? +} else if input.starts_with('-') { + parse_number(input)? +} else { + (input, 0) +}; +``` + +Do NOT modify `parse_number()` globally -- it is shared by offset and value parsing, and adding `+` support would change semantics elsewhere. + +### `parse_value()` requires quoted strings + +Integration tests initially failed because `parse_value()` does not accept bare strings. Magic file string values must be quoted: + +```text +# Correct +0 string "MZ" DOS executable + +# Wrong -- parse_value() rejects bare "MZ" +0 string MZ DOS executable +``` + +### Use big-endian specifiers in cross-platform tests + +Prefer `.L` (big-endian long) over `.l` (native) in integration test magic files so byte buffers are deterministic across architectures. + +## Prevention Strategies + +### Parser-Evaluator Parity Checklist + +When adding a new AST variant, ensure: + +1. **Parser produces it** -- unit test parses raw syntax, asserts correct AST node +2. **Evaluator consumes it** -- unit test constructs AST node, asserts evaluation result +3. **End-to-end test exists** -- integration test through `MagicDatabase::load_from_file()` proves the full pipeline works +4. **Codegen handles it** -- if it can appear in built-in rules, update `src/parser/codegen.rs` +5. **Strength calculation covers it** -- update `src/evaluator/strength.rs` if scoring changes + +### Integration Test Template + +```rust +#[test] +fn test_feature_end_to_end() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("test.magic"); + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MAGIC" Test match"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + let result = db.evaluate_buffer(b"MAGIC\x00data").unwrap(); + assert!(result.description.contains("Test match")); +} +``` + +## Cross-References + +- **Evaluator solution**: `docs/solutions/logic-errors/indirect-offset-resolution.md` +- **Magic format spec**: `docs/MAGIC_FORMAT.md` (lines 106-126, indirect offset section) +- **Gotchas**: `GOTCHAS.md` sections 3.5 (`parse_number` `+` limitation) and 3.6 (quoted strings) +- **Architecture**: `AGENTS.md` offset specifications section +- **Issue**: #37 (indirect offset resolution) +- **Related gotchas**: S2 (enum variant checklists), S3 (parser architecture split), S5 (numeric type pitfalls) diff --git a/docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md b/docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md new file mode 100644 index 0000000..87bb25c --- /dev/null +++ b/docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md @@ -0,0 +1,123 @@ +--- +title: 'Fix indirect offset parser: endianness, signedness, and adjustment placement' +date: 2026-03-30 +status: resolved +severity: high +category: logic-errors +tags: + - parser + - indirect-offset + - gnu-file-semantics + - endianness + - signed-by-default +components: + - src/parser/grammar/mod.rs + - src/parser/grammar/tests.rs + - tests/indirect_offset_integration.rs + - GOTCHAS.md + - AGENTS.md +symptoms: + - (0x3c.l)+4 parsed as indirect with adjustment=0 and leftover +4, breaking parse_magic_rule() + - Lowercase pointer specifiers (.s, .l, .q) produced Endianness::Native instead of Endianness::Little + - Pointer types were unsigned, mismatching libmagic signed-by-default convention +root_causes: + - pointer_specifier_to_type() mapped lowercase specifiers to Endianness::Native instead of Endianness::Little + - 'pointer_specifier_to_type() set signed: false instead of signed: true' + - parse_indirect_offset() consumed adjustment inside parentheses instead of after closing paren +references: + - GOTCHAS.md S6.3 (signed-by-default types) + - GOTCHAS.md S3.7 (added by this fix) + - 'GNU file(1) man page: indirect offset syntax' +related_issues: + - 37 +--- + +# Fix Indirect Offset Parser: GNU `file` Semantics + +## Problem + +The indirect offset parser had three semantic errors that caused it to produce incorrect AST nodes. The code compiled and tests passed, but behavior was wrong relative to the GNU `file` specification: + +1. **Endianness**: Lowercase specifiers (`.s`, `.l`, `.q`) mapped to `Endianness::Native` instead of `Endianness::Little` +2. **Signedness**: Pointer types set to `signed: false` instead of `signed: true` (GOTCHAS S6.3) +3. **Adjustment syntax**: Parsed inside parens `(0x3c.l+4)` instead of after them `(0x3c.l)+4` + +The tests validated the wrong implementation rather than the specification -- a "tests match code but not spec" anti-pattern. + +## Root Cause + +The initial implementation followed incorrect assumptions: + +- Lowercase = native endian (wrong: GNU `file` defines lowercase = little-endian) +- Pointer types = unsigned (wrong: libmagic types are signed by default per S6.3) +- Adjustment inside parens (wrong: GNU `file` syntax places adjustment after `)`) + +Tests were written alongside the code, so they confirmed the implementation's behavior rather than the spec's requirements. + +## Solution + +Three changes in `src/parser/grammar/mod.rs`: + +### Fix 1: Endianness mapping + +```rust +// Before (wrong) +'l' => Some((TypeKind::Long { endian: Endianness::Native, signed: false }, Endianness::Native)) + +// After (correct -- GNU `file` lowercase = little-endian) +'l' => Some((TypeKind::Long { endian: Endianness::Little, signed: true }, Endianness::Little)) +``` + +Applied to all lowercase specifiers (`b`, `s`, `l`, `q`). Uppercase specifiers were already correct (`Endianness::Big`). + +### Fix 2: Signed-by-default + +Changed all pointer types from `signed: false` to `signed: true` across every specifier arm. + +### Fix 3: Adjustment after closing paren + +```rust +// Before (wrong): adjustment consumed inside parens +let (input, adjustment) = parse_adjustment(input)?; +let (input, _) = char(')')(input)?; + +// After (correct): close paren first, then adjustment +let (input, _) = char(')')(input)?; +let (input, adjustment) = parse_adjustment(input)?; +``` + +### Test corrections + +- All parser unit tests updated to expect `Endianness::Little`, `signed: true`, and `(base.type)+adj` syntax +- Integration tests updated with little-endian byte layouts and lowercase `.l` specifier +- Added new test: `>(0x3c.l)+4` child rule with adjustment after paren + +## Prevention Strategies + +### Spec-first test writing + +Write test expectations from the spec (GNU `file` man page, GOTCHAS.md) before implementing. Document the spec reference above each test case. In TDD, the RED phase must derive expected values from the spec, not from running the code. + +### Cross-reference GOTCHAS.md for type mappings + +Treat GOTCHAS.md as a mandatory checklist when adding type mappings: + +- **S6.3**: Default to `signed: true` unless keyword has `u` prefix +- **S6.1**: Uppercase = big-endian, lowercase = little-endian +- **S3.7**: Indirect offset specifiers follow GNU `file` semantics + +### Prefer deterministic endianness + +`Endianness::Native` should never appear in indirect offset resolution. Every endianness value must be explicitly `Little` or `Big` per the spec. Tests must use explicit byte sequences, not `to_ne_bytes()`. + +### Verify against real magic files + +Extract test inputs from `/usr/share/misc/magic` or the upstream [file/file](https://github.com/file/file) repository rather than inventing syntax. + +## Cross-References + +- **Evaluator solution**: `docs/solutions/logic-errors/indirect-offset-resolution.md` +- **Parser-evaluator sync**: `docs/solutions/integration-issues/indirect-offset-parser-evaluator-sync.md` +- **Magic format spec**: `docs/MAGIC_FORMAT.md` (lines 106-126) +- **Gotchas**: `GOTCHAS.md` sections 3.5, 3.6, 3.7, 6.3 +- **Issue**: #37 diff --git a/docs/solutions/logic-errors/indirect-offset-resolution.md b/docs/solutions/logic-errors/indirect-offset-resolution.md new file mode 100644 index 0000000..be10ce0 --- /dev/null +++ b/docs/solutions/logic-errors/indirect-offset-resolution.md @@ -0,0 +1,82 @@ +--- +title: Implementing Indirect Offset Resolution for Binary Format Detection +category: logic-errors +date: 2026-03-30 +tags: [evaluator, offsets, indirect, binary-formats, pe-header, pointer-chasing] +issue: '#37' +severity: high +components: [evaluator/offset/indirect.rs, evaluator/offset/mod.rs] +--- + +# Implementing Indirect Offset Resolution + +## Problem + +Indirect offsets (`OffsetSpec::Indirect`) were parsed into the AST but evaluation returned "not yet implemented." This blocked detection of complex binary formats like PE executables, where a pointer at offset `0x3C` must be read and dereferenced to locate the PE header. + +Syntax: `(0x3c.l)` -- read a 32-bit long at offset 0x3C, use that value as the actual offset. + +## Root Cause + +The evaluator's `resolve_offset()` dispatcher in `offset/mod.rs` had a stub for `OffsetSpec::Indirect` that returned `UnsupportedType`. The implementation required a multi-step pointer dereference pipeline that did not exist. + +## Solution + +Implemented a 4-step pipeline in `evaluator/offset/indirect.rs`: + +1. **Resolve base offset** to absolute position (reuses `resolve_absolute_offset`, supports negative/from-end) +2. **Read pointer value** at that position using the specified numeric type and endianness +3. **Apply adjustment** with checked arithmetic (`checked_add`/`checked_sub`) +4. **Validate final offset** against buffer bounds + +### Key Design Decisions + +**Signed pointer reinterpretation**: Signed negative pointer values (e.g., `i32(-1)` from `[0xFF, 0xFF, 0xFF, 0xFF]`) are reinterpreted as raw unsigned (`u64::MAX`) via `extract_raw_unsigned()`. This matches libmagic's behavior where the bit pattern is what matters, not the signed interpretation. The bounds check at step 4 catches these enormous values. + +**Separated concerns**: `read_pointer()` handles type dispatch and endianness, `extract_raw_unsigned()` handles signed-to-unsigned conversion, `apply_adjustment()` handles arithmetic with overflow protection. Each is independently testable. + +**`i64::MIN` edge case**: `apply_adjustment` explicitly handles `i64::MIN` because `-i64::MIN` overflows. Returns an error rather than panicking. + +```rust +// Core pipeline +let abs_base = resolve_absolute_offset(base_offset, buffer)?; +let pointer_value = read_pointer(buffer, abs_base, pointer_type, endian)?; +let final_offset = apply_adjustment(pointer_value, adjustment)?; +if final_offset >= buffer.len() { return Err(BufferOverrun) } +``` + +### Dispatcher Update + +`offset/mod.rs` line 71 changed from stub to: + +```rust +OffsetSpec::Indirect { .. } => indirect::resolve_indirect_offset(spec, buffer), +``` + +## Prevention Tips + +- When adding new offset types, follow the same pattern: resolve base, read value, apply adjustment, validate bounds. The 4-step pipeline is the established pattern. +- Always use `checked_add`/`checked_sub` for offset arithmetic -- malicious files can craft values targeting overflow. +- Signed pointer values must be treated as raw bit patterns (reinterpret as unsigned), not as mathematical negatives. This is a libmagic compatibility requirement. + +## Test Coverage + +35 unit tests covering: + +- All pointer types (byte, short, long, quad) with both endiannesses +- Signed and unsigned pointer values +- Positive and negative adjustments +- From-end base offsets +- Pointer read buffer overruns +- Final offset buffer overruns +- Arithmetic overflow and underflow +- Unsupported pointer types (string, float, double) +- PE-header-style real-world scenario (0x3C pointer) +- 32-bit platform awareness (conditional assertions) + +## Related + +- Issue #38: Relative offset resolution (next offset type to implement) +- `evaluator/offset/absolute.rs`: Reused for base offset resolution +- `evaluator/types/`: `read_byte`, `read_short`, `read_long`, `read_quad` reused for pointer reading +- GOTCHAS.md S5.1: `usize::from(u32)` does not compile on 32-bit targets diff --git a/docs/src/ast-structures.md b/docs/src/ast-structures.md index 0910425..2058335 100644 --- a/docs/src/ast-structures.md +++ b/docs/src/ast-structures.md @@ -238,17 +238,21 @@ let string_type = TypeKind::String { Pascal-style length-prefixed strings where the length prefix can be 1, 2, or 4 bytes depending on the `length_width` field. **Structure:** + - Length prefix: 1, 2, or 4 bytes indicating string length, with configurable endianness - String data: The number of bytes specified by the length prefix **Example:** -``` + +```text 0 pstring JPEG 0 pstring/H JPEG ``` + The first line reads a 1-byte length prefix (default), then reads that many bytes as a string. The second line reads a 2-byte big-endian length prefix. **Behavior:** + - Returns `Value::String` containing the string data (without the length prefix) - Performs bounds checking on both the length prefix and the string data - Supports all string comparison operators @@ -275,6 +279,7 @@ pub enum PStringLengthWidth { ``` **Suffix conventions:** + - `/B` - 1-byte length prefix (default if no suffix specified) - `/H` - 2-byte big-endian length prefix - `/h` - 2-byte little-endian length prefix diff --git a/docs/src/evaluator.md b/docs/src/evaluator.md index f6f7018..7668f70 100644 --- a/docs/src/evaluator.md +++ b/docs/src/evaluator.md @@ -195,7 +195,7 @@ pub fn read_pstring( - `PStringLengthWidth::TwoByteLE` - 2-byte little-endian length prefix (`/h` suffix) - `PStringLengthWidth::FourByteBE` - 4-byte big-endian length prefix (`/L` suffix) - `PStringLengthWidth::FourByteLE` - 4-byte little-endian length prefix (`/l` suffix) -- **Length interpretation**: +- **Length interpretation**: - Reads 1, 2, or 4 bytes from buffer using `from_be_bytes` or `from_le_bytes` depending on variant - The length value specifies how many bytes of string data follow the prefix - **`/J` flag** (`length_includes_itself`): diff --git a/docs/src/magic-format.md b/docs/src/magic-format.md index 8a372f5..b7c9775 100644 --- a/docs/src/magic-format.md +++ b/docs/src/magic-format.md @@ -179,20 +179,20 @@ Float comparison behavior: ### Date/Timestamp Types -| Type | Size | Endianness | UTC/Local | Description | -| ----------- | ------- | ------------- | --------- | ----------------------------------------------------------------------- | -| `date` | 4 bytes | native | UTC | 32-bit Unix timestamp (signed seconds since epoch), formatted as UTC | -| `ldate` | 4 bytes | native | Local | 32-bit Unix timestamp, formatted as local time | -| `bedate` | 4 bytes | big-endian | UTC | 32-bit Unix timestamp, big-endian byte order, UTC | -| `beldate` | 4 bytes | big-endian | Local | 32-bit Unix timestamp, big-endian byte order, local time | -| `ledate` | 4 bytes | little-endian | UTC | 32-bit Unix timestamp, little-endian byte order, UTC | -| `leldate` | 4 bytes | little-endian | Local | 32-bit Unix timestamp, little-endian byte order, local time | -| `qdate` | 8 bytes | native | UTC | 64-bit Unix timestamp (signed seconds since epoch), formatted as UTC | -| `qldate` | 8 bytes | native | Local | 64-bit Unix timestamp, formatted as local time | -| `beqdate` | 8 bytes | big-endian | UTC | 64-bit Unix timestamp, big-endian byte order, UTC | -| `beqldate` | 8 bytes | big-endian | Local | 64-bit Unix timestamp, big-endian byte order, local time | -| `leqdate` | 8 bytes | little-endian | UTC | 64-bit Unix timestamp, little-endian byte order, UTC | -| `leqldate` | 8 bytes | little-endian | Local | 64-bit Unix timestamp, little-endian byte order, local time | +| Type | Size | Endianness | UTC/Local | Description | +| ---------- | ------- | ------------- | --------- | -------------------------------------------------------------------- | +| `date` | 4 bytes | native | UTC | 32-bit Unix timestamp (signed seconds since epoch), formatted as UTC | +| `ldate` | 4 bytes | native | Local | 32-bit Unix timestamp, formatted as local time | +| `bedate` | 4 bytes | big-endian | UTC | 32-bit Unix timestamp, big-endian byte order, UTC | +| `beldate` | 4 bytes | big-endian | Local | 32-bit Unix timestamp, big-endian byte order, local time | +| `ledate` | 4 bytes | little-endian | UTC | 32-bit Unix timestamp, little-endian byte order, UTC | +| `leldate` | 4 bytes | little-endian | Local | 32-bit Unix timestamp, little-endian byte order, local time | +| `qdate` | 8 bytes | native | UTC | 64-bit Unix timestamp (signed seconds since epoch), formatted as UTC | +| `qldate` | 8 bytes | native | Local | 64-bit Unix timestamp, formatted as local time | +| `beqdate` | 8 bytes | big-endian | UTC | 64-bit Unix timestamp, big-endian byte order, UTC | +| `beqldate` | 8 bytes | big-endian | Local | 64-bit Unix timestamp, big-endian byte order, local time | +| `leqdate` | 8 bytes | little-endian | UTC | 64-bit Unix timestamp, little-endian byte order, UTC | +| `leqldate` | 8 bytes | little-endian | Local | 64-bit Unix timestamp, little-endian byte order, local time | Timestamp values are formatted as strings matching GNU file output format: "Www Mmm DD HH:MM:SS YYYY" diff --git a/docs/src/parser.md b/docs/src/parser.md index 3a25386..b59adde 100644 --- a/docs/src/parser.md +++ b/docs/src/parser.md @@ -266,6 +266,7 @@ Pascal strings store the length as a prefix (1, 2, or 4 bytes depending on the v The parser supports date and timestamp types for parsing Unix timestamps (signed seconds since epoch). There are 12 type keywords: **32-bit timestamps (Date):** + - `date` - Native endian, UTC - `ldate` - Native endian, local time - `bedate` - Big-endian, UTC @@ -274,6 +275,7 @@ The parser supports date and timestamp types for parsing Unix timestamps (signed - `leldate` - Little-endian, local time **64-bit timestamps (QDate):** + - `qdate` - Native endian, UTC - `qldate` - Native endian, local time - `beqdate` - Big-endian, UTC diff --git a/justfile b/justfile index dd22d66..0274d9e 100644 --- a/justfile +++ b/justfile @@ -44,7 +44,7 @@ format-json-yaml: @{{ mise_exec }} prettier --write "**/*.{json,yaml,yml}" format-docs: - @{{ mise_exec }} mdformat --exclude "target/*" --exclude "node_modules/*" . + @{{ mise_exec }} mdformat . fmt: @{{ mise_exec }} cargo fmt --all diff --git a/mise.lock b/mise.lock index fceea7b..501bf6f 100644 --- a/mise.lock +++ b/mise.lock @@ -49,120 +49,100 @@ checksum = "sha256:5414b7124a91f4b5abee62e5c9d84802237734f8d15b9b7032732a32c3ebf url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_windows_amd64.zip" [[tools.bun]] -version = "1.3.10" +version = "1.3.11" backend = "core:bun" [tools.bun."platforms.linux-arm64"] -checksum = "sha256:fa5ecb25cafa8e8f5c87a0f833719d46dd0af0a86c7837d806531212d55636d3" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-aarch64.zip" +checksum = "sha256:d13944da12a53ecc74bf6a720bd1d04c4555c038dfe422365356a7be47691fdf" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-aarch64.zip" [tools.bun."platforms.linux-arm64-musl"] -checksum = "sha256:d2c81365a2e529b78a42330d3a0056e8dbd7896b4a6782c8e392b6532141e34d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-aarch64-musl.zip" +checksum = "sha256:0f5bf5dc3f276053196274bb84f90a44e2fa40c9432bd6757e3247a8d9476a3d" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-aarch64-musl.zip" [tools.bun."platforms.linux-x64"] -checksum = "sha256:f57bc0187e39623de716ba3a389fda5486b2d7be7131a980ba54dc7b733d2e08" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64.zip" +checksum = "sha256:8611ba935af886f05a6f38740a15160326c15e5d5d07adef966130b4493607ed" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64.zip" [tools.bun."platforms.linux-x64-baseline"] -checksum = "sha256:41201a8c5ee74a9dcbb1ce25a1104f1f929838b57a845aa78d98379b0ce7cde2" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64-baseline.zip" +checksum = "sha256:abe346f63414547cdf6b35b7a649a490c728b93d006226156923918a84c0e59b" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-baseline.zip" [tools.bun."platforms.linux-x64-musl"] -checksum = "sha256:48a6c32277d343db0148ce066336472ffd380358a4d26bb1329714742492d824" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64-musl.zip" +checksum = "sha256:b0fce3bc4fab52f26a1e0d8886dc07fd0c0eb2a274cb343b59c83a2d5997b5b1" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-musl.zip" [tools.bun."platforms.linux-x64-musl-baseline"] -checksum = "sha256:a7bc4cdea1ef255a83adbf39c7aafcd30e09f2b8f74deec4b10ee318bc024d1f" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-linux-x64-musl-baseline.zip" +checksum = "sha256:2fa2b697f14ada86a28df771d3876ca7606d7453b2339454893b1937aa9c0c7e" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-linux-x64-musl-baseline.zip" [tools.bun."platforms.macos-arm64"] -checksum = "sha256:82034e87c9d9b4398ea619aee2eed5d2a68c8157e9a6ae2d1052d84d533ccd8d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-darwin-aarch64.zip" +checksum = "sha256:6f5a3467ed9caec4795bf78cd476507d9f870c7d57b86c945fcb338126772ffc" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-aarch64.zip" [tools.bun."platforms.macos-x64"] -checksum = "sha256:c1d90bf6140f20e572c473065dc6b37a4b036349b5e9e4133779cc642ad94323" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-darwin-x64.zip" +checksum = "sha256:c4fe2b9247218b0295f24e895aaec8fee62e74452679a9026b67eacbd611a286" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-x64.zip" [tools.bun."platforms.macos-x64-baseline"] -checksum = "sha256:f9686c4e4e760db4cde77a0f1fad05e552648b9c9cbfa4f7fc9a7ec26b9f3267" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-darwin-x64-baseline.zip" +checksum = "sha256:fb6739b08bf54550edaa7c824cd5b2dca45b6a06afef408443087a63105f6f8d" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-darwin-x64-baseline.zip" [tools.bun."platforms.windows-x64"] -checksum = "sha256:7a77b3e245e2e26965c93089a4a1332e8a326d3364c89fae1d1fd99cdd3cd73d" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows-x64.zip" +checksum = "sha256:066f8694f8b7d8df592452746d18f01710d4053e93030922dbc6e8c34a8c4b9f" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64.zip" [tools.bun."platforms.windows-x64-baseline"] -checksum = "sha256:715709c69b176e20994533d3292bd0b7c32de9c0c5575b916746ec6b2aa38346" -url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.10/bun-windows-x64-baseline.zip" +checksum = "sha256:9d0e0f923e9626f3bc6044fc32e0d3ab29039aea753f5678ef8801cf26f75288" +url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64-baseline.zip" [[tools.cargo-binstall]] -version = "1.17.6" +version = "1.17.8" backend = "aqua:cargo-bins/cargo-binstall" [tools.cargo-binstall."platforms.linux-arm64"] -checksum = "sha256:e5f2c4b79b10370dff707b86a14e7a0ad399c5dc5853824e933432910741992c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-aarch64-unknown-linux-musl.tgz" +checksum = "sha256:81d6245bd1a7a89e914d29af81d82280540e94927e61492a0fc359820cd97abb" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-arm64-musl"] -checksum = "sha256:e5f2c4b79b10370dff707b86a14e7a0ad399c5dc5853824e933432910741992c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-aarch64-unknown-linux-musl.tgz" +checksum = "sha256:81d6245bd1a7a89e914d29af81d82280540e94927e61492a0fc359820cd97abb" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-baseline"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-musl"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-musl-baseline"] -checksum = "sha256:f926d96e9f0822ded35c4ac2071ce190bd1311565695c49c45e295de0d685aaa" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.macos-arm64"] -checksum = "sha256:101447fa30a723ca8e1a13cec11bb1350b7179331b2aa7054d27bef7a3e19021" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-aarch64-apple-darwin.zip" +checksum = "sha256:af87346fdb186f0a2333bc0a30cfddd6faa98b31145ef1bb19c284aedea65972" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-apple-darwin.zip" [tools.cargo-binstall."platforms.macos-x64"] -checksum = "sha256:cd07fd79e2848b13b994e3f83fa5377b631625b847f0734219f2706feb518258" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-apple-darwin.zip" +checksum = "sha256:db353e01b582c97382178db9b4dfe22d81109782e480a38f3db953e62f569952" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-apple-darwin.zip" [tools.cargo-binstall."platforms.macos-x64-baseline"] -checksum = "sha256:cd07fd79e2848b13b994e3f83fa5377b631625b847f0734219f2706feb518258" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-apple-darwin.zip" +checksum = "sha256:db353e01b582c97382178db9b4dfe22d81109782e480a38f3db953e62f569952" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-apple-darwin.zip" [tools.cargo-binstall."platforms.windows-x64"] -checksum = "sha256:5fcbddde2d415704d2432bbe606a5767ddaf1ef4ee2c16b7828f8be2ed1e5a5c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-pc-windows-msvc.zip" +checksum = "sha256:fef07560d4e391812091bb30c6ed1bd5289f74403a0c947b47b8a8c7a597b51b" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-pc-windows-msvc.zip" [tools.cargo-binstall."platforms.windows-x64-baseline"] -checksum = "sha256:5fcbddde2d415704d2432bbe606a5767ddaf1ef4ee2c16b7828f8be2ed1e5a5c" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.6/cargo-binstall-x86_64-pc-windows-msvc.zip" - -[[tools.cargo-binstall]] -version = "1.17.7" -backend = "aqua:cargo-bins/cargo-binstall" - -[tools.cargo-binstall."platforms.linux-x64"] -checksum = "sha256:29b5ecfb6e03c2511a617c77d312b06df0c54717644fbfda3d465ec8240532f0" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-unknown-linux-musl.tgz" - -[tools.cargo-binstall."platforms.macos-arm64"] -checksum = "sha256:1ad3c0c56fa3970634cce5009ed0ce61b943515f9115f8e480fd0e41d8d89085" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-aarch64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.macos-x64"] -checksum = "sha256:aa7174fb938e668dea4b4c3d22fe6cefed97642cc3a7a419ba96d63d63fd729b" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-apple-darwin.zip" - -[tools.cargo-binstall."platforms.windows-x64"] -checksum = "sha256:c5cb2444ee04480502a8ac73d96abd9f97af8300ec04ea1c1f2a9e959c02e4d6" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.7/cargo-binstall-x86_64-pc-windows-msvc.zip" +checksum = "sha256:fef07560d4e391812091bb30c6ed1bd5289f74403a0c947b47b8a8c7a597b51b" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-pc-windows-msvc.zip" [[tools.cargo-insta]] version = "1.46.3" @@ -285,52 +265,52 @@ version = "0.3.156" backend = "cargo:release-plz" [[tools.just]] -version = "1.46.0" +version = "1.48.1" backend = "aqua:casey/just" [tools.just."platforms.linux-arm64"] -checksum = "sha256:b81970c8247fa64cfb30d2a3da0e487e4253f9f2d01865ed5e7d53cdc7b02188" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-aarch64-unknown-linux-musl.tar.gz" +checksum = "sha256:3308721b991cf88cf2b9bbb3b31ac40550ec61a0c9b6fc011564e25e87964030" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-arm64-musl"] -checksum = "sha256:b81970c8247fa64cfb30d2a3da0e487e4253f9f2d01865ed5e7d53cdc7b02188" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-aarch64-unknown-linux-musl.tar.gz" +checksum = "sha256:3308721b991cf88cf2b9bbb3b31ac40550ec61a0c9b6fc011564e25e87964030" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64-baseline"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64-musl"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.linux-x64-musl-baseline"] -checksum = "sha256:79966e6e353f535ee7d1c6221641bcc8e3381c55b0d0a6dc6e54b34f9db36eaa" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-unknown-linux-musl.tar.gz" +checksum = "sha256:9293e553ce401d1b524bf4e104918f72f268e3f9c6827e0055fe98d84a1b2522" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-unknown-linux-musl.tar.gz" [tools.just."platforms.macos-arm64"] -checksum = "sha256:438eaf6468a115aa7db93e501cc7e3272f453f6b7083be3863adfab546b23358" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-aarch64-apple-darwin.tar.gz" +checksum = "sha256:03a73339ff55bcf7411a3c940cdcb0a726d98134b87203c83a9008575434e2a8" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-aarch64-apple-darwin.tar.gz" [tools.just."platforms.macos-x64"] -checksum = "sha256:ec54dd60ac876261b7318f1852ef9c0319fede1e5a73c14f56d908a8edf595b8" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-apple-darwin.tar.gz" +checksum = "sha256:4c3e9c880b8fc93d7fc24abfde3c36b0cc59f6e9f8b31f7175095700f64125a7" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-apple-darwin.tar.gz" [tools.just."platforms.macos-x64-baseline"] -checksum = "sha256:ec54dd60ac876261b7318f1852ef9c0319fede1e5a73c14f56d908a8edf595b8" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-apple-darwin.tar.gz" +checksum = "sha256:4c3e9c880b8fc93d7fc24abfde3c36b0cc59f6e9f8b31f7175095700f64125a7" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-apple-darwin.tar.gz" [tools.just."platforms.windows-x64"] -checksum = "sha256:f0acf3f8ccbcf360b481baae9cae4c921774c89d5d932012481d3e0bda78ab39" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-pc-windows-msvc.zip" +checksum = "sha256:368cd9ca827cba04d9e6fc00f7ad840773c4605b6f64b9f87bdb00325d351029" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-pc-windows-msvc.zip" [tools.just."platforms.windows-x64-baseline"] -checksum = "sha256:f0acf3f8ccbcf360b481baae9cae4c921774c89d5d932012481d3e0bda78ab39" -url = "https://github.com/casey/just/releases/download/1.46.0/just-1.46.0-x86_64-pc-windows-msvc.zip" +checksum = "sha256:368cd9ca827cba04d9e6fc00f7ad840773c4605b6f64b9f87bdb00325d351029" +url = "https://github.com/casey/just/releases/download/1.48.1/just-1.48.1-x86_64-pc-windows-msvc.zip" [[tools.lychee]] version = "0.23.0" @@ -377,11 +357,11 @@ version = "0.21.0" backend = "npm:markdownlint-cli2" [[tools."pipx:mdformat"]] -version = "0.7.21" +version = "1.0.0" backend = "pipx:mdformat" [tools."pipx:mdformat".options] -uvx_args = "--with mdformat-gfm --with mdformat-frontmatter --with mdformat-footnote --with mdformat-simple-breaks --with mdformat-gfm-alerts --with mdformat-toc --with mdformat-wikilink --with mdformat-tables" +uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote --with mdformat-front-matters --with mdformat-simple-breaks --with mdformat-web --with mdformat-wikilink --with mdformat-gfm-alerts --with mdformat-toc" [[tools."pipx:pre-commit"]] version = "4.5.1" @@ -396,51 +376,51 @@ version = "3.14.3" backend = "core:python" [tools.python."platforms.linux-arm64"] -checksum = "sha256:be0f4dc2932f762292b27d46ea7d3e8e66ddf3969a5eb0254a229015ed402625" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-arm64-musl"] -checksum = "sha256:be0f4dc2932f762292b27d46ea7d3e8e66ddf3969a5eb0254a229015ed402625" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64-baseline"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64-musl"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.linux-x64-musl-baseline"] -checksum = "sha256:0a73413f89efd417871876c9accaab28a9d1e3cd6358fbfff171a38ec99302f0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" [tools.python."platforms.macos-arm64"] -checksum = "sha256:4703cdf18b26798fde7b49b6b66149674c25f97127be6a10dbcf29309bdcdcdb" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-aarch64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:c43aecde4a663aebff99b9b83da0efec506479f1c3f98331442f33d2c43501f9" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-apple-darwin-install_only_stripped.tar.gz" [tools.python."platforms.macos-x64"] -checksum = "sha256:76f1cc26e3d262eae8ca546a93e8bded10cf0323613f7e246fea2e10a8115eb7" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" [tools.python."platforms.macos-x64-baseline"] -checksum = "sha256:76f1cc26e3d262eae8ca546a93e8bded10cf0323613f7e246fea2e10a8115eb7" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-apple-darwin-install_only_stripped.tar.gz" +checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" [tools.python."platforms.windows-x64"] -checksum = "sha256:950c5f21a015c1bdd1337f233456df2470fab71e4d794407d27a84cb8b9909a0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" [tools.python."platforms.windows-x64-baseline"] -checksum = "sha256:950c5f21a015c1bdd1337f233456df2470fab71e4d794407d27a84cb8b9909a0" -url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.14.3+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" [[tools.rust]] -version = "1.94.0" +version = "1.94.1" backend = "core:rust" [[tools.scorecard]] @@ -450,10 +430,12 @@ backend = "aqua:ossf/scorecard" [tools.scorecard."platforms.linux-arm64"] checksum = "sha256:3f8b6354c62ec0287a8e9694481d834e16bff8451cf5b5dca435e8400ce5adaf" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_arm64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-arm64-musl"] checksum = "sha256:3f8b6354c62ec0287a8e9694481d834e16bff8451cf5b5dca435e8400ce5adaf" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_arm64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-x64"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" @@ -463,14 +445,17 @@ provenance = "slsa" [tools.scorecard."platforms.linux-x64-baseline"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-x64-musl"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.linux-x64-musl-baseline"] checksum = "sha256:e5183aeaa5aa548fbb7318a6deb3e1038be0ef9aca24e655422ae88dfbe67502" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_linux_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.macos-arm64"] checksum = "sha256:2c672695a27d35537dd4054f690f31fa1d6a72b0957598f45181296487f537f4" @@ -485,6 +470,7 @@ provenance = "slsa" [tools.scorecard."platforms.macos-x64-baseline"] checksum = "sha256:2abfec13b8eecc9b730e3782c9b3a9544d31ae861ce21ea7fe6a369d887d7c89" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_darwin_amd64.tar.gz" +provenance = "slsa" [tools.scorecard."platforms.windows-x64"] checksum = "sha256:f7d0ece0dde703e4baa5f96e9b6ed33e6e786138c90db8de2c4943f24015b9ff" @@ -494,6 +480,7 @@ provenance = "slsa" [tools.scorecard."platforms.windows-x64-baseline"] checksum = "sha256:f7d0ece0dde703e4baa5f96e9b6ed33e6e786138c90db8de2c4943f24015b9ff" url = "https://github.com/ossf/scorecard/releases/download/v5.4.0/scorecard_5.4.0_windows_amd64.tar.gz" +provenance = "slsa" [[tools.shellcheck]] version = "0.11.0" diff --git a/mise.toml b/mise.toml index 3c0e40b..69370e5 100644 --- a/mise.toml +++ b/mise.toml @@ -23,7 +23,7 @@ python = "latest" "cargo:cargo-release" = "1.1.1" "cargo:cargo-auditable" = "0.7.4" "cargo:cargo-cyclonedx" = "0.5.7" -"pipx:mdformat" = { version = "0.7.21", uvx_args = "--with mdformat-gfm --with mdformat-frontmatter --with mdformat-footnote --with mdformat-simple-breaks --with mdformat-gfm-alerts --with mdformat-toc --with mdformat-wikilink --with mdformat-tables" } +"pipx:mdformat" = { version = "1.0.0", uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote --with mdformat-front-matters --with mdformat-simple-breaks --with mdformat-web --with mdformat-wikilink --with mdformat-gfm-alerts --with mdformat-toc" } prettier = "3.8.1" actionlint = "1.7.11" lychee = "0.23.0" diff --git a/src/evaluator/offset/indirect.rs b/src/evaluator/offset/indirect.rs index 794f041..5e56743 100644 --- a/src/evaluator/offset/indirect.rs +++ b/src/evaluator/offset/indirect.rs @@ -1,27 +1,638 @@ // Copyright (c) 2025-2026 the libmagic-rs contributors // SPDX-License-Identifier: Apache-2.0 -//! Indirect offset resolution (not yet implemented) +//! Indirect offset resolution +//! +//! Indirect offsets read a pointer value from the file at a base offset, +//! then use that value (with optional adjustment) as the final offset. use crate::LibmagicError; -use crate::parser::ast::OffsetSpec; +use crate::error::EvaluationError; +use crate::evaluator::types::{TypeReadError, read_byte, read_long, read_quad, read_short}; +use crate::parser::ast::{Endianness, OffsetSpec, TypeKind, Value}; -/// Resolve an indirect offset specification +use super::{map_offset_error, resolve_absolute_offset}; + +/// Resolve an indirect offset specification. +/// +/// Indirect offsets dereference a pointer stored in the file buffer: +/// 1. Resolve `base_offset` to an absolute position (supports negative/from-end). +/// 2. Read a numeric pointer value at that position using `pointer_type` and `endian`. +/// 3. Apply `adjustment` with checked arithmetic. +/// 4. Validate the final offset against `buffer.len()`. +/// +/// # Arguments /// -/// Indirect offsets read a pointer value from the file at a base offset, -/// then use that value (with optional adjustment) as the final offset. +/// * `spec` - Must be `OffsetSpec::Indirect { .. }` +/// * `buffer` - The file buffer to read from /// /// # Errors /// -/// Currently returns `LibmagicError::EvaluationError` with `UnsupportedType` -/// as indirect offset resolution is not yet implemented. -// TODO: Implement indirect offset resolution (issue #37) -pub fn resolve_indirect_offset(spec: &OffsetSpec, _buffer: &[u8]) -> Result { - debug_assert!( - matches!(spec, OffsetSpec::Indirect { .. }), - "resolve_indirect_offset called with non-indirect spec" - ); - Err(LibmagicError::EvaluationError( - crate::error::EvaluationError::unsupported_type("Indirect offsets not yet implemented"), - )) +/// * `EvaluationError::InvalidOffset` - If `base_offset` is out of bounds or arithmetic overflows +/// * `EvaluationError::BufferOverrun` - If the pointer read or final offset exceeds buffer bounds +/// * `EvaluationError::UnsupportedType` - If `pointer_type` is not a numeric type +pub fn resolve_indirect_offset(spec: &OffsetSpec, buffer: &[u8]) -> Result { + let (base_offset, pointer_type, adjustment, endian) = match spec { + OffsetSpec::Indirect { + base_offset, + pointer_type, + adjustment, + endian, + } => (*base_offset, pointer_type, *adjustment, *endian), + _ => { + return Err(LibmagicError::EvaluationError( + EvaluationError::internal_error( + "resolve_indirect_offset called with non-indirect spec", + ), + )); + } + }; + + // Validate: outer endian must match inner TypeKind endian (single source of truth). + // Byte has no inner endian field so only multi-byte types need the check. + match pointer_type { + TypeKind::Short { endian: inner, .. } + | TypeKind::Long { endian: inner, .. } + | TypeKind::Quad { endian: inner, .. } => { + debug_assert_eq!( + *inner, endian, + "Indirect offset: inner TypeKind endianness ({inner:?}) \ + contradicts outer endian field ({endian:?})" + ); + } + _ => {} + } + + // Step 1: Resolve base_offset to an absolute position + let abs_base = resolve_absolute_offset(base_offset, buffer) + .map_err(|e| map_offset_error(&e, base_offset))?; + + // Step 2: Read pointer value using the appropriate numeric reader + let pointer_value = read_pointer(buffer, abs_base, pointer_type, endian)?; + + // Step 3: Apply adjustment with checked arithmetic + let final_offset = apply_adjustment(pointer_value, adjustment)?; + + // Step 4: Validate final offset against buffer length + if final_offset >= buffer.len() { + return Err(LibmagicError::EvaluationError( + EvaluationError::BufferOverrun { + offset: final_offset, + }, + )); + } + + Ok(final_offset) +} + +/// Read a pointer value from the buffer and extract it as a raw `u64`. +fn read_pointer( + buffer: &[u8], + offset: usize, + pointer_type: &TypeKind, + endian: Endianness, +) -> Result { + let value = match pointer_type { + TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), + TypeKind::Short { signed, .. } => read_short(buffer, offset, endian, *signed), + TypeKind::Long { signed, .. } => read_long(buffer, offset, endian, *signed), + TypeKind::Quad { signed, .. } => read_quad(buffer, offset, endian, *signed), + _ => { + return Err(LibmagicError::EvaluationError( + EvaluationError::unsupported_type(format!( + "Indirect offset pointer type not supported: {pointer_type:?}" + )), + )); + } + } + .map_err(|e| map_type_read_error(e, offset))?; + + extract_raw_unsigned(&value) +} + +/// Extract a raw unsigned integer from a `Value`, converting signed values. +fn extract_raw_unsigned(value: &Value) -> Result { + match value { + Value::Uint(v) => Ok(*v), + #[allow(clippy::cast_sign_loss)] + Value::Int(v) => Ok(*v as u64), + _ => Err(LibmagicError::EvaluationError( + EvaluationError::internal_error("Pointer read returned non-integer value"), + )), + } +} + +/// Apply an `i64` adjustment to a `u64` pointer value with checked arithmetic. +fn apply_adjustment(pointer: u64, adjustment: i64) -> Result { + let adjusted = if adjustment >= 0 { + #[allow(clippy::cast_sign_loss)] + pointer + .checked_add(adjustment as u64) + .ok_or_else(|| overflow_error(pointer, adjustment))? + } else { + // Negative adjustment + if adjustment == i64::MIN { + return Err(overflow_error(pointer, adjustment)); + } + #[allow(clippy::cast_sign_loss)] + let abs_adj = (-adjustment) as u64; + pointer + .checked_sub(abs_adj) + .ok_or_else(|| overflow_error(pointer, adjustment))? + }; + + usize::try_from(adjusted).map_err(|_| overflow_error(pointer, adjustment)) +} + +/// Map a `TypeReadError` to a `LibmagicError`. +fn map_type_read_error(e: TypeReadError, offset: usize) -> LibmagicError { + match e { + TypeReadError::BufferOverrun { .. } => { + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { offset }) + } + other => LibmagicError::EvaluationError(EvaluationError::from(other)), + } +} + +/// Create an overflow error for failed adjustment arithmetic. +fn overflow_error(_pointer: u64, adjustment: i64) -> LibmagicError { + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { offset: adjustment }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::ast::Endianness; + + fn indirect( + base_offset: i64, + pointer_type: TypeKind, + adjustment: i64, + endian: Endianness, + ) -> OffsetSpec { + OffsetSpec::Indirect { + base_offset, + pointer_type, + adjustment, + endian, + } + } + + #[test] + fn test_pointer_type_and_endianness() { + let cases: &[(&str, &[u8], TypeKind, Endianness, usize)] = &[ + ( + "byte unsigned", + &[0x04, 0x00, 0x00, 0x00, 0xAA], + TypeKind::Byte { signed: false }, + Endianness::Little, + 4, + ), + ( + "byte signed positive", + &[0x03, 0x00, 0x00, 0xBB], + TypeKind::Byte { signed: true }, + Endianness::Little, + 3, + ), + ( + "short LE", + &[0x04, 0x00, 0x00, 0x00, 0xCC], + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + Endianness::Little, + 4, + ), + ( + "short BE", + &[0x00, 0x04, 0x00, 0x00, 0xDD], + TypeKind::Short { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + 4, + ), + ( + "long LE", + &[0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF], + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + Endianness::Little, + 6, + ), + ( + "long BE", + &[0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0xFF], + TypeKind::Long { + endian: Endianness::Big, + signed: false, + }, + Endianness::Big, + 6, + ), + ( + "signed long positive", + &[0x04, 0x00, 0x00, 0x00, 0xAA], + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + 4, + ), + ]; + for (name, buf, ptype, endian, expected) in cases { + let spec = indirect(0, ptype.clone(), 0, *endian); + assert_eq!( + resolve_indirect_offset(&spec, buf).unwrap(), + *expected, + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_quad_pointer_endianness() { + let quad_cases: &[(&str, Endianness, &[u8])] = &[ + ( + "quad LE", + Endianness::Little, + &[0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + ), + ( + "quad BE", + Endianness::Big, + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10], + ), + ]; + for (name, endian, prefix) in quad_cases { + let mut buffer = prefix.to_vec(); + buffer.resize(17, 0xBB); + let spec = indirect( + 0, + TypeKind::Quad { + endian: *endian, + signed: false, + }, + 0, + *endian, + ); + assert_eq!( + resolve_indirect_offset(&spec, &buffer).unwrap(), + 16, + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_extract_raw_unsigned_values() { + let ok_cases: &[(&str, Value, u64)] = &[ + ("Int(-1) -> u64::MAX", Value::Int(-1), u64::MAX), + ( + "Int(-2) -> u64::MAX-1", + Value::Int(-2), + 0xFFFF_FFFF_FFFF_FFFE, + ), + ( + "Int(-1) sign-extended", + Value::Int(-1), + 0xFFFF_FFFF_FFFF_FFFF, + ), + ("Int(42)", Value::Int(42), 42), + ("Uint(0xDEAD_BEEF)", Value::Uint(0xDEAD_BEEF), 0xDEAD_BEEF), + ]; + for (name, value, expected) in ok_cases { + assert_eq!( + extract_raw_unsigned(value).unwrap(), + *expected, + "Failed for case: {name}" + ); + } + + let err_value = Value::String("hello".to_string()); + assert!( + extract_raw_unsigned(&err_value).is_err(), + "Failed for case: rejects non-integer" + ); + } + + #[test] + fn test_read_pointer_signed_negative() { + let cases: &[(&str, &[u8], TypeKind, u64)] = &[ + ( + "signed long -1", + &[0xFF, 0xFF, 0xFF, 0xFF], + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + u64::MAX, + ), + ( + "signed short -2", + &[0xFE, 0xFF], + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + 0xFFFF_FFFF_FFFF_FFFE, + ), + ( + "signed byte -1", + &[0xFF], + TypeKind::Byte { signed: true }, + u64::MAX, + ), + ]; + for (name, buf, ptype, expected) in cases { + let raw = read_pointer(buf, 0, ptype, Endianness::Little).unwrap(); + assert_eq!(raw, *expected, "Failed for case: {name}"); + } + } + + #[test] + fn test_signed_short_negative_pointer_overruns_after_raw_conversion() { + let buffer = &[0xFE, 0xFF, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + 0, + Endianness::Little, + ); + let err = resolve_indirect_offset(&spec, buffer).unwrap_err(); + if usize::BITS == 64 { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { offset }) + if offset == 0xFFFF_FFFF_FFFF_FFFE + ), + "Expected BufferOverrun at 0xFFFF_FFFF_FFFF_FFFE, got: {err:?}" + ); + } else { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) + ), + "Expected InvalidOffset from usize::try_from overflow on 32-bit, got: {err:?}" + ); + } + } + + #[test] + fn test_signed_long_negative_pointer_with_adjustment_overruns() { + let buffer = &[0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00]; + let spec = indirect( + 0, + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + -1, + Endianness::Little, + ); + let err = resolve_indirect_offset(&spec, buffer).unwrap_err(); + if usize::BITS == 64 { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::BufferOverrun { offset }) + if offset == 0xFFFF_FFFF_FFFF_FFFE + ), + "Expected BufferOverrun at 0xFFFF_FFFF_FFFF_FFFE, got: {err:?}" + ); + } else { + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { .. }) + ), + "Expected InvalidOffset from usize::try_from overflow on 32-bit, got: {err:?}" + ); + } + } + + #[test] + fn test_adjustments() { + let cases: &[(&str, &[u8], i64, usize)] = &[ + ("positive +3", &[0x02, 0x00, 0x00, 0x00, 0x00, 0xEE], 3, 5), + ("negative -2", &[0x05, 0x00, 0x00, 0xFF], -2, 3), + ]; + for (name, buf, adj, expected) in cases { + let spec = indirect( + 0, + TypeKind::Byte { signed: false }, + *adj, + Endianness::Little, + ); + assert_eq!( + resolve_indirect_offset(&spec, buf).unwrap(), + *expected, + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_from_end_base_offset() { + let buffer = &[0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x00, 0x02]; + let spec = indirect(-1, TypeKind::Byte { signed: false }, 0, Endianness::Little); + assert_eq!(resolve_indirect_offset(&spec, buffer).unwrap(), 2); + } + + #[test] + fn test_pointer_read_overrun() { + let cases: &[(&str, &[u8], TypeKind)] = &[ + ( + "short from 1-byte buffer", + &[0x04], + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ), + ( + "long from 3-byte buffer", + &[0x00, 0x00, 0x00], + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ), + ]; + for (name, buf, ptype) in cases { + let spec = indirect(0, ptype.clone(), 0, Endianness::Little); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::BufferOverrun { .. } + )) + ), + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_final_offset_overrun() { + let cases: &[(&str, &[u8], i64)] = &[ + ( + "pointer=0xFF, no adjustment", + &[0xFF, 0x00, 0x00, 0x00, 0x00], + 0, + ), + ( + "pointer=3, adjustment=+10", + &[0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + 10, + ), + ]; + for (name, buf, adj) in cases { + let spec = indirect( + 0, + TypeKind::Byte { signed: false }, + *adj, + Endianness::Little, + ); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::BufferOverrun { .. } + )) + ), + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_adjustment_overflow_underflow() { + let cases: &[(&str, &[u8], TypeKind, i64)] = &[ + ( + "overflow: u64::MAX + 1", + &[0xFF; 16], + TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + 1, + ), + ( + "underflow: 0 - 1", + &[0x00, 0x00, 0x00, 0x00], + TypeKind::Byte { signed: false }, + -1, + ), + ]; + for (name, buf, ptype, adj) in cases { + let spec = indirect(0, ptype.clone(), *adj, Endianness::Little); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::InvalidOffset { .. } + )) + ), + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_unsupported_pointer_types() { + let cases: &[(&str, &[u8], TypeKind)] = &[ + ("string", &[0x00; 4], TypeKind::String { max_length: None }), + ( + "float", + &[0x00; 4], + TypeKind::Float { + endian: Endianness::Little, + }, + ), + ( + "double", + &[0x00; 8], + TypeKind::Double { + endian: Endianness::Little, + }, + ), + ]; + for (name, buf, ptype) in cases { + let spec = indirect(0, ptype.clone(), 0, Endianness::Little); + let result = resolve_indirect_offset(&spec, buf); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::UnsupportedType { .. } + )) + ), + "Failed for case: {name}" + ); + } + } + + #[test] + fn test_pe_header_style_offset_0x3c() { + let mut buffer = vec![0u8; 256]; + buffer[0x3C] = 0x80; + buffer[0x3D] = 0x00; + buffer[0x3E] = 0x00; + buffer[0x3F] = 0x00; + buffer[0x80] = b'P'; + buffer[0x81] = b'E'; + buffer[0x82] = 0x00; + buffer[0x83] = 0x00; + + let spec = indirect( + 0x3C, + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 0, + Endianness::Little, + ); + let offset = resolve_indirect_offset(&spec, &buffer).unwrap(); + assert_eq!(offset, 0x80); + assert_eq!(&buffer[offset..offset + 4], b"PE\0\0"); + } + + #[test] + fn test_base_offset_out_of_bounds() { + let buffer = &[0x00, 0x01, 0x02]; + let spec = indirect(100, TypeKind::Byte { signed: false }, 0, Endianness::Little); + assert!(resolve_indirect_offset(&spec, buffer).is_err()); + } + + #[test] + fn test_non_indirect_spec_returns_error() { + let buffer = &[0x00; 8]; + let spec = OffsetSpec::Absolute(0); + let result = resolve_indirect_offset(&spec, buffer); + assert!( + matches!( + result, + Err(LibmagicError::EvaluationError( + EvaluationError::InternalError { .. } + )) + ), + "Expected InternalError for non-indirect spec" + ); + } } diff --git a/src/evaluator/offset/mod.rs b/src/evaluator/offset/mod.rs index 687b6fa..b92c9a6 100644 --- a/src/evaluator/offset/mod.rs +++ b/src/evaluator/offset/mod.rs @@ -16,7 +16,7 @@ use crate::LibmagicError; use crate::parser::ast::OffsetSpec; /// Map an `OffsetError` to a `LibmagicError` for a given original offset value -fn map_offset_error(e: &OffsetError, original_offset: i64) -> LibmagicError { +pub(crate) fn map_offset_error(e: &OffsetError, original_offset: i64) -> LibmagicError { match e { OffsetError::BufferOverrun { offset, @@ -35,8 +35,8 @@ fn map_offset_error(e: &OffsetError, original_offset: i64) -> LibmagicError { /// Resolve any offset specification to an absolute position /// /// This is a higher-level function that handles all types of offset specifications. -/// Currently only supports absolute offsets, but will be extended to handle indirect, -/// relative, and from-end offsets in future tasks. +/// Supports absolute, from-end, and indirect offsets. Relative offsets are not yet +/// implemented. /// /// # Arguments /// @@ -127,26 +127,18 @@ mod tests { } #[test] - fn test_resolve_offset_indirect_not_implemented() { - let buffer = b"Test data"; + fn test_resolve_offset_indirect_success() { + // Byte pointer at offset 0 with value 5 → resolves to offset 5 + let buffer = b"\x05TestXdata"; let spec = OffsetSpec::Indirect { base_offset: 0, - pointer_type: crate::parser::ast::TypeKind::Byte { signed: true }, + pointer_type: crate::parser::ast::TypeKind::Byte { signed: false }, adjustment: 0, endian: crate::parser::ast::Endianness::Little, }; - let result = resolve_offset(&spec, buffer); - assert!(result.is_err()); - - match result.unwrap_err() { - LibmagicError::EvaluationError(crate::error::EvaluationError::UnsupportedType { - type_name, - }) => { - assert!(type_name.contains("Indirect offsets not yet implemented")); - } - _ => panic!("Expected EvaluationError with UnsupportedType"), - } + let result = resolve_offset(&spec, buffer).unwrap(); + assert_eq!(result, 5); } #[test] diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index ca8074a..62fa1bd 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs @@ -17,7 +17,9 @@ use nom::{ sequence::pair, }; -use crate::parser::ast::{MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value}; +use crate::parser::ast::{ + Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value, +}; /// Parse a decimal number with overflow protection fn parse_decimal_number(input: &str) -> IResult<&str, i64> { @@ -153,21 +155,148 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { Ok((input, result)) } -/// Parse an offset specification for absolute offsets +/// Map a single-character pointer specifier to its `TypeKind` and `Endianness`. +/// +/// GNU `file` semantics: lowercase = little-endian, uppercase = big-endian. +/// Numeric pointer types are signed by default per GOTCHAS S6.3. +/// +/// | Specifier | Width | Endianness | +/// |-----------|--------|---------------| +/// | `b` | 1 byte | Little-endian | +/// | `B` | 1 byte | Big-endian | +/// | `s` | 2 byte | Little-endian | +/// | `S` | 2 byte | Big-endian | +/// | `l` | 4 byte | Little-endian | +/// | `L` | 4 byte | Big-endian | +/// | `q` | 8 byte | Little-endian | +/// | `Q` | 8 byte | Big-endian | +fn pointer_specifier_to_type(spec: char) -> Option<(TypeKind, Endianness)> { + match spec { + 'b' => Some((TypeKind::Byte { signed: true }, Endianness::Little)), + 'B' => Some((TypeKind::Byte { signed: true }, Endianness::Big)), + 's' => Some(( + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + )), + 'S' => Some(( + TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + )), + 'l' => Some(( + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + )), + 'L' => Some(( + TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + )), + 'q' => Some(( + TypeKind::Quad { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + )), + 'Q' => Some(( + TypeKind::Quad { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + )), + _ => None, + } +} + +/// Parse an indirect offset specification: `(base.type)` or `(base.type)+/-adj` /// -/// Supports decimal and hexadecimal formats, both positive and negative. +/// Reads a pointer specifier after the dot, closes the parenthesized expression, +/// then optionally parses `+N` or `-N` adjustment after the `)`. +fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> { + let (input, _) = char('(')(input)?; + let (input, base_offset) = parse_number(input)?; + let (input, _) = char('.')(input)?; + let (input, spec_char) = one_of("bBsSlLqQ")(input)?; + + let (pointer_type, endian) = pointer_specifier_to_type(spec_char) + .ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::OneOf)))?; + + let (input, _) = char(')')(input)?; + + // Optional adjustment AFTER closing paren: (base.type)+N or (base.type)-N + // parse_number handles '-' but not '+', so consume '+' manually + let (input, adjustment) = if input.starts_with('+') { + let (input, _) = char('+')(input)?; + parse_number(input)? + } else if input.starts_with('-') { + parse_number(input)? + } else { + (input, 0) + }; + + Ok(( + input, + OffsetSpec::Indirect { + base_offset, + pointer_type, + adjustment, + endian, + }, + )) +} + +/// Parse an offset specification (absolute or indirect) +/// +/// Supports: +/// - Absolute offsets: decimal and hexadecimal, positive and negative +/// - Indirect offsets: `(base.type)` or `(base.type)+adj` syntax /// /// # Examples /// /// ``` /// use libmagic_rs::parser::grammar::parse_offset; -/// use libmagic_rs::parser::ast::OffsetSpec; +/// use libmagic_rs::parser::ast::{Endianness, OffsetSpec, TypeKind}; /// +/// // Absolute offsets /// assert_eq!(parse_offset("0"), Ok(("", OffsetSpec::Absolute(0)))); /// assert_eq!(parse_offset("123"), Ok(("", OffsetSpec::Absolute(123)))); /// assert_eq!(parse_offset("0x10"), Ok(("", OffsetSpec::Absolute(16)))); /// assert_eq!(parse_offset("-4"), Ok(("", OffsetSpec::Absolute(-4)))); /// assert_eq!(parse_offset("-0xFF"), Ok(("", OffsetSpec::Absolute(-255)))); +/// +/// // Indirect offset (lowercase = little-endian, signed by default) +/// assert_eq!( +/// parse_offset("(0x3c.l)"), +/// Ok(("", OffsetSpec::Indirect { +/// base_offset: 0x3c, +/// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: true }, +/// adjustment: 0, +/// endian: Endianness::Little, +/// })) +/// ); +/// +/// // Adjustment after closing paren +/// assert_eq!( +/// parse_offset("(0x3c.l)+4"), +/// Ok(("", OffsetSpec::Indirect { +/// base_offset: 0x3c, +/// pointer_type: TypeKind::Long { endian: Endianness::Little, signed: true }, +/// adjustment: 4, +/// endian: Endianness::Little, +/// })) +/// ); /// ``` /// /// # Errors @@ -176,12 +305,19 @@ pub fn parse_number(input: &str) -> IResult<&str, i64> { /// - The input contains invalid number format (propagated from `parse_number`) /// - Input is empty or contains no parseable offset value /// - The offset value cannot be represented as a valid `i64` +/// - Indirect offset has invalid pointer specifier or missing closing `)` pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> { let (input, _) = multispace0(input)?; - let (input, offset_value) = parse_number(input)?; - let (input, _) = multispace0(input)?; - Ok((input, OffsetSpec::Absolute(offset_value))) + if input.starts_with('(') { + let (input, spec) = parse_indirect_offset(input)?; + let (input, _) = multispace0(input)?; + Ok((input, spec)) + } else { + let (input, offset_value) = parse_number(input)?; + let (input, _) = multispace0(input)?; + Ok((input, OffsetSpec::Absolute(offset_value))) + } } /// Parse comparison operators for magic rules diff --git a/src/parser/grammar/tests/indirect_offset.rs b/src/parser/grammar/tests/indirect_offset.rs new file mode 100644 index 0000000..c3434b2 --- /dev/null +++ b/src/parser/grammar/tests/indirect_offset.rs @@ -0,0 +1,282 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::*; + +// Indirect offset parsing tests +// +// GNU `file` semantics: lowercase = little-endian, uppercase = big-endian. +// Numeric pointer types are signed by default (GOTCHAS S6.3). +// Adjustment is parsed AFTER the closing `)`: (base.type)+adj + +#[test] +fn test_parse_offset_indirect_all_specifiers() { + // Table-driven: (input, expected_pointer_type, expected_endian) + let cases: &[(&str, TypeKind, Endianness)] = &[ + // .b / .B - byte (little-endian, signed) + ("(0.b)", TypeKind::Byte { signed: true }, Endianness::Little), + ("(0.B)", TypeKind::Byte { signed: true }, Endianness::Big), + // .s - short little-endian, .S - short big-endian + ( + "(0.s)", + TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0.S)", + TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + ), + // .l - long little-endian, .L - long big-endian + ( + "(0x3c.l)", + TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0x3c.L)", + TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + ), + // .q - quad little-endian, .Q - quad big-endian + ( + "(0.q)", + TypeKind::Quad { + endian: Endianness::Little, + signed: true, + }, + Endianness::Little, + ), + ( + "(0.Q)", + TypeKind::Quad { + endian: Endianness::Big, + signed: true, + }, + Endianness::Big, + ), + ]; + + for (input, expected_type, expected_endian) in cases { + let base = if input.contains("0x3c") { 0x3c } else { 0 }; + assert_eq!( + parse_offset(input), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: base, + pointer_type: expected_type.clone(), + adjustment: 0, + endian: *expected_endian, + } + )), + "Failed for input: {input}" + ); + } +} + +#[test] +fn test_parse_offset_indirect_with_positive_adjustment() { + // Adjustment AFTER closing paren: (base.type)+adj + assert_eq!( + parse_offset("(0x3c.l)+4"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 4, + endian: Endianness::Little, + } + )) + ); + assert_eq!( + parse_offset("(0.b)+0xFF"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0, + pointer_type: TypeKind::Byte { signed: true }, + adjustment: 255, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_negative_adjustment() { + assert_eq!( + parse_offset("(0x3c.l)-8"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: -8, + endian: Endianness::Little, + } + )) + ); + assert_eq!( + parse_offset("(100.s)-0x10"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 100, + pointer_type: TypeKind::Short { + endian: Endianness::Little, + signed: true + }, + adjustment: -16, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_negative_base() { + // Negative base offsets (from end of file) + assert_eq!( + parse_offset("(-4.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: -4, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); + // Negative base with adjustment after paren + assert_eq!( + parse_offset("(-0x10.s)+2"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: -16, + pointer_type: TypeKind::Short { + endian: Endianness::Little, + signed: true + }, + adjustment: 2, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_hex_base() { + assert_eq!( + parse_offset("(0xFF.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0xFF, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_with_whitespace() { + // Leading whitespace should be handled + assert_eq!( + parse_offset(" (0x3c.l)"), + Ok(( + "", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); + // Trailing content after adjustment-free form + assert_eq!( + parse_offset("(0x3c.l) string"), + Ok(( + "string", + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + )) + ); +} + +#[test] +fn test_parse_offset_indirect_parse_failures() { + // Missing closing paren + assert!(parse_offset("(0x3c.l").is_err()); + // Missing dot and type + assert!(parse_offset("(0x3c)").is_err()); + // Invalid specifier character + assert!(parse_offset("(0x3c.x)").is_err()); + // Empty parens + assert!(parse_offset("()").is_err()); + // Missing base + assert!(parse_offset("(.l)").is_err()); +} + +#[test] +fn test_parse_rule_offset_indirect() { + // Level 0 indirect + assert_eq!( + parse_rule_offset("(0x3c.l)"), + Ok(( + "", + ( + 0, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + ) + )) + ); +} diff --git a/src/parser/grammar/tests.rs b/src/parser/grammar/tests/mod.rs similarity index 96% rename from src/parser/grammar/tests.rs rename to src/parser/grammar/tests/mod.rs index b01dc3c..c6a6958 100644 --- a/src/parser/grammar/tests.rs +++ b/src/parser/grammar/tests/mod.rs @@ -1,6 +1,8 @@ // Copyright (c) 2025-2026 the libmagic-rs contributors // SPDX-License-Identifier: Apache-2.0 +mod indirect_offset; + use super::*; use crate::parser::ast::Endianness; use crate::parser::ast::PStringLengthWidth; @@ -281,6 +283,90 @@ fn test_parse_offset_boundary_values() { ); } +#[test] +fn test_parse_rule_offset_indirect_child() { + // Level 1 child with indirect offset: >(0x3c.l) + assert_eq!( + parse_rule_offset(">(0x3c.l)"), + Ok(( + "", + ( + 1, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + ) + )) + ); + // Level 2 child with adjustment after paren: >>(0x3c.l)+4 + assert_eq!( + parse_rule_offset(">>(0x3c.l)+4"), + Ok(( + "", + ( + 2, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 4, + endian: Endianness::Little, + } + ) + )) + ); +} + +#[test] +fn test_parse_rule_offset_indirect_with_remaining() { + // >(0x3c.l) followed by type keyword + assert_eq!( + parse_rule_offset(">(0x3c.l) string"), + Ok(( + "string", + ( + 1, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 0, + endian: Endianness::Little, + } + ) + )) + ); + // >(0x3c.l)+4 followed by type keyword + assert_eq!( + parse_rule_offset(">(0x3c.l)+4 string"), + Ok(( + "string", + ( + 1, + OffsetSpec::Indirect { + base_offset: 0x3c, + pointer_type: TypeKind::Long { + endian: Endianness::Little, + signed: true + }, + adjustment: 4, + endian: Endianness::Little, + } + ) + )) + ); +} + // Operator parsing tests #[test] fn test_parse_operator_equality() { diff --git a/tessl.json b/tessl.json index 63c7a65..41560da 100644 --- a/tessl.json +++ b/tessl.json @@ -1,9 +1,9 @@ { - "name": "stringy", + "name": "libmagic-rs", "mode": "vendored", "dependencies": { "actionbook/rust-skills": { - "version": "3ea748280d2fa5680675fe4abe1a5e764f7c021e", + "version": "1f4becdcb88d1cbccc1880594479f28891102843", "source": "https://github.com/actionbook/rust-skills", "include": { "skills": [ @@ -35,6 +35,27 @@ "unsafe-checker" ] } + }, + "pantheon-ai/github-actions-generator": { + "version": "0.1.1" + }, + "pantheon-ai/mise-complete": { + "version": "0.1.1" + }, + "pantheon-ai/dockerfile-toolkit": { + "version": "0.1.0" + }, + "pantheon-ai/moscow-prioritization": { + "version": "0.1.1" + }, + "pantheon-ai/software-design-principles": { + "version": "0.1.4" + }, + "cisco/software-security": { + "version": "1.2.5" + }, + "tessl-labs/good-oss-citizen": { + "version": "1.0.1" } } } diff --git a/tests/indirect_offset_integration.rs b/tests/indirect_offset_integration.rs new file mode 100644 index 0000000..45312e6 --- /dev/null +++ b/tests/indirect_offset_integration.rs @@ -0,0 +1,222 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for indirect offset parsing and evaluation +//! +//! Exercises the full pipeline: write a magic file with indirect-offset syntax, +//! load it through `MagicDatabase::load_from_file()`, evaluate buffers, and +//! assert correct match / no-match behavior. +//! +//! GNU `file` semantics: lowercase specifiers are little-endian, uppercase are +//! big-endian. Pointer types are signed by default (GOTCHAS S6.3). +//! Adjustment is parsed after the closing paren: `(base.type)+adj`. + +use std::fs; +use std::io::Write; + +use libmagic_rs::MagicDatabase; +use tempfile::TempDir; + +/// Build a PE-like buffer where offset 0x3c holds a little-endian 4-byte pointer +/// to the PE signature (`PE\0\0`). +/// +/// Layout: +/// [0x00] "MZ" DOS header stub +/// [0x3c] 4-byte little-endian pointer -> 0x80 (PE header location) +/// [0x80] "PE\0\0" signature +fn build_pe_like_buffer() -> Vec { + let mut buf = vec![0u8; 0x84]; + // DOS stub magic + buf[0] = b'M'; + buf[1] = b'Z'; + // Little-endian pointer at 0x3c -> 0x80 + buf[0x3c] = 0x80; + buf[0x3d] = 0x00; + buf[0x3e] = 0x00; + buf[0x3f] = 0x00; + // PE signature at 0x80 + buf[0x80] = b'P'; + buf[0x81] = b'E'; + buf[0x82] = 0x00; + buf[0x83] = 0x00; + buf +} + +#[test] +fn test_indirect_offset_pe_detection_via_magic_file() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("pe.magic"); + + // Use lowercase .l (little-endian long) -- GNU `file` semantics. + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); + writeln!(f, r#">(0x3c.l) string "PE" (PE)"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + let buf = build_pe_like_buffer(); + let result = db.evaluate_buffer(&buf).unwrap(); + + assert!( + result.description.contains("DOS executable"), + "Expected DOS executable match, got: {}", + result.description + ); + assert!( + result.description.contains("(PE)"), + "Expected PE child match via indirect offset, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_no_match_when_pointer_out_of_bounds() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("pe.magic"); + + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); + writeln!(f, r#">(0x3c.l) string "PE" (PE)"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // Buffer has "MZ" but the LE pointer at 0x3c points beyond the buffer + let mut buf = vec![0u8; 0x40]; + buf[0] = b'M'; + buf[1] = b'Z'; + // Little-endian pointer at 0x3c -> 0xFF (beyond buffer length) + buf[0x3c] = 0xFF; + buf[0x3d] = 0x00; + buf[0x3e] = 0x00; + buf[0x3f] = 0x00; + + let result = db.evaluate_buffer(&buf).unwrap(); + + // The parent "MZ" rule should still match + assert!( + result.description.contains("DOS executable"), + "Expected DOS match even when child fails, got: {}", + result.description + ); + // But the PE child should NOT match (pointer out of bounds) + assert!( + !result.description.contains("(PE)"), + "PE child should not match when pointer is out of bounds, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_with_adjustment_after_paren() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("adj.magic"); + + // Adjustment AFTER closing paren: (base.type)+adj + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"(0.l)+4 string "MAGIC" Adjusted match"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // LE pointer at offset 0 = 0x06 (little-endian), +4 = 10, "MAGIC" at offset 10 + let mut buf = vec![0u8; 20]; + buf[0] = 0x06; + buf[1] = 0x00; + buf[2] = 0x00; + buf[3] = 0x00; + buf[10] = b'M'; + buf[11] = b'A'; + buf[12] = b'G'; + buf[13] = b'I'; + buf[14] = b'C'; + + let result = db.evaluate_buffer(&buf).unwrap(); + assert!( + result.description.contains("Adjusted match"), + "Expected adjusted indirect match, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_byte_specifier() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("byte_ptr.magic"); + + // Use .b (byte pointer): read 1 byte at offset 0, use as offset + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"(0.b) string "OK" Byte pointer match"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // Byte at offset 0 = 5, so check for "OK" at offset 5 + let mut buf = vec![0u8; 10]; + buf[0] = 5; + buf[5] = b'O'; + buf[6] = b'K'; + + let result = db.evaluate_buffer(&buf).unwrap(); + assert!( + result.description.contains("Byte pointer match"), + "Expected byte pointer match, got: {}", + result.description + ); +} + +#[test] +fn test_indirect_offset_loading_does_not_error() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("load.magic"); + + // Verify the parsing path succeeds for all specifier variants + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"(0.b) string "A" byte LE ptr"#).unwrap(); + writeln!(f, r#"(0.B) string "A" Byte LE ptr"#).unwrap(); + writeln!(f, r#"(0.s) string "A" short LE ptr"#).unwrap(); + writeln!(f, r#"(0.S) string "A" short BE ptr"#).unwrap(); + writeln!(f, r#"(0.l) string "A" long LE ptr"#).unwrap(); + writeln!(f, r#"(0.L) string "A" long BE ptr"#).unwrap(); + writeln!(f, r#"(0.q) string "A" quad LE ptr"#).unwrap(); + writeln!(f, r#"(0.Q) string "A" quad BE ptr"#).unwrap(); + + let result = MagicDatabase::load_from_file(&magic_path); + assert!( + result.is_ok(), + "Loading magic file with all indirect specifiers should succeed: {:?}", + result.err() + ); +} + +#[test] +fn test_indirect_offset_child_with_adjustment_after_paren() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("pe_adj.magic"); + + // Child rule with (base.type)+adj syntax + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, r#"0 string "MZ" DOS executable"#).unwrap(); + writeln!(f, r#">(0x3c.l)+4 string "PE" (PE+4)"#).unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // LE pointer at 0x3c = 0x7C, +4 = 0x80, "PE" at 0x80 + let mut buf = vec![0u8; 0x84]; + buf[0] = b'M'; + buf[1] = b'Z'; + buf[0x3c] = 0x7C; + buf[0x3d] = 0x00; + buf[0x3e] = 0x00; + buf[0x3f] = 0x00; + buf[0x80] = b'P'; + buf[0x81] = b'E'; + + let result = db.evaluate_buffer(&buf).unwrap(); + assert!( + result.description.contains("DOS executable"), + "Expected DOS match, got: {}", + result.description + ); + assert!( + result.description.contains("(PE+4)"), + "Expected child match with adjustment, got: {}", + result.description + ); +}