Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0e592fd
chore(deps): update libmagic-rs name and dependencies versions
unclesp1d3r Mar 30, 2026
281b643
chore(deps): update cargo-binstall to version 1.17.7 and rust to 1.94.1
unclesp1d3r Mar 30, 2026
d10cdb6
docs(agents): add agent rules section and reference to RULES.md
unclesp1d3r Mar 30, 2026
4469117
docs(policy): add AI usage policy to clarify accountability and guide…
unclesp1d3r Mar 30, 2026
11cd856
chore(ci): update mise-action to version 4.0.1 for improved functiona…
unclesp1d3r Mar 30, 2026
ad857ae
feat(offset): implement indirect offset resolution functionality
unclesp1d3r Mar 30, 2026
014fa8a
docs(ast): enhance PString documentation with structure and behavior …
unclesp1d3r Mar 30, 2026
25754b0
docs(evaluator): clarify length interpretation in type reading section
unclesp1d3r Mar 30, 2026
082b017
docs(parser): improve formatting for date and timestamp types
unclesp1d3r Mar 30, 2026
f420dfc
chore(gitignore): remove unnecessary entries from .gitignore
unclesp1d3r Mar 30, 2026
66c0723
feat(offset): implement parsing and evaluation for indirect offsets
unclesp1d3r Mar 30, 2026
a779736
feat(parser): implement parsing for indirect offset specifications
unclesp1d3r Mar 30, 2026
f24206b
test(offset): add integration tests for indirect offset resolution
unclesp1d3r Mar 30, 2026
96b0a1e
feat(parser): implement indirect offset parsing for magic file grammar
unclesp1d3r Mar 30, 2026
a9e90eb
feat(evaluator): implement indirect offset resolution for binary formats
unclesp1d3r Mar 30, 2026
2e8b034
docs(gotchas): document limitations of parse_number and parse_value f…
unclesp1d3r Mar 30, 2026
94d8b91
docs(agents): clarify indirect offset specifications and GNU semantics
unclesp1d3r Mar 30, 2026
9bd7759
docs(gotchas): document indirect offset pointer specifiers and GNU se…
unclesp1d3r Mar 30, 2026
b926bec
fix(parser): correct indirect offset parser for GNU file semantics
unclesp1d3r Mar 30, 2026
5ea12f2
feat(parser): update indirect offset parsing to align with GNU semantics
unclesp1d3r Mar 30, 2026
c481d42
Merge branch 'main' into 37-evaluator-implement-indirect-offset-resol…
mergify[bot] Mar 30, 2026
2befffa
feat(deps): update bun and cargo-binstall versions in mise.lock
unclesp1d3r Mar 30, 2026
41763f2
refactor: address PR review feedback for indirect offset implementation
unclesp1d3r Mar 30, 2026
585ad21
fix: split oversized table-driven test to satisfy clippy too_many_lines
unclesp1d3r Mar 30, 2026
8ca4bd2
chore(deps): update mdformat version to 1.0.0 with new arguments
unclesp1d3r Mar 30, 2026
aa5a249
chore(deps): update bun and cargo-binstall versions in mise.lock
unclesp1d3r Mar 30, 2026
25f35b1
style(settings): format JSON files for consistency and readability
unclesp1d3r Mar 30, 2026
e0a7dd2
style(devcontainer): format rust-analyzer extraArgs for consistency
unclesp1d3r Mar 30, 2026
36b7444
chore(format): update mdformat configuration for improved consistency
unclesp1d3r Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@
"--all-features"
],
"rust-analyzer.cargo.features": "all",
"rust-analyzer.rustfmt.extraArgs": [
"--edition",
"2024"
],
"rust-analyzer.rustfmt.extraArgs": ["--edition", "2024"],
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.fixAll": "explicit"
Expand Down
15 changes: 6 additions & 9 deletions .gemini/settings.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
{
"mcpServers": {
"tessl": {
"type": "stdio",
"command": "tessl",
"args": [
"mcp",
"start"
]
"mcpServers": {
"tessl": {
"type": "stdio",
"command": "tessl",
"args": ["mcp", "start"]
}
}
}
}
15 changes: 6 additions & 9 deletions .mcp.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
{
"mcpServers": {
"tessl": {
"type": "stdio",
"command": "tessl",
"args": [
"mcp",
"start"
]
"mcpServers": {
"tessl": {
"type": "stdio",
"command": "tessl",
"args": ["mcp", "start"]
}
}
}
}
21 changes: 12 additions & 9 deletions .mdformat.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,23 @@ exclude = [
"megalinter-reports/**",
"**/*.result",
"**/*.testfile",
"**/SKILL.md", # AI stuff
".claude/**/*", # AI stuff
".tessl/**/*", # AI stuff
]
validate = true
number = true
wrap = "no"
end_of_line = "lf"
# extensions = [
# "gfm",
# "frontmatter",
# "footnote",
# "simple_breaks",
# "gfm_alerts",
# "toc",
# "wikilink",
# ]
extensions = [
"gfm",
"footnote",
"front_matters",
"simple_breaks",
"wikilink",
"gfm_alerts",
"toc",
]

[plugin.mkdocs]
align_semantic_breaks_in_lists = true
Expand Down
2 changes: 0 additions & 2 deletions .tessl/.gitignore

This file was deleted.

10 changes: 3 additions & 7 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,8 @@
"git.rebaseWhenSync": true,
"git.replaceTagsWhenPull": true,
"githubPullRequests.codingAgent.uiIntegration": true,
"ruff.path": [
"${workspaceFolder}/.vscode/mise-tools/ruff"
],
"ruff.interpreter": [
"${workspaceFolder}/.vscode/mise-tools/python"
],
"ruff.path": ["${workspaceFolder}/.vscode/mise-tools/ruff"],
"ruff.interpreter": ["${workspaceFolder}/.vscode/mise-tools/python"],
"python.defaultInterpreterPath": "${workspaceFolder}/.vscode/mise-tools/python",
"bun.runtime": "${workspaceFolder}/.vscode/mise-tools/bun"
}
}
9 changes: 6 additions & 3 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ cargo test --doc # Test documentation examples

### Currently Implemented (v0.1.0)

- **Offsets**: Absolute and from-end specifications (indirect and relative are parsed but not yet evaluated)
- **Offsets**: Absolute, from-end, and indirect specifications (relative offsets are parsed but not yet evaluated)
- **Types**: `byte`, `short`, `long`, `quad`, `float`, `double`, `string`, `pstring` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`, `uquad`/`ubequad`/`ulequad`; float/double endian variants `befloat`/`lefloat`, `bedouble`/`ledouble`; 32-bit date/timestamp types `date`/`ldate`/`bedate`/`beldate`/`ledate`/`leldate`; 64-bit date/timestamp types `qdate`/`qldate`/`beqdate`/`beqldate`/`leqdate`/`leqldate`; `pstring` is a Pascal string (length-prefixed) with support for 1/2/4-byte length prefixes via `/B`, `/H` (2-byte BE), `/h` (2-byte LE), `/L` (4-byte BE), `/l` (4-byte LE) suffixes, and the `/J` flag (stored length includes prefix width, JPEG convention) which is combinable with width suffixes (e.g., `pstring/HJ`); date values formatted as "Www Mmm DD HH:MM:SS YYYY" matching GNU `file` output; types are signed by default (libmagic-compatible)
- **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask), `^` (bitwise XOR), `~` (bitwise NOT), `x` (any value)
- **Nested Rules**: Hierarchical rule evaluation with proper indentation
Expand Down Expand Up @@ -245,9 +245,8 @@ impl BinaryRegex for regex::bytes::Regex {

### Offset Specifications

- Indirect offsets are parsed into the AST but evaluation is not yet implemented (#37)
- Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = little-endian, uppercase = big-endian (GNU `file` semantics); pointer types signed by default; adjustment after closing paren: `(base.type)+adj`
- Relative offsets are parsed into the AST but evaluation is not yet implemented (#38)
- Only absolute and from-end offsets are fully functional

### Magic File Syntax

Expand Down Expand Up @@ -570,3 +569,7 @@ This project has the OSSF Best Practices passing badge. Maintain these standards
- SECURITY.md documents vulnerability reporting with scope, safe harbor, and PGP key
- AGENTS.md must accurately reflect implemented features (not aspirational)
- `docs/src/release-verification.md` documents artifact signing for users

## Agent Rules <!-- tessl-managed -->

@.tessl/RULES.md follow the [instructions](.tessl/RULES.md)
31 changes: 31 additions & 0 deletions AI_POLICY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# AI Usage Policy

We build operator-focused security tools. AI coding assistants are part of how we do that. This policy is not anti-AI -- it is pro-accountability.

Think of AI assistance like spellcheck. It catches typos, suggests corrections, and speeds up the mechanical parts of writing. But you are still responsible for your words and their consequences.

## The Rule

**You own every line you submit.** You must be able to explain what it does and how it interacts with the rest of the system without asking your AI to explain it back to you.

Everything else follows from that.

## How We Work

- **Disclose your tools.** Note what you used in your PR description -- Claude Code, Copilot, Cursor, whatever. No specific format required.

- **Review AI-generated text before posting.** Issues, discussions, and PR descriptions must reflect your understanding, not a language model's first draft. Read it, cut the filler, make sure it says what you mean.

- **No AI-generated media.** No generated images, logos, audio, or video. Text-based diagrams (ASCII art, Mermaid) and code are acceptable.

- **Unreviewed output gets closed.** Hallucinated APIs, boilerplate that ignores project conventions, suggestions you clearly did not run -- these get closed without review. We are not a QA service for your AI's output.

## Why

Transparent by design means knowing what the code does and why it is there. Tested under pressure means every change was understood by the person who submitted it. AI makes capable engineers faster. It does not replace the understanding that makes contributions trustworthy.

Every pull request is reviewed by a human. Submitting work you do not understand shifts that burden onto maintainers. That is not how we operate.

## New Contributors

Use AI to learn the codebase. Read the code it generates. Run it. Break it. Then submit work that reflects your understanding. We will help you through review -- that deal only works if the code is yours.
12 changes: 12 additions & 0 deletions GOTCHAS.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ The nom `tuple` combinator is deprecated. Use bare tuple syntax `(a, b, c)` dire

`type_keyword_to_kind` has `#[allow(clippy::too_many_lines)]` because it exceeds 100 lines with all date keywords.

### 3.5 `parse_number` Does Not Handle `+` Prefix

`parse_number` handles `-` signs but not `+`. When parsing syntax like `+4` (e.g., indirect offset adjustments), consume the `+` character manually before calling `parse_number`.

### 3.6 `parse_value` Requires Quoted Strings

`parse_value()` does not accept bare unquoted strings. String values in magic file rules must be quoted (e.g., `string "MZ"` not `string MZ`). Integration tests writing magic files must use `r#"0 string "MZ" description"#` format.

### 3.7 Indirect Offset Pointer Specifiers Follow GNU `file` Semantics

Lowercase pointer specifiers (`.s`, `.l`, `.q`) map to **little-endian**, not native endian. Uppercase (`.S`, `.L`, `.Q`) map to big-endian. All numeric pointer types are **signed by default** (per S6.3). The adjustment is parsed **after** the closing paren: `(base.type)+adj`, not `(base.type+adj)`.

## 4. Module Visibility & Re-exports

### 4.1 Private Engine Module
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
---
title: Implement indirect offset parsing in magic file grammar
date: 2026-03-30
status: resolved
severity: high
category: integration-issues
components:
- parser/grammar
- evaluator/offset
- integration
tags:
- parser
- indirect-offset
- nom
- magic-file-syntax
- pointer-specifier
issue: '#37'
branch: 37-evaluator-implement-indirect-offset-resolution
symptoms:
- parse_offset("(0x3c.l)") fails with parse error
- Magic files containing indirect offset syntax cannot be loaded via MagicDatabase::load_from_file()
- resolve_indirect_offset() is unreachable dead code from text-magic loading path
root_cause: parse_offset() had no branch for '('-prefixed input; always delegated to parse_number() which only handles numeric literals
solution_files:
- src/parser/grammar/mod.rs
- src/parser/grammar/tests.rs
- tests/indirect_offset_integration.rs
related_gotchas:
- parse_number() handles '-' prefix but not '+'; positive adjustments need manual '+' consumption
- parse_value() requires quoted strings; bare string literals cause integration test failures
---

# Indirect Offset Parser-Evaluator Sync

## Problem

The evaluator for indirect offsets (`resolve_indirect_offset()` in `src/evaluator/offset/indirect.rs`) was fully implemented with 35 unit tests, but the parser in `src/parser/grammar/mod.rs` could not produce `OffsetSpec::Indirect` AST nodes. The `parse_offset()` function only handled absolute numeric offsets and had no branch for `(`-prefixed indirect offset syntax like `(0x3c.l)` or `(0x3c.l+4)`.

This meant the feature was unreachable through the public `MagicDatabase::load_from_file()` API -- the primary way users load text magic files.

## Root Cause

`parse_offset()` unconditionally delegated to `parse_number()`, which only parses numeric literals. Input starting with `(` was rejected as a parse error. The evaluator code was effectively dead code from the text-magic loading path.

## Solution

### 1. Added `pointer_specifier_to_type()` helper

Maps single-character pointer specifiers to `(TypeKind, Endianness)` per libmagic convention:

| Specifier | Width | Endianness |
| ---------- | ------ | ---------- |
| `.b`, `.B` | 1 byte | Native |
| `.s` | 2 byte | Native |
| `.S` | 2 byte | Big |
| `.l` | 4 byte | Native |
| `.L` | 4 byte | Big |
| `.q` | 8 byte | Native |
| `.Q` | 8 byte | Big |

All pointer types are unsigned (`signed: false`). Lowercase = native endian, uppercase = big-endian.

### 2. Added `parse_indirect_offset()` function

Parses `(base.type)` and `(base.type+/-adj)` syntax:

1. Consume `(`
2. Parse base offset via `parse_number()`
3. Consume `.` and type specifier character
4. Optionally parse adjustment (see gotcha below)
5. Consume `)`
6. Return `OffsetSpec::Indirect { base_offset, pointer_type, adjustment, endian }`

### 3. Updated `parse_offset()` to branch on leading `(`

```rust
pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> {
let (input, _) = multispace0(input)?;
if input.starts_with('(') {
let (input, spec) = parse_indirect_offset(input)?;
let (input, _) = multispace0(input)?;
Ok((input, spec))
} else {
let (input, offset_value) = parse_number(input)?;
let (input, _) = multispace0(input)?;
Ok((input, OffsetSpec::Absolute(offset_value)))
}
}
```

### 4. No changes needed to `parse_rule_offset()`

It delegates to `parse_offset()`, so hierarchical forms like `>(0x3c.l)` work automatically.

## Gotchas Discovered

### `parse_number()` does not handle `+` prefix

`parse_number()` handles `-` internally but not `+`. For `+N` adjustments, the `+` must be consumed manually:

```rust
let (input, adjustment) = if input.starts_with('+') {
let (input, _) = char('+')(input)?;
parse_number(input)?
} else if input.starts_with('-') {
parse_number(input)?
} else {
(input, 0)
};
```

Do NOT modify `parse_number()` globally -- it is shared by offset and value parsing, and adding `+` support would change semantics elsewhere.

### `parse_value()` requires quoted strings

Integration tests initially failed because `parse_value()` does not accept bare strings. Magic file string values must be quoted:

```text
# Correct
0 string "MZ" DOS executable

# Wrong -- parse_value() rejects bare "MZ"
0 string MZ DOS executable
```

### Use big-endian specifiers in cross-platform tests

Prefer `.L` (big-endian long) over `.l` (native) in integration test magic files so byte buffers are deterministic across architectures.

## Prevention Strategies

### Parser-Evaluator Parity Checklist

When adding a new AST variant, ensure:

1. **Parser produces it** -- unit test parses raw syntax, asserts correct AST node
2. **Evaluator consumes it** -- unit test constructs AST node, asserts evaluation result
3. **End-to-end test exists** -- integration test through `MagicDatabase::load_from_file()` proves the full pipeline works
4. **Codegen handles it** -- if it can appear in built-in rules, update `src/parser/codegen.rs`
5. **Strength calculation covers it** -- update `src/evaluator/strength.rs` if scoring changes

### Integration Test Template

```rust
#[test]
fn test_feature_end_to_end() {
let temp_dir = TempDir::new().unwrap();
let magic_path = temp_dir.path().join("test.magic");
let mut f = fs::File::create(&magic_path).unwrap();
writeln!(f, r#"0 string "MAGIC" Test match"#).unwrap();

let db = MagicDatabase::load_from_file(&magic_path).unwrap();
let result = db.evaluate_buffer(b"MAGIC\x00data").unwrap();
assert!(result.description.contains("Test match"));
}
```

## Cross-References

- **Evaluator solution**: `docs/solutions/logic-errors/indirect-offset-resolution.md`
- **Magic format spec**: `docs/MAGIC_FORMAT.md` (lines 106-126, indirect offset section)
- **Gotchas**: `GOTCHAS.md` sections 3.5 (`parse_number` `+` limitation) and 3.6 (quoted strings)
- **Architecture**: `AGENTS.md` offset specifications section
- **Issue**: #37 (indirect offset resolution)
- **Related gotchas**: S2 (enum variant checklists), S3 (parser architecture split), S5 (numeric type pitfalls)
Loading
Loading