From 96916e15f062a130c71917ec264c456dff453b0d Mon Sep 17 00:00:00 2001 From: Florian Kinder Date: Thu, 18 Jun 2026 19:01:38 +0200 Subject: [PATCH] Cap parser nesting depth to prevent stack overflow parseArray/parseDict recursed without limit, so deeply nested [[[...]]] or <<...>> in a hostile PDF could overflow the stack -- a fatal, unrecoverable crash. Cap nesting at maxParseDepth (1000) and return an error past it; real PDFs nest only a few levels. Tests: deeply nested array and dict are rejected; moderate nesting still resolves. --- parse.go | 15 +++++++++++ parse_test.go | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 parse_test.go diff --git a/parse.go b/parse.go index de3d276..50b5ba2 100644 --- a/parse.go +++ b/parse.go @@ -8,6 +8,10 @@ import ( "github.com/speedata/pdfdisassembler/internal/lex" ) +// maxParseDepth caps array/dict nesting so a hostile PDF can't stack-overflow +// the recursive parser. +const maxParseDepth = 1000 + // parser is a recursive descent parser over a lex.Lexer that emits direct // PDF Objects. It does not chase indirect references — every Reference // token becomes a Reference value. @@ -15,6 +19,7 @@ type parser struct { lx *lex.Lexer r *Reader queue []lex.Token + depth int } func newParser(lx *lex.Lexer, r *Reader) *parser { @@ -135,6 +140,11 @@ func (p *parser) parseObjectFrom(tok lex.Token) (Object, error) { } func (p *parser) parseArray() (Array, error) { + p.depth++ + defer func() { p.depth-- }() + if p.depth > maxParseDepth { + return nil, fmt.Errorf("pdfdisassembler/parse: nesting too deep (> %d)", maxParseDepth) + } var out Array for { t, err := p.peek() @@ -157,6 +167,11 @@ func (p *parser) parseArray() (Array, error) { } func (p *parser) parseDict() (*Dict, error) { + p.depth++ + defer func() { p.depth-- }() + if p.depth > maxParseDepth { + return nil, fmt.Errorf("pdfdisassembler/parse: nesting too deep (> %d)", maxParseDepth) + } d := newDict(p.r) for { t, err := p.peek() diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..d657b1a --- /dev/null +++ b/parse_test.go @@ -0,0 +1,70 @@ +package pdfdisassembler + +import ( + "bytes" + "fmt" + "strings" + "testing" +) + +// buildPDFWithObjectBody puts body as object 3 in a minimal classical-xref PDF. +func buildPDFWithObjectBody(t *testing.T, body string) []byte { + t.Helper() + var buf bytes.Buffer + off := func() int { return buf.Len() } + fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n") + offsets := make([]int, 4) + offsets[1] = off() + fmt.Fprint(&buf, "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + offsets[2] = off() + fmt.Fprint(&buf, "2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n") + offsets[3] = off() + fmt.Fprintf(&buf, "3 0 obj\n%s\nendobj\n", body) + xrefOff := off() + fmt.Fprint(&buf, "xref\n0 4\n") + fmt.Fprintf(&buf, "%010d %05d f \n", 0, 65535) + for i := 1; i <= 3; i++ { + fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0) + } + fmt.Fprint(&buf, "trailer\n<< /Size 4 /Root 1 0 R >>\n") + fmt.Fprintf(&buf, "startxref\n%d\n%%%%EOF\n", xrefOff) + return buf.Bytes() +} + +func TestDeeplyNestedRejected(t *testing.T) { + // Far above the parser's depth cap, but well below a real stack overflow. + const depth = 2000 + tests := []struct{ name, body string }{ + {"array", strings.Repeat("[", depth) + strings.Repeat("]", depth)}, + {"dict", strings.Repeat("<< /K ", depth) + "0" + strings.Repeat(" >>", depth)}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data := buildPDFWithObjectBody(t, tt.body) + r, err := Open(bytes.NewReader(data)) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer r.Close() + if _, err := r.Resolve(Reference{Number: 3, Generation: 0}); err == nil { + t.Fatal("expected error for over-deep nesting") + } + }) + } +} + +func TestModeratelyNestedArrayResolves(t *testing.T) { + data := buildPDFWithObjectBody(t, strings.Repeat("[", 100)+strings.Repeat("]", 100)) + r, err := Open(bytes.NewReader(data)) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer r.Close() + obj, err := r.Resolve(Reference{Number: 3, Generation: 0}) + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if _, ok := obj.(Array); !ok { + t.Fatalf("got %T, want Array", obj) + } +}