From 92d6fbe35efde8232804defda94089684f77337e Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 14:03:36 +0100
Subject: [PATCH 01/15] git: add pprof to gitignore

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 57edc61..1a4e801 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
 test/*.json
 test/test
-test/*.pprof
+*.pprof

From 1b952152a49d04587239b9e91022cacbb7000d3b Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 14:03:57 +0100
Subject: [PATCH 02/15] go: update to g1.26

---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3d85827..2724904 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/xnacly/libjson
 
-go 1.23.0
+go 1.26.0
 
 require github.com/stretchr/testify v1.9.0
 

From c669f22b7d82f5f5dd8e9d0258544364d13cbfc7 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 14:26:15 +0100
Subject: [PATCH 03/15] cmd/lj: rework the cli with options and flags

---
 cmd/lj.go | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/cmd/lj.go b/cmd/lj.go
index f87b538..7922dc8 100644
--- a/cmd/lj.go
+++ b/cmd/lj.go
@@ -1,9 +1,14 @@
 package main
 
 import (
+	"encoding/json"
+	"flag"
 	"fmt"
 	"log"
 	"os"
+	"path/filepath"
+	"runtime/debug"
+	"runtime/pprof"
 
 	"github.com/xnacly/libjson"
 )
@@ -16,17 +21,59 @@ func Must[T any](t T, err error) T {
 }
 
 func main() {
-	args := os.Args
+	noGc := flag.Bool("nogc", false, "disable the go garbage collector")
+	useLibjson := flag.Bool("libjson", true, "use libjson, if false use encoding/json")
+	usePprof := flag.Bool("pprof", false, "use pprof cpu tracing")
+	query := flag.String("q", ".", "query the parsed json")
+	silent := flag.Bool("s", false, "no stdoutput")
+	flag.Parse()
+
+	if *noGc {
+		debug.SetGCPercent(-1)
+	}
+
+	args := flag.Args()
+
+	var filePath string
 	var file *os.File
 	if info, err := os.Stdin.Stat(); err != nil || info.Mode()&os.ModeCharDevice != 0 { // we are in a pipe
-		if len(args) == 1 {
-			log.Fatalln("Wanted a file as first argument, got nothing, exiting")
+		if len(args) == 0 {
+			log.Fatalln("Wanted a file as an argument, got nothing, exiting")
 		}
-		file = Must(os.Open(args[1]))
+		filePath = args[0]
+		file = Must(os.Open(filePath))
 	} else {
 		file = os.Stdin
+		filePath = "stdin"
+	}
+
+	if *usePprof {
+		f, err := os.Create(filepath.Base(filePath) + ".pprof")
+		if err != nil {
+			panic(err)
+		}
+		pprof.StartCPUProfile(f)
+		defer pprof.StopCPUProfile()
+	}
+
+	if *useLibjson {
+		out := Must(libjson.NewReader(file))
+		if !*silent {
+			fmt.Printf("%+#v\n", Must(libjson.Get[any](&out, *query)))
+		}
+	} else {
+		if *query != "." {
+			panic("With -libjson=false, there is no support for querying the json")
+		}
+
+		decoder := json.NewDecoder(file)
+		var a any
+		if err := decoder.Decode(&a); err != nil {
+			panic(err)
+		}
+
+		if !*silent {
+			fmt.Printf("%+#v\n", a)
+		}
 	}
-	query := os.Args[len(os.Args)-1]
-	json := Must(libjson.NewReader(file))
-	fmt.Printf("%+#v\n", Must(libjson.Get[any](&json, query)))
 }

From 7410c209396338266a5f1562fdb85c595a1b63dd Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 14:26:42 +0100
Subject: [PATCH 04/15] test: replace test binary with cmd/lj

---
 test/bench.sh |  8 ++++----
 test/test.go  | 49 -------------------------------------------------
 2 files changed, 4 insertions(+), 53 deletions(-)
 delete mode 100644 test/test.go

diff --git a/test/bench.sh b/test/bench.sh
index 9585625..9162698 100755
--- a/test/bench.sh
+++ b/test/bench.sh
@@ -4,8 +4,8 @@ python3 gen.py
 
 echo "building executable"
 rm ./test
-go build ./test.go
+go build -o ./test ../cmd/lj.go
 
-hyperfine "./test ./1MB.json" "./test -libjson=false ./1MB.json"
-hyperfine "./test ./5MB.json" "./test -libjson=false ./5MB.json"
-hyperfine "./test ./10MB.json" "./test -libjson=false ./10MB.json"
+hyperfine "./test -s ./1MB.json" "./test -s -libjson=false ./1MB.json"
+hyperfine "./test -s ./5MB.json" "./test -s -libjson=false ./5MB.json"
+hyperfine "./test -s ./10MB.json" "./test -s -libjson=false ./10MB.json"
diff --git a/test/test.go b/test/test.go
deleted file mode 100644
index 3a6dfe0..0000000
--- a/test/test.go
+++ /dev/null
@@ -1,49 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"flag"
-	"log"
-	"os"
-
-	// "runtime/pprof"
-
-	"github.com/xnacly/libjson"
-)
-
-func main() {
-	// f, err := os.Create("cpu.pprof")
-	// if err != nil {
-	// 	panic(err)
-	// }
-	// pprof.StartCPUProfile(f)
-	// defer pprof.StopCPUProfile()
-	lj := flag.Bool("libjson", true, "benchmark libjson or gojson")
-	flag.Parse()
-	args := flag.Args()
-	if len(args) == 0 {
-		log.Fatalln("Wanted a file as first argument, got nothing, exiting")
-	}
-	file, err := os.Open(args[0])
-	if err != nil {
-		log.Fatalln(err)
-	}
-	if *lj {
-		_, err := libjson.NewReader(file)
-		if err != nil {
-			log.Fatalln(err)
-		}
-	} else {
-		v := []struct {
-			Key1      string
-			Array     []any
-			Obj       any
-			AtomArray []any
-		}{}
-		d := json.NewDecoder(file)
-		err := d.Decode(&v)
-		if err != nil {
-			log.Fatalln(err)
-		}
-	}
-}

From 815f8815ea18877b49c99379351f8e38d39dda5a Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 16:16:29 +0100
Subject: [PATCH 05/15] lexer+parser: support ecma404 escape characters

---
 cmd/lj.go |  4 +--
 lexer.go  | 19 +++++++++++-
 parser.go | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 types.go  | 25 ++++++++-------
 4 files changed, 125 insertions(+), 16 deletions(-)

diff --git a/cmd/lj.go b/cmd/lj.go
index 7922dc8..8689011 100644
--- a/cmd/lj.go
+++ b/cmd/lj.go
@@ -59,7 +59,7 @@ func main() {
 	if *useLibjson {
 		out := Must(libjson.NewReader(file))
 		if !*silent {
-			fmt.Printf("%+#v\n", Must(libjson.Get[any](&out, *query)))
+			fmt.Printf("%#+v\n", Must(libjson.Get[any](&out, *query)))
 		}
 	} else {
 		if *query != "." {
@@ -73,7 +73,7 @@ func main() {
 		}
 
 		if !*silent {
-			fmt.Printf("%+#v\n", a)
+			fmt.Printf("%#+v\n", a)
 		}
 	}
 }
diff --git a/lexer.go b/lexer.go
index e3888d7..e4715b9 100644
--- a/lexer.go
+++ b/lexer.go
@@ -58,10 +58,27 @@ func (l *lexer) next() (token, error) {
 	case '"':
 		start := l.pos
 		for i := start; i < len(l.data); i++ {
-			if l.data[i] == '"' {
+			switch l.data[i] {
+			case '"':
 				t := token{Type: t_string, Start: start, End: i}
 				l.pos = i + 1
 				return t, nil
+			case '\\': // OH NO ITS ESCAPING :O
+				i++
+				if i >= len(l.data) {
+					return empty, errors.New("Unterminated string escape")
+				}
+				switch l.data[i] {
+				case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
+					// we simply skip the escaped char, the parser has to
+				case 'u':
+					if i+4 > len(l.data) {
+						return empty, errors.New("Unterminated string")
+					}
+					i += 4
+				default:
+					return empty, fmt.Errorf("Invalid escape %q", l.data[i])
+				}
 			}
 		}
 		return empty, errors.New("Unterminated string")
diff --git a/parser.go b/parser.go
index 98faa54..49ea53f 100644
--- a/parser.go
+++ b/parser.go
@@ -1,7 +1,9 @@
 package libjson
 
 import (
+	"errors"
 	"fmt"
+	"unicode/utf8"
 	"unsafe"
 )
 
@@ -161,17 +163,106 @@ func (p *parser) array() ([]any, error) {
 	return a, p.advance()
 }
 
+func hex4(b []byte) (r rune, err error) {
+	r = 0
+	for _, c := range b {
+		r <<= 4
+		switch {
+		case '0' <= c && c <= '9':
+			r += rune(c - '0')
+		case 'a' <= c && c <= 'f':
+			r += rune(c - 'a' + 10)
+		case 'A' <= c && c <= 'F':
+			r += rune(c - 'A' + 10)
+		default:
+			return 0, fmt.Errorf("invalid hex %q", c)
+		}
+	}
+	return r, nil
+}
+
+// unescapes escapes in a buffer, returns the end of the in place escaped
+// buffer so the caller can resize to the new, smaller buffer size
+func unescapeInPlace(in []byte) (int, error) {
+	curEnd := 0
+	for i := 0; i < len(in); i++ {
+		b := in[i]
+		if b != '\\' {
+			in[curEnd] = b
+			curEnd++
+			continue
+		}
+
+		i++ // skip \
+
+		switch in[i] {
+		case '"', '\\', '/':
+			in[curEnd] = in[i]
+			curEnd++
+		case 'b':
+			in[curEnd] = '\b'
+			curEnd++
+		case 'f':
+			in[curEnd] = '\f'
+			curEnd++
+		case 'n':
+			in[curEnd] = '\n'
+			curEnd++
+		case 'r':
+			in[curEnd] = '\r'
+			curEnd++
+		case 't':
+			in[curEnd] = '\t'
+			curEnd++
+		case 'u': // \uXXXX
+
+			// From ECMA-404:
+			//
+			// However, whether a processor of JSON texts interprets such a surrogate pair
+			// as a single code point or as an explicit surrogate pair is a semantic
+			// decision that is determined by the specific processor.
+			//
+			// meaning we dont merge unicode points, firstly because fuck
+			// utf16, and secondly because its simpler to just keep two unicode
+			// points separate compared to increasing the complexity of this
+			// decoding
+
+			i++ // skip u
+
+			if i+4 > len(in) {
+				return 0, errors.New("unterminated unicode escape")
+			}
+
+			r, err := hex4(in[i : i+4])
+			if err != nil {
+				return 0, err
+			}
+
+			n := utf8.EncodeRune(in[curEnd:], r)
+			curEnd += n
+			i += 4
+		}
+	}
+
+	return curEnd, nil
+}
+
 func (p *parser) atom() (any, error) {
 	var r any
 	switch p.cur_tok.Type {
 	case t_string:
 		in := p.input[p.cur_tok.Start:p.cur_tok.End]
+		end, err := unescapeInPlace(in)
+		if err != nil {
+			return nil, err
+		}
+		in = in[:end]
 		r = *(*string)(unsafe.Pointer(&in))
 	case t_number:
 		raw := p.input[p.cur_tok.Start:p.cur_tok.End]
 		number, err := parseFloat(raw)
 		if err != nil {
-			return empty, fmt.Errorf("Invalid floating point number %q: %w", string(raw), err)
+			return nil, fmt.Errorf("Invalid floating point number %q: %w", string(raw), err)
 		}
 		r = number
 	case t_true:
diff --git a/types.go b/types.go
index 86ff403..9034629 100644
--- a/types.go
+++ b/types.go
@@ -13,18 +13,19 @@ type token struct {
 var empty = token{Type: t_eof}
 
 const (
-	t_string       t_json = iota // anything between ""
-	t_number                     // floating point, hex, etc
-	t_true                       // true
-	t_false                      // false
-	t_null                       // null
-	t_left_curly                 // {
-	t_right_curly                // }
-	t_left_braket                // [
-	t_right_braket               // ]
-	t_comma                      // ,
-	t_colon                      // :
-	t_eof                        // for any non structure characters outside of strings and numbers
+	t_string         t_json = iota // anything between ""
+	t_string_escaped               // t_string but contains an escape char
+	t_number                       // floating point, hex, etc
+	t_true                         // true
+	t_false                        // false
+	t_null                         // null
+	t_left_curly                   // {
+	t_right_curly                  // }
+	t_left_braket                  // [
+	t_right_braket                 // ]
+	t_comma                        // ,
+	t_colon                        // :
+	t_eof                          // for any non structure characters outside of strings and numbers
 )
 
 var tokennames = map[t_json]string{

From 86709d848d8a0723c42d7b869ac0f50411b243ba Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 16:20:17 +0100
Subject: [PATCH 06/15] docs: change ecma404 ref

---
 README.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 173bc53..b913b35 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,5 @@
 # libjson
 
-> WARNING: libjson is currently a work in progress :)
-
 Fast and minimal JSON parser written in and for Go with a JIT query language
 
 ```go
@@ -13,16 +11,16 @@ import (
 
 func main() {
 	input := `{ "hello": {"world": ["hi"] } }`
-	jsonObj, _ := New(input) // or libjson.NewReader(r io.Reader)
+	jsonObj, _ := libjson.New([]byte(input)) // or libjson.NewReader(r io.Reader)
 
 	// accessing values
-	fmt.Println(Get[string](jsonObj, ".hello.world.0")) // hi, nil
+	fmt.Println(libjson.Get[string](jsonObj, ".hello.world.0")) // hi, nil
 }
 ```
 
 ## Features
 
-- [ECMA 404](https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf)
+- [ECMA 404](https://ecma-international.org/publications-and-standards/standards/ecma-404/)
   and [rfc8259](https://www.rfc-editor.org/rfc/rfc8259) compliant
   - tests against [JSONTestSuite](https://github.com/nst/JSONTestSuite), see
     [Parsing JSON is a Minefield

From 74c795ccc7f26afb4fd3433701d82741ac008168 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 16:23:22 +0100
Subject: [PATCH 07/15] types: remove t_string_escaped

---
 types.go | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/types.go b/types.go
index 9034629..86ff403 100644
--- a/types.go
+++ b/types.go
@@ -13,19 +13,18 @@ type token struct {
 var empty = token{Type: t_eof}
 
 const (
-	t_string         t_json = iota // anything between ""
-	t_string_escaped               // t_string but contains an escape char
-	t_number                       // floating point, hex, etc
-	t_true                         // true
-	t_false                        // false
-	t_null                         // null
-	t_left_curly                   // {
-	t_right_curly                  // }
-	t_left_braket                  // [
-	t_right_braket                 // ]
-	t_comma                        // ,
-	t_colon                        // :
-	t_eof                          // for any non structure characters outside of strings and numbers
+	t_string       t_json = iota // anything between ""
+	t_number                     // floating point, hex, etc
+	t_true                       // true
+	t_false                      // false
+	t_null                       // null
+	t_left_curly                 // {
+	t_right_curly                // }
+	t_left_braket                // [
+	t_right_braket               // ]
+	t_comma                      // ,
+	t_colon                      // :
+	t_eof                        // for any non structure characters outside of strings and numbers
 )
 
 var tokennames = map[t_json]string{

From 12878d75c59c0070e26e9e0da85ab087abbbb98c Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Wed, 18 Feb 2026 16:26:25 +0100
Subject: [PATCH 08/15] parser: fix off by one error in unescapeInPlace unicode
 handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before this change "\uD834\uDD1E" would result in "�DD1E" but should
have resulted in "��", due to both being unmerged surrogates.
---
 parser.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser.go b/parser.go
index 49ea53f..34e1a02 100644
--- a/parser.go
+++ b/parser.go
@@ -240,7 +240,7 @@ func unescapeInPlace(in []byte) (int, error) {
 
 			n := utf8.EncodeRune(in[curEnd:], r)
 			curEnd += n
-			i += 4
+			i += 3
 		}
 	}
 

From df358157ef0d705239f119a5ed7d264e0456833d Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 10:25:05 +0100
Subject: [PATCH 09/15] parser: update benchmarking input

---
 README.md             | 10 ++++++
 cmd/lj.go             | 18 ++++++++---
 json.go               |  5 +--
 json_test.go          | 71 ++++++++++++++++++++++++++++++++++++-------
 lexer.go              | 29 ++++++++++--------
 parser_test.go        | 10 +++---
 types.go => tokens.go |  0
 7 files changed, 109 insertions(+), 34 deletions(-)
 rename types.go => tokens.go (100%)

diff --git a/README.md b/README.md
index b913b35..382c944 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@ func main() {
 
 ## Features
 
+- Parser consumes and mutates the input to make most operations zero copy and zero alloc
 - [ECMA 404](https://ecma-international.org/publications-and-standards/standards/ecma-404/)
   and [rfc8259](https://www.rfc-editor.org/rfc/rfc8259) compliant
   - tests against [JSONTestSuite](https://github.com/nst/JSONTestSuite), see
@@ -33,6 +34,15 @@ func main() {
 - caching of queries with `libjson.Compile`, just in time caching of queries
 - serialisation via `json.Marshal`
 
+## Why is it faster than encoding/json?
+
+- zero-copy strings
+- mutate input for string escaping instead of allocating a new one
+- no allocations for strings, views into the original input
+- no reflection
+- no copies for map keys
+- very simple lexer and parser
+
 ## Benchmarks
 
 ![libjson-vs-encodingjson](https://github.com/user-attachments/assets/b11bcce4-e7db-4c45-ab42-45a2042e2a51)
diff --git a/cmd/lj.go b/cmd/lj.go
index 8689011..c65f6fc 100644
--- a/cmd/lj.go
+++ b/cmd/lj.go
@@ -26,6 +26,7 @@ func main() {
 	usePprof := flag.Bool("pprof", false, "use pprof cpu tracing")
 	query := flag.String("q", ".", "query the parsed json")
 	silent := flag.Bool("s", false, "no stdoutput")
+	escape := flag.Bool("e", false, "escapes input with Gos '%#+v'")
 	flag.Parse()
 
 	if *noGc {
@@ -59,7 +60,12 @@ func main() {
 	if *useLibjson {
 		out := Must(libjson.NewReader(file))
 		if !*silent {
-			fmt.Printf("%#+v\n", Must(libjson.Get[any](&out, *query)))
+			out := Must(libjson.Get[any](&out, *query))
+			if *escape {
+				fmt.Printf("%#+v\n", out)
+			} else {
+				fmt.Println(out)
+			}
 		}
 	} else {
 		if *query != "." {
@@ -67,13 +73,17 @@ func main() {
 		}
 
 		decoder := json.NewDecoder(file)
-		var a any
-		if err := decoder.Decode(&a); err != nil {
+		var out any
+		if err := decoder.Decode(&out); err != nil {
 			panic(err)
 		}
 
 		if !*silent {
-			fmt.Printf("%#+v\n", a)
+			if *escape {
+				fmt.Printf("%#+v\n", out)
+			} else {
+				fmt.Println(out)
+			}
 		}
 	}
 }
diff --git a/json.go b/json.go
index 8477a8c..c433ffb 100644
--- a/json.go
+++ b/json.go
@@ -9,7 +9,7 @@ func NewReader(r io.Reader) (JSON, error) {
 	if err != nil {
 		return JSON{}, err
 	}
-	p := parser{l: lexer{data: data}}
+	p := parser{l: lexer{data: data, len: len(data)}}
 	obj, err := p.parse(data)
 	if err != nil {
 		return JSON{}, err
@@ -17,8 +17,9 @@ func NewReader(r io.Reader) (JSON, error) {
 	return JSON{obj}, nil
 }
 
+// data is consumed and possibly mutated, DO NOT REUSE
 func New(data []byte) (JSON, error) {
-	p := parser{l: lexer{data: data}}
+	p := parser{l: lexer{data: data, len: len(data)}}
 	obj, err := p.parse(data)
 	if err != nil {
 		return JSON{}, err
diff --git a/json_test.go b/json_test.go
index f7af6c3..37dcef9 100644
--- a/json_test.go
+++ b/json_test.go
@@ -9,31 +9,80 @@ import (
 )
 
 const amount = 50_000
+const naiveInput = `{"key1":"value","array":[],"obj":{},"atomArray":[11201,1e112,true,false,null,"str"]},`
+const escapedInput = `{"text":"line1\nline2\nline3","quote":"\"hello\"","path":"C:\\\\Users\\\\name","unicode":"\u0041\u0042\u0043","mix":"abc\\ndef\"ghi\u263A"},`
+const hardInput = `{
+	"id":12345,
+	"name":"very_long_string_with_no_escapes_but_large_payload_abcdefghijklmnopqrstuvwxyz_0123456789",
+	"description":"This string contains\nmultiple\nlines\nand \"quotes\" and unicode \u2764\u2764\u2764",
+	"nested":{
+		"level1":{
+			"level2":{
+				"array":[
+					"short",
+					"string_with_escape\\n",
+					"another\\tvalue",
+					"unicode\u2603",
+					1234567890,
+					-1.2345e67,
+					true,
+					false,
+					null
+				]
+			}
+		}
+	}
+},`
 
-func BenchmarkLibJson(b *testing.B) {
-	data := strings.Repeat(`{"key1": "value","array": [],"obj": {},"atomArray": [11201,1e112,true,false,null,"str"]},`, amount)
+func benchmarkWithInput(b *testing.B, input string) {
+	data := strings.Repeat(input, amount)
 	d := []byte("[" + data[:len(data)-1] + "]")
+
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		_, err := New(d)
+		buf := make([]byte, len(d))
+		copy(buf, d)
+		b.StartTimer()
+		_, err := New(buf)
+		b.StopTimer()
 		assert.NoError(b, err)
 	}
 	b.ReportAllocs()
 }
 
-func BenchmarkEncodingJson(b *testing.B) {
-	data := strings.Repeat(`{"key1": "value","array": [],"obj": {},"atomArray": [11201,1e112,true,false,null,"str"]},`, amount)
+func benchmarkEncodingJsonWithInput(b *testing.B, input string) {
+	data := strings.Repeat(input, amount)
 	d := []byte("[" + data[:len(data)-1] + "]")
+
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		v := []struct {
-			Key1      string
-			Array     []any
-			Obj       any
-			AtomArray []any
-		}{}
+		var v any
 		err := json.Unmarshal(d, &v)
 		assert.NoError(b, err)
 	}
 	b.ReportAllocs()
 }
+
+func BenchmarkLibJson_Naive(b *testing.B) {
+	benchmarkWithInput(b, naiveInput)
+}
+
+func BenchmarkLibJson_Escaped(b *testing.B) {
+	benchmarkWithInput(b, escapedInput)
+}
+
+func BenchmarkLibJson_Hard(b *testing.B) {
+	benchmarkWithInput(b, hardInput)
+}
+
+func BenchmarkEncodingJson_Naive(b *testing.B) {
+	benchmarkEncodingJsonWithInput(b, naiveInput)
+}
+
+func BenchmarkEncodingJson_Escaped(b *testing.B) {
+	benchmarkEncodingJsonWithInput(b, escapedInput)
+}
+
+func BenchmarkEncodingJson_Hard(b *testing.B) {
+	benchmarkEncodingJsonWithInput(b, hardInput)
+}
diff --git a/lexer.go b/lexer.go
index e4715b9..7ae9a3d 100644
--- a/lexer.go
+++ b/lexer.go
@@ -9,6 +9,7 @@ import (
 type lexer struct {
 	data []byte
 	pos  int
+	len  int
 }
 
 var numChar [256]bool
@@ -25,7 +26,7 @@ func init() {
 }
 
 func (l *lexer) next() (token, error) {
-	for l.pos < len(l.data) {
+	for l.pos < l.len {
 		cc := l.data[l.pos]
 		if cc == ' ' || cc == '\n' || cc == '\t' || cc == '\r' {
 			l.pos++
@@ -34,7 +35,7 @@ func (l *lexer) next() (token, error) {
 		}
 	}
 
-	if l.pos >= len(l.data) {
+	if l.pos >= l.len {
 		return empty, nil
 	}
 
@@ -57,22 +58,25 @@ func (l *lexer) next() (token, error) {
 		tt = t_colon
 	case '"':
 		start := l.pos
-		for i := start; i < len(l.data); i++ {
-			switch l.data[i] {
-			case '"':
+		for i := start; i < l.len; i++ {
+			if c := l.data[i]; c == '"' {
 				t := token{Type: t_string, Start: start, End: i}
+				// if hasEscaped {
+				// 	t.Type = t_string_escaped
+				// }
 				l.pos = i + 1
 				return t, nil
-			case '\\': // OH NO ITS ESCAPING :O
+			} else if c == '\\' { // OH NO ITS ESCAPING :O
 				i++
-				if i >= len(l.data) {
+				if i >= l.len {
 					return empty, errors.New("Unterminated string escape")
 				}
+
 				switch l.data[i] {
 				case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
 					// we simply skip the escaped char, the parser has to
 				case 'u':
-					if i+4 > len(l.data) {
+					if i+4 > l.len {
 						return empty, errors.New("Unterminated string")
 					}
 					i += 4
@@ -83,7 +87,7 @@ func (l *lexer) next() (token, error) {
 		}
 		return empty, errors.New("Unterminated string")
 	case 't': // this should always be the 'true' atom and is therefore optimised here
-		if l.pos+3 > len(l.data) {
+		if l.pos+3 > l.len {
 			return empty, errors.New("Failed to read the expected 'true' atom")
 		}
 		if !(l.data[l.pos] == 'r' && l.data[l.pos+1] == 'u' && l.data[l.pos+2] == 'e') {
@@ -92,7 +96,7 @@ func (l *lexer) next() (token, error) {
 		l.pos += 3
 		tt = t_true
 	case 'f': // this should always be the 'false' atom and is therefore optimised here
-		if l.pos+4 > len(l.data) {
+		if l.pos+4 > l.len {
 			return empty, errors.New("Failed to read the expected 'false' atom")
 		}
 		if !(l.data[l.pos] == 'a' && l.data[l.pos+1] == 'l' && l.data[l.pos+2] == 's' && l.data[l.pos+3] == 'e') {
@@ -101,7 +105,7 @@ func (l *lexer) next() (token, error) {
 		l.pos += 4
 		tt = t_false
 	case 'n': // this should always be the 'null' atom and is therefore optimised here
-		if l.pos+3 > len(l.data) {
+		if l.pos+3 > l.len {
 			return empty, errors.New("Failed to read the expected 'null' atom")
 		}
 		if !(l.data[l.pos] == 'u' && l.data[l.pos+1] == 'l' && l.data[l.pos+2] == 'l') {
@@ -112,7 +116,7 @@ func (l *lexer) next() (token, error) {
 	default:
 		if cc == '-' || (cc >= '0' && cc <= '9') {
 			start := l.pos - 1
-			for l.pos < len(l.data) && numChar[l.data[l.pos]] {
+			for l.pos < l.len && numChar[l.data[l.pos]] {
 				l.pos++
 			}
 
@@ -132,6 +136,7 @@ func (l *lexer) lex(r io.Reader) ([]token, error) {
 	if err != nil {
 		return nil, err
 	}
+	l.len = len(l.data)
 
 	toks := make([]token, 0, len(l.data)/2)
 	for {
diff --git a/parser_test.go b/parser_test.go
index 907473b..30731f5 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -30,7 +30,7 @@ func TestParserAtoms(t *testing.T) {
 	for i, in := range input {
 		t.Run(in, func(t *testing.T) {
 			in := []byte(in)
-			p := &parser{l: lexer{data: in}}
+			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
 			assert.EqualValues(t, wanted[i], out)
@@ -54,7 +54,7 @@ func TestParserArray(t *testing.T) {
 	for i, in := range input {
 		t.Run(in, func(t *testing.T) {
 			in := []byte(in)
-			p := &parser{l: lexer{data: in}}
+			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
 			assert.EqualValues(t, wanted[i], out)
@@ -82,7 +82,7 @@ func TestParserObject(t *testing.T) {
 	for i, in := range input {
 		t.Run(in, func(t *testing.T) {
 			in := []byte(in)
-			p := &parser{l: lexer{data: in}}
+			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
 			assert.EqualValues(t, wanted[i], out)
@@ -110,7 +110,7 @@ func TestParserEdge(t *testing.T) {
 	for i, in := range input {
 		t.Run(in, func(t *testing.T) {
 			in := []byte(in)
-			p := &parser{l: lexer{data: in}}
+			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
 			assert.EqualValues(t, wanted[i], out)
@@ -145,7 +145,7 @@ func TestParserFail(t *testing.T) {
 	for _, in := range input {
 		t.Run(in, func(t *testing.T) {
 			in := []byte(in)
-			p := &parser{l: lexer{data: in}}
+			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.Error(t, err)
 			assert.Nil(t, out)
diff --git a/types.go b/tokens.go
similarity index 100%
rename from types.go
rename to tokens.go

From 7ff2cb2b9fca5b8a053bf954c4f6987a3315f4ed Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 11:48:44 +0100
Subject: [PATCH 10/15] benchmarks: use heavier input in benchmarking (1-100MB)

---
 .gitignore          |  4 ++--
 benchmarks/bench.sh | 15 +++++++++++++++
 benchmarks/gen.py   | 29 +++++++++++++++++++++++++++++
 parser.go           |  4 ++--
 test/bench.sh       | 11 -----------
 test/gen.py         | 22 ----------------------
 6 files changed, 48 insertions(+), 37 deletions(-)
 create mode 100755 benchmarks/bench.sh
 create mode 100644 benchmarks/gen.py
 delete mode 100755 test/bench.sh
 delete mode 100644 test/gen.py

diff --git a/.gitignore b/.gitignore
index 1a4e801..ead7226 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-test/*.json
-test/test
+benchmarks/*.json
+benchmarks/test
 *.pprof
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
new file mode 100755
index 0000000..0787939
--- /dev/null
+++ b/benchmarks/bench.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+echo "generating example data"
+python3 gen.py
+
+echo "building executable"
+rm ./test
+go build -o ./test ../cmd/lj.go
+
+for SIZE in 1MB 5MB 10MB 100MB; do
+    hyperfine \
+        --warmup 1 \
+        --runs 10 \
+        "./test -s ./${SIZE}.json" \
+        "./test -s -libjson=false ./${SIZE}.json"
+done
diff --git a/benchmarks/gen.py b/benchmarks/gen.py
new file mode 100644
index 0000000..d540015
--- /dev/null
+++ b/benchmarks/gen.py
@@ -0,0 +1,29 @@
+from os.path import exists
+import math
+import json
+
+sizes =[1,5,10,100]
+
+line = json.dumps({
+    "id": 12345,
+    "name": "very_long_string_with_no_escapes_but_large_payload_abcdefghijklmnopqrstuvwxyz_0123456789",
+    "description": "This string contains\nmultiple\nlines\nand \"quotes\" and unicode ❤❤❤",
+    "nested": {
+        "level1": {
+            "level2": {
+                "array": ["short", "string_with_escape\n", "another\tvalue", "unicode\u2603", 1234567890, -1.2345e67, True, False, None]
+            }
+        }
+    }
+})
+
+def write_data(size: int): 
+    name = f"{size}MB.json"
+    if not exists(name):
+        with open(name, mode="w", encoding="utf8") as f:
+            f.write("[\n")
+            size = math.floor((size*1000000)/len(line))
+            f.write(",\n".join([line for _ in range(0, size)]))
+            f.write("\n]")
+
+[write_data(size) for size in sizes]
diff --git a/parser.go b/parser.go
index 34e1a02..e78e802 100644
--- a/parser.go
+++ b/parser.go
@@ -59,7 +59,7 @@ func (p *parser) object() (map[string]any, error) {
 		return nil, err
 	}
 
-	m := make(map[string]any, 4)
+	m := make(map[string]any)
 
 	if p.cur_tok.Type == t_right_curly {
 		err := p.advance()
@@ -137,7 +137,7 @@ func (p *parser) array() ([]any, error) {
 		return []any{}, p.advance()
 	}
 
-	a := make([]any, 0, 8)
+	a := make([]any, 0)
 
 	for p.cur_tok.Type != t_eof && p.cur_tok.Type != t_right_braket {
 		if len(a) > 0 {
diff --git a/test/bench.sh b/test/bench.sh
deleted file mode 100755
index 9162698..0000000
--- a/test/bench.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-echo "generating example data"
-python3 gen.py
-
-echo "building executable"
-rm ./test
-go build -o ./test ../cmd/lj.go
-
-hyperfine "./test -s ./1MB.json" "./test -s -libjson=false ./1MB.json"
-hyperfine "./test -s ./5MB.json" "./test -s -libjson=false ./5MB.json"
-hyperfine "./test -s ./10MB.json" "./test -s -libjson=false ./10MB.json"
diff --git a/test/gen.py b/test/gen.py
deleted file mode 100644
index 50d2bcb..0000000
--- a/test/gen.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from os.path import exists
-import math
-
-sizes =[1,5,10]
-
-line = """\t{
-        "key1": "value",
-        "array": [],
-        "obj": {},
-        "atomArray": [11201,1e112,true,false,null,"str"]
-    }"""
-
-def write_data(size: int): 
-    name = f"{size}MB.json"
-    if not exists(name):
-        with open(name, mode="w", encoding="utf8") as f:
-            f.write("[\n")
-            size = math.floor((size*1000000)/len(line))
-            f.write(",\n".join([line for _ in range(0, size)]))
-            f.write("\n]")
-
-[write_data(size) for size in sizes]

From 12bf614fcadd6c90ee70f7ea7574fb325fe01d22 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 13:35:13 +0100
Subject: [PATCH 11/15] parser: remove bounds checks in unescapeInPlace

Previously time spent for parsing 100MB JSON input (600ms) took 60ms in
a number of unnecessary bound checks: CALL runtime.panicBounds(SB), now
reduced to 20ms by moving explicit bound checks before indizes, reusing
indexed slots and merging manual out of loop increments.
---
 lexer.go  |  3 ---
 parser.go | 29 ++++++++++++++++++-----------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/lexer.go b/lexer.go
index 7ae9a3d..d49434b 100644
--- a/lexer.go
+++ b/lexer.go
@@ -61,9 +61,6 @@ func (l *lexer) next() (token, error) {
 		for i := start; i < l.len; i++ {
 			if c := l.data[i]; c == '"' {
 				t := token{Type: t_string, Start: start, End: i}
-				// if hasEscaped {
-				// 	t.Type = t_string_escaped
-				// }
 				l.pos = i + 1
 				return t, nil
 			} else if c == '\\' { // OH NO ITS ESCAPING :O
diff --git a/parser.go b/parser.go
index e78e802..f759e60 100644
--- a/parser.go
+++ b/parser.go
@@ -181,8 +181,13 @@ func hex4(b []byte) (r rune, err error) {
 	return r, nil
 }
 
-// unescapes escapes in a buffer, returns the end of the in place escaped
-// buffer so the caller can resize to the new, smaller buffer size
+// unescapes JSON escapes in a buffer into their non-JSON representation
+//
+// Returns the end of the in place escaped buffer so the caller can resize to
+// the new, smaller buffer size
+//
+// The implementation may look weird, but is optimised to have the least
+// possible branches
 func unescapeInPlace(in []byte) (int, error) {
 	curEnd := 0
 	for i := 0; i < len(in); i++ {
@@ -193,11 +198,16 @@ func unescapeInPlace(in []byte) (int, error) {
 			continue
 		}
 
+		// check if there’s at least 1 more byte for the escape
+		if i+1 >= len(in) {
+			return 0, errors.New("unterminated escape")
+		}
 		i++ // skip \
+		b = in[i]
 
-		switch in[i] {
+		switch b {
 		case '"', '\\', '/':
-			in[curEnd] = in[i]
+			in[curEnd] = b
 			curEnd++
 		case 'b':
 			in[curEnd] = '\b'
@@ -227,21 +237,18 @@ func unescapeInPlace(in []byte) (int, error) {
 			// points separate compared to increasing the complexity of this
 			// decoding
 
-			i++ // skip u
-
-			if i+4 > len(in) {
+			if i+4 >= len(in) {
 				return 0, errors.New("unterminated unicode escape")
 			}
 
-			r, err := hex4(in[i : i+4])
+			r, err := hex4(in[i+1 : i+5])
 			if err != nil {
 				return 0, err
 			}
-
 			n := utf8.EncodeRune(in[curEnd:], r)
 			curEnd += n
-			i += 3
-		}
+			i += 4
+		} // we dont need a default case since we check all possible escapes in the lexer
 	}
 
 	return curEnd, nil

From 0f98f84c93dbdc2dc06b36f19bb23fe4fe4ef3c2 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 13:49:43 +0100
Subject: [PATCH 12/15] benchmarks: deeper nested and more escapes in benchmark

---
 benchmarks/gen.py | 22 ++++++++++++++++++++--
 json_test.go      | 48 +++++++++++++++++++++++++++--------------------
 2 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/benchmarks/gen.py b/benchmarks/gen.py
index d540015..4afb15a 100644
--- a/benchmarks/gen.py
+++ b/benchmarks/gen.py
@@ -6,12 +6,30 @@
 
 line = json.dumps({
     "id": 12345,
-    "name": "very_long_string_with_no_escapes_but_large_payload_abcdefghijklmnopqrstuvwxyz_0123456789",
+    "name": "very_long_string_with_escapes_and_unicode_abcdefghijklmnopqrstuvwxyz_0123456789",
     "description": "This string contains\nmultiple\nlines\nand \"quotes\" and unicode ❤❤❤",
     "nested": {
         "level1": {
             "level2": {
-                "array": ["short", "string_with_escape\n", "another\tvalue", "unicode\u2603", 1234567890, -1.2345e67, True, False, None]
+                "level3": {
+                    "level4": {
+                        "array": [
+                            "short",
+                            "string_with_escape\\n",
+                            "another\\tvalue",
+                            "unicode\u2603",
+                            "escaped_quote_\"_and_backslash_\\",
+                            1234567890,
+                            -1.2345e67,
+                            3.1415926535897932384626433832795028841971,
+                            True,
+                            False,
+                            None,
+                            "\u0041\u0042\u0043\u00A9\u20AC",
+                            "mix\\n\\t\\r\\\\\\\"end"
+                        ]
+                    }
+                }
             }
         }
     }
diff --git a/json_test.go b/json_test.go
index 37dcef9..85ccdd2 100644
--- a/json_test.go
+++ b/json_test.go
@@ -12,26 +12,34 @@ const amount = 50_000
 const naiveInput = `{"key1":"value","array":[],"obj":{},"atomArray":[11201,1e112,true,false,null,"str"]},`
 const escapedInput = `{"text":"line1\nline2\nline3","quote":"\"hello\"","path":"C:\\\\Users\\\\name","unicode":"\u0041\u0042\u0043","mix":"abc\\ndef\"ghi\u263A"},`
 const hardInput = `{
-	"id":12345,
-	"name":"very_long_string_with_no_escapes_but_large_payload_abcdefghijklmnopqrstuvwxyz_0123456789",
-	"description":"This string contains\nmultiple\nlines\nand \"quotes\" and unicode \u2764\u2764\u2764",
-	"nested":{
-		"level1":{
-			"level2":{
-				"array":[
-					"short",
-					"string_with_escape\\n",
-					"another\\tvalue",
-					"unicode\u2603",
-					1234567890,
-					-1.2345e67,
-					true,
-					false,
-					null
-				]
-			}
-		}
-	}
+  "id": 12345,
+  "name": "very_long_string_with_escapes_and_unicode_abcdefghijklmnopqrstuvwxyz_0123456789",
+  "description": "This string contains\nmultiple\nlines\nand \"quotes\" and unicode \u2764\u2764\u2764",
+  "nested": {
+    "level1": {
+      "level2": {
+        "level3": {
+          "level4": {
+            "array": [
+              "short",
+              "string_with_escape\\n",
+              "another\\tvalue",
+              "unicode\u2603",
+              "escaped_quote_\"_and_backslash_\\",
+              1234567890,
+              -1.2345e67,
+              3.141592653589793,
+              true,
+              false,
+              null,
+              "ABC\u00a9\u20ac",
+              "mix\\n\\t\\r\\\\\\\"end"
+            ]
+          }
+        }
+      }
+    }
+  }
 },`
 
 func benchmarkWithInput(b *testing.B, input string) {

From e4b2043bbdcacf005f1e3d7bf5e151030d6ff1f6 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 13:58:26 +0100
Subject: [PATCH 13/15] parser: ripped hex out and make it table driven

Reduced time taken in unescapeInPlace by 30ms (from 5.75% to 3.41%)
---
 benchmarks/gen.py |  6 +++---
 hex.go            | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 parser.go         | 22 +++----------------
 3 files changed, 60 insertions(+), 22 deletions(-)
 create mode 100644 hex.go

diff --git a/benchmarks/gen.py b/benchmarks/gen.py
index 4afb15a..f169beb 100644
--- a/benchmarks/gen.py
+++ b/benchmarks/gen.py
@@ -7,7 +7,7 @@
 line = json.dumps({
     "id": 12345,
     "name": "very_long_string_with_escapes_and_unicode_abcdefghijklmnopqrstuvwxyz_0123456789",
-    "description": "This string contains\nmultiple\nlines\nand \"quotes\" and unicode ❤❤❤",
+    "description": "This string contains\nmultiple\nlines\nand \"quotes\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"",
     "nested": {
         "level1": {
             "level2": {
@@ -19,13 +19,13 @@
                             "another\\tvalue",
                             "unicode\u2603",
                             "escaped_quote_\"_and_backslash_\\",
-                            1234567890,
+                            11234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,234567890,
                             -1.2345e67,
                             3.1415926535897932384626433832795028841971,
                             True,
                             False,
                             None,
-                            "\u0041\u0042\u0043\u00A9\u20AC",
+                            "\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC",
                             "mix\\n\\t\\r\\\\\\\"end"
                         ]
                     }
diff --git a/hex.go b/hex.go
new file mode 100644
index 0000000..8fa4a59
--- /dev/null
+++ b/hex.go
@@ -0,0 +1,54 @@
+package libjson
+
+import "errors"
+
+var invalid_hex_err = errors.New("invalid hex")
+
+var hexTable [256]byte
+
+func init() {
+	for i := 0; i < 256; i++ {
+		hexTable[i] = 0xFF
+	}
+	for i := byte('0'); i <= '9'; i++ {
+		hexTable[i] = i - '0'
+	}
+	for i := byte('a'); i <= 'f'; i++ {
+		hexTable[i] = i - 'a' + 10
+	}
+	for i := byte('A'); i <= 'F'; i++ {
+		hexTable[i] = i - 'A' + 10
+	}
+}
+
+// hex4 converts 4 ASCII hex bytes to a rune.
+// Returns an error if any byte is invalid.
+func hex4(b []byte) (r rune, err error) {
+	var v byte
+
+	v = hexTable[b[0]]
+	if v == 0xFF {
+		return 0, invalid_hex_err
+	}
+	r = rune(v) << 12
+
+	v = hexTable[b[1]]
+	if v == 0xFF {
+		return 0, invalid_hex_err
+	}
+	r |= rune(v) << 8
+
+	v = hexTable[b[2]]
+	if v == 0xFF {
+		return 0, invalid_hex_err
+	}
+	r |= rune(v) << 4
+
+	v = hexTable[b[3]]
+	if v == 0xFF {
+		return 0, invalid_hex_err
+	}
+	r |= rune(v)
+
+	return r, nil
+}
diff --git a/parser.go b/parser.go
index f759e60..ce0a750 100644
--- a/parser.go
+++ b/parser.go
@@ -163,23 +163,7 @@ func (p *parser) array() ([]any, error) {
 	return a, p.advance()
 }
 
-func hex4(b []byte) (r rune, err error) {
-	r = 0
-	for _, c := range b {
-		r <<= 4
-		switch {
-		case '0' <= c && c <= '9':
-			r += rune(c - '0')
-		case 'a' <= c && c <= 'f':
-			r += rune(c - 'a' + 10)
-		case 'A' <= c && c <= 'F':
-			r += rune(c - 'A' + 10)
-		default:
-			return 0, fmt.Errorf("invalid hex %q", c)
-		}
-	}
-	return r, nil
-}
+var badEscapeErr = errors.New("bad escape")
 
 // unescapes JSON escapes in a buffer into their non-JSON representation
 //
@@ -200,7 +184,7 @@ func unescapeInPlace(in []byte) (int, error) {
 
 		// check if there’s at least 1 more byte for the escape
 		if i+1 >= len(in) {
-			return 0, errors.New("unterminated escape")
+			return 0, badEscapeErr
 		}
 		i++ // skip \
 		b = in[i]
@@ -238,7 +222,7 @@ func unescapeInPlace(in []byte) (int, error) {
 			// decoding
 
 			if i+4 >= len(in) {
-				return 0, errors.New("unterminated unicode escape")
+				return 0, badEscapeErr
 			}
 
 			r, err := hex4(in[i+1 : i+5])

From 79124c1badd70edb3790f05739004f95298ac2f8 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 15:16:16 +0100
Subject: [PATCH 14/15] parser: replace parseFloat with strconv.ParseFloat due
 to it being as fast but more correct

---
 parser.go | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/parser.go b/parser.go
index ce0a750..a247c1d 100644
--- a/parser.go
+++ b/parser.go
@@ -3,6 +3,7 @@ package libjson
 import (
 	"errors"
 	"fmt"
+	"strconv"
 	"unicode/utf8"
 	"unsafe"
 )
@@ -59,16 +60,16 @@ func (p *parser) object() (map[string]any, error) {
 		return nil, err
 	}
 
-	m := make(map[string]any)
-
 	if p.cur_tok.Type == t_right_curly {
 		err := p.advance()
 		if err != nil {
 			return nil, err
 		}
-		return m, nil
+		return make(map[string]any, 0), nil
 	}
 
+	m := make(map[string]any, 8)
+
 	for p.cur_tok.Type != t_eof && p.cur_tok.Type != t_right_curly {
 		if len(m) > 0 {
 			if p.cur_tok.Type != t_comma {
@@ -137,7 +138,7 @@ func (p *parser) array() ([]any, error) {
 		return []any{}, p.advance()
 	}
 
-	a := make([]any, 0)
+	a := make([]any, 0, 8)
 
 	for p.cur_tok.Type != t_eof && p.cur_tok.Type != t_right_braket {
 		if len(a) > 0 {
@@ -251,7 +252,7 @@ func (p *parser) atom() (any, error) {
 		r = *(*string)(unsafe.Pointer(&in))
 	case t_number:
 		raw := p.input[p.cur_tok.Start:p.cur_tok.End]
-		number, err := parseFloat(raw)
+		number, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&raw)), 64)
 		if err != nil {
 			return nil, fmt.Errorf("Invalid floating point number %q: %w", string(raw), err)
 		}

From f456fe22a81d619a117646bd419f186c3c51dec7 Mon Sep 17 00:00:00 2001
From: xnacly <47723417+xNaCly@users.noreply.github.com>
Date: Fri, 20 Feb 2026 16:10:01 +0100
Subject: [PATCH 15/15] parser+object: change internal JSON object
 representation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit replaces the need for hashing json object keys at parse time
by replacing the previously used map[string]any with the new obj struct:

    | Benchmark | LibJson B/op | EncodingJson B/op | LibJson x Less Memory | LibJson Allocs | EncodingJson Allocs | LibJson x Fewer Allocs |
    | --------- | ------------ | ----------------- | --------------------- | -------------- | ------------------- | ---------------------- |
    | Naive     | 29,632,671   | 42,744,497        | 1.44x                 | 450,023        | 1,050,031           | 2.33x                  |
    | Escaped   | 22,471,438   | 37,544,412        | 1.67x                 | 350,023        | 1,100,030           | 3.14x                  |
    | Hard      | 121,444,318  | 173,944,500       | 1.43x                 | 1,400,023      | 3,000,032           | 2.14x                  |

These changes result in a ~10-15% speedup and allows libjson to hit the
~2x faster than encoding/json milestone. For instance with 1MB, 5MB, 10MB
and 100MB sized files filled with:

    {
        "id": 12345,
        "name": "very_long_string_with_escapes_and_unicode_abcdefghijklmnopqrstuvwxyz_0123456789",
        "description": "This string contains\nmultiple\nlines\nand \"quotes\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"",
        "nested": {
            "level1": {
                "level2": {
                    "level3": {
                        "level4": {
                            "array": [
                                "short",
                                "string_with_escape\\n",
                                "another\\tvalue",
                                "unicode\u2603",
                                "escaped_quote_\"_and_backslash_\\",
                                11234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,1234567890,234567890,
                                -1.2345e67,
                                3.1415926535897932384626433832795028841971,
                                True,
                                False,
                                None,
                                "\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC\u0041\u0042\u0043\u00A9\u20AC",
                                "mix\\n\\t\\r\\\\\\\"end"
                            ]
                        }
                    }
                }
            }
        }
    }

libjson now outperforms encoding/json:

    $ cd ./benchmarks
    $ ./bench.sh | rg "faster"
    1.72 ± 0.15 times faster than ./test -s -libjson=false ./1MB.json
    1.89 ± 0.11 times faster than ./test -s -libjson=false ./5MB.json
    1.90 ± 0.06 times faster than ./test -s -libjson=false ./10MB.json
    1.95 ± 0.05 times faster than ./test -s -libjson=false ./100MB.json
---
 float.go       | 92 --------------------------------------------------
 object.go      | 50 +++++++++++++++++++++++----
 parser.go      | 61 ++++++++++++++++-----------------
 parser_test.go | 10 +++---
 tokens.go      |  2 +-
 5 files changed, 79 insertions(+), 136 deletions(-)
 delete mode 100644 float.go

diff --git a/float.go b/float.go
deleted file mode 100644
index 81f1be5..0000000
--- a/float.go
+++ /dev/null
@@ -1,92 +0,0 @@
-package libjson
-
-import (
-	"errors"
-)
-
-func pow10(exp int) float64 {
-	res := 1.0
-	if exp > 0 {
-		for i := 0; i < exp; i++ {
-			res *= 10
-		}
-	} else {
-		for i := 0; i < -exp; i++ {
-			res /= 10
-		}
-	}
-	return res
-}
-
-// non allocating float parsing
-func parseFloat(input []byte) (float64, error) {
-	if len(input) == 0 {
-		return 0, errors.New("empty input")
-	}
-
-	pos := 0
-	neg := false
-	if input[pos] == '-' {
-		neg = true
-		pos++
-	}
-
-	mantissa := uint64(0)
-	exponent := 0
-	seenDot := false
-
-	for pos < len(input) {
-		c := input[pos]
-		if c >= '0' && c <= '9' {
-			mantissa = mantissa*10 + uint64(c-'0')
-			if seenDot {
-				exponent--
-			}
-			pos++
-		} else if c == '.' {
-			if seenDot {
-				return 0, errors.New("multiple dots in number")
-			}
-			seenDot = true
-			pos++
-		} else {
-			break
-		}
-	}
-
-	// weird eE+- handling
-	if pos < len(input) && (input[pos] == 'e' || input[pos] == 'E') {
-		pos++
-		expNeg := false
-		if pos < len(input) && input[pos] == '-' {
-			expNeg = true
-			pos++
-		} else if pos < len(input) && input[pos] == '+' {
-			pos++
-		}
-
-		if pos >= len(input) || input[pos] < '0' || input[pos] > '9' {
-			return 0, errors.New("missing digits in exponent")
-		}
-
-		expVal := 0
-		for pos < len(input) && input[pos] >= '0' && input[pos] <= '9' {
-			expVal = expVal*10 + int(input[pos]-'0')
-			pos++
-		}
-		if expNeg {
-			expVal = -expVal
-		}
-		exponent += expVal
-	}
-
-	if mantissa == 0 {
-		return 0, nil
-	}
-
-	result := float64(mantissa) * pow10(exponent)
-	if neg {
-		result = -result
-	}
-	return result, nil
-}
diff --git a/object.go b/object.go
index 4e59837..83ee480 100644
--- a/object.go
+++ b/object.go
@@ -8,7 +8,28 @@ import (
 )
 
 type JSON struct {
-	obj any
+	inner any
+}
+
+// takes a JSON.inner value and converts it to Go, for instance merges the obj
+// fields into a map
+func toGo(json any) any {
+	switch v := json.(type) {
+	case obj:
+		m := make(map[string]any, len(v.Fields))
+		for _, f := range v.Fields {
+			m[f.Key] = toGo(f.Value)
+		}
+		return m
+	case []any:
+		arr := make([]any, len(v))
+		for i, el := range v {
+			arr[i] = toGo(el)
+		}
+		return arr
+	default:
+		return v
+	}
 }
 
 func Get[T any](obj *JSON, path string) (T, error) {
@@ -17,6 +38,15 @@ func Get[T any](obj *JSON, path string) (T, error) {
 		var e T
 		return e, err
 	}
+
+	// normalise inner json representation into something Go can deal with
+	val = toGo(val)
+
+	if val == nil {
+		var e T
+		return e, nil
+	}
+
 	if castVal, ok := val.(T); !ok {
 		var e T
 		return e, fmt.Errorf("Expected value of type %T, got type %T", e, val)
@@ -42,14 +72,22 @@ func indexByKey(data any, key any) (any, error) {
 		} else {
 			return v[k], nil
 		}
-	case map[string]any:
-		if len(v) == 0 {
+	case obj:
+		if len(v.Fields) == 0 {
 			return nil, nil
 		}
+
 		if k, ok := key.(string); !ok {
 			return nil, fmt.Errorf("Can not use %T::%v to index into %T::%v", key, key, data, data)
 		} else {
-			return v[k], nil
+			i := 0
+			for ; i < len(v.Fields); i++ {
+				cur := v.Fields[i]
+				if cur.Key == k {
+					return cur.Value, nil
+				}
+			}
+			return nil, nil
 		}
 	default:
 		return nil, fmt.Errorf("Unsupported %T, can not index", data)
@@ -107,9 +145,9 @@ func (j *JSON) get(path string) (any, error) {
 	if err != nil {
 		return nil, fmt.Errorf("%w: %q", errors.ErrUnsupported, path)
 	}
-	return f(j.obj)
+	return f(j.inner)
 }
 
 func (j *JSON) MarshalJSON() ([]byte, error) {
-	return json.Marshal(j.obj)
+	return json.Marshal(toGo(j.inner))
 }
diff --git a/parser.go b/parser.go
index a247c1d..cf0c955 100644
--- a/parser.go
+++ b/parser.go
@@ -51,78 +51,75 @@ func (p *parser) expression() (any, error) {
 	}
 }
 
-func (p *parser) object() (map[string]any, error) {
+type field struct {
+	Key   string
+	Value any
+}
+
+type obj struct {
+	Fields []field
+}
+
+var emptyObj = obj{}
+
+func (p *parser) object() (obj, error) {
 	if p.cur_tok.Type != t_left_curly {
-		return nil, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_left_curly])
+		return emptyObj, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_left_curly])
 	}
 	err := p.advance()
 	if err != nil {
-		return nil, err
+		return emptyObj, err
 	}
 
 	if p.cur_tok.Type == t_right_curly {
-		err := p.advance()
-		if err != nil {
-			return nil, err
-		}
-		return make(map[string]any, 0), nil
+		return emptyObj, p.advance()
 	}
 
-	m := make(map[string]any, 8)
+	m := obj{
+		Fields: make([]field, 0, 8),
+	}
 
 	for p.cur_tok.Type != t_eof && p.cur_tok.Type != t_right_curly {
-		if len(m) > 0 {
+		if len(m.Fields) > 0 {
 			if p.cur_tok.Type != t_comma {
-				return nil, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_comma])
+				return emptyObj, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_comma])
 			}
 			err := p.advance()
 			if err != nil {
-				return nil, err
+				return emptyObj, err
 			}
 		}
 
 		if p.cur_tok.Type != t_string {
-			return nil, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_string])
+			return emptyObj, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_string])
 		}
 		in := p.input[p.cur_tok.Start:p.cur_tok.End]
 		key := *(*string)(unsafe.Pointer(&in))
 		err := p.advance()
 		if err != nil {
-			return nil, err
+			return emptyObj, err
 		}
 
 		if p.cur_tok.Type != t_colon {
-			return nil, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_colon])
+			return emptyObj, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_colon])
 		}
 		err = p.advance()
 		if err != nil {
-			return nil, err
+			return emptyObj, err
 		}
 
 		val, err := p.expression()
 		if err != nil {
-			return nil, err
+			return emptyObj, err
 		}
 
-		// TODO:  think about activating a uniqueness check for object keys,
-		// would add an other hashing and a branch for each object key parsed.
-		//
-		// if _, ok := m[key]; ok {
-		// 	return nil, fmt.Errorf("Key %q is already set in this object", key)
-		// }
-
-		m[key] = val
+		m.Fields = append(m.Fields, field{key, val})
 	}
 
 	if p.cur_tok.Type != t_right_curly {
-		return nil, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_right_curly])
+		return emptyObj, fmt.Errorf("Unexpected %q at this position, expected %q", tokennames[p.cur_tok.Type], tokennames[t_right_curly])
 	}
-	err = p.advance()
-	if err != nil {
-		return nil, err
-	}
-
-	return m, nil
+	return m, p.advance()
 }
 
 func (p *parser) array() ([]any, error) {
diff --git a/parser_test.go b/parser_test.go
index 30731f5..9a84840 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -33,7 +33,7 @@ func TestParserAtoms(t *testing.T) {
 			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
-			assert.EqualValues(t, wanted[i], out)
+			assert.EqualValues(t, wanted[i], toGo(out))
 		})
 	}
 }
@@ -57,7 +57,7 @@ func TestParserArray(t *testing.T) {
 			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
-			assert.EqualValues(t, wanted[i], out)
+			assert.EqualValues(t, wanted[i], toGo(out))
 		})
 	}
 }
@@ -85,7 +85,7 @@ func TestParserObject(t *testing.T) {
 			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
-			assert.EqualValues(t, wanted[i], out)
+			assert.EqualValues(t, wanted[i], toGo(out))
 		})
 	}
 }
@@ -113,7 +113,7 @@ func TestParserEdge(t *testing.T) {
 			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.NoError(t, err)
-			assert.EqualValues(t, wanted[i], out)
+			assert.EqualValues(t, wanted[i], toGo(out))
 		})
 	}
 }
@@ -148,7 +148,7 @@ func TestParserFail(t *testing.T) {
 			p := &parser{l: lexer{data: in, len: len(in)}}
 			out, err := p.parse(in)
 			assert.Error(t, err)
-			assert.Nil(t, out)
+			assert.Nil(t, toGo(out))
 		})
 	}
 }
diff --git a/tokens.go b/tokens.go
index 86ff403..5a5f70d 100644
--- a/tokens.go
+++ b/tokens.go
@@ -1,7 +1,7 @@
 package libjson
 
 // json type
-type t_json int32
+type t_json int8
 
 type token struct {
 	Type t_json