Skip to content

Commit 623beda

Browse files
committed
feat: Add more options
1 parent 92b6ee4 commit 623beda

4 files changed

Lines changed: 358 additions & 18 deletions

File tree

README.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,72 @@ $data = (new JsonRepairer($broken))->decode();
3939
$data = json_repair_decode($broken);
4040
```
4141

42+
## Configuration Options
43+
44+
### Omit Empty Values
45+
46+
When repairing JSON from streaming sources (e.g., LLM responses), you may want to remove keys with missing values instead of adding empty strings:
47+
48+
```php
49+
// Missing value - defaults to adding empty string
50+
$broken = '{"name": "John", "age": }';
51+
$repaired = json_repair($broken);
52+
// {"name": "John", "age": ""}
53+
54+
// Remove keys with missing values
55+
$repaired = json_repair($broken, omitEmptyValues: true);
56+
// {"name": "John"}
57+
```
58+
59+
### Omit Incomplete Strings
60+
61+
Similarly, you can remove keys with incomplete string values instead of closing them:
62+
63+
```php
64+
// Incomplete string - defaults to closing the string
65+
$broken = '{"name": "John", "bio": "A developer who';
66+
$repaired = json_repair($broken);
67+
// {"name": "John", "bio": "A developer who"}
68+
69+
// Remove keys with incomplete strings
70+
$repaired = json_repair($broken, omitIncompleteStrings: true);
71+
// {"name": "John"}
72+
```
73+
74+
### Using Both Options Together
75+
76+
Both options can be used together, which is especially useful for streaming JSON where deltas are concatenated:
77+
78+
```php
79+
$broken = '{"name": "John", "age": , "bio": "A developer who';
80+
$repaired = json_repair($broken, omitEmptyValues: true, omitIncompleteStrings: true);
81+
// {"name": "John"}
82+
```
83+
84+
### Using with JsonRepairer Class
85+
86+
You can also pass these options to the `JsonRepairer` constructor:
87+
88+
```php
89+
$repairer = new JsonRepairer(
90+
$broken,
91+
ensureAscii: true,
92+
omitEmptyValues: true,
93+
omitIncompleteStrings: true
94+
);
95+
$repaired = $repairer->repair();
96+
```
97+
98+
Or with `json_repair_decode`:
99+
100+
```php
101+
$data = json_repair_decode(
102+
$broken,
103+
omitEmptyValues: true,
104+
omitIncompleteStrings: true
105+
);
106+
```
107+
42108
## Benchmarking
43109

44110
Run performance benchmarks using PHPBench:

src/JsonRepairer.php

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,13 @@ class JsonRepairer
4141

4242
private int $stateBeforeString = self::STATE_START;
4343

44+
private int $currentKeyStart = -1;
45+
4446
public function __construct(
4547
protected string $json,
4648
private readonly bool $ensureAscii = true,
49+
private readonly bool $omitEmptyValues = false,
50+
private readonly bool $omitIncompleteStrings = false,
4751
) {}
4852

4953
public function repair(): string
@@ -66,6 +70,7 @@ public function repair(): string
6670
$this->inString = false;
6771
$this->stringDelimiter = '';
6872
$this->stateBeforeString = self::STATE_START;
73+
$this->currentKeyStart = -1;
6974

7075
$length = strlen($json);
7176
$i = 0;
@@ -101,6 +106,12 @@ public function repair(): string
101106
$this->inString = false;
102107
$this->stringDelimiter = '';
103108
$this->state = $this->getNextStateAfterString();
109+
110+
// Reset key tracking after successfully completing a string value
111+
if ($this->state === self::STATE_EXPECTING_COMMA_OR_END) {
112+
$this->currentKeyStart = -1;
113+
}
114+
104115
$i++;
105116
continue;
106117
}
@@ -139,17 +150,26 @@ public function repair(): string
139150
// Close any unclosed strings
140151
// @phpstan-ignore if.alwaysFalse (can be true if string wasn't closed in loop)
141152
if ($this->inString) {
142-
$this->output .= '"';
153+
// Check if we should remove incomplete string values
154+
// @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (stateBeforeString is set when entering string state and can be STATE_IN_OBJECT_VALUE)
155+
if ($this->omitIncompleteStrings && $this->stateBeforeString === self::STATE_IN_OBJECT_VALUE) {
156+
$this->removeCurrentKey();
157+
// Update state after removing key
158+
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
159+
} else {
160+
$this->output .= '"';
143161

144-
// If we were in a string escape state, the escape was incomplete
145-
// @phpstan-ignore identical.alwaysFalse (state can be STATE_IN_STRING_ESCAPE if string ended during escape)
146-
if ($this->state === self::STATE_IN_STRING_ESCAPE) {
147-
// Remove the incomplete escape backslash
148-
$this->output = substr($this->output, 0, -2) . substr($this->output, -1);
162+
// If we were in a string escape state, the escape was incomplete
163+
// @phpstan-ignore identical.alwaysFalse (state can be STATE_IN_STRING_ESCAPE if string ended during escape)
164+
if ($this->state === self::STATE_IN_STRING_ESCAPE) {
165+
// Remove the incomplete escape backslash
166+
$this->output = substr($this->output, 0, -2) . substr($this->output, -1);
167+
}
168+
169+
// Update state after closing string
170+
$this->state = $this->getNextStateAfterString();
149171
}
150172

151-
// Update state after closing string
152-
$this->state = $this->getNextStateAfterString();
153173
$this->inString = false;
154174
}
155175

@@ -158,21 +178,35 @@ public function repair(): string
158178
// @phpstan-ignore identical.alwaysFalse (state set to STATE_EXPECTING_COLON after closing string key)
159179
if ($this->state === self::STATE_EXPECTING_COLON) {
160180
// We have a key but no colon/value - add colon and empty value
161-
$this->output .= ':""';
181+
if ($this->omitEmptyValues) {
182+
$this->removeCurrentKey();
183+
} else {
184+
$this->output .= ':""';
185+
}
186+
162187
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
163188
// @phpstan-ignore identical.alwaysFalse (state can be STATE_IN_OBJECT_KEY for unquoted keys)
164189
} elseif ($this->state === self::STATE_IN_OBJECT_KEY) {
165190
// We're still in key state - might have an incomplete unquoted key
166191
// If output ends with a quote, we have a complete key, add colon and empty value
167192
if (str_ends_with($this->output, '"') && ! str_ends_with($this->output, ':"')) {
168-
$this->output .= ':""';
193+
if ($this->omitEmptyValues) {
194+
$this->removeCurrentKey();
195+
} else {
196+
$this->output .= ':""';
197+
}
169198
}
170199
}
171200

172201
// If we're in OBJECT_VALUE state and output ends with ':', add empty string
173202
// @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (state can change during loop)
174203
if ($this->state === self::STATE_IN_OBJECT_VALUE && str_ends_with($this->output, ':')) {
175-
$this->output .= '""';
204+
if ($this->omitEmptyValues) {
205+
$this->removeCurrentKey();
206+
} else {
207+
$this->output .= '""';
208+
}
209+
176210
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
177211
}
178212

@@ -184,7 +218,11 @@ public function repair(): string
184218
$this->removeTrailingComma();
185219

186220
if ($expected === '}' && str_ends_with($this->output, ':')) {
187-
$this->output .= '""';
221+
if ($this->omitEmptyValues) {
222+
$this->removeCurrentKey();
223+
} else {
224+
$this->output .= '""';
225+
}
188226
}
189227

190228
$this->output .= $expected;
@@ -363,6 +401,8 @@ private function handleObjectKey(string $json, int $i): int
363401
}
364402

365403
if ($char === '"' || $char === "'") {
404+
// Track where the key starts
405+
$this->currentKeyStart = strlen($this->output);
366406
$this->output .= '"';
367407
$this->inString = true;
368408
$this->stringDelimiter = $char;
@@ -374,6 +414,8 @@ private function handleObjectKey(string $json, int $i): int
374414

375415
// Unquoted key
376416
if (ctype_alnum($char) || $char === '_' || $char === '-') {
417+
// Track where the key starts
418+
$this->currentKeyStart = strlen($this->output);
377419
$this->output .= '"';
378420
while ($i < strlen($json) && (ctype_alnum($json[$i]) || $json[$i] === '_' || $json[$i] === '-')) {
379421
$this->output .= $json[$i];
@@ -419,6 +461,8 @@ private function handleObjectValue(string $json, int $i): int
419461
$this->output .= '{';
420462
$this->stack[] = '}';
421463
$this->state = self::STATE_IN_OBJECT_KEY;
464+
// Reset key tracking when starting a nested object (this is a value, not a key)
465+
$this->currentKeyStart = -1;
422466

423467
return $i + 1;
424468
}
@@ -427,6 +471,8 @@ private function handleObjectValue(string $json, int $i): int
427471
$this->output .= '[';
428472
$this->stack[] = ']';
429473
$this->state = self::STATE_IN_ARRAY;
474+
// Reset key tracking when starting a nested array (this is a value, not a key)
475+
$this->currentKeyStart = -1;
430476

431477
return $i + 1;
432478
}
@@ -443,7 +489,11 @@ private function handleObjectValue(string $json, int $i): int
443489

444490
if ($char === '}') {
445491
if (str_ends_with($this->output, ':')) {
446-
$this->output .= '""';
492+
if ($this->omitEmptyValues) {
493+
$this->removeCurrentKey();
494+
} else {
495+
$this->output .= '""';
496+
}
447497
}
448498

449499
$this->removeTrailingComma();
@@ -460,6 +510,8 @@ private function handleObjectValue(string $json, int $i): int
460510
if ($matchResult === 1) {
461511
$this->output .= $this->normalizeBoolean($matches[1]);
462512
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
513+
// Reset key tracking after successfully completing a boolean/null value
514+
$this->currentKeyStart = -1;
463515

464516
return $i + strlen($matches[1]);
465517
}
@@ -473,7 +525,12 @@ private function handleObjectValue(string $json, int $i): int
473525

474526
// Missing value
475527
if ($char === ',' || $char === '}') {
476-
$this->output .= '""';
528+
if ($this->omitEmptyValues) {
529+
$this->removeCurrentKey();
530+
} else {
531+
$this->output .= '""';
532+
}
533+
477534
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
478535

479536
return $i;
@@ -626,6 +683,8 @@ private function handleNumber(string $json, int $i): int
626683
}
627684

628685
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
686+
// Reset key tracking after successfully completing a number value
687+
$this->currentKeyStart = -1;
629688

630689
return $i;
631690
}
@@ -684,4 +743,21 @@ private function normalizeBoolean(string $value): string
684743
default => 'null',
685744
};
686745
}
746+
747+
private function removeCurrentKey(): void
748+
{
749+
if ($this->currentKeyStart >= 0) {
750+
$beforeKey = substr($this->output, 0, $this->currentKeyStart);
751+
// Remove preceding comma and whitespace if present
752+
$beforeKey = rtrim($beforeKey);
753+
754+
if (str_ends_with($beforeKey, ',')) {
755+
$beforeKey = substr($beforeKey, 0, -1);
756+
$beforeKey = rtrim($beforeKey);
757+
}
758+
759+
$this->output = $beforeKey;
760+
$this->currentKeyStart = -1;
761+
}
762+
}
687763
}

src/functions.php

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,18 @@
99
*
1010
* @param string $json The JSON string to repair
1111
* @param bool $ensureAscii Whether to escape non-ASCII characters (default: true)
12+
* @param bool $omitEmptyValues Whether to remove keys with missing values instead of adding empty strings (default: false)
13+
* @param bool $omitIncompleteStrings Whether to remove keys with incomplete string values instead of closing them (default: false)
1214
*
1315
* @return string The repaired JSON string
1416
*/
15-
function json_repair(string $json, bool $ensureAscii = true): string
16-
{
17-
$repairer = new JsonRepairer($json, $ensureAscii);
17+
function json_repair(
18+
string $json,
19+
bool $ensureAscii = true,
20+
bool $omitEmptyValues = false,
21+
bool $omitIncompleteStrings = false,
22+
): string {
23+
$repairer = new JsonRepairer($json, $ensureAscii, $omitEmptyValues, $omitIncompleteStrings);
1824

1925
return $repairer->repair();
2026
}
@@ -26,6 +32,8 @@ function json_repair(string $json, bool $ensureAscii = true): string
2632
* @param int<1, max> $depth Maximum nesting depth of the structure being decoded
2733
* @param int $flags Bitmask of JSON decode flags (default: JSON_THROW_ON_ERROR)
2834
* @param bool $ensureAscii Whether to escape non-ASCII characters (default: true)
35+
* @param bool $omitEmptyValues Whether to remove keys with missing values instead of adding empty strings (default: false)
36+
* @param bool $omitIncompleteStrings Whether to remove keys with incomplete string values instead of closing them (default: false)
2937
*
3038
* @return array<mixed>|object The decoded JSON data
3139
*/
@@ -34,8 +42,10 @@ function json_repair_decode(
3442
int $depth = 512,
3543
int $flags = JSON_THROW_ON_ERROR,
3644
bool $ensureAscii = true,
45+
bool $omitEmptyValues = false,
46+
bool $omitIncompleteStrings = false,
3747
): array|object {
38-
$repairer = new JsonRepairer($json, $ensureAscii);
48+
$repairer = new JsonRepairer($json, $ensureAscii, $omitEmptyValues, $omitIncompleteStrings);
3949

4050
return $repairer->decode($depth, $flags);
4151
}

0 commit comments

Comments
 (0)