Skip to content

Commit 5291a7a

Browse files
committed
add logging support
1 parent daf2f3c commit 5291a7a

5 files changed

Lines changed: 245 additions & 35 deletions

File tree

composer.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
],
1919
"require": {
2020
"php": "^8.3",
21-
"ext-json": "*"
21+
"ext-json": "*",
22+
"psr/log": "^3.0"
2223
},
2324
"require-dev": {
25+
"colinodell/psr-testlogger": "^1.3",
2426
"pestphp/pest": "^4.1.4",
2527
"pestphp/pest-plugin-type-coverage": "^4.0.3",
2628
"phpbench/phpbench": "^1.4",

src/JsonRepairer.php

Lines changed: 111 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44

55
namespace Cortex\JsonRepair;
66

7+
use Psr\Log\LoggerAwareTrait;
8+
use Psr\Log\LoggerAwareInterface;
79
use Cortex\JsonRepair\Exceptions\JsonRepairException;
810

9-
class JsonRepairer
11+
class JsonRepairer implements LoggerAwareInterface
1012
{
13+
use LoggerAwareTrait;
14+
1115
private const int STATE_START = 0;
1216

1317
private const int STATE_IN_STRING = 1;
@@ -78,12 +82,20 @@ public function __construct(
7882
public function repair(): string
7983
{
8084
if (json_validate($this->json)) {
85+
$this->log('JSON is already valid, returning as-is');
86+
8187
return $this->json;
8288
}
8389

90+
$this->log('Starting JSON repair');
91+
8492
// Extract JSON from markdown code blocks if present
8593
$json = $this->extractJsonFromMarkdown($this->json);
8694

95+
if ($json !== $this->json) {
96+
$this->log('Extracted JSON from markdown code block');
97+
}
98+
8799
// Handle multiple JSON objects
88100
$json = $this->extractFirstValidJson($json);
89101

@@ -108,16 +120,15 @@ public function repair(): string
108120
// @phpstan-ignore identical.alwaysFalse (state changes in loop iterations)
109121
if ($this->state === self::STATE_IN_STRING_ESCAPE) {
110122
// If we're at the end of the string and in escape state, the escape is incomplete
123+
// Just drop the incomplete escape (backslash wasn't added to output yet)
111124
if ($i >= strlen($json)) {
112-
// Remove the backslash, treat as literal character
113-
$this->output = substr($this->output, 0, -1);
114125
$this->state = self::STATE_IN_STRING;
115126
break;
116127
}
117128

118-
$this->handleEscapeSequence($char);
129+
$extraCharsConsumed = $this->handleEscapeSequence($char, $json);
119130
$this->state = self::STATE_IN_STRING;
120-
$i++;
131+
$i += 1 + $extraCharsConsumed;
121132
continue;
122133
}
123134

@@ -127,6 +138,15 @@ public function repair(): string
127138
// Check for smart quotes as closing delimiter
128139
$smartQuoteLength = $this->getSmartQuoteLength($json, $i);
129140

141+
// Handle double quote inside single-quoted string - must escape it
142+
// @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (delimiter set when entering string state and can be single quote)
143+
if ($char === '"' && $this->stringDelimiter === "'") {
144+
$this->log('Escaping double quote inside single-quoted string');
145+
$this->output .= '\\"';
146+
$i++;
147+
continue;
148+
}
149+
130150
// @phpstan-ignore identical.alwaysFalse (delimiter set when entering string state)
131151
if ($char === $this->stringDelimiter || $smartQuoteLength > 0) {
132152
// Check if this quote should be escaped (it's inside the string value)
@@ -138,6 +158,7 @@ public function repair(): string
138158

139159
// @phpstan-ignore booleanAnd.leftAlwaysFalse, booleanAnd.rightAlwaysFalse, booleanAnd.alwaysFalse (variables can be true at runtime)
140160
if ($isRegularQuote && $isInValue && $this->shouldEscapeQuoteInValue($json, $i)) {
161+
$this->log('Escaping embedded quote inside string value');
141162
$this->output .= '\\"';
142163
$i++;
143164
continue;
@@ -160,7 +181,7 @@ public function repair(): string
160181
}
161182

162183
if ($char === '\\') {
163-
$this->output .= $char;
184+
// Don't output the backslash yet - let handleEscapeSequence decide
164185
$this->state = self::STATE_IN_STRING_ESCAPE;
165186
$i++;
166187
continue;
@@ -169,6 +190,9 @@ public function repair(): string
169190
// Check if this is a structural character that should close an unclosed string
170191
// This handles cases like {"key": "value with no closing quote}
171192
if (($char === '}' || $char === ']') && $this->shouldCloseStringAtStructuralChar($json, $i)) {
193+
$this->log('Closing unclosed string at structural character', [
194+
'char' => $char,
195+
]);
172196
// Close the string and let the structural character be processed
173197
$this->output .= '"';
174198
$this->inString = false;
@@ -214,18 +238,16 @@ public function repair(): string
214238
// Check if we should remove incomplete string values
215239
// @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (stateBeforeString is set when entering string state and can be STATE_IN_OBJECT_VALUE)
216240
if ($this->omitIncompleteStrings && $this->stateBeforeString === self::STATE_IN_OBJECT_VALUE) {
241+
$this->log('Removing incomplete string value (omitIncompleteStrings enabled)');
217242
$this->removeCurrentKey();
218243
// Update state after removing key
219244
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
220245
} else {
246+
$this->log('Adding missing closing quote for unclosed string');
221247
$this->output .= '"';
222248

223-
// If we were in a string escape state, the escape was incomplete
224-
// @phpstan-ignore identical.alwaysFalse (state can be STATE_IN_STRING_ESCAPE if string ended during escape)
225-
if ($this->state === self::STATE_IN_STRING_ESCAPE) {
226-
// Remove the incomplete escape backslash
227-
$this->output = substr($this->output, 0, -2) . substr($this->output, -1);
228-
}
249+
// Note: If we were in escape state, the incomplete escape backslash
250+
// was never added to output (we defer adding it to handleEscapeSequence)
229251

230252
// Update state after closing string
231253
$this->state = $this->getNextStateAfterString();
@@ -240,8 +262,10 @@ public function repair(): string
240262
if ($this->state === self::STATE_EXPECTING_COLON) {
241263
// We have a key but no colon/value - add colon and empty value
242264
if ($this->omitEmptyValues) {
265+
$this->log('Removing key without value (omitEmptyValues enabled)');
243266
$this->removeCurrentKey();
244267
} else {
268+
$this->log('Adding missing colon and empty value for incomplete key');
245269
$this->output .= ':""';
246270
}
247271

@@ -278,6 +302,9 @@ public function repair(): string
278302
// Close any unclosed brackets/braces
279303
while ($this->stack !== []) {
280304
$expected = array_pop($this->stack);
305+
$this->log('Adding missing closing bracket/brace', [
306+
'char' => $expected,
307+
]);
281308

282309
// Remove trailing comma before closing
283310
$this->removeTrailingComma();
@@ -523,6 +550,7 @@ private function handleObjectKey(string $json, int $i): int
523550
$afterDoubleQuote = $json[$i + 2];
524551

525552
if (ctype_alnum($afterDoubleQuote) || $afterDoubleQuote === '_' || $afterDoubleQuote === ' ') {
553+
$this->log('Found doubled quote delimiter pattern, normalizing key');
526554
// This looks like ""key"" pattern - skip the opening "" and read the key
527555
$this->currentKeyStart = strlen($this->output);
528556
$this->output .= '"';
@@ -575,6 +603,10 @@ private function handleObjectKey(string $json, int $i): int
575603
}
576604
}
577605

606+
if ($char === "'") {
607+
$this->log('Converting single-quoted key to double quotes');
608+
}
609+
578610
// Track where the key starts
579611
$this->currentKeyStart = strlen($this->output);
580612
$this->output .= '"';
@@ -590,6 +622,7 @@ private function handleObjectKey(string $json, int $i): int
590622
$smartQuoteLength = $this->getSmartQuoteLength($json, $i);
591623

592624
if ($smartQuoteLength > 0) {
625+
$this->log('Converting smart/curly quote to standard double quote');
593626
$this->currentKeyStart = strlen($this->output);
594627
$this->output .= '"';
595628
$this->inString = true;
@@ -602,6 +635,7 @@ private function handleObjectKey(string $json, int $i): int
602635

603636
// Unquoted key
604637
if (ctype_alnum($char) || $char === '_' || $char === '-') {
638+
$this->log('Adding quotes around unquoted key');
605639
// Track where the key starts
606640
$this->currentKeyStart = strlen($this->output);
607641
$this->output .= '"';
@@ -650,6 +684,7 @@ private function handleExpectingColon(string $json, int $i): int
650684

651685
// Missing colon, insert it
652686
if (! ctype_space($char)) {
687+
$this->log('Inserting missing colon after key');
653688
$this->output .= ':';
654689
$this->state = self::STATE_IN_OBJECT_VALUE;
655690

@@ -724,8 +759,10 @@ private function handleObjectValue(string $json, int $i): int
724759
$this->output = $trimmedOutput;
725760

726761
if ($this->omitEmptyValues) {
762+
$this->log('Removing key with missing value (omitEmptyValues enabled)');
727763
$this->removeCurrentKey();
728764
} else {
765+
$this->log('Adding empty string for missing value');
729766
$this->output .= '""';
730767
}
731768
}
@@ -742,7 +779,16 @@ private function handleObjectValue(string $json, int $i): int
742779
$matchResult = preg_match('/^(true|false|null|True|False|None)\b/i', substr($json, $i), $matches);
743780

744781
if ($matchResult === 1) {
745-
$this->output .= $this->normalizeBoolean($matches[1]);
782+
$normalized = $this->normalizeBoolean($matches[1]);
783+
784+
if ($matches[1] !== $normalized) {
785+
$this->log('Normalizing boolean/null value', [
786+
'from' => $matches[1],
787+
'to' => $normalized,
788+
]);
789+
}
790+
791+
$this->output .= $normalized;
746792
$this->state = self::STATE_EXPECTING_COMMA_OR_END;
747793
// Reset key tracking after successfully completing a boolean/null value
748794
$this->currentKeyStart = -1;
@@ -760,8 +806,10 @@ private function handleObjectValue(string $json, int $i): int
760806
// Missing value
761807
if ($char === ',' || $char === '}') {
762808
if ($this->omitEmptyValues) {
809+
$this->log('Removing key with missing value (omitEmptyValues enabled)');
763810
$this->removeCurrentKey();
764811
} else {
812+
$this->log('Adding empty string for missing value');
765813
$this->output .= '""';
766814
}
767815

@@ -785,6 +833,8 @@ private function handleObjectValue(string $json, int $i): int
785833

786834
// Handle unquoted string values
787835
if (ctype_alpha($char) || $char === '_') {
836+
$this->log('Found unquoted string value, adding quotes');
837+
788838
return $this->handleUnquotedStringValue($json, $i);
789839
}
790840

@@ -902,6 +952,7 @@ private function handleExpectingCommaOrEnd(string $json, int $i): int
902952

903953
// Missing comma, insert it
904954
if (! ctype_space($char) && $char !== $top) {
955+
$this->log('Inserting missing comma');
905956
$this->output .= ',';
906957
$this->state = $top === '}' ? self::STATE_IN_OBJECT_KEY : self::STATE_IN_ARRAY;
907958

@@ -989,27 +1040,39 @@ private function handleNumber(string $json, int $i): int
9891040
* unicode escapes (\uXXXX). Invalid or incomplete escapes are treated
9901041
* as literal backslash followed by the character.
9911042
*/
992-
private function handleEscapeSequence(string $char): void
1043+
/**
1044+
* Handle an escape sequence within a string.
1045+
*
1046+
* Processes escape sequences like \", \\, \/, \b, \f, \n, \r, \t, and
1047+
* unicode escapes (\uXXXX). Invalid or incomplete escapes are treated
1048+
* as escaped backslash followed by the character.
1049+
*
1050+
* @return int Number of extra characters consumed beyond the escape character itself
1051+
*/
1052+
private function handleEscapeSequence(string $char, string $json): int
9931053
{
9941054
$validEscapes = ['"', '\\', '/', 'b', 'f', 'n', 'r', 't'];
9951055

9961056
if (in_array($char, $validEscapes, true)) {
9971057
$this->output .= '\\' . $char;
9981058

999-
return;
1059+
return 0;
10001060
}
10011061

1002-
if ($char === 'u' && $this->pos + 4 < strlen($this->json)) {
1003-
$hex = substr($this->json, $this->pos + 1, 4);
1062+
if ($char === 'u' && $this->pos + 4 < strlen($json)) {
1063+
$hex = substr($json, $this->pos + 1, 4);
10041064

10051065
if (ctype_xdigit($hex)) {
10061066
$this->output .= '\\u' . $hex;
10071067

1008-
return;
1068+
return 4; // Consumed 4 extra hex digits
10091069
}
10101070
}
10111071

1012-
$this->output .= '\\' . $char;
1072+
// Invalid escape sequence - escape the backslash and output the character literally
1073+
$this->output .= '\\\\' . $char;
1074+
1075+
return 0;
10131076
}
10141077

10151078
/**
@@ -1034,6 +1097,7 @@ private function removeTrailingComma(): void
10341097
$trimmed = rtrim($this->output);
10351098

10361099
if (str_ends_with($trimmed, ',')) {
1100+
$this->log('Removing trailing comma');
10371101
$this->output = substr($trimmed, 0, -1);
10381102
}
10391103
}
@@ -1382,4 +1446,32 @@ private function getSmartQuoteLength(string $json, int $pos): int
13821446

13831447
return 0;
13841448
}
1449+
1450+
/**
1451+
* Log a repair action with context.
1452+
*
1453+
* @param string $message Description of the repair action
1454+
* @param array<string, mixed> $context Additional context data
1455+
*/
1456+
private function log(string $message, array $context = []): void
1457+
{
1458+
$this->logger?->debug($message, array_merge([
1459+
'position' => $this->pos,
1460+
'context' => $this->getContextSnippet(),
1461+
], $context));
1462+
}
1463+
1464+
/**
1465+
* Get a snippet of the JSON around the current position for logging context.
1466+
*/
1467+
private function getContextSnippet(int $window = 15): string
1468+
{
1469+
$start = max(0, $this->pos - $window);
1470+
$end = min(strlen($this->json), $this->pos + $window);
1471+
1472+
$before = substr($this->json, $start, $this->pos - $start);
1473+
$after = substr($this->json, $this->pos, $end - $this->pos);
1474+
1475+
return $before . '>>>' . $after;
1476+
}
13851477
}

src/functions.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44

55
namespace Cortex\JsonRepair;
66

7+
use Psr\Log\LoggerInterface;
8+
79
/**
810
* Repair a broken JSON string.
911
*
1012
* @param string $json The JSON string to repair
1113
* @param bool $ensureAscii Whether to escape non-ASCII characters (default: true)
1214
* @param bool $omitEmptyValues Whether to remove keys with missing values instead of adding empty strings (default: false)
1315
* @param bool $omitIncompleteStrings Whether to remove keys with incomplete string values instead of closing them (default: false)
16+
* @param \Psr\Log\LoggerInterface|null $logger Optional PSR-3 logger for debugging repair actions
1417
*
1518
* @return string The repaired JSON string
1619
*/
@@ -19,9 +22,14 @@ function json_repair(
1922
bool $ensureAscii = true,
2023
bool $omitEmptyValues = false,
2124
bool $omitIncompleteStrings = false,
25+
?LoggerInterface $logger = null,
2226
): string {
2327
$repairer = new JsonRepairer($json, $ensureAscii, $omitEmptyValues, $omitIncompleteStrings);
2428

29+
if ($logger instanceof LoggerInterface) {
30+
$repairer->setLogger($logger);
31+
}
32+
2533
return $repairer->repair();
2634
}
2735

@@ -34,6 +42,7 @@ function json_repair(
3442
* @param bool $ensureAscii Whether to escape non-ASCII characters (default: true)
3543
* @param bool $omitEmptyValues Whether to remove keys with missing values instead of adding empty strings (default: false)
3644
* @param bool $omitIncompleteStrings Whether to remove keys with incomplete string values instead of closing them (default: false)
45+
* @param \Psr\Log\LoggerInterface|null $logger Optional PSR-3 logger for debugging repair actions
3746
*
3847
* @return array<mixed>|object The decoded JSON data
3948
*/
@@ -44,8 +53,13 @@ function json_repair_decode(
4453
bool $ensureAscii = true,
4554
bool $omitEmptyValues = false,
4655
bool $omitIncompleteStrings = false,
56+
?LoggerInterface $logger = null,
4757
): array|object {
4858
$repairer = new JsonRepairer($json, $ensureAscii, $omitEmptyValues, $omitIncompleteStrings);
4959

60+
if ($logger instanceof LoggerInterface) {
61+
$repairer->setLogger($logger);
62+
}
63+
5064
return $repairer->decode($depth, $flags);
5165
}

0 commit comments

Comments
 (0)