44
55namespace Cortex \JsonRepair ;
66
7+ use Psr \Log \LoggerAwareTrait ;
8+ use Psr \Log \LoggerAwareInterface ;
79use Cortex \JsonRepair \Exceptions \JsonRepairException ;
810
9- class JsonRepairer
11+ class JsonRepairer implements LoggerAwareInterface
1012{
13+ use LoggerAwareTrait;
14+
1115 private const int STATE_START = 0 ;
1216
1317 private const int STATE_IN_STRING = 1 ;
@@ -78,12 +82,20 @@ public function __construct(
7882 public function repair (): string
7983 {
8084 if (json_validate ($ this ->json )) {
85+ $ this ->log ('JSON is already valid, returning as-is ' );
86+
8187 return $ this ->json ;
8288 }
8389
90+ $ this ->log ('Starting JSON repair ' );
91+
8492 // Extract JSON from markdown code blocks if present
8593 $ json = $ this ->extractJsonFromMarkdown ($ this ->json );
8694
95+ if ($ json !== $ this ->json ) {
96+ $ this ->log ('Extracted JSON from markdown code block ' );
97+ }
98+
8799 // Handle multiple JSON objects
88100 $ json = $ this ->extractFirstValidJson ($ json );
89101
@@ -108,16 +120,15 @@ public function repair(): string
108120 // @phpstan-ignore identical.alwaysFalse (state changes in loop iterations)
109121 if ($ this ->state === self ::STATE_IN_STRING_ESCAPE ) {
110122 // If we're at the end of the string and in escape state, the escape is incomplete
123+ // Just drop the incomplete escape (backslash wasn't added to output yet)
111124 if ($ i >= strlen ($ json )) {
112- // Remove the backslash, treat as literal character
113- $ this ->output = substr ($ this ->output , 0 , -1 );
114125 $ this ->state = self ::STATE_IN_STRING ;
115126 break ;
116127 }
117128
118- $ this ->handleEscapeSequence ($ char );
129+ $ extraCharsConsumed = $ this ->handleEscapeSequence ($ char, $ json );
119130 $ this ->state = self ::STATE_IN_STRING ;
120- $ i++ ;
131+ $ i += 1 + $ extraCharsConsumed ;
121132 continue ;
122133 }
123134
@@ -127,6 +138,15 @@ public function repair(): string
127138 // Check for smart quotes as closing delimiter
128139 $ smartQuoteLength = $ this ->getSmartQuoteLength ($ json , $ i );
129140
141+ // Handle double quote inside single-quoted string - must escape it
142+ // @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (delimiter set when entering string state and can be single quote)
143+ if ($ char === '" ' && $ this ->stringDelimiter === "' " ) {
144+ $ this ->log ('Escaping double quote inside single-quoted string ' );
145+ $ this ->output .= '\\" ' ;
146+ $ i ++;
147+ continue ;
148+ }
149+
130150 // @phpstan-ignore identical.alwaysFalse (delimiter set when entering string state)
131151 if ($ char === $ this ->stringDelimiter || $ smartQuoteLength > 0 ) {
132152 // Check if this quote should be escaped (it's inside the string value)
@@ -138,6 +158,7 @@ public function repair(): string
138158
139159 // @phpstan-ignore booleanAnd.leftAlwaysFalse, booleanAnd.rightAlwaysFalse, booleanAnd.alwaysFalse (variables can be true at runtime)
140160 if ($ isRegularQuote && $ isInValue && $ this ->shouldEscapeQuoteInValue ($ json , $ i )) {
161+ $ this ->log ('Escaping embedded quote inside string value ' );
141162 $ this ->output .= '\\" ' ;
142163 $ i ++;
143164 continue ;
@@ -160,7 +181,7 @@ public function repair(): string
160181 }
161182
162183 if ($ char === '\\' ) {
163- $ this -> output .= $ char ;
184+ // Don't output the backslash yet - let handleEscapeSequence decide
164185 $ this ->state = self ::STATE_IN_STRING_ESCAPE ;
165186 $ i ++;
166187 continue ;
@@ -169,6 +190,9 @@ public function repair(): string
169190 // Check if this is a structural character that should close an unclosed string
170191 // This handles cases like {"key": "value with no closing quote}
171192 if (($ char === '} ' || $ char === '] ' ) && $ this ->shouldCloseStringAtStructuralChar ($ json , $ i )) {
193+ $ this ->log ('Closing unclosed string at structural character ' , [
194+ 'char ' => $ char ,
195+ ]);
172196 // Close the string and let the structural character be processed
173197 $ this ->output .= '" ' ;
174198 $ this ->inString = false ;
@@ -214,18 +238,16 @@ public function repair(): string
214238 // Check if we should remove incomplete string values
215239 // @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (stateBeforeString is set when entering string state and can be STATE_IN_OBJECT_VALUE)
216240 if ($ this ->omitIncompleteStrings && $ this ->stateBeforeString === self ::STATE_IN_OBJECT_VALUE ) {
241+ $ this ->log ('Removing incomplete string value (omitIncompleteStrings enabled) ' );
217242 $ this ->removeCurrentKey ();
218243 // Update state after removing key
219244 $ this ->state = self ::STATE_EXPECTING_COMMA_OR_END ;
220245 } else {
246+ $ this ->log ('Adding missing closing quote for unclosed string ' );
221247 $ this ->output .= '" ' ;
222248
223- // If we were in a string escape state, the escape was incomplete
224- // @phpstan-ignore identical.alwaysFalse (state can be STATE_IN_STRING_ESCAPE if string ended during escape)
225- if ($ this ->state === self ::STATE_IN_STRING_ESCAPE ) {
226- // Remove the incomplete escape backslash
227- $ this ->output = substr ($ this ->output , 0 , -2 ) . substr ($ this ->output , -1 );
228- }
249+ // Note: If we were in escape state, the incomplete escape backslash
250+ // was never added to output (we defer adding it to handleEscapeSequence)
229251
230252 // Update state after closing string
231253 $ this ->state = $ this ->getNextStateAfterString ();
@@ -240,8 +262,10 @@ public function repair(): string
240262 if ($ this ->state === self ::STATE_EXPECTING_COLON ) {
241263 // We have a key but no colon/value - add colon and empty value
242264 if ($ this ->omitEmptyValues ) {
265+ $ this ->log ('Removing key without value (omitEmptyValues enabled) ' );
243266 $ this ->removeCurrentKey ();
244267 } else {
268+ $ this ->log ('Adding missing colon and empty value for incomplete key ' );
245269 $ this ->output .= ':"" ' ;
246270 }
247271
@@ -278,6 +302,9 @@ public function repair(): string
278302 // Close any unclosed brackets/braces
279303 while ($ this ->stack !== []) {
280304 $ expected = array_pop ($ this ->stack );
305+ $ this ->log ('Adding missing closing bracket/brace ' , [
306+ 'char ' => $ expected ,
307+ ]);
281308
282309 // Remove trailing comma before closing
283310 $ this ->removeTrailingComma ();
@@ -523,6 +550,7 @@ private function handleObjectKey(string $json, int $i): int
523550 $ afterDoubleQuote = $ json [$ i + 2 ];
524551
525552 if (ctype_alnum ($ afterDoubleQuote ) || $ afterDoubleQuote === '_ ' || $ afterDoubleQuote === ' ' ) {
553+ $ this ->log ('Found doubled quote delimiter pattern, normalizing key ' );
526554 // This looks like ""key"" pattern - skip the opening "" and read the key
527555 $ this ->currentKeyStart = strlen ($ this ->output );
528556 $ this ->output .= '" ' ;
@@ -575,6 +603,10 @@ private function handleObjectKey(string $json, int $i): int
575603 }
576604 }
577605
606+ if ($ char === "' " ) {
607+ $ this ->log ('Converting single-quoted key to double quotes ' );
608+ }
609+
578610 // Track where the key starts
579611 $ this ->currentKeyStart = strlen ($ this ->output );
580612 $ this ->output .= '" ' ;
@@ -590,6 +622,7 @@ private function handleObjectKey(string $json, int $i): int
590622 $ smartQuoteLength = $ this ->getSmartQuoteLength ($ json , $ i );
591623
592624 if ($ smartQuoteLength > 0 ) {
625+ $ this ->log ('Converting smart/curly quote to standard double quote ' );
593626 $ this ->currentKeyStart = strlen ($ this ->output );
594627 $ this ->output .= '" ' ;
595628 $ this ->inString = true ;
@@ -602,6 +635,7 @@ private function handleObjectKey(string $json, int $i): int
602635
603636 // Unquoted key
604637 if (ctype_alnum ($ char ) || $ char === '_ ' || $ char === '- ' ) {
638+ $ this ->log ('Adding quotes around unquoted key ' );
605639 // Track where the key starts
606640 $ this ->currentKeyStart = strlen ($ this ->output );
607641 $ this ->output .= '" ' ;
@@ -650,6 +684,7 @@ private function handleExpectingColon(string $json, int $i): int
650684
651685 // Missing colon, insert it
652686 if (! ctype_space ($ char )) {
687+ $ this ->log ('Inserting missing colon after key ' );
653688 $ this ->output .= ': ' ;
654689 $ this ->state = self ::STATE_IN_OBJECT_VALUE ;
655690
@@ -724,8 +759,10 @@ private function handleObjectValue(string $json, int $i): int
724759 $ this ->output = $ trimmedOutput ;
725760
726761 if ($ this ->omitEmptyValues ) {
762+ $ this ->log ('Removing key with missing value (omitEmptyValues enabled) ' );
727763 $ this ->removeCurrentKey ();
728764 } else {
765+ $ this ->log ('Adding empty string for missing value ' );
729766 $ this ->output .= '"" ' ;
730767 }
731768 }
@@ -742,7 +779,16 @@ private function handleObjectValue(string $json, int $i): int
742779 $ matchResult = preg_match ('/^(true|false|null|True|False|None)\b/i ' , substr ($ json , $ i ), $ matches );
743780
744781 if ($ matchResult === 1 ) {
745- $ this ->output .= $ this ->normalizeBoolean ($ matches [1 ]);
782+ $ normalized = $ this ->normalizeBoolean ($ matches [1 ]);
783+
784+ if ($ matches [1 ] !== $ normalized ) {
785+ $ this ->log ('Normalizing boolean/null value ' , [
786+ 'from ' => $ matches [1 ],
787+ 'to ' => $ normalized ,
788+ ]);
789+ }
790+
791+ $ this ->output .= $ normalized ;
746792 $ this ->state = self ::STATE_EXPECTING_COMMA_OR_END ;
747793 // Reset key tracking after successfully completing a boolean/null value
748794 $ this ->currentKeyStart = -1 ;
@@ -760,8 +806,10 @@ private function handleObjectValue(string $json, int $i): int
760806 // Missing value
761807 if ($ char === ', ' || $ char === '} ' ) {
762808 if ($ this ->omitEmptyValues ) {
809+ $ this ->log ('Removing key with missing value (omitEmptyValues enabled) ' );
763810 $ this ->removeCurrentKey ();
764811 } else {
812+ $ this ->log ('Adding empty string for missing value ' );
765813 $ this ->output .= '"" ' ;
766814 }
767815
@@ -785,6 +833,8 @@ private function handleObjectValue(string $json, int $i): int
785833
786834 // Handle unquoted string values
787835 if (ctype_alpha ($ char ) || $ char === '_ ' ) {
836+ $ this ->log ('Found unquoted string value, adding quotes ' );
837+
788838 return $ this ->handleUnquotedStringValue ($ json , $ i );
789839 }
790840
@@ -902,6 +952,7 @@ private function handleExpectingCommaOrEnd(string $json, int $i): int
902952
903953 // Missing comma, insert it
904954 if (! ctype_space ($ char ) && $ char !== $ top ) {
955+ $ this ->log ('Inserting missing comma ' );
905956 $ this ->output .= ', ' ;
906957 $ this ->state = $ top === '} ' ? self ::STATE_IN_OBJECT_KEY : self ::STATE_IN_ARRAY ;
907958
@@ -989,27 +1040,39 @@ private function handleNumber(string $json, int $i): int
9891040 * unicode escapes (\uXXXX). Invalid or incomplete escapes are treated
9901041 * as literal backslash followed by the character.
9911042 */
992- private function handleEscapeSequence (string $ char ): void
1043+ /**
1044+ * Handle an escape sequence within a string.
1045+ *
1046+ * Processes escape sequences like \", \\, \/, \b, \f, \n, \r, \t, and
1047+ * unicode escapes (\uXXXX). Invalid or incomplete escapes are treated
1048+ * as escaped backslash followed by the character.
1049+ *
1050+ * @return int Number of extra characters consumed beyond the escape character itself
1051+ */
1052+ private function handleEscapeSequence (string $ char , string $ json ): int
9931053 {
9941054 $ validEscapes = ['" ' , '\\' , '/ ' , 'b ' , 'f ' , 'n ' , 'r ' , 't ' ];
9951055
9961056 if (in_array ($ char , $ validEscapes , true )) {
9971057 $ this ->output .= '\\' . $ char ;
9981058
999- return ;
1059+ return 0 ;
10001060 }
10011061
1002- if ($ char === 'u ' && $ this ->pos + 4 < strlen ($ this -> json )) {
1003- $ hex = substr ($ this -> json , $ this ->pos + 1 , 4 );
1062+ if ($ char === 'u ' && $ this ->pos + 4 < strlen ($ json )) {
1063+ $ hex = substr ($ json , $ this ->pos + 1 , 4 );
10041064
10051065 if (ctype_xdigit ($ hex )) {
10061066 $ this ->output .= '\\u ' . $ hex ;
10071067
1008- return ;
1068+ return 4 ; // Consumed 4 extra hex digits
10091069 }
10101070 }
10111071
1012- $ this ->output .= '\\' . $ char ;
1072+ // Invalid escape sequence - escape the backslash and output the character literally
1073+ $ this ->output .= '\\\\' . $ char ;
1074+
1075+ return 0 ;
10131076 }
10141077
10151078 /**
@@ -1034,6 +1097,7 @@ private function removeTrailingComma(): void
10341097 $ trimmed = rtrim ($ this ->output );
10351098
10361099 if (str_ends_with ($ trimmed , ', ' )) {
1100+ $ this ->log ('Removing trailing comma ' );
10371101 $ this ->output = substr ($ trimmed , 0 , -1 );
10381102 }
10391103 }
@@ -1382,4 +1446,32 @@ private function getSmartQuoteLength(string $json, int $pos): int
13821446
13831447 return 0 ;
13841448 }
1449+
1450+ /**
1451+ * Log a repair action with context.
1452+ *
1453+ * @param string $message Description of the repair action
1454+ * @param array<string, mixed> $context Additional context data
1455+ */
1456+ private function log (string $ message , array $ context = []): void
1457+ {
1458+ $ this ->logger ?->debug($ message , array_merge ([
1459+ 'position ' => $ this ->pos ,
1460+ 'context ' => $ this ->getContextSnippet (),
1461+ ], $ context ));
1462+ }
1463+
1464+ /**
1465+ * Get a snippet of the JSON around the current position for logging context.
1466+ */
1467+ private function getContextSnippet (int $ window = 15 ): string
1468+ {
1469+ $ start = max (0 , $ this ->pos - $ window );
1470+ $ end = min (strlen ($ this ->json ), $ this ->pos + $ window );
1471+
1472+ $ before = substr ($ this ->json , $ start , $ this ->pos - $ start );
1473+ $ after = substr ($ this ->json , $ this ->pos , $ end - $ this ->pos );
1474+
1475+ return $ before . '>>> ' . $ after ;
1476+ }
13851477}
0 commit comments