From 324e7836b59ea661ad799db3fd26a19b758b2c0e Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Mon, 14 Apr 2025 23:20:11 -0500 Subject: [PATCH 1/4] Make DotNotationParser more efficient and robust --- src/DotNotationParser.php | 80 +++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 28 deletions(-) diff --git a/src/DotNotationParser.php b/src/DotNotationParser.php index b8a02d6..5d8831c 100644 --- a/src/DotNotationParser.php +++ b/src/DotNotationParser.php @@ -18,19 +18,17 @@ class DotNotationParser { */ public function parse( string $path ) : array { $out = []; - $chars = preg_split('/(?iterateGraphemes($path); - for(;;) { - $token = current($chars); - if( $token === false ) { - break; - } + while( $chars->valid() ) { + $token = $chars->current(); + $key = $chars->key(); switch( $token ) { case '.': throw new ParseException( - sprintf('failed to parse path, expected string, got "%s" at %d', $token, key($chars)), - key($chars), + sprintf('failed to parse path, expected string, got "%s" at %d', $token, $key), + $key, ParseException::CODE_UNEXPECTED_CHARACTER ); case '"': @@ -46,61 +44,87 @@ public function parse( string $path ) : array { } /** - * @param string[] $chars array of unicode characters by reference + * @param \Iterator $chars Generator of Unicode characters */ - private function scanString( array &$chars ) : string { + private function scanString( \Iterator $chars ) : string { $buff = ''; - for(;;) { - $token = current($chars); - if( $token === false || $token === '.' ) { - next($chars); + while( $chars->valid() ) { + $token = $chars->current(); + if( $token === '.' ) { + $chars->next(); break; } $buff .= $token; - next($chars); + $chars->next(); } return $buff; } /** - * @param string[] $chars array of unicode characters by reference + * @param \Iterator $chars array of Unicode characters by reference */ - private function scanQuotedString( array &$chars ) : string { + private function scanQuotedString( \Iterator $chars ) : string { $buff = ''; - next($chars); - for(;;) { - $token = current($chars); - if( $token === false ) { + $chars->next(); + $lastKey = 0; + for( ; ; ) { + $token = $chars->current(); + $key = $chars->key(); + + if( !$chars->valid() ) { throw new ParseException( 'failed to parse path, expected ", got EOF', - key($chars) ?: count($chars), + $key ?? ($lastKey + 1), ParseException::CODE_UNEXPECTED_EOF ); } if( $token === '"' ) { - $next = next($chars); - if( $next === false || $next === '.' ) { - next($chars); + $chars->next(); + $next = $chars->current(); + $nextKey = $chars->key(); + + if( !$chars->valid() || $next === '.' ) { + $chars->next(); break; } throw new ParseException( - sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, key($chars)), - key($chars), + sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, $key), + $nextKey ?? $key, ParseException::CODE_UNEXPECTED_CHARACTER ); } $buff .= $token; - next($chars); + + $lastKey = $key; + $chars->next(); } return $buff; } + /** + * Yields each grapheme (user‑visible “character”) from $s. + * + * @return \Generator + */ + private function iterateGraphemes( string $s ) : \Generator { + $off = 0; + $len = strlen($s); + + while( $off < $len && preg_match('/\X/u', $s, $m, 0, $off) ) { + $g = $m[0]; // one grapheme cluster, UTF‑8 safe + + yield $off => $g; + + $off += strlen($g); // advance by its byte length + } + } + } From 43d4944a606fa28774905cd7023d1386ad92887d Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Sat, 12 Apr 2025 17:45:00 -0500 Subject: [PATCH 2/4] Handle escaping quotes with backslashes --- src/DotNotationParser.php | 24 ++++++++++- .../DotNotation/DotNotationParserTest.php | 40 ++++++++++--------- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/DotNotationParser.php b/src/DotNotationParser.php index 5d8831c..094fa60 100644 --- a/src/DotNotationParser.php +++ b/src/DotNotationParser.php @@ -14,6 +14,18 @@ class DotNotationParser { /** * Parse a given dot notation path into it's parts * + * The path is expected to be a string of dot separated keys, where keys can be + * quoted with double quotes. Backslashes are used to escape double quotes inside + * quoted keys. + * + * Examples: + * + * - `'foo.bar.baz'` => `[ 'foo', 'bar', 'baz' ]` + * - `'foo."bar.baz"'` => `[ 'foo', 'bar.baz' ]` + * - `'foo."bar.baz".quux'` => `[ 'foo', 'bar.baz', 'quux' ]` + * - `'foo."bar\"baz".quux'` => `[ 'foo', 'bar"baz', 'quux' ]` + * + * @throws ParseException * @return string[] */ public function parse( string $path ) : array { @@ -70,7 +82,7 @@ private function scanQuotedString( \Iterator $chars ) : string { $buff = ''; $chars->next(); - $lastKey = 0; + $lastKey = $chars->key(); for( ; ; ) { $token = $chars->current(); $key = $chars->key(); @@ -100,6 +112,16 @@ private function scanQuotedString( \Iterator $chars ) : string { ); } + if( $token === '\\' ) { + $chars->next(); + $token = $chars->current(); + $key = $chars->key(); + + if( !$chars->valid() ) { + continue; + } + } + $buff .= $token; $lastKey = $key; diff --git a/test/Quorum/DotNotation/DotNotationParserTest.php b/test/Quorum/DotNotation/DotNotationParserTest.php index e1aece6..c9e3d4e 100644 --- a/test/Quorum/DotNotation/DotNotationParserTest.php +++ b/test/Quorum/DotNotation/DotNotationParserTest.php @@ -20,16 +20,17 @@ public function testParse( string $path, array $result ) : void { ); } - public function parseProvider() : \Generator { - yield [ 'foo.bar.baz', [ 'foo', 'bar', 'baz' ] ]; - - yield [ 'foo."bar.baz"', [ 'foo', 'bar.baz' ] ]; - - yield [ 'foo.bar"baz".2', [ 'foo', 'bar"baz"', '2' ] ]; - - yield [ 'foo.bar.baz.', [ 'foo', 'bar', 'baz' ] ]; - - yield [ '日.本.語', [ '日', '本', '語' ] ]; + public static function parseProvider() : array { + return [ + [ '', [] ], + [ 'foo.bar.baz', [ 'foo', 'bar', 'baz' ] ], + [ 'foo."bar.baz"', [ 'foo', 'bar.baz' ] ], + [ 'foo.bar"baz".2', [ 'foo', 'bar"baz"', '2' ] ], + [ 'foo.bar.baz.', [ 'foo', 'bar', 'baz' ] ], + [ '日.本.語', [ '日', '本', '語' ] ], + [ 'foo."bar\\"baz".quux', [ 'foo', 'bar"baz', 'quux' ] ], + [ 'foo."bar\\\\baz".quux.', [ 'foo', 'bar\\baz', 'quux' ] ], + ]; } /** @@ -48,14 +49,17 @@ public function testUnexpectedCharacters( string $path, int $pos ) : void { $this->fail(sprintf('"%s" failed to throw exception', $path)); } - public function unexpectedCharacterProvider() : \Generator { - yield [ 'foo."bar', 8 ]; - - yield [ 'a.foo."bar"baz', 11 ]; - - yield [ '.foo', 0 ]; - - yield [ '.', 0 ]; + public static function unexpectedCharacterProvider() : array { + return [ + [ 'foo."bar', 8 ], + [ 'a.foo."bar"baz', 11 ], + [ '.foo', 0 ], + [ '.', 0 ], + [ 'foo."👨‍👩‍👧‍👦"."broke', 38 ], + [ 'a..', 2 ], + [ 'a..b', 2 ], + [ 'a."\\', 4 ], + ]; } } From 537aed44b9d09e84b5e7c8810ae4023f6fffde3e Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Sat, 12 Apr 2025 17:45:39 -0500 Subject: [PATCH 3/4] Add types to ParseException class --- src/Exceptions/ParseException.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Exceptions/ParseException.php b/src/Exceptions/ParseException.php index 65b8697..532e84e 100644 --- a/src/Exceptions/ParseException.php +++ b/src/Exceptions/ParseException.php @@ -7,9 +7,12 @@ class ParseException extends \InvalidArgumentException { public const CODE_UNEXPECTED_CHARACTER = 22; public const CODE_UNEXPECTED_EOF = 484; + /** + * @var int The index of the character that caused the exception + */ private $charIndex; - public function __construct( $message, int $charIndex, $code, ?\Throwable $previous = null ) { + public function __construct( string $message, int $charIndex, int $code, ?\Throwable $previous = null ) { parent::__construct($message, $code, $previous); $this->charIndex = $charIndex; From 6ea4bc1c05d0a1d3a86fbe950b3bd673d82414d2 Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Sat, 12 Apr 2025 17:45:52 -0500 Subject: [PATCH 4/4] Update README --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 999d1fa..43883b9 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,20 @@ Parse strings like foo."bar.baz".quux into [ 'foo', 'bar.baz', 'quux' ] function parse(string $path) : array ``` -Parse a given dot notation path into it's parts +Parse a given dot notation path into it's parts + +The path is expected to be a string of dot separated keys, where keys can be +quoted with double quotes. Backslashes are used to escape double quotes inside +quoted keys. + +##### Examples + +- `'foo.bar.baz'` => `[ 'foo', 'bar', 'baz' ]` +- `'foo."bar.baz"'` => `[ 'foo', 'bar.baz' ]` +- `'foo."bar.baz".quux'` => `[ 'foo', 'bar.baz', 'quux' ]` +- `'foo."bar\"baz".quux'` => `[ 'foo', 'bar"baz', 'quux' ]` + +**Throws**: `\Quorum\DotNotation\Exceptions\ParseException` ##### Returns: