diff --git a/README.md b/README.md index 999d1fa..43883b9 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,20 @@ Parse strings like foo."bar.baz".quux into [ 'foo', 'bar.baz', 'quux' ] function parse(string $path) : array ``` -Parse a given dot notation path into it's parts +Parse a given dot notation path into it's parts + +The path is expected to be a string of dot separated keys, where keys can be +quoted with double quotes. Backslashes are used to escape double quotes inside +quoted keys. + +##### Examples + +- `'foo.bar.baz'` => `[ 'foo', 'bar', 'baz' ]` +- `'foo."bar.baz"'` => `[ 'foo', 'bar.baz' ]` +- `'foo."bar.baz".quux'` => `[ 'foo', 'bar.baz', 'quux' ]` +- `'foo."bar\"baz".quux'` => `[ 'foo', 'bar"baz', 'quux' ]` + +**Throws**: `\Quorum\DotNotation\Exceptions\ParseException` ##### Returns: diff --git a/src/DotNotationParser.php b/src/DotNotationParser.php index b8a02d6..094fa60 100644 --- a/src/DotNotationParser.php +++ b/src/DotNotationParser.php @@ -14,23 +14,33 @@ class DotNotationParser { /** * Parse a given dot notation path into it's parts * + * The path is expected to be a string of dot separated keys, where keys can be + * quoted with double quotes. Backslashes are used to escape double quotes inside + * quoted keys. + * + * Examples: + * + * - `'foo.bar.baz'` => `[ 'foo', 'bar', 'baz' ]` + * - `'foo."bar.baz"'` => `[ 'foo', 'bar.baz' ]` + * - `'foo."bar.baz".quux'` => `[ 'foo', 'bar.baz', 'quux' ]` + * - `'foo."bar\"baz".quux'` => `[ 'foo', 'bar"baz', 'quux' ]` + * + * @throws ParseException * @return string[] */ public function parse( string $path ) : array { $out = []; - $chars = preg_split('/(?iterateGraphemes($path); - for(;;) { - $token = current($chars); - if( $token === false ) { - break; - } + while( $chars->valid() ) { + $token = $chars->current(); + $key = $chars->key(); switch( $token ) { case '.': throw new ParseException( - sprintf('failed to parse path, expected string, got "%s" at %d', $token, key($chars)), - key($chars), + sprintf('failed to parse path, expected string, got "%s" at %d', $token, $key), + $key, ParseException::CODE_UNEXPECTED_CHARACTER ); case '"': @@ -46,61 +56,97 @@ public function parse( string $path ) : array { } /** - * @param string[] $chars array of unicode characters by reference + * @param \Iterator $chars Generator of Unicode characters */ - private function scanString( array &$chars ) : string { + private function scanString( \Iterator $chars ) : string { $buff = ''; - for(;;) { - $token = current($chars); - if( $token === false || $token === '.' ) { - next($chars); + while( $chars->valid() ) { + $token = $chars->current(); + if( $token === '.' ) { + $chars->next(); break; } $buff .= $token; - next($chars); + $chars->next(); } return $buff; } /** - * @param string[] $chars array of unicode characters by reference + * @param \Iterator $chars array of Unicode characters by reference */ - private function scanQuotedString( array &$chars ) : string { + private function scanQuotedString( \Iterator $chars ) : string { $buff = ''; - next($chars); - for(;;) { - $token = current($chars); - if( $token === false ) { + $chars->next(); + $lastKey = $chars->key(); + for( ; ; ) { + $token = $chars->current(); + $key = $chars->key(); + + if( !$chars->valid() ) { throw new ParseException( 'failed to parse path, expected ", got EOF', - key($chars) ?: count($chars), + $key ?? ($lastKey + 1), ParseException::CODE_UNEXPECTED_EOF ); } if( $token === '"' ) { - $next = next($chars); - if( $next === false || $next === '.' ) { - next($chars); + $chars->next(); + $next = $chars->current(); + $nextKey = $chars->key(); + + if( !$chars->valid() || $next === '.' ) { + $chars->next(); break; } throw new ParseException( - sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, key($chars)), - key($chars), + sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, $key), + $nextKey ?? $key, ParseException::CODE_UNEXPECTED_CHARACTER ); } + if( $token === '\\' ) { + $chars->next(); + $token = $chars->current(); + $key = $chars->key(); + + if( !$chars->valid() ) { + continue; + } + } + $buff .= $token; - next($chars); + + $lastKey = $key; + $chars->next(); } return $buff; } + /** + * Yields each grapheme (user‑visible “character”) from $s. + * + * @return \Generator + */ + private function iterateGraphemes( string $s ) : \Generator { + $off = 0; + $len = strlen($s); + + while( $off < $len && preg_match('/\X/u', $s, $m, 0, $off) ) { + $g = $m[0]; // one grapheme cluster, UTF‑8 safe + + yield $off => $g; + + $off += strlen($g); // advance by its byte length + } + } + } diff --git a/src/Exceptions/ParseException.php b/src/Exceptions/ParseException.php index 65b8697..532e84e 100644 --- a/src/Exceptions/ParseException.php +++ b/src/Exceptions/ParseException.php @@ -7,9 +7,12 @@ class ParseException extends \InvalidArgumentException { public const CODE_UNEXPECTED_CHARACTER = 22; public const CODE_UNEXPECTED_EOF = 484; + /** + * @var int The index of the character that caused the exception + */ private $charIndex; - public function __construct( $message, int $charIndex, $code, ?\Throwable $previous = null ) { + public function __construct( string $message, int $charIndex, int $code, ?\Throwable $previous = null ) { parent::__construct($message, $code, $previous); $this->charIndex = $charIndex; diff --git a/test/Quorum/DotNotation/DotNotationParserTest.php b/test/Quorum/DotNotation/DotNotationParserTest.php index e1aece6..c9e3d4e 100644 --- a/test/Quorum/DotNotation/DotNotationParserTest.php +++ b/test/Quorum/DotNotation/DotNotationParserTest.php @@ -20,16 +20,17 @@ public function testParse( string $path, array $result ) : void { ); } - public function parseProvider() : \Generator { - yield [ 'foo.bar.baz', [ 'foo', 'bar', 'baz' ] ]; - - yield [ 'foo."bar.baz"', [ 'foo', 'bar.baz' ] ]; - - yield [ 'foo.bar"baz".2', [ 'foo', 'bar"baz"', '2' ] ]; - - yield [ 'foo.bar.baz.', [ 'foo', 'bar', 'baz' ] ]; - - yield [ '日.本.語', [ '日', '本', '語' ] ]; + public static function parseProvider() : array { + return [ + [ '', [] ], + [ 'foo.bar.baz', [ 'foo', 'bar', 'baz' ] ], + [ 'foo."bar.baz"', [ 'foo', 'bar.baz' ] ], + [ 'foo.bar"baz".2', [ 'foo', 'bar"baz"', '2' ] ], + [ 'foo.bar.baz.', [ 'foo', 'bar', 'baz' ] ], + [ '日.本.語', [ '日', '本', '語' ] ], + [ 'foo."bar\\"baz".quux', [ 'foo', 'bar"baz', 'quux' ] ], + [ 'foo."bar\\\\baz".quux.', [ 'foo', 'bar\\baz', 'quux' ] ], + ]; } /** @@ -48,14 +49,17 @@ public function testUnexpectedCharacters( string $path, int $pos ) : void { $this->fail(sprintf('"%s" failed to throw exception', $path)); } - public function unexpectedCharacterProvider() : \Generator { - yield [ 'foo."bar', 8 ]; - - yield [ 'a.foo."bar"baz', 11 ]; - - yield [ '.foo', 0 ]; - - yield [ '.', 0 ]; + public static function unexpectedCharacterProvider() : array { + return [ + [ 'foo."bar', 8 ], + [ 'a.foo."bar"baz', 11 ], + [ '.foo', 0 ], + [ '.', 0 ], + [ 'foo."👨‍👩‍👧‍👦"."broke', 38 ], + [ 'a..', 2 ], + [ 'a..b', 2 ], + [ 'a."\\', 4 ], + ]; } }