Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,20 @@ Parse strings like foo."bar.baz".quux into [ 'foo', 'bar.baz', 'quux' ]
function parse(string $path) : array
```

Parse a given dot notation path into it's parts
Parse a given dot notation path into it's parts

The path is expected to be a string of dot separated keys, where keys can be
quoted with double quotes. Backslashes are used to escape double quotes inside
quoted keys.

##### Examples

- `'foo.bar.baz'` => `[ 'foo', 'bar', 'baz' ]`
- `'foo."bar.baz"'` => `[ 'foo', 'bar.baz' ]`
- `'foo."bar.baz".quux'` => `[ 'foo', 'bar.baz', 'quux' ]`
- `'foo."bar\"baz".quux'` => `[ 'foo', 'bar"baz', 'quux' ]`

**Throws**: `\Quorum\DotNotation\Exceptions\ParseException`

##### Returns:

Expand Down
102 changes: 74 additions & 28 deletions src/DotNotationParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,33 @@ class DotNotationParser {
/**
* Parse a given dot notation path into it's parts
*
* The path is expected to be a string of dot separated keys, where keys can be
* quoted with double quotes. Backslashes are used to escape double quotes inside
* quoted keys.
*
* Examples:
*
* - `'foo.bar.baz'` => `[ 'foo', 'bar', 'baz' ]`
* - `'foo."bar.baz"'` => `[ 'foo', 'bar.baz' ]`
* - `'foo."bar.baz".quux'` => `[ 'foo', 'bar.baz', 'quux' ]`
* - `'foo."bar\"baz".quux'` => `[ 'foo', 'bar"baz', 'quux' ]`
*
* @throws ParseException
* @return string[]
*/
public function parse( string $path ) : array {
$out = [];
$chars = preg_split('/(?<!^)(?!$)/u', $path, -1, PREG_SPLIT_NO_EMPTY) ?: [];
$chars = $this->iterateGraphemes($path);

for(;;) {
$token = current($chars);
if( $token === false ) {
break;
}
while( $chars->valid() ) {
$token = $chars->current();
$key = $chars->key();

switch( $token ) {
case '.':
throw new ParseException(
sprintf('failed to parse path, expected string, got "%s" at %d', $token, key($chars)),
key($chars),
sprintf('failed to parse path, expected string, got "%s" at %d', $token, $key),
$key,
ParseException::CODE_UNEXPECTED_CHARACTER
);
case '"':
Expand All @@ -46,61 +56,97 @@ public function parse( string $path ) : array {
}

/**
* @param string[] $chars array of unicode characters by reference
* @param \Iterator<int,string> $chars Generator of Unicode characters
*/
private function scanString( array &$chars ) : string {
private function scanString( \Iterator $chars ) : string {
$buff = '';
for(;;) {
$token = current($chars);
if( $token === false || $token === '.' ) {
next($chars);
while( $chars->valid() ) {
$token = $chars->current();

if( $token === '.' ) {
$chars->next();
break;
}

$buff .= $token;
next($chars);
$chars->next();
}

return $buff;
}

/**
* @param string[] $chars array of unicode characters by reference
* @param \Iterator<int,string> $chars array of Unicode characters by reference
*/
private function scanQuotedString( array &$chars ) : string {
private function scanQuotedString( \Iterator $chars ) : string {
$buff = '';

next($chars);
for(;;) {
$token = current($chars);
if( $token === false ) {
$chars->next();
$lastKey = $chars->key();
for( ; ; ) {
$token = $chars->current();
$key = $chars->key();

if( !$chars->valid() ) {
throw new ParseException(
'failed to parse path, expected ", got EOF',
key($chars) ?: count($chars),
$key ?? ($lastKey + 1),
ParseException::CODE_UNEXPECTED_EOF
);
}

if( $token === '"' ) {
$next = next($chars);
if( $next === false || $next === '.' ) {
next($chars);
$chars->next();
$next = $chars->current();
$nextKey = $chars->key();

if( !$chars->valid() || $next === '.' ) {
$chars->next();
break;
}

throw new ParseException(
sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, key($chars)),
key($chars),
sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, $key),
$nextKey ?? $key,
ParseException::CODE_UNEXPECTED_CHARACTER
);
}

if( $token === '\\' ) {
$chars->next();
$token = $chars->current();
$key = $chars->key();

if( !$chars->valid() ) {
continue;
}
}

$buff .= $token;
next($chars);

$lastKey = $key;
$chars->next();
}

return $buff;
}

/**
* Yields each grapheme (user‑visible “character”) from $s.
*
* @return \Generator<int,string>
*/
private function iterateGraphemes( string $s ) : \Generator {
$off = 0;
$len = strlen($s);

while( $off < $len && preg_match('/\X/u', $s, $m, 0, $off) ) {
$g = $m[0]; // one grapheme cluster, UTF‑8 safe

yield $off => $g;

$off += strlen($g); // advance by its byte length
}
}

}
5 changes: 4 additions & 1 deletion src/Exceptions/ParseException.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ class ParseException extends \InvalidArgumentException {
public const CODE_UNEXPECTED_CHARACTER = 22;
public const CODE_UNEXPECTED_EOF = 484;

/**
* @var int The index of the character that caused the exception
*/
private $charIndex;

public function __construct( $message, int $charIndex, $code, ?\Throwable $previous = null ) {
public function __construct( string $message, int $charIndex, int $code, ?\Throwable $previous = null ) {
parent::__construct($message, $code, $previous);

$this->charIndex = $charIndex;
Expand Down
40 changes: 22 additions & 18 deletions test/Quorum/DotNotation/DotNotationParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,17 @@ public function testParse( string $path, array $result ) : void {
);
}

public function parseProvider() : \Generator {
yield [ 'foo.bar.baz', [ 'foo', 'bar', 'baz' ] ];

yield [ 'foo."bar.baz"', [ 'foo', 'bar.baz' ] ];

yield [ 'foo.bar"baz".2', [ 'foo', 'bar"baz"', '2' ] ];

yield [ 'foo.bar.baz.', [ 'foo', 'bar', 'baz' ] ];

yield [ '日.本.語', [ '日', '本', '語' ] ];
public static function parseProvider() : array {
return [
[ '', [] ],
[ 'foo.bar.baz', [ 'foo', 'bar', 'baz' ] ],
[ 'foo."bar.baz"', [ 'foo', 'bar.baz' ] ],
[ 'foo.bar"baz".2', [ 'foo', 'bar"baz"', '2' ] ],
[ 'foo.bar.baz.', [ 'foo', 'bar', 'baz' ] ],
[ '日.本.語', [ '日', '本', '語' ] ],
[ 'foo."bar\\"baz".quux', [ 'foo', 'bar"baz', 'quux' ] ],
[ 'foo."bar\\\\baz".quux.', [ 'foo', 'bar\\baz', 'quux' ] ],
];
}

/**
Expand All @@ -48,14 +49,17 @@ public function testUnexpectedCharacters( string $path, int $pos ) : void {
$this->fail(sprintf('"%s" failed to throw exception', $path));
}

public function unexpectedCharacterProvider() : \Generator {
yield [ 'foo."bar', 8 ];

yield [ 'a.foo."bar"baz', 11 ];

yield [ '.foo', 0 ];

yield [ '.', 0 ];
public static function unexpectedCharacterProvider() : array {
return [
[ 'foo."bar', 8 ],
[ 'a.foo."bar"baz', 11 ],
[ '.foo', 0 ],
[ '.', 0 ],
[ 'foo."👨‍👩‍👧‍👦"."broke', 38 ],
[ 'a..', 2 ],
[ 'a..b', 2 ],
[ 'a."\\', 4 ],
];
}

}