Skip to content

Commit 63f2c70

Browse files
authored
fix for cyrillic chars (#10)
1 parent 44f0411 commit 63f2c70

2 files changed

Lines changed: 22 additions & 7 deletions

File tree

src/Potaka/BbCode/Tokenizer/Tokenizer.php

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ public function tokenize($text)
1515
{
1616
$this->rootTag = new Tag(null);
1717
$curentElement = 0;
18-
$textLenght = mb_strlen($text);
18+
$textAsArray = preg_split('//u', $text);
19+
$textLenght = count($textAsArray);
20+
1921
$bufferText = '';
2022
$currentTag = $this->rootTag;
2123
while ($curentElement < $textLenght) {
22-
$currentChar = $text[$curentElement];
23-
if ($currentChar === '[' && ($curentElement+1 < $textLenght) && $text[$curentElement+1] !== ']') {
24+
$currentChar = $textAsArray[$curentElement];
25+
if ($currentChar === '[' && ($curentElement+1 < $textLenght) && $textAsArray[$curentElement+1] !== ']') {
2426
// get the close bracket
2527
$closeTagFound = false;
2628
// [tag=argumen]
@@ -30,16 +32,16 @@ public function tokenize($text)
3032
$tagText = '';
3133
$tmpPosion++;
3234
while ($tmpPosion < $textLenght) {
33-
if ($text[$tmpPosion] === ']') {
35+
if ($textAsArray[$tmpPosion] === ']') {
3436
$closeTagFound = true;
3537
break;
36-
} elseif ($text[$tmpPosion] === '=') {
38+
} elseif ($textAsArray[$tmpPosion] === '=') {
3739
$argumentFound = true;
3840
} else {
3941
if ($argumentFound) {
40-
$argumentValue .= $text[$tmpPosion];
42+
$argumentValue .= $textAsArray[$tmpPosion];
4143
} else {
42-
$tagText .= $text[$tmpPosion];
44+
$tagText .= $textAsArray[$tmpPosion];
4345
}
4446
}
4547
$tmpPosion++;

tests/phpUnit/Test/Potaka/BbCode/Tokenizer/TokenizerTest.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,4 +218,17 @@ public function testTokenizerReuse()
218218

219219
$this->assertSameTokenized($expectedReuse, $resultReuse);
220220
}
221+
222+
public function testCyrillicChars()
223+
{
224+
$tokenizer = new Tokenizer();
225+
$text = 'удебелен текст';
226+
$result = $tokenizer->tokenize($text);
227+
$expected = new Tag(null);
228+
$expected->addTag(
229+
(new Tag(null))->setText('удебелен текст')
230+
);
231+
232+
$this->assertSameTokenized($expected, $result);
233+
}
221234
}

0 commit comments

Comments
 (0)