Skip to content

Commit ca000e4

Browse files
committed
added expandable_blockquote entity - added date_time entity - fix ambiguos entities separator for MV2 - fix near blockquote entities separation
1 parent ba65341 commit ca000e4

1 file changed

Lines changed: 105 additions & 22 deletions

File tree

src/EntityDecoder.php

Lines changed: 105 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
class EntityDecoder
2121
{
22-
private $entitiesToParse = ['bold', 'italic', 'code', 'pre', 'text_mention', 'text_link', 'strikethrough', 'underline', 'spoiler', 'blockquote', 'custom_emoji'];
22+
private $entitiesToParse = ['bold', 'italic', 'code', 'pre', 'text_mention', 'text_link', 'strikethrough', 'underline', 'spoiler', 'blockquote', 'expandable_blockquote', 'custom_emoji', 'date_time'];
2323
private $entities = [];
2424
private $style;
2525

@@ -61,6 +61,10 @@ public function decode($message): string
6161
{
6262
$this->entities = $message->caption_entities;
6363
}
64+
if(empty($this->entities) && empty($message->caption_entities))
65+
{
66+
$this->entities = [];
67+
}
6468
//Get internal encoding
6569
$prevencoding = mb_internal_encoding();
6670
//Set encoding to UTF-8
@@ -78,7 +82,7 @@ public function decode($message): string
7882
//split text in char array with UTF-16 code units length
7983
$arrayText = $this->splitCharAndLength($textToDecode);
8084
$finalText = "";
81-
85+
$lastBlackquoteClosingPosition = false; //Var to manage the case of a second blockquote entity starts immediately after a blockquote entity closed (see Telegram docs for MarkdownV2 format options)
8286
$openedEntities = [];
8387
$currenPosition = 0;
8488
//Cycle characters one by one to calculate begins and ends of entities and escape special chars
@@ -90,7 +94,16 @@ public function decode($message): string
9094
{
9195
foreach ($entityCheckStart as $stEntity)
9296
{
93-
$startChar = $this->getEntityStartString($stEntity);
97+
$blockquoteJustClosed = false;
98+
if($stEntity->type === 'blockquote' || $stEntity->type === 'expandable_blockquote')
99+
{
100+
if($lastBlackquoteClosingPosition !== false && $lastBlackquoteClosingPosition == $stEntity->offset - 1)
101+
{
102+
$blockquoteJustClosed = true;
103+
$lastBlackquoteClosingPosition = false;
104+
}
105+
}
106+
$startChar = $this->getEntityStartString($stEntity, $blockquoteJustClosed);
94107
$openedEntities[] = $stEntity;
95108
$finalText .= $startChar;
96109
}
@@ -102,12 +115,22 @@ public function decode($message): string
102115
{
103116
$finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities);
104117
}
105-
if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop))
118+
if ($this->style == 'MarkdownV2')
106119
{
107-
$stopChar = "_\r__";
108-
$finalText .= $stopChar;
109-
array_pop($openedEntities);
110-
array_pop($openedEntities);
120+
if($this->checkMarkdownV2AmbiguousEntities($entityCheckStop))
121+
{
122+
$stopChar = "_**__";
123+
$finalText .= $stopChar;
124+
array_pop($openedEntities);
125+
array_pop($openedEntities);
126+
}
127+
foreach ($entityCheckStop as $stEntity)
128+
{
129+
if($stEntity->type === 'blockquote' || $stEntity->type === 'expandable_blockquote')
130+
{
131+
$lastBlackquoteClosingPosition = $stEntity->offset + $stEntity->length;
132+
}
133+
}
111134
}
112135
foreach ($entityCheckStop as $stEntity)
113136
{
@@ -161,6 +184,10 @@ public function extractAllEntities($message): array
161184
{
162185
$this->entities = $message->caption_entities;
163186
}
187+
if(empty($this->entities) && empty($message->caption_entities))
188+
{
189+
$this->entities = [];
190+
}
164191
//Get internal encoding
165192
$prevencoding = mb_internal_encoding();
166193
//Set encoding to UTF-8
@@ -177,7 +204,7 @@ public function extractAllEntities($message): array
177204
}
178205
$arrayText = $this->splitCharAndLength($textToDecode);
179206
$entitytext = "";
180-
207+
$lastBlackquoteClosingPosition = false; //Var to manage the case of a second blockquote entity starts immediately after a blockquote entity closed (see Telegram docs for MarkdownV2 format options)
181208
$openedEntities = [];
182209
$currenPosition = 0;
183210
//Cycle characters one by one to calculate begins and ends of entities and escape special chars
@@ -189,7 +216,16 @@ public function extractAllEntities($message): array
189216
{
190217
foreach ($entityCheckStart as $stEntity)
191218
{
192-
$startChar = $this->getEntityStartString($stEntity);
219+
$blockquoteJustClosed = false;
220+
if($stEntity->type === 'blockquote' || $stEntity->type === 'expandable_blockquote')
221+
{
222+
if($lastBlackquoteClosingPosition !== false && $lastBlackquoteClosingPosition == $stEntity->offset - 1)
223+
{
224+
$blockquoteJustClosed = true;
225+
$lastBlackquoteClosingPosition = false;
226+
}
227+
}
228+
$startChar = $this->getEntityStartString($stEntity, $blockquoteJustClosed);
193229
$openedEntities[] = $stEntity;
194230
$entitytext .= $startChar;
195231
}
@@ -201,16 +237,26 @@ public function extractAllEntities($message): array
201237
{
202238
$entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities);
203239
}
204-
if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop))
240+
if ($this->style == 'MarkdownV2')
205241
{
206-
$stopChar = "_\r__";
207-
$entitytext .= $stopChar;
208-
array_pop($openedEntities);
209-
array_pop($openedEntities);
210-
if(empty($openedEntities))
242+
if($this->checkMarkdownV2AmbiguousEntities($entityCheckStop))
211243
{
212-
$entitiesArray[] = $entitytext;
213-
$entitytext = "";
244+
$stopChar = "_**__";
245+
$entitytext .= $stopChar;
246+
array_pop($openedEntities);
247+
array_pop($openedEntities);
248+
if(empty($openedEntities))
249+
{
250+
$entitiesArray[] = $entitytext;
251+
$entitytext = "";
252+
}
253+
}
254+
foreach ($entityCheckStop as $stEntity)
255+
{
256+
if($stEntity->type === 'blockquote' || $stEntity->type === 'expandable_blockquote')
257+
{
258+
$lastBlackquoteClosingPosition = $stEntity->offset + $stEntity->length;
259+
}
214260
}
215261
}
216262
foreach ($entityCheckStop as $stEntity)
@@ -345,7 +391,7 @@ protected function escapeSpecialChars($char, $isEntityOpen, $entities) {
345391
{
346392
$isBlockquoteOpen = false;
347393
foreach ($entities as $entity) {
348-
if ($entity->type === 'blockquote') {
394+
if ($entity->type === 'blockquote' || $entity->type === 'expandable_blockquote') {
349395
$isBlockquoteOpen = true;
350396
break;
351397
}
@@ -366,9 +412,9 @@ protected function escapeSpecialChars($char, $isEntityOpen, $entities) {
366412
}
367413

368414
/**
369-
* Get the begin string of the entity for the choosen style
415+
* Get the begin string of the entity for the choosen style
370416
*/
371-
protected function getEntityStartString($entity)
417+
protected function getEntityStartString($entity, $isBlockquoteJustClosed = false)
372418
{
373419
$startString = '';
374420
if ($this->style == 'Markdown')
@@ -471,6 +517,16 @@ protected function getEntityStartString($entity)
471517
$startString = '<blockquote>';
472518
break;
473519
}
520+
case 'expandable_blockquote':
521+
{
522+
$startString = '<blockquote expandable>';
523+
break;
524+
}
525+
case 'date_time':
526+
{
527+
$startString = '<tg-time unix="'.$entity->unix_time.'"'.(!empty($entity->date_time_format) ? ' format="'.$entity->date_time_format.'"' : '' ).'>';
528+
break;
529+
}
474530
}
475531
}
476532
else if ($this->style == 'MarkdownV2')
@@ -529,8 +585,14 @@ protected function getEntityStartString($entity)
529585
break;
530586
}
531587
case 'blockquote':
588+
case 'expandable_blockquote':
589+
{
590+
$startString = ($isBlockquoteJustClosed ? "**" : "").'>';
591+
break;
592+
}
593+
case 'date_time':
532594
{
533-
$startString = '>';
595+
$startString = '![';
534596
break;
535597
}
536598
}
@@ -658,10 +720,16 @@ protected function getEntityStopString($entity)
658720
break;
659721
}
660722
case 'blockquote':
723+
case 'expandable_blockquote':
661724
{
662725
$stopString = '</blockquote>';
663726
break;
664727
}
728+
case 'date_time':
729+
{
730+
$stopString = '</tg-time>';
731+
break;
732+
}
665733
}
666734
}
667735
else if ($this->style == 'MarkdownV2')
@@ -713,11 +781,26 @@ protected function getEntityStopString($entity)
713781
$stopString = ']('.$entity->url.')';
714782
break;
715783
}
784+
case 'blockquote':
785+
{
786+
$stopString = "\n";
787+
break;
788+
}
789+
case 'expandable_blockquote':
790+
{
791+
$stopString = "||\n";
792+
break;
793+
}
716794
case 'custom_emoji':
717795
{
718796
$stopString = '](tg://emoji?id='.$entity->custom_emoji_id.')';
719797
break;
720798
}
799+
case 'date_time':
800+
{
801+
$stopString = '](tg://time?unix='.$entity->unix_time.(!empty($entity->date_time_format) ? '&format='.$entity->date_time_format : '' ).')';
802+
break;
803+
}
721804
}
722805
}
723806
return $stopString;

0 commit comments

Comments
 (0)