From 09c86ed0505b12ee1622e42da402fb6f79745130 Mon Sep 17 00:00:00 2001 From: mscherer Date: Sun, 30 Nov 2025 03:17:49 +0100 Subject: [PATCH 1/5] Add Creole-style |= table header syntax Adds support for marking header cells with |= prefix instead of requiring a separator row. This is simpler and more intuitive, especially for users coming from wiki markup. Features: - |= Cell marks a header cell - |=< Left-aligned header - |=> Right-aligned header - |=~ Center-aligned header The traditional separator row syntax continues to work unchanged. See: https://github.com/jgm/djot/issues/354 --- src/Parser/BlockParser.php | 44 +++++++++++-- tests/TestCase/Parser/BlockParserTest.php | 80 +++++++++++++++++++++++ 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index a8c705b..117d9d3 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -2229,13 +2229,49 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int } // Parse regular row - $row = new TableRow(false); $cells = $this->parseTableCells($currentLine); + $rowHasHeaderCell = false; + $parsedCells = []; foreach ($cells as $index => $cellContent) { - $alignment = $alignments[$index] ?? TableCell::ALIGN_DEFAULT; - $cell = new TableCell(false, $alignment); - $this->inlineParser->parse($cell, trim($cellContent), $i); + $trimmed = trim($cellContent); + $isHeader = false; + $cellAlignment = $alignments[$index] ?? TableCell::ALIGN_DEFAULT; + + // Check for |= header cell syntax (Creole-style) + // Supports: |= Header |=< Left |=> Right |=~ Center + if (str_starts_with($trimmed, '=')) { + $isHeader = true; + $rowHasHeaderCell = true; + $trimmed = substr($trimmed, 1); // Remove = + + // Check for alignment marker after = + if (str_starts_with($trimmed, '<')) { + $cellAlignment = TableCell::ALIGN_LEFT; + $trimmed = substr($trimmed, 1); + } elseif (str_starts_with($trimmed, '>')) { + $cellAlignment = TableCell::ALIGN_RIGHT; + $trimmed = substr($trimmed, 1); + } elseif (str_starts_with($trimmed, '~')) { + $cellAlignment = TableCell::ALIGN_CENTER; + $trimmed = substr($trimmed, 1); + } + + $cellContent = $trimmed; + } + + $parsedCells[] = [ + 'content' => trim($cellContent), + 'isHeader' => $isHeader, + 'alignment' => $cellAlignment, + ]; + } + + // Create the row (header row if any cell has |= syntax) + $row = new TableRow($rowHasHeaderCell); + foreach ($parsedCells as $cellData) { + $cell = new TableCell($cellData['isHeader'], $cellData['alignment']); + $this->inlineParser->parse($cell, $cellData['content'], $i); $row->appendChild($cell); } diff --git a/tests/TestCase/Parser/BlockParserTest.php b/tests/TestCase/Parser/BlockParserTest.php index 20a1e38..f5dc494 100644 --- a/tests/TestCase/Parser/BlockParserTest.php +++ b/tests/TestCase/Parser/BlockParserTest.php @@ -12,6 +12,8 @@ use Djot\Node\Block\ListBlock; use Djot\Node\Block\Paragraph; use Djot\Node\Block\Table; +use Djot\Node\Block\TableCell; +use Djot\Node\Block\TableRow; use Djot\Node\Block\ThematicBreak; use Djot\Node\Document; use Djot\Parser\BlockParser; @@ -204,6 +206,84 @@ public function testParseTable(): void $this->assertInstanceOf(Table::class, $doc->getChildren()[0]); } + public function testParseTableWithEqualsHeaderSyntax(): void + { + // Creole-style |= header syntax (no separator row needed) + $doc = $this->parser->parse("|= Name |= Age |\n| Alice | 28 |"); + + $this->assertCount(1, $doc->getChildren()); + $table = $doc->getChildren()[0]; + $this->assertInstanceOf(Table::class, $table); + + $rows = $table->getChildren(); + $this->assertCount(2, $rows); + + // First row should be a header row + $headerRow = $rows[0]; + $this->assertInstanceOf(TableRow::class, $headerRow); + $this->assertTrue($headerRow->isHeader()); + + // Header cells should be marked as headers + $headerCells = $headerRow->getChildren(); + $this->assertCount(2, $headerCells); + $this->assertInstanceOf(TableCell::class, $headerCells[0]); + $this->assertTrue($headerCells[0]->isHeader()); + $this->assertTrue($headerCells[1]->isHeader()); + + // Second row should be a data row + $dataRow = $rows[1]; + $this->assertInstanceOf(TableRow::class, $dataRow); + $this->assertFalse($dataRow->isHeader()); + } + + public function testParseTableWithEqualsHeaderAlignment(): void + { + // |=< left, |=> right, |=~ center + $doc = $this->parser->parse("|=< Left |=> Right |=~ Center |\n| A | B | C |"); + + $table = $doc->getChildren()[0]; + $this->assertInstanceOf(Table::class, $table); + + $headerRow = $table->getChildren()[0]; + $cells = $headerRow->getChildren(); + + $this->assertSame(TableCell::ALIGN_LEFT, $cells[0]->getAlignment()); + $this->assertSame(TableCell::ALIGN_RIGHT, $cells[1]->getAlignment()); + $this->assertSame(TableCell::ALIGN_CENTER, $cells[2]->getAlignment()); + } + + public function testParseTableWithMixedHeaderCells(): void + { + // Mix of header and regular cells in a row + $doc = $this->parser->parse("|= Header | Regular |\n| Data | Data |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // Row with any header cell is marked as header row + $firstRow = $rows[0]; + $this->assertTrue($firstRow->isHeader()); + + $cells = $firstRow->getChildren(); + $this->assertTrue($cells[0]->isHeader()); + $this->assertFalse($cells[1]->isHeader()); + } + + public function testParseTableWithEqualsHeaderNoSeparatorNeeded(): void + { + // Unlike traditional tables, |= syntax doesn't need separator row + $doc = $this->parser->parse("|= A |= B |\n| 1 | 2 |\n| 3 | 4 |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // Should have 3 rows (1 header + 2 data), no separator consumed + $this->assertCount(3, $rows); + $this->assertTrue($rows[0]->isHeader()); + $this->assertFalse($rows[1]->isHeader()); + $this->assertFalse($rows[2]->isHeader()); + } + public function testParseBlockAttributes(): void { $doc = $this->parser->parse("{.highlight}\n# Heading"); From ea5d384b5b790ccbf40bbf298da1fdb66fc8541e Mon Sep 17 00:00:00 2001 From: mscherer Date: Wed, 3 Dec 2025 01:09:48 +0100 Subject: [PATCH 2/5] Propagate header cell alignment to column when no separator row When using |=< |=> |=~ alignment markers on header cells, the alignment now applies to the entire column (subsequent data rows), not just the header cell itself. Separator row alignment still takes precedence if present. --- src/Parser/BlockParser.php | 10 +++++ tests/TestCase/Parser/BlockParserTest.php | 45 +++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index 117d9d3..4628f5a 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -2246,15 +2246,25 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int $trimmed = substr($trimmed, 1); // Remove = // Check for alignment marker after = + // This sets column alignment if no separator row defined it if (str_starts_with($trimmed, '<')) { $cellAlignment = TableCell::ALIGN_LEFT; $trimmed = substr($trimmed, 1); + if (!isset($alignments[$index])) { + $alignments[$index] = TableCell::ALIGN_LEFT; + } } elseif (str_starts_with($trimmed, '>')) { $cellAlignment = TableCell::ALIGN_RIGHT; $trimmed = substr($trimmed, 1); + if (!isset($alignments[$index])) { + $alignments[$index] = TableCell::ALIGN_RIGHT; + } } elseif (str_starts_with($trimmed, '~')) { $cellAlignment = TableCell::ALIGN_CENTER; $trimmed = substr($trimmed, 1); + if (!isset($alignments[$index])) { + $alignments[$index] = TableCell::ALIGN_CENTER; + } } $cellContent = $trimmed; diff --git a/tests/TestCase/Parser/BlockParserTest.php b/tests/TestCase/Parser/BlockParserTest.php index f5dc494..94a1c8c 100644 --- a/tests/TestCase/Parser/BlockParserTest.php +++ b/tests/TestCase/Parser/BlockParserTest.php @@ -284,6 +284,51 @@ public function testParseTableWithEqualsHeaderNoSeparatorNeeded(): void $this->assertFalse($rows[2]->isHeader()); } + public function testParseTableWithEqualsHeaderAlignmentPropagates(): void + { + // Header alignment should propagate to data cells when no separator row + $doc = $this->parser->parse("|=> Right |=< Left |=~ Center |\n| A | B | C |\n| D | E | F |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // Header row alignments + $headerCells = $rows[0]->getChildren(); + $this->assertSame(TableCell::ALIGN_RIGHT, $headerCells[0]->getAlignment()); + $this->assertSame(TableCell::ALIGN_LEFT, $headerCells[1]->getAlignment()); + $this->assertSame(TableCell::ALIGN_CENTER, $headerCells[2]->getAlignment()); + + // Data rows should inherit column alignment from header + $dataCells1 = $rows[1]->getChildren(); + $this->assertSame(TableCell::ALIGN_RIGHT, $dataCells1[0]->getAlignment()); + $this->assertSame(TableCell::ALIGN_LEFT, $dataCells1[1]->getAlignment()); + $this->assertSame(TableCell::ALIGN_CENTER, $dataCells1[2]->getAlignment()); + + $dataCells2 = $rows[2]->getChildren(); + $this->assertSame(TableCell::ALIGN_RIGHT, $dataCells2[0]->getAlignment()); + $this->assertSame(TableCell::ALIGN_LEFT, $dataCells2[1]->getAlignment()); + $this->assertSame(TableCell::ALIGN_CENTER, $dataCells2[2]->getAlignment()); + } + + public function testParseTableSeparatorRowOverridesHeaderAlignment(): void + { + // Separator row alignment takes precedence over header |= alignment + $doc = $this->parser->parse("|=> Right |=< Left |\n|:--------|------:|\n| A | B |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // Header cells get alignment from separator row, not from |= markers + $headerCells = $rows[0]->getChildren(); + $this->assertSame(TableCell::ALIGN_LEFT, $headerCells[0]->getAlignment()); + $this->assertSame(TableCell::ALIGN_RIGHT, $headerCells[1]->getAlignment()); + + // Data row also uses separator row alignment + $dataCells = $rows[1]->getChildren(); + $this->assertSame(TableCell::ALIGN_LEFT, $dataCells[0]->getAlignment()); + $this->assertSame(TableCell::ALIGN_RIGHT, $dataCells[1]->getAlignment()); + } + public function testParseBlockAttributes(): void { $doc = $this->parser->parse("{.highlight}\n# Heading"); From 38be3da3f31c52bcb45e9fa9d9a99ba86815d877 Mon Sep 17 00:00:00 2001 From: mscherer Date: Fri, 5 Dec 2025 23:33:14 +0100 Subject: [PATCH 3/5] Add table colspan and rowspan support Implements cell spanning syntax for tables: - Colspan: || (empty cells) after content spans multiple columns - Rowspan: |^| continues a cell from the row above - Header rowspan: |=^ for header cells that span rows Features: - TableCell node now has colspan and rowspan properties - HtmlRenderer outputs colspan/rowspan attributes - Compatible with |= header syntax from PR #8 - Standard djot tables work unchanged (opt-in extension) Syntax examples: - |= Header || (colspan=2) - |^ | (rowspan continuation) - |=^ | (header rowspan) --- docs/cookbook.md | 92 +++++++++++++++++++ src/Node/Block/TableCell.php | 22 +++++ src/Parser/BlockParser.php | 98 +++++++++++++++++++-- src/Renderer/HtmlRenderer.php | 7 ++ tests/TestCase/DjotConverterTest.php | 33 +++++++ tests/TestCase/Parser/BlockParserTest.php | 102 ++++++++++++++++++++++ 6 files changed, 345 insertions(+), 9 deletions(-) diff --git a/docs/cookbook.md b/docs/cookbook.md index 24894fc..50a9b02 100644 --- a/docs/cookbook.md +++ b/docs/cookbook.md @@ -19,6 +19,7 @@ Common recipes and customizations for djot-php. - [Working with the AST](#working-with-the-ast) - [Custom Inline Patterns](#custom-inline-patterns) - [Custom Block Patterns](#custom-block-patterns) +- [Tables with Spanning](#tables-with-spanning) - [Alternative Output Formats](#alternative-output-formats) ## External Links @@ -1015,6 +1016,97 @@ DJOT; echo $converter->convert($djot); ``` +## Tables with Spanning + +djot-php extends standard djot with support for table cell spanning (colspan and rowspan). + +### Column Spanning (Colspan) + +Use `||` (empty cells) after content to span multiple columns: + +```djot +|= Name |= Contact Info || +| Alice | alice@ex.com | 555-1234 | +``` + +Output: +```html + + + +
NameContact Info
Alicealice@ex.com555-1234
+``` + +Multiple empty cells create larger spans: + +```djot +| Title spanning three columns ||| +| A | B | C | +``` + +Renders as `Title spanning three columns`. + +**Note:** `| |` (with space) creates an empty cell, while `||` (no space) creates a colspan. + +### Row Spanning (Rowspan) + +Use `|^|` to continue a cell from the row above: + +```djot +|= Category |= Item | +| Fruits | Apple | +|^ | Banana | +|^ | Cherry | +``` + +Output: +```html + + + + + +
CategoryItem
FruitsApple
Banana
Cherry
+``` + +### Header Row Spanning + +Use `|=^` to create header cells that span rows: + +```djot +|= Region |= Q1 ||= Q2 || +|=^ |= Units |= Revenue |= Units |= Revenue | +| North | 100 | $500 | 150 | $750 | +``` + +Output: +```html +RegionQ1Q2 +UnitsRevenueUnitsRevenue +North100$500150$750 +``` + +### Combined Spanning + +Colspan and rowspan can be used together for complex tables: + +```djot +|= Product Report |||| +|= Category |= Q1 || Q2 || +|=^ |= A |= B |= A |= B | +| Widgets | 10 | 20 | 15 | 25 | +``` + +### Compatibility Notes + +These spanning features are **djot-php extensions** and not part of the official djot specification: + +- Standard djot tables work unchanged +- `| |` (space) creates empty cells (standard behavior) +- `||` (no space) triggers colspan (extension) +- `|^|` triggers rowspan (extension) +- Content starting with `^` like `| ^text |` is treated as normal content + ## Alternative Output Formats ### Plain Text Extraction diff --git a/src/Node/Block/TableCell.php b/src/Node/Block/TableCell.php index 67e623d..7fdb055 100644 --- a/src/Node/Block/TableCell.php +++ b/src/Node/Block/TableCell.php @@ -32,6 +32,8 @@ class TableCell extends BlockNode public function __construct( protected bool $isHeader = false, protected string $alignment = self::ALIGN_DEFAULT, + protected int $colspan = 1, + protected int $rowspan = 1, ) { } @@ -45,6 +47,26 @@ public function getAlignment(): string return $this->alignment; } + public function getColspan(): int + { + return $this->colspan; + } + + public function setColspan(int $colspan): void + { + $this->colspan = $colspan; + } + + public function getRowspan(): int + { + return $this->rowspan; + } + + public function setRowspan(int $rowspan): void + { + $this->rowspan = $rowspan; + } + public function getType(): string { return 'table_cell'; diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index 4628f5a..bff4b24 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -2186,6 +2186,8 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int $count = count($lines); $alignments = []; $headerFound = false; + // Track rowspan state: colIndex => ['cell' => TableCell, 'remaining' => int] + $rowspanState = []; while ($i < $count) { $currentLine = $lines[$i]; @@ -2232,38 +2234,82 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int $cells = $this->parseTableCells($currentLine); $rowHasHeaderCell = false; $parsedCells = []; + $colIndex = 0; foreach ($cells as $index => $cellContent) { $trimmed = trim($cellContent); $isHeader = false; - $cellAlignment = $alignments[$index] ?? TableCell::ALIGN_DEFAULT; + $cellAlignment = $alignments[$colIndex] ?? TableCell::ALIGN_DEFAULT; + + // Check for colspan: empty cell (no content at all, not even whitespace) + // || creates an empty string, | | creates a space + // Only treat as colspan if there's a previous cell to extend + if ($cellContent === '' && !empty($parsedCells)) { + // Colspan - merge with previous cell + $lastIndex = count($parsedCells) - 1; + $parsedCells[$lastIndex]['colspan'] = (int) $parsedCells[$lastIndex]['colspan'] + 1; + $colIndex++; + + continue; + } + + // Check for rowspan: cell contains only ^ (with optional whitespace) + // |^| or |^ | means "continue from cell above" + if ($trimmed === '^') { + $parsedCells[] = [ + 'content' => '', + 'isHeader' => false, + 'alignment' => $cellAlignment, + 'colspan' => 1, + 'isRowspanMarker' => true, + 'colIndex' => $colIndex, + ]; + $colIndex++; + + continue; + } // Check for |= header cell syntax (Creole-style) - // Supports: |= Header |=< Left |=> Right |=~ Center + // Supports: |= Header |=< Left |=> Right |=~ Center |=^ (header rowspan) if (str_starts_with($trimmed, '=')) { $isHeader = true; $rowHasHeaderCell = true; $trimmed = substr($trimmed, 1); // Remove = + // Check for rowspan marker in header: |=^ + if (trim($trimmed) === '^') { + $parsedCells[] = [ + 'content' => '', + 'isHeader' => true, + 'alignment' => $cellAlignment, + 'colspan' => 1, + 'isRowspanMarker' => true, + 'colIndex' => $colIndex, + ]; + $colIndex++; + + continue; + } + // Check for alignment marker after = // This sets column alignment if no separator row defined it if (str_starts_with($trimmed, '<')) { $cellAlignment = TableCell::ALIGN_LEFT; $trimmed = substr($trimmed, 1); - if (!isset($alignments[$index])) { - $alignments[$index] = TableCell::ALIGN_LEFT; + if (!isset($alignments[$colIndex])) { + $alignments[$colIndex] = TableCell::ALIGN_LEFT; } } elseif (str_starts_with($trimmed, '>')) { $cellAlignment = TableCell::ALIGN_RIGHT; $trimmed = substr($trimmed, 1); - if (!isset($alignments[$index])) { - $alignments[$index] = TableCell::ALIGN_RIGHT; + if (!isset($alignments[$colIndex])) { + $alignments[$colIndex] = TableCell::ALIGN_RIGHT; } } elseif (str_starts_with($trimmed, '~')) { $cellAlignment = TableCell::ALIGN_CENTER; $trimmed = substr($trimmed, 1); - if (!isset($alignments[$index])) { - $alignments[$index] = TableCell::ALIGN_CENTER; + if (!isset($alignments[$colIndex])) { + $alignments[$colIndex] = TableCell::ALIGN_CENTER; } } @@ -2274,17 +2320,51 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int 'content' => trim($cellContent), 'isHeader' => $isHeader, 'alignment' => $cellAlignment, + 'colspan' => 1, + 'isRowspanMarker' => false, + 'colIndex' => $colIndex, ]; + $colIndex++; } // Create the row (header row if any cell has |= syntax) $row = new TableRow($rowHasHeaderCell); + $newRowspanState = []; + foreach ($parsedCells as $cellData) { - $cell = new TableCell($cellData['isHeader'], $cellData['alignment']); + /** @var int<0, max> $colIdx */ + $colIdx = $cellData['colIndex']; + /** @var int<1, max> $colSpan */ + $colSpan = $cellData['colspan']; + + if ($cellData['isRowspanMarker']) { + // Find cell that owns this column and increment its rowspan + if (isset($rowspanState[$colIdx])) { + // Column is covered by an active rowspan - extend it + $rowspanState[$colIdx]['cell']->setRowspan( + $rowspanState[$colIdx]['cell']->getRowspan() + 1 + ); + // Keep tracking this cell for next row + $newRowspanState[$colIdx] = $rowspanState[$colIdx]; + } + // Don't create a cell for rowspan markers + + continue; + } + + $cell = new TableCell($cellData['isHeader'], $cellData['alignment'], $colSpan); $this->inlineParser->parse($cell, $cellData['content'], $i); $row->appendChild($cell); + + // Track this cell for potential rowspan extension in next row + for ($c = $colIdx; $c < $colIdx + $colSpan; $c++) { + $newRowspanState[$c] = ['cell' => $cell]; + } } + // Replace rowspan state with new state for next row + $rowspanState = $newRowspanState; + $table->appendChild($row); $i++; } diff --git a/src/Renderer/HtmlRenderer.php b/src/Renderer/HtmlRenderer.php index 91bc7ec..dc7397e 100644 --- a/src/Renderer/HtmlRenderer.php +++ b/src/Renderer/HtmlRenderer.php @@ -629,6 +629,13 @@ protected function renderTableCell(TableCell $node): string $tag = $node->isHeader() ? 'th' : 'td'; $attrs = $this->renderAttributes($node); + if ($node->getColspan() > 1) { + $attrs .= ' colspan="' . $node->getColspan() . '"'; + } + if ($node->getRowspan() > 1) { + $attrs .= ' rowspan="' . $node->getRowspan() . '"'; + } + $alignment = $node->getAlignment(); if ($alignment !== TableCell::ALIGN_DEFAULT) { $attrs .= ' style="text-align: ' . $alignment . ';"'; diff --git a/tests/TestCase/DjotConverterTest.php b/tests/TestCase/DjotConverterTest.php index 558e97d..2d62651 100644 --- a/tests/TestCase/DjotConverterTest.php +++ b/tests/TestCase/DjotConverterTest.php @@ -1644,6 +1644,39 @@ public function testTableWithMismatchedColumns(): void $this->assertStringContainsString('', $result); } + public function testTableColspanRendering(): void + { + // || creates colspan + $djot = "|= Name |= Contact Info ||\n| Alice | alice@ex.com | 555-1234 |"; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('colspan="2"', $result); + $this->assertStringContainsString('', $result); + $this->assertStringContainsString('', $result); + } + + public function testTableRowspanRendering(): void + { + // |^| creates rowspan + $djot = "| Fruits | Apple |\n|^ | Banana |\n|^ | Cherry |"; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('rowspan="3"', $result); + $this->assertStringContainsString('', $result); + } + + public function testTableCombinedSpanning(): void + { + // Complex table with both colspan and rowspan + $djot = "|= Region |= Q1 ||\n|=^ |= A |= B |\n| North | 1 | 2 |"; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('rowspan="2"', $result); + $this->assertStringContainsString('colspan="2"', $result); + $this->assertStringContainsString('', $result); + $this->assertStringContainsString('', $result); + } + // Edge cases: Code blocks public function testCodeBlockWithLongerClosingFence(): void diff --git a/tests/TestCase/Parser/BlockParserTest.php b/tests/TestCase/Parser/BlockParserTest.php index 94a1c8c..3c37903 100644 --- a/tests/TestCase/Parser/BlockParserTest.php +++ b/tests/TestCase/Parser/BlockParserTest.php @@ -329,6 +329,108 @@ public function testParseTableSeparatorRowOverridesHeaderAlignment(): void $this->assertSame(TableCell::ALIGN_RIGHT, $dataCells[1]->getAlignment()); } + public function testParseTableColspan(): void + { + // || creates colspan - empty cell merges with previous + $doc = $this->parser->parse("|= Name |= Contact Info ||\n| Alice | alice@ex.com | 555-1234 |"); + + $table = $doc->getChildren()[0]; + $this->assertInstanceOf(Table::class, $table); + + $rows = $table->getChildren(); + $headerCells = $rows[0]->getChildren(); + + // "Contact Info" should have colspan=2 + $this->assertCount(2, $headerCells); + $this->assertSame(1, $headerCells[0]->getColspan()); + $this->assertSame(2, $headerCells[1]->getColspan()); + } + + public function testParseTableColspanMultiple(): void + { + // ||| creates colspan=3 + $doc = $this->parser->parse("| Spans three |||\n| A | B | C |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + $firstRowCells = $rows[0]->getChildren(); + + $this->assertCount(1, $firstRowCells); + $this->assertSame(3, $firstRowCells[0]->getColspan()); + } + + public function testParseTableRowspan(): void + { + // |^| creates rowspan - cell continues from above + $doc = $this->parser->parse("| Fruits | Apple |\n|^ | Banana |\n|^ | Cherry |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // First row: "Fruits" should have rowspan=3 + $firstRowCells = $rows[0]->getChildren(); + $this->assertSame(3, $firstRowCells[0]->getRowspan()); + $this->assertSame(1, $firstRowCells[1]->getRowspan()); + + // Second row should only have one cell (Banana) + $this->assertCount(1, $rows[1]->getChildren()); + + // Third row should only have one cell (Cherry) + $this->assertCount(1, $rows[2]->getChildren()); + } + + public function testParseTableRowspanWithHeader(): void + { + // |=^ creates header rowspan + $doc = $this->parser->parse("|= Region |= Q1 ||\n|=^ |= A |= B |\n| North | 1 | 2 |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // "Region" header should have rowspan=2 + $firstRowCells = $rows[0]->getChildren(); + $this->assertSame(2, $firstRowCells[0]->getRowspan()); + $this->assertTrue($firstRowCells[0]->isHeader()); + + // "Q1" should have colspan=2 + $this->assertSame(2, $firstRowCells[1]->getColspan()); + } + + public function testParseTableEmptyCellNotColspan(): void + { + // | | (with space) is an empty cell, not colspan + $doc = $this->parser->parse("| A | |\n| B | C |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + $firstRowCells = $rows[0]->getChildren(); + + // Should have 2 cells, both with colspan=1 + $this->assertCount(2, $firstRowCells); + $this->assertSame(1, $firstRowCells[0]->getColspan()); + $this->assertSame(1, $firstRowCells[1]->getColspan()); + } + + public function testParseTableCaretAsContent(): void + { + // ^text is regular content, not rowspan marker + $doc = $this->parser->parse("| ^caret | text |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + $cells = $rows[0]->getChildren(); + + // Should have 2 cells, first contains "^caret" + $this->assertCount(2, $cells); + $firstCellContent = ''; + foreach ($cells[0]->getChildren() as $child) { + if ($child instanceof \Djot\Node\Inline\Text) { + $firstCellContent .= $child->getContent(); + } + } + $this->assertSame('^caret', $firstCellContent); + } + public function testParseBlockAttributes(): void { $doc = $this->parser->parse("{.highlight}\n# Heading"); From 80798c354424c46f7f4c3a721f0da981fecaddd3 Mon Sep 17 00:00:00 2001 From: mscherer Date: Fri, 5 Dec 2025 23:36:21 +0100 Subject: [PATCH 4/5] Fix CS. --- src/Parser/BlockParser.php | 6 +++--- tests/TestCase/Parser/BlockParserTest.php | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index cb5dc99..12f0f77 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -2371,10 +2371,10 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int // Check for colspan: empty cell (no content at all, not even whitespace) // || creates an empty string, | | creates a space // Only treat as colspan if there's a previous cell to extend - if ($cellContent === '' && !empty($parsedCells)) { + if ($cellContent === '' && $parsedCells) { // Colspan - merge with previous cell $lastIndex = count($parsedCells) - 1; - $parsedCells[$lastIndex]['colspan'] = (int) $parsedCells[$lastIndex]['colspan'] + 1; + $parsedCells[$lastIndex]['colspan'] = (int)$parsedCells[$lastIndex]['colspan'] + 1; $colIndex++; continue; @@ -2469,7 +2469,7 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int if (isset($rowspanState[$colIdx])) { // Column is covered by an active rowspan - extend it $rowspanState[$colIdx]['cell']->setRowspan( - $rowspanState[$colIdx]['cell']->getRowspan() + 1 + $rowspanState[$colIdx]['cell']->getRowspan() + 1, ); // Keep tracking this cell for next row $newRowspanState[$colIdx] = $rowspanState[$colIdx]; diff --git a/tests/TestCase/Parser/BlockParserTest.php b/tests/TestCase/Parser/BlockParserTest.php index 97c7d9e..80085d5 100644 --- a/tests/TestCase/Parser/BlockParserTest.php +++ b/tests/TestCase/Parser/BlockParserTest.php @@ -16,6 +16,7 @@ use Djot\Node\Block\TableRow; use Djot\Node\Block\ThematicBreak; use Djot\Node\Document; +use Djot\Node\Inline\Text; use Djot\Parser\BlockParser; use PHPUnit\Framework\TestCase; use function str_contains; @@ -414,7 +415,7 @@ public function testParseTableEmptyCellNotColspan(): void public function testParseTableCaretAsContent(): void { // ^text is regular content, not rowspan marker - $doc = $this->parser->parse("| ^caret | text |"); + $doc = $this->parser->parse('| ^caret | text |'); $table = $doc->getChildren()[0]; $rows = $table->getChildren(); @@ -424,7 +425,7 @@ public function testParseTableCaretAsContent(): void $this->assertCount(2, $cells); $firstCellContent = ''; foreach ($cells[0]->getChildren() as $child) { - if ($child instanceof \Djot\Node\Inline\Text) { + if ($child instanceof Text) { $firstCellContent .= $child->getContent(); } } From 56cc7723e5f6d157657f5a28fe19b37d806027ea Mon Sep 17 00:00:00 2001 From: mscherer Date: Sat, 6 Dec 2025 00:17:54 +0100 Subject: [PATCH 5/5] Require table markers to be directly attached to pipe Fixes false positives where spaces before markers would trigger special behavior: - | = text | now renders as literal "= text", not a header - | ^ | now renders as literal "^", not a rowspan marker - |= < text | now renders header with "< text", not left-aligned Markers must be directly attached: |=, |^, |=<, |=>, |=~ --- src/Parser/BlockParser.php | 30 ++++--- tests/TestCase/Parser/BlockParserTest.php | 100 ++++++++++++++++++++++ 2 files changed, 116 insertions(+), 14 deletions(-) diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index 12f0f77..77c9d77 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -2380,9 +2380,10 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int continue; } - // Check for rowspan: cell contains only ^ (with optional whitespace) - // |^| or |^ | means "continue from cell above" - if ($trimmed === '^') { + // Check for rowspan: cell contains only ^ and must be attached to pipe + // |^| means "continue from cell above" + // | ^ | is literal content "^", not a rowspan marker + if ($trimmed === '^' && str_starts_with($cellContent, '^')) { $parsedCells[] = [ 'content' => '', 'isHeader' => false, @@ -2398,13 +2399,14 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int // Check for |= header cell syntax (Creole-style) // Supports: |= Header |=< Left |=> Right |=~ Center |=^ (header rowspan) - if (str_starts_with($trimmed, '=')) { + // Must be directly attached to pipe: | = text | is literal, not header + if (str_starts_with($cellContent, '=')) { $isHeader = true; $rowHasHeaderCell = true; - $trimmed = substr($trimmed, 1); // Remove = + $afterEquals = substr($cellContent, 1); // Remove = // Check for rowspan marker in header: |=^ - if (trim($trimmed) === '^') { + if (str_starts_with($afterEquals, '^') && trim($afterEquals) === '^') { $parsedCells[] = [ 'content' => '', 'isHeader' => true, @@ -2418,29 +2420,29 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int continue; } - // Check for alignment marker after = + // Check for alignment marker after = (must be directly attached: |=< not |= <) // This sets column alignment if no separator row defined it - if (str_starts_with($trimmed, '<')) { + if (str_starts_with($afterEquals, '<')) { $cellAlignment = TableCell::ALIGN_LEFT; - $trimmed = substr($trimmed, 1); + $afterEquals = substr($afterEquals, 1); if (!isset($alignments[$colIndex])) { $alignments[$colIndex] = TableCell::ALIGN_LEFT; } - } elseif (str_starts_with($trimmed, '>')) { + } elseif (str_starts_with($afterEquals, '>')) { $cellAlignment = TableCell::ALIGN_RIGHT; - $trimmed = substr($trimmed, 1); + $afterEquals = substr($afterEquals, 1); if (!isset($alignments[$colIndex])) { $alignments[$colIndex] = TableCell::ALIGN_RIGHT; } - } elseif (str_starts_with($trimmed, '~')) { + } elseif (str_starts_with($afterEquals, '~')) { $cellAlignment = TableCell::ALIGN_CENTER; - $trimmed = substr($trimmed, 1); + $afterEquals = substr($afterEquals, 1); if (!isset($alignments[$colIndex])) { $alignments[$colIndex] = TableCell::ALIGN_CENTER; } } - $cellContent = $trimmed; + $cellContent = $afterEquals; } $parsedCells[] = [ diff --git a/tests/TestCase/Parser/BlockParserTest.php b/tests/TestCase/Parser/BlockParserTest.php index 80085d5..add5624 100644 --- a/tests/TestCase/Parser/BlockParserTest.php +++ b/tests/TestCase/Parser/BlockParserTest.php @@ -432,6 +432,106 @@ public function testParseTableCaretAsContent(): void $this->assertSame('^caret', $firstCellContent); } + public function testParseTableMarkersRequireAttachment(): void + { + // Markers with leading space should be treated as literal content + // | ^ | should be literal "^", not rowspan + $doc = $this->parser->parse("| A |\n|---|\n| 1 |\n| ^ |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // Should have 3 rows (header + 2 data rows), no rowspan + $this->assertCount(3, $rows); + $lastRowCells = $rows[2]->getChildren(); + $this->assertCount(1, $lastRowCells); + $this->assertSame(1, $lastRowCells[0]->getRowspan()); + + // Content should be "^" + $cellContent = ''; + foreach ($lastRowCells[0]->getChildren() as $child) { + if ($child instanceof Text) { + $cellContent .= $child->getContent(); + } + } + $this->assertSame('^', $cellContent); + } + + public function testParseTableEqualsWithSpaceIsLiteral(): void + { + // | = text | should be literal "= text", not header + $doc = $this->parser->parse('|= Header | = literal |'); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + $cells = $rows[0]->getChildren(); + + $this->assertCount(2, $cells); + // First cell is header (|= attached) + $this->assertTrue($cells[0]->isHeader()); + // Second cell is NOT header (| = has space) + $this->assertFalse($cells[1]->isHeader()); + + // Second cell content should be "= literal" + $cellContent = ''; + foreach ($cells[1]->getChildren() as $child) { + if ($child instanceof Text) { + $cellContent .= $child->getContent(); + } + } + $this->assertSame('= literal', $cellContent); + } + + public function testParseTableAlignmentMarkersRequireAttachment(): void + { + // |=< attached should align, |= < with space should be literal + $doc = $this->parser->parse('|=< Left |= < literal |'); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + $cells = $rows[0]->getChildren(); + + $this->assertCount(2, $cells); + // First cell has left alignment (|=< attached) + $this->assertSame(TableCell::ALIGN_LEFT, $cells[0]->getAlignment()); + // Second cell has default alignment (|= < has space before <) + $this->assertSame(TableCell::ALIGN_DEFAULT, $cells[1]->getAlignment()); + + // Second cell content should include the "<" + $cellContent = ''; + foreach ($cells[1]->getChildren() as $child) { + if ($child instanceof Text) { + $cellContent .= $child->getContent(); + } + } + $this->assertSame('< literal', $cellContent); + } + + public function testParseTableHeaderRowspanRequiresAttachment(): void + { + // |=^ attached should be header rowspan, |= ^ with space should be literal + $doc = $this->parser->parse("|= Group |= A |\n|=^| 1 |\n|= ^ | 2 |"); + + $table = $doc->getChildren()[0]; + $rows = $table->getChildren(); + + // First row header cell should have rowspan 2 (from |=^ in second row) + $this->assertSame(2, $rows[0]->getChildren()[0]->getRowspan()); + + // Third row first cell should be a header with content "^" (|= ^ has space) + $thirdRowCells = $rows[2]->getChildren(); + $this->assertTrue($thirdRowCells[0]->isHeader()); + $this->assertSame(1, $thirdRowCells[0]->getRowspan()); + + $cellContent = ''; + foreach ($thirdRowCells[0]->getChildren() as $child) { + if ($child instanceof Text) { + $cellContent .= $child->getContent(); + } + } + $this->assertSame('^', $cellContent); + } + public function testParseBlockAttributes(): void { $doc = $this->parser->parse("{.highlight}\n# Heading");
NameContact InfoFruitsRegionQ1