-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add processors for table rowspan and colspan issues
- Loading branch information
akulbii
committed
Nov 19, 2024
1 parent
1703dcd
commit fcfee2a
Showing
13 changed files
with
430 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
<?php | ||
|
||
namespace HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table; | ||
|
||
use HalloWelt\MigrateDokuwiki\IProcessor; | ||
|
||
class Colspan implements IProcessor { | ||
|
||
/** | ||
* @param string $text | ||
* @return string | ||
*/ | ||
public function process( string $text ): string { | ||
$lines = explode( "\n", $text ); | ||
|
||
foreach ( $lines as $lineIndex => &$line ) { | ||
$line = trim( $line ); | ||
|
||
$colspanPos = strpos( $line, "###COLSPAN_" ); | ||
if ( $colspanPos !== false ) { | ||
// In wikitext one line = one cell | ||
// One cell may have few blocks separated by "|" | ||
$cellBlocks = explode( '|', $line ); | ||
|
||
$colspanCount = 0; | ||
|
||
foreach ( $cellBlocks as $cellBlockIndex => $cellBlock ) { | ||
$matches = []; | ||
|
||
preg_match( "/###COLSPAN_(.*?)###/", $cellBlock, $matches ); | ||
|
||
// Current cell block contains "colspan" | ||
if ( isset( $matches[1] ) ) { | ||
$colspanCount = (int)$matches[1]; | ||
|
||
// If cell already contains block with HTML attributes (like "style" or "colspan") | ||
// Then just append "colspan" there | ||
if ( count( $cellBlocks ) > 2 ) { | ||
$cellBlocks[$cellBlockIndex - 1] = $cellBlocks[$cellBlockIndex - 1] . " colspan=\"$colspanCount\""; | ||
} else { | ||
// Otherwise add such block | ||
$cellBlocks = array_merge( | ||
[ | ||
$cellBlocks[0] | ||
], | ||
[ | ||
"colspan=\"$colspanCount\"" | ||
], | ||
array_slice( $cellBlocks, 1 ) | ||
); | ||
} | ||
|
||
// Remove "###COLSPAN_<N>###" string from current cell block | ||
$cellBlocks[$cellBlockIndex] = str_replace( | ||
"###COLSPAN_$colspanCount###", "", $cellBlocks[$cellBlockIndex] | ||
); | ||
} | ||
} | ||
|
||
$line = implode( "|", $cellBlocks ); | ||
|
||
// If we found "colspan" on current line - | ||
// then remove corresponding amount of next redundant empty lines (produced by Pandoc) | ||
if ( $colspanCount > 0 ) { | ||
for ( $i = $lineIndex + 1; $i < $lineIndex + $colspanCount; $i++ ) { | ||
unset( $lines[$i] ); | ||
} | ||
} | ||
} | ||
} | ||
unset( $line ); | ||
|
||
$text = implode( "\n", $lines ); | ||
|
||
return $text; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
<?php | ||
|
||
namespace HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table; | ||
|
||
use HalloWelt\MigrateDokuwiki\IProcessor; | ||
|
||
class Rowspan implements IProcessor { | ||
|
||
/** | ||
* @param string $text | ||
* @return string | ||
*/ | ||
public function process( string $text ): string { | ||
$lines = explode( "\n", $text ); | ||
|
||
$isTable = false; | ||
|
||
$rowIndex = 0; | ||
$colIndex = 0; | ||
|
||
$linesToDelete = []; | ||
$rowspanCounts = []; | ||
|
||
// We'll use this mapping later to change only lines which we need, | ||
// Which contain "rowspan" cells | ||
// Without need to process all table cells once more | ||
$cellCoordsToLineMap = []; | ||
|
||
foreach ( $lines as $lineIndex => &$line ) { | ||
$line = trim( $line ); | ||
|
||
if ( strpos( $line, '{|' ) === 0 ) { | ||
// If we found "{|" - then we start processing the table | ||
$isTable = true; | ||
|
||
$rowIndex = 0; | ||
$colIndex = 0; | ||
|
||
$rowspanCounts = []; | ||
|
||
// We use separate "cell -> line" mapping for each table we process | ||
$cellCoordsToLineMap = []; | ||
|
||
continue; | ||
} | ||
|
||
if ( strpos( $line, '|}' ) === 0 ) { | ||
$isTable = false; | ||
|
||
// If we got finished with current table - set proper "rowspan" attributes | ||
// for specific cells in this table | ||
foreach ( $rowspanCounts as $rowIndex => $rowspanCells ) { | ||
foreach ( $rowspanCells as $colIndex => $rowspanCount ) { | ||
$tableCellLineIndex = $cellCoordsToLineMap[$rowIndex][$colIndex]; | ||
|
||
// We found necessary line in the wikitext | ||
// Now parse this table line and add necessary "rowspan" attribute value | ||
$rowspanLine = $lines[$tableCellLineIndex]; | ||
|
||
$cellBlocks = explode( '|', $rowspanLine ); | ||
|
||
// If cell already contains block with HTML attributes (like "style" or "colspan") | ||
// Then just append "colspan" there | ||
if ( count( $cellBlocks ) > 2 ) { | ||
$cellBlocks[1] = $cellBlocks[1] . " rowspan=\"$rowspanCount\""; | ||
} else { | ||
// Otherwise add such block | ||
$cellBlocks = array_merge( | ||
[ | ||
$cellBlocks[0] | ||
], | ||
[ | ||
"rowspan=\"$rowspanCount\"" | ||
], | ||
array_slice( $cellBlocks, 1 ) | ||
); | ||
} | ||
|
||
$lines[$tableCellLineIndex] = implode( '|', $cellBlocks ); | ||
} | ||
} | ||
|
||
continue; | ||
} | ||
|
||
if ( $isTable ) { | ||
// Row separator | ||
if ( strpos( $line, '|-' ) === 0 ) { | ||
$rowIndex++; | ||
$colIndex = 0; | ||
|
||
continue; | ||
} | ||
|
||
$colIndex++; | ||
|
||
// It makes sense to record lines indexes only for "cell content lines" | ||
$cellCoordsToLineMap[$rowIndex][$colIndex] = $lineIndex; | ||
|
||
// Check if ":::" is the only content of current cell | ||
$cellBlocks = explode( '|', $line ); | ||
foreach ( $cellBlocks as $cellBlock ) { | ||
$cellBlockTrimmed = trim( $cellBlock ); | ||
|
||
if ( $cellBlockTrimmed === ':::' ) { | ||
$linesToDelete[] = $lineIndex; | ||
|
||
// Mark that cell for removal | ||
$rowspanCounts[$rowIndex][$colIndex] = -1; | ||
|
||
$rowspanRow = $rowIndex - 1; | ||
|
||
// There can be any amount of cells united in one vertically using "rowspan" | ||
// And in that case in DokuWiki there will be corresponding amount | ||
// of vertical aligned cells filled with ":::" | ||
while ( true ) { | ||
// If that cell above also contains ":::" - go one more row above | ||
if ( | ||
isset( $rowspanCounts[$rowspanRow][$colIndex] ) && | ||
$rowspanCounts[$rowspanRow][$colIndex] === -1 | ||
) { | ||
$rowspanRow--; | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
if ( !isset( $rowspanCounts[$rowspanRow][$colIndex] ) ) { | ||
// Minimum amount for "rowspan" attribute is 2 | ||
$rowspanCounts[$rowspanRow][$colIndex] = 2; | ||
} else { | ||
$rowspanCounts[$rowspanRow][$colIndex]++; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
unset( $line ); | ||
|
||
// Remove all cells containing only ":::" | ||
foreach ( $linesToDelete as $lineToDelete ) { | ||
unset( $lines[$lineToDelete] ); | ||
} | ||
|
||
$text = implode( "\n", $lines ); | ||
|
||
return $text; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
<?php | ||
|
||
namespace HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table; | ||
|
||
use HalloWelt\MigrateDokuwiki\IProcessor; | ||
|
||
class Colspan implements IProcessor { | ||
|
||
/** | ||
* @param string $text | ||
* @return string | ||
*/ | ||
public function process( string $text ): string { | ||
$lines = explode( "\n", $text ); | ||
|
||
foreach ( $lines as $index => &$line ) { | ||
$line = trim( $line ); | ||
|
||
// Each table has either "|" or "^" at the line start | ||
if ( | ||
strpos( $line, "|" ) === 0 || | ||
strpos( $line, "^" ) === 0 | ||
) { | ||
$isTable = true; | ||
} else { | ||
$isTable = false; | ||
} | ||
|
||
if ( !$isTable ) { | ||
continue; | ||
} | ||
|
||
$regex = '/(.*?)(\|\|+)/'; | ||
$line = preg_replace_callback( $regex, static function( $matches ) { | ||
$colspanCount = strlen( $matches[2] ); | ||
|
||
return $matches[1] . '###COLSPAN_' . $colspanCount . '###|'; | ||
}, $line ); | ||
} | ||
unset( $line ); | ||
|
||
$text = implode( "\n", $lines ); | ||
|
||
return $text; | ||
} | ||
} |
22 changes: 22 additions & 0 deletions
22
tests/phpunit/Converter/PostProcessor/Table/ColspanTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
namespace HalloWelt\MigrateDokuwiki\Tests\Converter\PostProcessor\Table; | ||
|
||
use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Colspan; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
class ColspanTest extends TestCase { | ||
|
||
/** | ||
* @covers \HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Colspan::process() | ||
*/ | ||
public function testProcess() { | ||
$processor = new Colspan(); | ||
|
||
$dataDir = dirname( __DIR__, 3 ) . '/data/Converter/PostProcessor/Table'; | ||
$input = file_get_contents( "$dataDir/colspan-input.txt" ); | ||
$expected = file_get_contents( "$dataDir/colspan-output.txt" ); | ||
$actual = $processor->process( $input ); | ||
$this->assertEquals( $expected, $actual ); | ||
} | ||
} |
22 changes: 22 additions & 0 deletions
22
tests/phpunit/Converter/PostProcessor/Table/RowspanTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
namespace HalloWelt\MigrateDokuwiki\Tests\Converter\PostProcessor\Table; | ||
|
||
use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Rowspan; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
class RowspanTest extends TestCase { | ||
|
||
/** | ||
* @covers \HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Rowspan::process() | ||
*/ | ||
public function testProcess() { | ||
$processor = new Rowspan(); | ||
|
||
$dataDir = dirname( __DIR__, 3 ) . '/data/Converter/PostProcessor/Table'; | ||
$input = file_get_contents( "$dataDir/rowspan-input.txt" ); | ||
$expected = file_get_contents( "$dataDir/rowspan-output.txt" ); | ||
$actual = $processor->process( $input ); | ||
$this->assertEquals( $expected, $actual ); | ||
} | ||
} |
22 changes: 22 additions & 0 deletions
22
tests/phpunit/Converter/PreProcessor/Table/ColspanTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
namespace HalloWelt\MigrateDokuwiki\Tests\Converter\PreProcessor\Table; | ||
|
||
use HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table\Colspan; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
class ColspanTest extends TestCase { | ||
|
||
/** | ||
* @covers \HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table\Colspan::process() | ||
*/ | ||
public function testProcess() { | ||
$processor = new Colspan(); | ||
|
||
$dataDir = dirname( __DIR__, 3 ) . '/data/Converter/PreProcessor/Table'; | ||
$input = file_get_contents( "$dataDir/colspan-input.txt" ); | ||
$expected = file_get_contents( "$dataDir/colspan-output.txt" ); | ||
$actual = $processor->process( $input ); | ||
$this->assertEquals( $expected, $actual ); | ||
} | ||
} |
Oops, something went wrong.