Skip to content

Commit

Permalink
Add processors for table rowspan and colspan issues
Browse files Browse the repository at this point in the history
  • Loading branch information
akulbii committed Nov 19, 2024
1 parent 1703dcd commit fcfee2a
Show file tree
Hide file tree
Showing 13 changed files with 430 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/Converter/DokuwikiConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
use HalloWelt\MediaWiki\Lib\Migration\Workspace;
use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Image as ImagePostProcessor;
use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Link as PostProcessorsLink;
use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Colspan as ColspanPostProcessor;
use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Rowspan as RowspanPostProcessor;
use HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table\Colspan as ColspanPreProcessor;
use HalloWelt\MigrateDokuwiki\Converter\Processors\Image as ImageProcessor;
use HalloWelt\MigrateDokuwiki\Converter\Processors\Link;
use HalloWelt\MigrateDokuwiki\IProcessor;
Expand All @@ -25,7 +28,9 @@ class DokuwikiConverter extends PandocDokuwiki implements IOutputAwareInterface
* @return array
*/
private function getPreProcessors(): array {
return [];
return [
new ColspanPreProcessor()
];
}

/**
Expand All @@ -44,7 +49,9 @@ private function getProcessors(): array {
private function getPostProcessors(): array {
return [
new ImagePostProcessor(),
new PostProcessorsLink()
new PostProcessorsLink(),
new ColspanPostProcessor(),
new RowspanPostProcessor()
];
}

Expand Down
77 changes: 77 additions & 0 deletions src/Converter/PostProcessors/Table/Colspan.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
<?php

namespace HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table;

use HalloWelt\MigrateDokuwiki\IProcessor;

class Colspan implements IProcessor {

/**
* @param string $text
* @return string
*/
public function process( string $text ): string {
$lines = explode( "\n", $text );

foreach ( $lines as $lineIndex => &$line ) {
$line = trim( $line );

$colspanPos = strpos( $line, "###COLSPAN_" );
if ( $colspanPos !== false ) {
// In wikitext one line = one cell
// One cell may have few blocks separated by "|"
$cellBlocks = explode( '|', $line );

$colspanCount = 0;

foreach ( $cellBlocks as $cellBlockIndex => $cellBlock ) {
$matches = [];

preg_match( "/###COLSPAN_(.*?)###/", $cellBlock, $matches );

// Current cell block contains "colspan"
if ( isset( $matches[1] ) ) {
$colspanCount = (int)$matches[1];

// If cell already contains block with HTML attributes (like "style" or "colspan")
// Then just append "colspan" there
if ( count( $cellBlocks ) > 2 ) {
$cellBlocks[$cellBlockIndex - 1] = $cellBlocks[$cellBlockIndex - 1] . " colspan=\"$colspanCount\"";
} else {
// Otherwise add such block
$cellBlocks = array_merge(
[
$cellBlocks[0]
],
[
"colspan=\"$colspanCount\""
],
array_slice( $cellBlocks, 1 )
);
}

// Remove "###COLSPAN_<N>###" string from current cell block
$cellBlocks[$cellBlockIndex] = str_replace(
"###COLSPAN_$colspanCount###", "", $cellBlocks[$cellBlockIndex]
);
}
}

$line = implode( "|", $cellBlocks );

// If we found "colspan" on current line -
// then remove corresponding amount of next redundant empty lines (produced by Pandoc)
if ( $colspanCount > 0 ) {
for ( $i = $lineIndex + 1; $i < $lineIndex + $colspanCount; $i++ ) {
unset( $lines[$i] );
}
}
}
}
unset( $line );

$text = implode( "\n", $lines );

return $text;
}
}
149 changes: 149 additions & 0 deletions src/Converter/PostProcessors/Table/Rowspan.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
<?php

namespace HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table;

use HalloWelt\MigrateDokuwiki\IProcessor;

class Rowspan implements IProcessor {

/**
* @param string $text
* @return string
*/
public function process( string $text ): string {
$lines = explode( "\n", $text );

$isTable = false;

$rowIndex = 0;
$colIndex = 0;

$linesToDelete = [];
$rowspanCounts = [];

// We'll use this mapping later to change only lines which we need,
// Which contain "rowspan" cells
// Without need to process all table cells once more
$cellCoordsToLineMap = [];

foreach ( $lines as $lineIndex => &$line ) {
$line = trim( $line );

if ( strpos( $line, '{|' ) === 0 ) {
// If we found "{|" - then we start processing the table
$isTable = true;

$rowIndex = 0;
$colIndex = 0;

$rowspanCounts = [];

// We use separate "cell -> line" mapping for each table we process
$cellCoordsToLineMap = [];

continue;
}

if ( strpos( $line, '|}' ) === 0 ) {
$isTable = false;

// If we got finished with current table - set proper "rowspan" attributes
// for specific cells in this table
foreach ( $rowspanCounts as $rowIndex => $rowspanCells ) {
foreach ( $rowspanCells as $colIndex => $rowspanCount ) {
$tableCellLineIndex = $cellCoordsToLineMap[$rowIndex][$colIndex];

// We found necessary line in the wikitext
// Now parse this table line and add necessary "rowspan" attribute value
$rowspanLine = $lines[$tableCellLineIndex];

$cellBlocks = explode( '|', $rowspanLine );

// If cell already contains block with HTML attributes (like "style" or "colspan")
// Then just append "colspan" there
if ( count( $cellBlocks ) > 2 ) {
$cellBlocks[1] = $cellBlocks[1] . " rowspan=\"$rowspanCount\"";
} else {
// Otherwise add such block
$cellBlocks = array_merge(
[
$cellBlocks[0]
],
[
"rowspan=\"$rowspanCount\""
],
array_slice( $cellBlocks, 1 )
);
}

$lines[$tableCellLineIndex] = implode( '|', $cellBlocks );
}
}

continue;
}

if ( $isTable ) {
// Row separator
if ( strpos( $line, '|-' ) === 0 ) {
$rowIndex++;
$colIndex = 0;

continue;
}

$colIndex++;

// It makes sense to record lines indexes only for "cell content lines"
$cellCoordsToLineMap[$rowIndex][$colIndex] = $lineIndex;

// Check if ":::" is the only content of current cell
$cellBlocks = explode( '|', $line );
foreach ( $cellBlocks as $cellBlock ) {
$cellBlockTrimmed = trim( $cellBlock );

if ( $cellBlockTrimmed === ':::' ) {
$linesToDelete[] = $lineIndex;

// Mark that cell for removal
$rowspanCounts[$rowIndex][$colIndex] = -1;

$rowspanRow = $rowIndex - 1;

// There can be any amount of cells united in one vertically using "rowspan"
// And in that case in DokuWiki there will be corresponding amount
// of vertical aligned cells filled with ":::"
while ( true ) {
// If that cell above also contains ":::" - go one more row above
if (
isset( $rowspanCounts[$rowspanRow][$colIndex] ) &&
$rowspanCounts[$rowspanRow][$colIndex] === -1
) {
$rowspanRow--;
} else {
break;
}
}

if ( !isset( $rowspanCounts[$rowspanRow][$colIndex] ) ) {
// Minimum amount for "rowspan" attribute is 2
$rowspanCounts[$rowspanRow][$colIndex] = 2;
} else {
$rowspanCounts[$rowspanRow][$colIndex]++;
}
}
}
}
}
unset( $line );

// Remove all cells containing only ":::"
foreach ( $linesToDelete as $lineToDelete ) {
unset( $lines[$lineToDelete] );
}

$text = implode( "\n", $lines );

return $text;
}
}
46 changes: 46 additions & 0 deletions src/Converter/PreProcessors/Table/Colspan.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

namespace HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table;

use HalloWelt\MigrateDokuwiki\IProcessor;

class Colspan implements IProcessor {

/**
* @param string $text
* @return string
*/
public function process( string $text ): string {
$lines = explode( "\n", $text );

foreach ( $lines as $index => &$line ) {
$line = trim( $line );

// Each table has either "|" or "^" at the line start
if (
strpos( $line, "|" ) === 0 ||
strpos( $line, "^" ) === 0
) {
$isTable = true;
} else {
$isTable = false;
}

if ( !$isTable ) {
continue;
}

$regex = '/(.*?)(\|\|+)/';
$line = preg_replace_callback( $regex, static function( $matches ) {
$colspanCount = strlen( $matches[2] );

return $matches[1] . '###COLSPAN_' . $colspanCount . '###|';
}, $line );
}
unset( $line );

$text = implode( "\n", $lines );

return $text;
}
}
22 changes: 22 additions & 0 deletions tests/phpunit/Converter/PostProcessor/Table/ColspanTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

namespace HalloWelt\MigrateDokuwiki\Tests\Converter\PostProcessor\Table;

use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Colspan;
use PHPUnit\Framework\TestCase;

class ColspanTest extends TestCase {

/**
* @covers \HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Colspan::process()
*/
public function testProcess() {
$processor = new Colspan();

$dataDir = dirname( __DIR__, 3 ) . '/data/Converter/PostProcessor/Table';
$input = file_get_contents( "$dataDir/colspan-input.txt" );
$expected = file_get_contents( "$dataDir/colspan-output.txt" );
$actual = $processor->process( $input );
$this->assertEquals( $expected, $actual );
}
}
22 changes: 22 additions & 0 deletions tests/phpunit/Converter/PostProcessor/Table/RowspanTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

namespace HalloWelt\MigrateDokuwiki\Tests\Converter\PostProcessor\Table;

use HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Rowspan;
use PHPUnit\Framework\TestCase;

class RowspanTest extends TestCase {

/**
* @covers \HalloWelt\MigrateDokuwiki\Converter\PostProcessors\Table\Rowspan::process()
*/
public function testProcess() {
$processor = new Rowspan();

$dataDir = dirname( __DIR__, 3 ) . '/data/Converter/PostProcessor/Table';
$input = file_get_contents( "$dataDir/rowspan-input.txt" );
$expected = file_get_contents( "$dataDir/rowspan-output.txt" );
$actual = $processor->process( $input );
$this->assertEquals( $expected, $actual );
}
}
22 changes: 22 additions & 0 deletions tests/phpunit/Converter/PreProcessor/Table/ColspanTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

namespace HalloWelt\MigrateDokuwiki\Tests\Converter\PreProcessor\Table;

use HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table\Colspan;
use PHPUnit\Framework\TestCase;

class ColspanTest extends TestCase {

/**
* @covers \HalloWelt\MigrateDokuwiki\Converter\PreProcessors\Table\Colspan::process()
*/
public function testProcess() {
$processor = new Colspan();

$dataDir = dirname( __DIR__, 3 ) . '/data/Converter/PreProcessor/Table';
$input = file_get_contents( "$dataDir/colspan-input.txt" );
$expected = file_get_contents( "$dataDir/colspan-output.txt" );
$actual = $processor->process( $input );
$this->assertEquals( $expected, $actual );
}
}
Loading

0 comments on commit fcfee2a

Please sign in to comment.