Skip to content

Commit

Permalink
- Make TocBuilder.php more robust
Browse files Browse the repository at this point in the history
- Fix tests
  • Loading branch information
doefom committed Sep 14, 2024
1 parent 9adcfbe commit 6aa86dc
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 61 deletions.
25 changes: 14 additions & 11 deletions src/Classes/TocBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,22 @@ public function getTocMarkup(): string
public function addIdsToHeadings(): string
{
$doc = new DOMDocument;
$doc->loadHTML($this->html);
$doc->loadHTML(mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'));

if (trim($this->html) === '') {
return '';
}

$xpath = new DOMXPath($doc);
$headings = $xpath->query('//h1|//h2|//h3|//h4|//h5|//h6');

$usedSlugs = collect();
foreach ($headings as $heading) {
$slug = $this->slugify($heading->textContent);
$slug = Str::slug($heading->textContent);
$suffix = 1;

while ($usedSlugs->contains($slug)) {
$slug = $this->slugify($heading->textContent).'-'.$suffix;
$slug = Str::slug($heading->textContent).'-'.$suffix;
$suffix++;
}

Expand All @@ -140,8 +144,12 @@ private function getHeadingsFormatted(): Collection
$minLevel = $this->minLevel;
$maxLevel = $this->maxLevel;

if (trim($this->html) === '') {
return collect();
}

$doc = new DOMDocument;
$doc->loadHTML($this->html);
$doc->loadHTML(mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'));

$xpath = new DOMXPath($doc);
$range = collect(range($minLevel, $maxLevel));
Expand All @@ -155,12 +163,12 @@ private function getHeadingsFormatted(): Collection
foreach ($headingNodes as $headingNode) {
$level = intval($headingNode->nodeName[1]);
$text = $headingNode->textContent;
$slug = $this->slugify($text);
$slug = Str::slug($text);

// Ensure the slug is unique or this table of contents
$suffix = 1;
while ($usedSlugs->contains($slug)) {
$slug = $this->slugify($text).'-'.$suffix;
$slug = Str::slug($text).'-'.$suffix;
$suffix++;
}

Expand All @@ -177,9 +185,4 @@ private function getHeadingsFormatted(): Collection

return $headings;
}

private function slugify(string $text): string
{
return Str::slug(html_entity_decode($text));
}
}
99 changes: 49 additions & 50 deletions tests/TocBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,53 +6,33 @@

class TocBuilderTest extends TestCase
{
// Increasing heading levels
const HTML01 = <<<'HTML'
<h1>Heading 01</h1>
<h2>Heading 02</h2>
<h3>Heading 03</h3>
<h4>Heading 04</h4>
<h5>Heading 05</h5>
<h6>Heading 06</h6>
HTML;

// Decreasing heading levels
const HTML02 = <<<'HTML'
<h6>Heading 06</h6>
<h5>Heading 05</h5>
<h4>Heading 04</h4>
<h3>Heading 03</h3>
<h2>Heading 02</h2>
<h1>Heading 01</h1>
HTML;

// Increasing and decreasing heading levels
const HTML03 = <<<'HTML'
<h1>Heading 01</h1>
<h2>Heading 02</h2>
<h3>Heading 03</h3>
<h2>Heading 02</h2>
<h1>Heading 01</h1>
HTML;

// Real world structure
const HTML04 = <<<'HTML'
<h2 id="heading-02">Heading 02</h2>
const REAL_WORLD_HTML = "
<h2>Heading 02</h2>
<p>Some text after the first heading.</p>
<ul><li><p>Has this bullet point</p></li><li><p>And this one</p></li></ul>
<h3 id="heading-03">Heading 03</h3>
<h3>Heading 03</h3>
<p>Then there's a heading of a higher level.</p>
<h4 id="heading-04">Heading 04</h4><p>It has subcategories, so there's an even higher level.</p>
<h4 id="heading-04">Heading 04</h4><p>This one is one of the subcategories.</p>
<h2 id="heading-02">Heading 02</h2><p>And then at the end there's a whole other topic.</p>
HTML;
<h4>Heading 04</h4><p>It has subcategories, so there's an even higher level.</p>
<h4>Heading 04</h4><p>This one is one of the subcategories.</p>
<h2>Heading 02</h2><p>And then at the end there's a whole other topic.</p>
";

/**
* Test TOC with increasing heading levels.
*/
public function test_that_toc_matches_html_with_increasing_heading_levels(): void
{
$result = (new TocBuilder(self::HTML01))->getTocMarkup();
$html = '
<h1>Heading 01</h1>
<h2>Heading 02</h2>
<h3>Heading 03</h3>
<h4>Heading 04</h4>
<h5>Heading 05</h5>
<h6>Heading 06</h6>
';

$result = (new TocBuilder($html))->getTocMarkup();
$expected = '<ul><li><a href="#heading-01">Heading 01</a></li><ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li><ul><li><a href="#heading-04">Heading 04</a></li><ul><li><a href="#heading-05">Heading 05</a></li><ul><li><a href="#heading-06">Heading 06</a></li></ul></ul></ul></ul></ul></ul>';

$this->assertEquals($expected, $result);
Expand All @@ -63,47 +43,64 @@ public function test_that_toc_matches_html_with_increasing_heading_levels(): voi
*/
public function test_that_toc_matches_html_with_decreasing_heading_levels(): void
{
$result = (new TocBuilder(self::HTML02))->getTocMarkup();
$html = '
<h6>Heading 06</h6>
<h5>Heading 05</h5>
<h4>Heading 04</h4>
<h3>Heading 03</h3>
<h2>Heading 02</h2>
<h1>Heading 01</h1>
';

$result = (new TocBuilder($html))->getTocMarkup();
$expected = '<ul><ul><ul><ul><ul><ul><li><a href="#heading-06">Heading 06</a></li></ul><li><a href="#heading-05">Heading 05</a></li></ul><li><a href="#heading-04">Heading 04</a></li></ul><li><a href="#heading-03">Heading 03</a></li></ul><li><a href="#heading-02">Heading 02</a></li></ul><li><a href="#heading-01">Heading 01</a></li></ul>';

$this->assertEquals($expected, $result);
}

public function test_that_toc_matches_html_with_increasing_and_decreasing_heading_levels(): void
{
$result = (new TocBuilder(self::HTML03))->getTocMarkup();
$expected = '<ul><li><a href="#heading-01">Heading 01</a></li><ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li></ul><li><a href="#heading-02">Heading 02</a></li></ul><li><a href="#heading-01">Heading 01</a></li></ul>';
$html = '
<h1>Heading 01</h1>
<h2>Heading 02</h2>
<h3>Heading 03</h3>
<h2>Heading 02</h2>
<h1>Heading 01</h1>
';

$result = (new TocBuilder($html))->getTocMarkup();
$expected = '<ul><li><a href="#heading-01">Heading 01</a></li><ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li></ul><li><a href="#heading-02-1">Heading 02</a></li></ul><li><a href="#heading-01-1">Heading 01</a></li></ul>';

$this->assertEquals($expected, $result);
}

public function test_that_toc_matches_real_world_structure(): void
{
$result = (new TocBuilder(self::HTML04))->getTocMarkup();
$expected = '<ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li><ul><li><a href="#heading-04">Heading 04</a></li><li><a href="#heading-04">Heading 04</a></li></ul></ul><li><a href="#heading-02">Heading 02</a></li></ul>';
$result = (new TocBuilder(self::REAL_WORLD_HTML))->getTocMarkup();
$expected = '<ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li><ul><li><a href="#heading-04">Heading 04</a></li><li><a href="#heading-04-1">Heading 04</a></li></ul></ul><li><a href="#heading-02-1">Heading 02</a></li></ul>';

$this->assertEquals($expected, $result);
}

public function test_that_min_and_max_levels_can_be_set(): void
{
$builder = new TocBuilder(self::HTML04);
$builder = new TocBuilder(self::REAL_WORLD_HTML);
$builder->setMinLevel(2);
$builder->setMaxLevel(3);

$result = $builder->getTocMarkup();
$expected = '<ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li></ul><li><a href="#heading-02">Heading 02</a></li></ul>';
$expected = '<ul><li><a href="#heading-02">Heading 02</a></li><ul><li><a href="#heading-03">Heading 03</a></li></ul><li><a href="#heading-02-1">Heading 02</a></li></ul>';

$this->assertEquals($expected, $result);
}

public function test_that_toc_can_be_ordered(): void
{
$builder = new TocBuilder(self::HTML04);
$builder = new TocBuilder(self::REAL_WORLD_HTML);
$builder->setOrdered(true);

$result = $builder->getTocMarkup();
$expected = '<ol><li><a href="#heading-02">Heading 02</a></li><ol><li><a href="#heading-03">Heading 03</a></li><ol><li><a href="#heading-04">Heading 04</a></li><li><a href="#heading-04">Heading 04</a></li></ol></ol><li><a href="#heading-02">Heading 02</a></li></ol>';
$expected = '<ol><li><a href="#heading-02">Heading 02</a></li><ol><li><a href="#heading-03">Heading 03</a></li><ol><li><a href="#heading-04">Heading 04</a></li><li><a href="#heading-04-1">Heading 04</a></li></ol></ol><li><a href="#heading-02-1">Heading 02</a></li></ol>';

$this->assertEquals($expected, $result);
}
Expand Down Expand Up @@ -132,7 +129,7 @@ public function test_headings_with_special_characters_and_html_entities(): void
{
$html = '<h2>Some &amp; Others</h2><h3>A heading with "quotes" in it</h3>';
$result = (new TocBuilder($html))->getTocMarkup();
$expected = '<ul><li><a href="#some-others">Some &amp; Others</a></li><ul><li><a href="#a-heading-with-quotes-in-it">A heading with "quotes" in it</a></li></ul></ul>';
$expected = '<ul><li><a href="#some-others">Some & Others</a></li><ul><li><a href="#a-heading-with-quotes-in-it">A heading with "quotes" in it</a></li></ul></ul>';

$this->assertEquals($expected, $result);
}
Expand All @@ -157,8 +154,10 @@ public function test_skipped_heading_levels(): void

public function test_identical_headings(): void
{
// Working with identical headings will not crash the TOC builder but when clicking a link of one of the
// duplicates, the browser will always scroll to the first one.
$this->markTestSkipped('Identical headings are not yet checked for.');
$html = '<h2>Heading</h2><h2>Heading</h2>';
$result = (new TocBuilder($html))->getTocMarkup();
$expected = '<ul><li><a href="#heading">Heading</a></li><li><a href="#heading-1">Heading</a></li></ul>';

$this->assertEquals($expected, $result);
}
}

0 comments on commit 6aa86dc

Please sign in to comment.