Skip to content

Commit

Permalink
IBX-7987: Added extension point to skip nodes while extracting text
Browse files Browse the repository at this point in the history
  • Loading branch information
adamwojs committed Mar 20, 2024
1 parent 6c49bc4 commit b4ac7dc
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 1 deletion.
13 changes: 13 additions & 0 deletions src/bundle/Resources/config/settings/fieldtype_services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,16 @@ services:

Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor: ~

Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface:
alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter

Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter:
arguments:
$filters: !tagged ibexa.field_type.richtext.text_extractor.node_filter

ibexa.field_type.richtext.text_extractor.node_filter.template:
class: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodePathFilter
arguments: ['eztemplate', 'ezconfig']
tags:
- { name: ibexa.field_type.richtext.text_extractor.node_filter }

22 changes: 22 additions & 0 deletions src/contracts/RichText/TextExtractor/NodeFilterInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor;

use DOMNode;

/**
* Filters nodes for text extraction.
*/
interface NodeFilterInterface
{
/**
* Return true to preserve the node, false to remove it.
*/
public function filter(DOMNode $node): bool;
}
14 changes: 13 additions & 1 deletion src/lib/RichText/TextExtractor/FullTextExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

use DOMDocument;
use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractorInterface;

/**
Expand All @@ -19,6 +20,13 @@
*/
final class FullTextExtractor implements TextExtractorInterface
{
private NodeFilterInterface $filter;

public function __construct(NodeFilterInterface $filter)
{
$this->filter = $filter;
}

public function extractText(DOMDocument $document): string
{
return null !== $document->documentElement
Expand All @@ -28,8 +36,12 @@ public function extractText(DOMDocument $document): string

private function extractTextFromNode(DOMNode $node): string
{
$text = '';
if ($this->filter->filter($node) === true) {
// Node is excluded
return '';
}

$text = '';
if ($node->childNodes !== null && $node->childNodes->count() > 0) {
foreach ($node->childNodes as $child) {
$text .= $this->extractTextFromNode($child);
Expand Down
37 changes: 37 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class AggregateFilter implements NodeFilterInterface
{
/** @var \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[] */
private iterable $filters;

/**
* @param \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[]|iterable $filters
*/
public function __construct(iterable $filters)
{
$this->filters = $filters;
}

public function filter(DOMNode $node): bool
{
foreach ($this->filters as $filter) {
if ($filter->filter($node)) {
return true;
}
}

return false;
}
}
40 changes: 40 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/NodePathFilter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class NodePathFilter implements NodeFilterInterface
{
/**
* Path in reverse order.
*
* @var string[]
*/
private array $path;

public function __construct(string ...$path)
{
$this->path = array_reverse($path);
}

public function filter(DOMNode $node): bool
{
foreach ($this->path as $name) {
if ($node === null || $node->nodeName !== $name) {
return true;
}

$node = $node->parentNode;
}

return false;
}
}

0 comments on commit b4ac7dc

Please sign in to comment.