Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IBX-7987: Node filter for extraction text #155

Open
wants to merge 3 commits into
base: 4.6
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/bundle/Resources/config/settings/fieldtype_services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,22 @@ services:

Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor: ~

Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface:
alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter

Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface:
alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodeFilterFactory

Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodeFilterFactory: ~

Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter:
arguments:
$filters: !tagged ibexa.field_type.richtext.text_extractor.node_filter

ibexa.field_type.richtext.text_extractor.node_filter.template:
class: Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface
factory: ['@Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface', 'createPathFilter']
arguments: ['eztemplate', 'ezconfig']
tags:
- { name: ibexa.field_type.richtext.text_extractor.node_filter }

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor;

interface NodeFilterFactoryInterface
{
public function createPathFilter(string ...$path): NodeFilterInterface;
}
22 changes: 22 additions & 0 deletions src/contracts/RichText/TextExtractor/NodeFilterInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor;

use DOMNode;

/**
* Filters nodes for text extraction.
*/
interface NodeFilterInterface
{
/**
* Return false to preserve the node, true to remove it.
*/
public function filter(DOMNode $node): bool;
Comment on lines +18 to +21
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO the result of false/true combined with filter might be a bit misleading. My first thought was that it should work the opposite way. Maybe changing it to filterOut would be more clear?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personalny stand with the original naming.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to agree with @alongosz here.

Most methods (ArrayCollection::filter) and functions (array_filter) work in the opposite way:

  • when true, an entry is preserved
  • when false, an entry is removed

Therefore, I'd suggest reversing the logic to comply with generally established PHP practice.

}
14 changes: 13 additions & 1 deletion src/lib/RichText/TextExtractor/FullTextExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

use DOMDocument;
use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractorInterface;

/**
Expand All @@ -19,6 +20,13 @@
*/
final class FullTextExtractor implements TextExtractorInterface
{
private NodeFilterInterface $filter;

public function __construct(NodeFilterInterface $filter)
{
$this->filter = $filter;
}

public function extractText(DOMDocument $document): string
{
return null !== $document->documentElement
Expand All @@ -28,8 +36,12 @@ public function extractText(DOMDocument $document): string

private function extractTextFromNode(DOMNode $node): string
{
$text = '';
if ($this->filter->filter($node) === true) {
// Node is excluded
return '';
}

$text = '';
if ($node->childNodes !== null && $node->childNodes->count() > 0) {
foreach ($node->childNodes as $child) {
$text .= $this->extractTextFromNode($child);
Expand Down
37 changes: 37 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class AggregateFilter implements NodeFilterInterface
{
/** @var \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[] */
private iterable $filters;

/**
* @param \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[]|iterable $filters
*/
public function __construct(iterable $filters)
{
$this->filters = $filters;
}

public function filter(DOMNode $node): bool
{
foreach ($this->filters as $filter) {
if ($filter->filter($node)) {
return true;
}
}

return false;
}
}
20 changes: 20 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class NodeFilterFactory implements NodeFilterFactoryInterface
{
public function createPathFilter(string ...$path): NodeFilterInterface
{
return new NodePathFilter(...$path);
}
}
40 changes: 40 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/NodePathFilter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class NodePathFilter implements NodeFilterInterface
{
/**
* Path in reverse order.
*
* @var string[]
*/
private array $path;

public function __construct(string ...$path)
{
$this->path = array_reverse($path);
}

public function filter(DOMNode $node): bool
{
foreach ($this->path as $name) {
if ($node === null || $node->nodeName !== $name) {
return false;
}

$node = $node->parentNode;
}

return true;
}
}
6 changes: 5 additions & 1 deletion tests/lib/RichText/TextExtractor/FullTextExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@

namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor;

use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor;

final class FullTextExtractorTest extends BaseTest
{
protected function setUp(): void
{
$this->textExtractor = new FullTextExtractor();
$filter = $this->createMock(NodeFilterInterface::class);
$filter->method('filter')->willReturn(false);

$this->textExtractor = new FullTextExtractor($filter);
}

public function providerForTestExtractText(): array
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter;
use PHPUnit\Framework\TestCase;

final class AggregateFilterTest extends TestCase
{
public function testFilter(): void
{
$node = $this->createMock(DOMNode::class);

$filterA = $this->createMock(NodeFilterInterface::class);
$filterA->expects(self::once())->method('filter')->with($node)->willReturn(false);
$filterB = $this->createMock(NodeFilterInterface::class);
$filterB->expects(self::once())->method('filter')->with($node)->willReturn(true);
$filterC = $this->createMock(NodeFilterInterface::class);
$filterC->expects(self::never())->method('filter');

$aggregateFilter = new AggregateFilter([$filterA, $filterB, $filterC]);

self::assertTrue($aggregateFilter->filter($node));
}
}
46 changes: 46 additions & 0 deletions tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMDocument;
use DOMNode;
use DOMNodeList;
use DOMXPath;
use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodePathFilter;
use PHPUnit\Framework\TestCase;

final class NodePathFilterTest extends TestCase
{
public function testFilter(): void
{
$document = new DOMDocument();
$document->loadXML('<a><b><c></c></b></a>');

$nodeA = $this->getNode($document, '//a');
$nodeB = $this->getNode($document, '//b');
$nodeC = $this->getNode($document, '//c');

self::assertFalse((new NodePathFilter('b', 'c'))->filter($nodeB));
self::assertTrue((new NodePathFilter('b', 'c'))->filter($nodeC));
self::assertFalse((new NodePathFilter('a', 'b', 'c', 'd'))->filter($nodeA));
}

private function getNode(DOMDocument $document, string $expression): DOMNode
{
$xpath = new DOMXPath($document);

$results = $xpath->query($expression);
if ($results instanceof DOMNodeList) {
/** @var \DOMNode */
return $results->item(0);
}

self::fail("Expression '$expression' did not return a node.");
}
}
Loading