Skip to content

Commit

Permalink
Improve internal codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Sep 28, 2023
1 parent 391fffb commit e415048
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 50 deletions.
81 changes: 34 additions & 47 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,10 @@ final class Parser
* @param array<string, int> $includedSections
*/
private function __construct(
private readonly string $expression,
private readonly string $tableExpression,
private readonly array $tableHeader,
private readonly bool $ignoreTableHeader,
private readonly Section $tableHeaderSection,
private readonly int $tableHeaderOffset,
private readonly string $tableHeaderExpression,
private readonly bool $throwOnXmlErrors,
private readonly array $includedSections,
private readonly ?Closure $formatter,
Expand All @@ -69,8 +68,7 @@ public static function new(): self
'(//table)[1]',
[],
false,
Section::thead,
0,
'(//table/thead/tr)[1]',
false,
[Section::tbody->value => 1, Section::tr->value => 1, Section::tfoot->value => 1],
null,
Expand All @@ -82,14 +80,13 @@ public function tableXPathPosition(string $expression): self
{
set_error_handler(fn (int $errno, string $errstr, string $errfile, int $errline) => true);
$newInstace = match (true) {
$expression === $this->expression => $this,
$expression === $this->tableExpression => $this,
false === (new DOMXPath(new DOMDocument()))->query($expression) => throw new ParserError('The xpath expression `'.$expression.'` is invalie.'),
default => new self(
$expression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
$this->formatter,
Expand Down Expand Up @@ -128,11 +125,10 @@ public function tableHeader(array $headerRow): self
$headerRow !== ($filteredHeader = array_filter($headerRow, is_string(...))) => throw new ParserError('The header record contains non string colum names.'),
$headerRow !== array_unique($filteredHeader) => throw ParserError::dueToDuplicateHeaderColumnNames($headerRow),
default => new self(
$this->expression,
$this->tableExpression,
$headerRow,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
$this->formatter,
Expand All @@ -146,11 +142,10 @@ public function ignoreTableHeader(): self
return match ($this->ignoreTableHeader) {
true => $this,
false => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
true,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
$this->formatter,
Expand All @@ -164,11 +159,10 @@ public function resolveTableHeader(): self
return match ($this->ignoreTableHeader) {
false => $this,
true => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
false,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
$this->formatter,
Expand All @@ -182,15 +176,15 @@ public function resolveTableHeader(): self
*/
public function tableHeaderPosition(Section $section, int $offset = 0): self
{
return match (true) {
$section === $this->tableHeaderSection && $offset === $this->tableHeaderOffset => $this,
$offset < 0 => throw new ParserError('The table header row offset must be a positive integer or 0.'), /* @phpstan-ignore-line */
$expression = $section->xpathRow($offset);

return match ($this->tableHeaderExpression) {
$expression => $this,
default => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$section,
$offset,
$expression,
$this->throwOnXmlErrors,
$this->includedSections,
$this->formatter,
Expand All @@ -207,11 +201,10 @@ public function includeSection(Section $section): self
return match ($this->includedSections) {
$includedSections => $this,
default => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$includedSections,
$this->formatter,
Expand All @@ -228,11 +221,10 @@ public function excludeSection(Section $section): self
return match ($this->includedSections) {
$includedSections => $this,
default => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$includedSections,
$this->formatter,
Expand All @@ -246,11 +238,10 @@ public function failOnXmlErrors(): self
return match ($this->throwOnXmlErrors) {
true => $this,
false => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
true,
$this->includedSections,
$this->formatter,
Expand All @@ -264,11 +255,10 @@ public function ignoreXmlErrors(): self
return match ($this->throwOnXmlErrors) {
false => $this,
true => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
false,
$this->includedSections,
$this->formatter,
Expand All @@ -280,11 +270,10 @@ public function ignoreXmlErrors(): self
public function withFormatter(Closure $formatter): self
{
return new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
$formatter,
Expand All @@ -297,11 +286,10 @@ public function withoutFormatter(): self
return match (null) {
$this->formatter => $this,
default => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
null,
Expand All @@ -315,11 +303,10 @@ public function tableCaption(?string $caption = null): self
return match ($this->caption) {
$caption => $this,
default => new self(
$this->expression,
$this->tableExpression,
$this->tableHeader,
$this->ignoreTableHeader,
$this->tableHeaderSection,
$this->tableHeaderOffset,
$this->tableHeaderExpression,
$this->throwOnXmlErrors,
$this->includedSections,
$this->formatter,
Expand Down Expand Up @@ -365,7 +352,7 @@ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Tab
public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|string $source): Table
{
/** @var DOMNodeList<DOMElement> $query */
$query = (new DOMXPath($this->sourceToDomDocument($source)))->query($this->expression);
$query = (new DOMXPath($this->sourceToDomDocument($source)))->query($this->tableExpression);
$table = $query->item(0);
if (!$table instanceof DOMElement) {
throw new ParserError('The HTML table could not be found in the submitted html.');
Expand Down Expand Up @@ -446,9 +433,9 @@ private function sourceToDomDocument(DOMDocument|SimpleXMLElement|DOMElement|Str
private function extractTableHeader(DOMXPath $xpath): array
{
/** @var DOMNodeList<DOMElement> $query */
$query = $xpath->query($this->tableHeaderSection->xpath());
$query = $xpath->query($this->tableHeaderExpression);
/** @var DOMElement|null $tr */
$tr = $query->item($this->tableHeaderOffset);
$tr = $query->item(0);

return match (null) {
$tr => [],
Expand Down
14 changes: 11 additions & 3 deletions src/Section.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,19 @@ enum Section: string
case tfoot = 'tfoot';
case tr = 'tr';

public function xpath(): string
/**
* @param int<0, max> $offset
*/
public function xpathRow(int $offset = 0): string
{
if ($offset < 0) { /* @phpstan-ignore-line */
throw new ParserError('The table header row offset must be a positive integer or 0.');
}

++$offset;
return match ($this) {
self::tr => '//table/tr',
default => '//table/'.$this->name.'/tr',
self::tr => '(//table/tr)['.$offset.']',
default => '(//table/'.$this->name.'/tr)['.$offset.']',
};
}
}

0 comments on commit e415048

Please sign in to comment.