diff --git a/bridges/XPathBridge.php b/bridges/XPathBridge.php index 35ec6ad1001..d4a2dfb5a3c 100644 --- a/bridges/XPathBridge.php +++ b/bridges/XPathBridge.php @@ -17,7 +17,6 @@ class XPathBridge extends XPathAbstract (for example a news blog). EOL, 'type' => 'text', 'exampleValue' => 'https://news.blizzard.com/en-en', - 'defaultValue' => 'https://news.blizzard.com/en-en', 'required' => true ], @@ -30,7 +29,6 @@ class XPathBridge extends XPathAbstract starts with a single forward slash. EOL, 'type' => 'text', 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', - 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', 'required' => true ], @@ -42,7 +40,6 @@ class XPathBridge extends XPathAbstract forward slashes, referring to any descendant nodes of the article item node. EOL, 'type' => 'text', 'exampleValue' => './/div/div[2]/h2', - 'defaultValue' => './/div/div[2]/h2', 'required' => true ], @@ -55,7 +52,6 @@ class XPathBridge extends XPathAbstract article item node. EOL, 'type' => 'text', 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', - 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', 'required' => false ], @@ -80,7 +76,6 @@ class XPathBridge extends XPathAbstract before the attributes name. EOL, 'type' => 'text', 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href', - 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href', 'required' => false ], @@ -105,7 +100,6 @@ class XPathBridge extends XPathAbstract selected by prepending an @ char before the attributes name. EOL, 'type' => 'text', 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', - 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', 'required' => false ], @@ -119,7 +113,6 @@ class XPathBridge extends XPathAbstract can be selected by prepending an @ char before the attributes name. EOL, 'type' => 'text', 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style', - 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style', 'required' => false ], @@ -134,8 +127,7 @@ class XPathBridge extends XPathAbstract item node. Attributes can be selected by prepending an @ char before the attributes name. EOL, 'type' => 'text', - 'exampleValue' => './/div[@class="ArticleListItem-label"]', - 'defaultValue' => './/div[@class="ArticleListItem-label"]', + 'exampleValue' => 'polyfill:function("replace", .//div[@class="ArticleListItem-label"], "(diablo immortal|warcraft rumble)", "$1 (Mobile)", "i")', 'required' => false ], @@ -186,7 +178,7 @@ protected function getExpressionItemTitle() */ protected function getExpressionItemContent() { - return urldecode($this->getInput('content')); + return urldecode($this->getInput('content') ?? ''); } /** @@ -204,7 +196,7 @@ protected function getSettingUseRawItemContent(): bool */ protected function getExpressionItemUri() { - return urldecode($this->getInput('uri')); + return urldecode($this->getInput('uri') ?? ''); } /** @@ -213,7 +205,7 @@ protected function getExpressionItemUri() */ protected function getExpressionItemAuthor() { - return urldecode($this->getInput('author')); + return urldecode($this->getInput('author') ?? ''); } /** @@ -222,7 +214,7 @@ protected function getExpressionItemAuthor() */ protected function getExpressionItemTimestamp() { - return urldecode($this->getInput('timestamp')); + return urldecode($this->getInput('timestamp') ?? ''); } /** @@ -232,7 +224,7 @@ protected function getExpressionItemTimestamp() */ protected function getExpressionItemEnclosures() { - return urldecode($this->getInput('enclosures')); + return urldecode($this->getInput('enclosures') ?? ''); } /** @@ -241,7 +233,7 @@ protected function getExpressionItemEnclosures() */ protected function getExpressionItemCategories() { - return urldecode($this->getInput('categories')); + return urldecode($this->getInput('categories') ?? ''); } /** diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index 44cbab674bc..416ed500098 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -411,6 +411,7 @@ public function collectData() defaultLinkTo($webPageHtml, $webPageHtml->baseURI ?? $this->feedUri); $xpath = new \DOMXPath($webPageHtml); + $this->registerXPathPolyfills($xpath); $this->feedName = $this->provideFeedTitle($xpath); $this->feedIcon = $this->provideFeedIcon($xpath); @@ -661,4 +662,60 @@ protected function generateItemId(array $item) { return null; } + + /** + * Adds xpath polyfills to the given DOMXPath instance + * + * @param DOMXPath $xpath + * @return void + */ + protected function registerXPathPolyfills(DOMXPath $xpath) + { + $xpath->registerNamespace('polyfill', 'http://php.net/xpath'); + + // https://www.w3.org/TR/xpath-functions-31/#func-replace + function replace($input, $pattern, $replacement, $flags = null) + { + $exceptionSuffix = ' [xpath polyfill for fn:replace]'; + + if (is_array($input) && count($input) !== 1) { + throw new \Exception('input not supported' . $exceptionSuffix); + } + $input = is_array($input) ? $input[0] : $input; + + if (is_string($input)) { + $text = $input; + } elseif ($input instanceof \DOMElement) { + $text = $input->textContent; + } elseif ($input instanceof \DOMAttr) { + $text = $input->value; + } elseif ($input instanceof \DOMText) { + $text = $input->wholeText; + } else { + throw new \Exception('input not supported' . $exceptionSuffix); + } + + if (!is_string($pattern)) { + throw new \Exception('pattern (regular expression) must be a string' . $exceptionSuffix); + } + + if (!is_string($replacement)) { + throw new \Exception('replacement must be a string' . $exceptionSuffix); + } + + $flags ??= ''; + if (!is_string($flags)) { + throw new \Exception('flags (regular expression) must be a string' . $exceptionSuffix); + } + + $modified = preg_replace(sprintf('/%s/%s', $pattern, $flags), $replacement, $text); + if ($modified == null) { + throw new \Exception('pattern (regular expression) is invalid' . $exceptionSuffix); + } + + return $modified; + } + + $xpath->registerPHPFunctions(['replace']); + } }