Skip to content

Commit

Permalink
Validate urls before adding to queue.
Browse files Browse the repository at this point in the history
Prevents invalid url exceptions while parsing recursively
  • Loading branch information
JanPetterMG committed Aug 10, 2019
1 parent a10bd4a commit e793cae
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/SitemapParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ public function parseRecursive($url)
public function addToQueue(array $urlArray)
{
foreach ($urlArray as $url) {
$this->queue[] = $url;
$url = $this->urlEncode($url);
if ($this->urlValidate($url)) {
$this->queue[] = $url;
}
}
}

Expand All @@ -173,10 +176,13 @@ public function getQueue()
public function parse($url, $urlContent = null)
{
$this->clean();
$this->currentURL = $url;
$this->currentURL = $this->urlEncode($url);
if (!$this->urlValidate($this->currentURL)) {
throw new Exceptions\SitemapParserException('Invalid URL');
}
$this->history[] = $this->currentURL;
$response = is_string($urlContent) ? $urlContent : $this->getContent();
if ($this->urlValidate($this->currentURL) && parse_url($this->currentURL, PHP_URL_PATH) === self::ROBOTSTXT_PATH) {
if (parse_url($this->currentURL, PHP_URL_PATH) === self::ROBOTSTXT_PATH) {
$this->parseRobotstxt($response);
return;
}
Expand Down

0 comments on commit e793cae

Please sign in to comment.