Skip to content
This repository has been archived by the owner on Jul 27, 2022. It is now read-only.

Commit

Permalink
Merge pull request #929 from ec-europa/regexp
Browse files Browse the repository at this point in the history
Don't parse data URL scheme when converting references
  • Loading branch information
sandervd authored Oct 2, 2017
2 parents 116ffa1 + 50c5b52 commit f6e088e
Showing 1 changed file with 10 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,6 @@ public function prepareRow(Row $row) {
* If $markup has been changed.
*/
protected function process(&$markup) {
// Perform a bird-eye check and exit here if there are no internal links,
// for performance reasons.
if (!static::needsProcessing($markup)) {
return FALSE;
}

// Build the DOM based on this markup.
$document = Html::load($markup);
$changed = FALSE;
Expand Down Expand Up @@ -562,8 +556,16 @@ protected function importUnmigratedFile($path) {
* The relative path parts or NULL.
*/
protected function getRelativePath($path) {
if ((strpos($path, '#') === 0) || !UrlHelper::isValid(UrlHelper::encodePath($path))) {
// Only fragment or invalid.
if (
// If it's only a fragment.
(strpos($path, '#') === 0) ||
// Or a Data URI file scheme.
// @see https://en.wikipedia.org/wiki/Data_URI_scheme
(substr($path, 0, 5) === 'data:') ||
// Or an invalid path.
!UrlHelper::isValid(UrlHelper::encodePath($path))
) {
// Exit early.
return NULL;
}

Expand Down Expand Up @@ -617,24 +619,6 @@ protected function getRelativePath($path) {
return parse_url($path) ?: NULL;
}

/**
* Preforms a bird-eye check on the markup to see if processing is needed.
*
* This method is called just to avoid processing on markup that doesn't
* really need processing and improving the performance.
*
* @param string $markup
* The markup to be checked.
*
* @return bool
* TRUE, if process is needed.
*/
protected static function needsProcessing($markup) {
$a_pattern = "@<a\s+[^>]*href\s*=\s*(['\"])??((http|https)?://joinup.ec.europa.eu)?[/]?([^\\1]*?)\\1[^>]*>@i";
$img_pattern = "@<img\s+[^>]*src\s*=\s*(['\"])??((http|https)?://joinup.ec.europa.eu)?[/]?([^\\1]*?)\\1[^>]*>@i";
return preg_match($a_pattern, $markup) || preg_match($img_pattern, $markup);
}

/**
* Returns the fields to be migrated in a structured array.
*
Expand Down

0 comments on commit f6e088e

Please sign in to comment.