diff --git a/lib/string_tools/html.rb b/lib/string_tools/html.rb
index ff4b19d..d3d66b9 100644
--- a/lib/string_tools/html.rb
+++ b/lib/string_tools/html.rb
@@ -1,6 +1,6 @@
# coding: utf-8
require 'loofah'
-require 'uri'
+require 'addressable/uri'
module StringTools
module HTML
@@ -44,16 +44,20 @@ def initialize(options)
def scrub(node)
return unless node.name == 'a'.freeze
- uri = URI.parse(node['href'.freeze])
+ href = node['href']
+ return if href.blank?
+ uri = Addressable::URI.parse(href).normalize
+ return unless uri.host
node.swap(node.children) unless whitelisted? uri.host
- rescue URI::InvalidURIError => _
+ rescue
+ # в любой непонятной ситуации просто удаляем ссылку
node.swap(node.children)
end
def whitelisted?(domain)
- host_parts = domain.split('.'.freeze).reverse!
- host = host_parts[0] # com, ru ...
- 1.upto(host_parts.length - 1) do |i|
+ host_parts = domain.split('.'.freeze)
+ host = host_parts[-1] # com, ru ...
+ (host_parts.length - 2).downto(0) do |i|
subdomain = host_parts[i]
host = "#{subdomain}.#{host}"
return true if @whitelist.include? host
diff --git a/spec/html_spec.rb b/spec/html_spec.rb
index 2047ec0..9c94f8a 100644
--- a/spec/html_spec.rb
+++ b/spec/html_spec.rb
@@ -48,11 +48,14 @@
end
context 'when whitelist passed' do
- subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru']) }
+ subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru', 'pulscen.com.ua']) }
context 'domain link match to whitelisted' do
let(:html) do
<<-MARKUP
+ firm.pulscen.com.ua
+ pulscen.com.ua
+ com.ua
google
yandex
MARKUP
@@ -60,6 +63,9 @@
it 'should keep only whitelisted links' do
is_expected.to eq(<<-MARKUP)
+ firm.pulscen.com.ua
+ pulscen.com.ua
+ com.ua
google
yandex
MARKUP
@@ -99,6 +105,22 @@
MARKUP
end
end
+
+ context 'content with relative links' do
+ let(:html) do
+ <<-MARKUP
+ google
+ yandex
+ MARKUP
+ end
+
+ it 'should keep relative links' do
+ is_expected.to eq(<<-MARKUP)
+ google
+ yandex
+ MARKUP
+ end
+ end
end
end
end