Skip to content

Commit

Permalink
fix(html): поддержка относительных путей
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryBochkarev committed Oct 23, 2015
1 parent 24bd113 commit e05076f
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 7 deletions.
16 changes: 10 additions & 6 deletions lib/string_tools/html.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8
require 'loofah'
require 'uri'
require 'addressable/uri'

module StringTools
module HTML
Expand Down Expand Up @@ -44,16 +44,20 @@ def initialize(options)

def scrub(node)
return unless node.name == 'a'.freeze
uri = URI.parse(node['href'.freeze])
href = node['href']
return if href.blank?
uri = Addressable::URI.parse(href).normalize
return unless uri.host
node.swap(node.children) unless whitelisted? uri.host
rescue URI::InvalidURIError => _
rescue
# в любой непонятной ситуации просто удаляем ссылку
node.swap(node.children)
end

def whitelisted?(domain)
host_parts = domain.split('.'.freeze).reverse!
host = host_parts[0] # com, ru ...
1.upto(host_parts.length - 1) do |i|
host_parts = domain.split('.'.freeze)
host = host_parts[-1] # com, ru ...
(host_parts.length - 2).downto(0) do |i|
subdomain = host_parts[i]
host = "#{subdomain}.#{host}"
return true if @whitelist.include? host
Expand Down
24 changes: 23 additions & 1 deletion spec/html_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,24 @@
end

context 'when whitelist passed' do
subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru']) }
subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru', 'pulscen.com.ua']) }

context 'domain link match to whitelisted' do
let(:html) do
<<-MARKUP
<a href="https://firm.pulscen.com.ua">firm.pulscen.com.ua</a>
<a href="https://pulscen.com.ua">pulscen.com.ua</a>
<a href="https://com.ua">com.ua</a>
<a href="https://google.com"><span>goo</span><span>gle</span></a>
<a href="https://yandex.ru"><span>yan</span><span>dex</span></a>
MARKUP
end

it 'should keep only whitelisted links' do
is_expected.to eq(<<-MARKUP)
<a href="https://firm.pulscen.com.ua">firm.pulscen.com.ua</a>
<a href="https://pulscen.com.ua">pulscen.com.ua</a>
com.ua
<span>goo</span><span>gle</span>
<a href="https://yandex.ru"><span>yan</span><span>dex</span></a>
MARKUP
Expand Down Expand Up @@ -99,6 +105,22 @@
MARKUP
end
end

context 'content with relative links' do
let(:html) do
<<-MARKUP
<a href="https://google.com"><span>goo</span><span>gle</span></a>
<a href="yandex.ru"><span>yan</span><span>dex</span></a>
MARKUP
end

it 'should keep relative links' do
is_expected.to eq(<<-MARKUP)
<span>goo</span><span>gle</span>
<a href="yandex.ru"><span>yan</span><span>dex</span></a>
MARKUP
end
end
end
end
end

0 comments on commit e05076f

Please sign in to comment.