Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get the largest image, and also grab image size from style attributes #73

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 61 additions & 4 deletions lib/readability.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ class Document
:min_image_height => 80,
:ignore_image_format => [],
:blacklist => nil,
:whitelist => nil
:whitelist => nil,
:get_largest_image => false,
:url_blacklist => []
}.freeze

attr_accessor :options, :html, :best_candidate, :candidates, :best_candidate_has_image
Expand Down Expand Up @@ -81,6 +83,18 @@ def make_html(whitelist=nil, blacklist=nil)
@html.xpath('//comment()').each { |i| i.remove }
end


def is_blacklist(url)
options[:url_blacklist].each do |blacklist|
if url.include? blacklist
return true
end
end

return false
end


def images(content=nil, reload=false)
begin
require 'fastimage'
Expand All @@ -90,6 +104,9 @@ def images(content=nil, reload=false)

@best_candidate_has_image = false if reload

largest_image_url = nil
largest_image_area = 0

prepare_candidates
list_images = []
tested_images = []
Expand All @@ -105,6 +122,27 @@ def images(content=nil, reload=false)
height = element["height"].nil? ? 0 : element["height"].value.to_i
width = element["width"].nil? ? 0 : element["width"].value.to_i

if is_blacklist(url)
debug("image discarded (blacklist): #{url}")
next
end

if element["style"]

width_reg = /width:(\d+)/.match(element["style"])
height_reg = /height:(\d+)/.match(element["style"])

if width_reg
width = width_reg[1].to_i
end

if height_reg
height = height_reg[1].to_i
end

end


if url =~ /\Ahttps?:\/\//i && (height.zero? || width.zero?)
image = get_image_size(url)
next unless image
Expand All @@ -121,21 +159,40 @@ def images(content=nil, reload=false)

tested_images.push(url)
if image_meets_criteria?(image)
list_images << url
if options[:get_largest_image]
area = image[:height] * image[:width]
if area > largest_image_area

if largest_image_url
debug("Image discarded by larger image: #{largest_image_url}")
end

largest_image_area = area
largest_image_url = url
end
else
list_images << url
end

else
debug("Image discarded: #{url} - height: #{image[:height]} - width: #{image[:width]} - format: #{image[:format]}")
end
end

(list_images.empty? and content != @html) ? images(@html, true) : list_images
if options[:get_largest_image] and largest_image_url
list_images << largest_image_url
end

(list_images.empty? and content != @html) ? images(@html, true) : list_images

end

def get_image_size(url)
w, h = FastImage.size(url)
raise "Couldn't get size." if w.nil? || h.nil?
{:width => w, :height => h}
rescue => e
debug("Image error: #{e}")
debug("Image error: #{e} url: #{url}")
nil
end

Expand Down