diff --git a/app/search_engines/bento_search/catalog_engine.rb b/app/search_engines/bento_search/catalog_engine.rb index f096cb64..466d631e 100644 --- a/app/search_engines/bento_search/catalog_engine.rb +++ b/app/search_engines/bento_search/catalog_engine.rb @@ -65,12 +65,13 @@ def search_implementation(args) # TODO: Get a hidden reference to this value into the atom payload so it is referencable from the summary variable parsed_summary = Oga.parse_html(entry['summary']) - link_href = parsed_summary.at_xpath('//a/@href') - if link_href - # NOTE: this was changed mainly to avoid NPE and avoid parsing the same html 3 times. I can't vouch for the - # logic (specifically, it feels fragile to assume a correspondence between href and associated text, - # but perhaps that assumption is valid in practice?) - online_resource[link_href.text] = parsed_summary.at_xpath('//a/text()')&.text&.strip.presence || '[no link text]' + link = parsed_summary.at_xpath('//a') + if link + # If a link exists, we are not opinionated about what it must be. e.g., if href.nil? || href=='', we just pass + # it along. The exception is that we generate placeholder link text if none is present, to ensure that any + # links generated downstream will be visible/clickable. + href = link.attribute('href').to_s + online_resource[href] = link.text.strip.presence || '[no link text]' end holdings_string = mms_id.downcase.start_with?("hathi") ? '' : determine_holdings_status(holdings, mms_id, online_resource)