diff --git a/app/models/dro.rb b/app/models/dro.rb index d5deb89f5..53c563d2b 100644 --- a/app/models/dro.rb +++ b/app/models/dro.rb @@ -5,6 +5,11 @@ class Dro < RepositoryRecord validates :content_type, :access, :administrative, :description, :identification, :structural, presence: true + scope :has_admin_policy, ->(admin_policy_druid) { where("administrative ->> 'hasAdminPolicy' = '#{admin_policy_druid}'").select(:external_identifier).order(:external_identifier) } + scope :in_virtual_objects, ->(member_druid) { where("structural #> '{hasMemberOrders,0}' -> 'members' ? :druid", druid: member_druid) } + scope :members_of_collection, ->(collection_druid) { where("structural -> 'isMemberOf' ? :druid", druid: collection_druid).select(:external_identifier, :version, :content_type) } + scope :embargoed_and_releaseable, -> { where("(access -> 'embargo' ->> 'releaseDate')::timestamp <= ?", Time.zone.now).select(:external_identifier) } + def self.find_by_source_id(source_id) find_by("identification->>'sourceId' = ?", source_id) end diff --git a/app/reports/apo_catkey.rb b/app/reports/apo_catkey.rb index 4255656a8..d0affa8e6 100644 --- a/app/reports/apo_catkey.rb +++ b/app/reports/apo_catkey.rb @@ -2,7 +2,7 @@ require 'csv' -# Find items that are goverened by the provided APO and then return all catkeys and refresh status. +# Find items that are governed by the provided APO and then return all catkeys and refresh status. # https://github.com/sul-dlss/dor-services-app/issues/4373 # Invoke via: # bin/rails r -e production "ApoCatkey.report('druid:bx911tp9024')" @@ -12,17 +12,7 @@ def self.report(apo_druid) CSV.open(output_file, 'w') do |csv| csv << %w[druid catkey refresh] - query = "is_governed_by_ssim:\"info:fedora/#{apo_druid}\"&objectType_ssim:\"item\"" - druids = [] - # borrowed from bin/generate-druid-list - loop do - results = SolrService.query('*:*', fl: 'id', rows: 10000, fq: query, start: druids.length, sort: 'id asc') - break if results.empty? - - results.each { |r| druids << r['id'] } - sleep(0.5) - end - + druids = Dro.has_admin_policy(apo_druid).map(&:external_identifier) num_dros = druids.size puts "Found #{num_dros} objects that are governed by APO #{apo_druid}" druids.each_with_index do |druid, i| diff --git a/app/services/delete_service.rb b/app/services/delete_service.rb index 8cc955603..031798b52 100644 --- a/app/services/delete_service.rb +++ b/app/services/delete_service.rb @@ -4,7 +4,6 @@ class DeleteService # Tries to remove any existence of the object in our systems # Does the following: - # - Removes item from Fedora/Solr # - Removes content from dor workspace # - Removes content from assembly workspace # - Removes content from sdr export area diff --git a/app/services/embargo_release_service.rb b/app/services/embargo_release_service.rb index d819b43d3..b2baa2ca8 100644 --- a/app/services/embargo_release_service.rb +++ b/app/services/embargo_release_service.rb @@ -1,27 +1,23 @@ # frozen_string_literal: true # Finds objects where the embargo release date has passed for embargoed items -# Builds list of candidate objects by doing a Solr query +# Builds list of candidate objects by querying the database # # Should run once a day from cron class EmbargoReleaseService - RELEASEABLE_NOW_QUERY = 'embargo_status_ssim:"embargoed" AND embargo_release_dtsim:[* TO NOW]' - def self.release_all # Find objects to process - Rails.logger.info("***** Querying solr: #{RELEASEABLE_NOW_QUERY}") - solr = SolrService.get(RELEASEABLE_NOW_QUERY, 'rows' => '5000', 'fl' => 'id') + embargoed_items_to_release = Dro.embargoed_and_releaseable - num_found = solr['response']['numFound'].to_i - if num_found.zero? + if embargoed_items_to_release.none? Rails.logger.info('No objects to process') return end - Rails.logger.info("Found #{num_found} objects") + Rails.logger.info("Found #{embargoed_items_to_release.count} objects") count = 0 - solr['response']['docs'].each do |doc| - release(doc['id']) + embargoed_items_to_release.each do |item| + release(item.external_identifier) count += 1 end diff --git a/app/services/member_service.rb b/app/services/member_service.rb index 95c13348c..591f2cdb9 100644 --- a/app/services/member_service.rb +++ b/app/services/member_service.rb @@ -1,19 +1,37 @@ # frozen_string_literal: true -# Finds the members of a collection by using Solr +# Finds the members of a collection class MemberService # @param [String] druid the identifier of the collection # @param [Boolean] only_published when true, restrict to only published items # @param [Boolean] exclude_opened when true, exclude opened items # @return [Array>] the members of this collection def self.for(druid, only_published: false, exclude_opened: false) - query = "is_member_of_collection_ssim:\"info:fedora/#{druid}\"" - query += ' published_dttsim:[* TO *]' if only_published - query += ' -processing_status_text_ssi:Opened' if exclude_opened - args = { - fl: 'id,objectType_ssim', - rows: 100_000_000 - } - SolrService.query query, args + Dro + .members_of_collection(druid) + .then { |members| reject_opened_members(members, exclude_opened) } + .then { |members| select_published_members(members, only_published) } + .map do |member| + { + 'id' => member.external_identifier, + 'objectType' => member.content_type == Cocina::Models::ObjectType.agreement ? 'agreement' : 'item' + } + end + end + + def self.reject_opened_members(members, exclude_opened) + return members unless exclude_opened + + members.reject do |member| + WorkflowClientFactory.build.status(druid: member.external_identifier, version: member.version).display_simplified == 'Opened' + end + end + + def self.select_published_members(members, only_published) + return members unless only_published + + members.select do |member| + WorkflowClientFactory.build.lifecycle(druid: member.external_identifier, milestone_name: 'published', version: member.version).present? + end end end diff --git a/app/services/publish/public_desc_metadata_service.rb b/app/services/publish/public_desc_metadata_service.rb index 92774671c..5b8fd80d4 100644 --- a/app/services/publish/public_desc_metadata_service.rb +++ b/app/services/publish/public_desc_metadata_service.rb @@ -57,7 +57,7 @@ def add_doi # expand constituent relations into relatedItem references -- see JUMBO-18 # @return [Void] def add_constituent_relations! - VirtualObject.for(druid: cocina_object.externalIdentifier).each do |solr_doc| + VirtualObject.for(druid: cocina_object.externalIdentifier).each do |virtual_object_hash| # create the MODS relation relatedItem = doc.create_element('relatedItem', xmlns: MODS_NS) relatedItem['type'] = 'host' @@ -66,14 +66,14 @@ def add_constituent_relations! # load the title from the virtual object's DC.title titleInfo = doc.create_element('titleInfo', xmlns: MODS_NS) title = doc.create_element('title', xmlns: MODS_NS) - title.content = solr_doc.fetch(:title) + title.content = virtual_object_hash.fetch(:title) titleInfo << title relatedItem << titleInfo # point to the PURL for the virtual object location = doc.create_element('location', xmlns: MODS_NS) url = doc.create_element('url', xmlns: MODS_NS) - url.content = purl_url(solr_doc.fetch(:id)) + url.content = purl_url(virtual_object_hash.fetch(:id)) location << url relatedItem << location diff --git a/app/services/published_relationships_filter.rb b/app/services/published_relationships_filter.rb index c1aa88892..9a43724e8 100644 --- a/app/services/published_relationships_filter.rb +++ b/app/services/published_relationships_filter.rb @@ -36,8 +36,8 @@ def collections def virtual_objects return unless cocina_object.dro? - VirtualObject.for(druid: cocina_object.externalIdentifier).map do |solr_doc| - "" + VirtualObject.for(druid: cocina_object.externalIdentifier).map do |virtual_object_hash| + "" end.join(INDENT) end end diff --git a/app/services/solr_service.rb b/app/services/solr_service.rb deleted file mode 100644 index 7ac194855..000000000 --- a/app/services/solr_service.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -# Functions for querying solr -class SolrService - include Singleton - - def options - { timeout: Settings.solr.timeout, url: Settings.solr.url } - end - - def conn - @conn ||= RSolr.connect options - end - - class << self - delegate :conn, to: :instance - - def select_path - Settings.solr.select_path - end - - # @param [Hash] options - def get(query, args = {}) - args = args.merge(q: query, wt: :json) - conn.get(select_path, params: args) - end - - def query(query, args = {}) - unless args.key?(:rows) - Rails.logger.warn "Calling SolrService.get without passing an explicit value for ':rows' is not recommended. " \ - "You will end up with Solr's default (usually set to 10)\nCalled by #{caller(1..1).first}" - end - get(query, args).fetch('response').fetch('docs') - end - - delegate :add, :commit, to: :conn - end -end diff --git a/app/services/virtual_object.rb b/app/services/virtual_object.rb index 79ccf3269..d8ccc741b 100644 --- a/app/services/virtual_object.rb +++ b/app/services/virtual_object.rb @@ -6,10 +6,11 @@ class VirtualObject # @param [String] druid # @return [Array] a list of results with ids and titles def self.for(druid:) - query = "has_constituents_ssim:#{druid.sub(':', '\:')}" - response = SolrService.get(query, { fl: 'id sw_display_title_tesim' }) - response.fetch('response').fetch('docs').map do |row| - { id: row.fetch('id'), title: row.fetch('sw_display_title_tesim').first } + Dro.in_virtual_objects(druid).map do |dro| + { + id: dro.external_identifier, + title: Cocina::Models::Builders::TitleBuilder.build(dro.to_cocina.description.title) + } end end end diff --git a/bin/clean-druid-list b/bin/clean-druid-list index 5e2cf91cd..ae811ff20 100755 --- a/bin/clean-druid-list +++ b/bin/clean-druid-list @@ -23,7 +23,9 @@ count = 0 File.open(options[:output], 'w') do |file| druids.each_with_index do |druid, index| puts "Finding #{druid} (#{index + 1})" - next if SolrService.query('*:*', fl: 'id', rows: 1, fq: "id:\"#{druid}\"").empty? + next unless AdminPolicy.exists?(external_identifier: druid) || + Collection.exists?(external_identifier: druid) || + Dro.exists?(external_identifier: druid) file.write("#{druid}\n") count += 1 diff --git a/bin/generate-druid-list b/bin/generate-druid-list index bfa89ae80..4b1bf5b48 100755 --- a/bin/generate-druid-list +++ b/bin/generate-druid-list @@ -4,6 +4,8 @@ require_relative '../config/environment' require 'optparse' +# TODO: Figure out if we still want this or not, given how tightly coupled this functionality is to Solr + options = { output: 'druids.txt', quiet: false } parser = OptionParser.new do |option_parser| option_parser.banner = 'Usage: bin/generate-druid-list \'\' [options]' diff --git a/lib/tasks/missing_druids.rake b/lib/tasks/missing_druids.rake index 26cb87a5d..c1a18e18c 100644 --- a/lib/tasks/missing_druids.rake +++ b/lib/tasks/missing_druids.rake @@ -1,5 +1,7 @@ # frozen_string_literal: true +# TODO: Figure out if we still want this or not, given how tightly coupled this functionality is to Solr + namespace :missing_druids do desc 'Find unindexed druids' task unindexed_objects: :environment do