Skip to content

Commit

Permalink
Use Solr for newspaper issue listing (#158)
Browse files Browse the repository at this point in the history
* Add a solr query for issues list

* Update README

* Lots of missing variables to uninstall

* Code review and travis issues

* PHP 5.3git add includes/utilities.incgit add includes/utilities.inc

* Make OR explicit

* Remove base filters to ensure we get all results

Don't alter the results

* Need to override limit or you get what is set in the config.

* Code review

* Include toggle to disable removing solr base filters

* Add last comma
  • Loading branch information
whikloj authored and DiegoPino committed Mar 20, 2018
1 parent 7c7256a commit bd5ba36
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 26 deletions.
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,24 @@ Install as usual, see [this](https://drupal.org/documentation/install/modules-th

## Configuration

Select configuration options for page derivatives, Parent Solr Field, and select a viewer for the issue view and page view in Administration » Islandora » Solution pack configuration » Newspapers (admin/islandora/solution_pack_config/newspaper).
Configuration options are found at Administration » Islandora » Solution pack configuration » Newspapers (admin/islandora/solution_pack_config/newspaper).

Select configuration options for which issue and page derivatives are created locally.
Select the **Parent Solr Field** which holds the parent issue PID for a newspaper page.

![Configuration](https://camo.githubusercontent.com/00b3d34d5927b733689ce0d1598a79c832082937/687474703a2f2f692e696d6775722e636f6d2f56764b6a6479462e706e67)
Select the **Use Solr** option to switch from using the resource index to using Solr to generate the issue list for a newspaper.
This also reveals three required Solr fields for this option.
* Newspaper issue parent field
* Issued date field
* Sequence field

**Note**: The above three fields are for Solr records of newspaper **issues** in your repository.

![Derivative and solr configuration options](https://user-images.githubusercontent.com/2857697/33495867-b94dffc4-d68d-11e7-9002-ed419dbeec64.jpg)

Also select a viewer for the newspaper issue view and page view.

![Issue view and page view configuration options](https://user-images.githubusercontent.com/2857697/33495870-bcc7f2ae-d68d-11e7-834d-cade66be45b4.jpg)

## Documentation
:warning: <br/>Deleting a newspaper object directly (Manage > Properties > Delete Newspaper) will delete all its child Issue objects, and their associated Page objects. Highlighted in red in this diagram shows all that will be deleted if the newspaper Locusta Newspaper is deleted.
Expand Down
107 changes: 100 additions & 7 deletions includes/admin.form.inc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* The Drupal form definition.
*/
function islandora_newspaper_admin_settings_form(array $form, array &$form_state) {
$get_default_value = function($name, $default) use(&$form_state) {
$get_default_value = function ($name, $default) use (&$form_state) {
return isset($form_state['values'][$name]) ? $form_state['values'][$name] : variable_get($name, $default);
};
$form = array(
Expand All @@ -39,13 +39,79 @@ function islandora_newspaper_admin_settings_form(array $form, array &$form_state
),
);

$form['islandora_newspaper_parent_issue_solr_field'] = array(
'#type' => 'textfield',
'#title' => t('Parent Solr Field'),
'#description' => t("Solr field containing the parent issue's PID."),
'#default_value' => variable_get('islandora_newspaper_parent_issue_solr_field', 'RELS_EXT_isMemberOf_uri_ms'),
'#size' => 30,
$form['islandora_newspaper_solr_options'] = array(
'#type' => 'fieldset',
'#title' => t('Solr settings'),
'islandora_newspaper_parent_issue_solr_field' => array(
'#type' => 'textfield',
'#title' => t('Parent Solr Field'),
'#description' => t("Solr field containing the parent issue's PID."),
'#default_value' => variable_get('islandora_newspaper_parent_issue_solr_field', 'RELS_EXT_isMemberOf_uri_ms'),
'#size' => 100,
'#autocomplete_path' => 'islandora_solr/autocomplete_luke',
),
'islandora_newspaper_use_solr' => array(
'#type' => 'checkbox',
'#title' => t('Use Solr for Newspaper display'),
'#disabled' => (!module_exists('islandora_solr')),
'#description' => t('Use Solr to generate lists of issues for a newspaper object.'),
'#default_value' => variable_get('islandora_newspaper_use_solr', FALSE),
),
'islandora_newspaper_solr_wrapper' => array(
'#type' => 'container',
'#states' => array(
'visible' => array(
':input[name="islandora_newspaper_use_solr"]' => array('checked' => TRUE),
),
),
'islandora_newspaper_solr_newspaper_parent_field' => array(
'#type' => 'textfield',
'#title' => t('Newspaper issue parent field'),
'#description' => t('Solr field that contains the parent newspaper of the newspaper issue.'),
'#size' => 100,
'#default_value' => variable_get('islandora_newspaper_solr_newspaper_parent_field', 'RELS_EXT_isMemberOf_uri_ms'),
'#autocomplete_path' => 'islandora_solr/autocomplete_luke',
'#states' => array(
'required' => array(
':input[name="islandora_newspaper_use_solr"]' => array('checked' => TRUE),
),
),
),
'islandora_newspaper_solr_date_field' => array(
'#type' => 'textfield',
'#title' => t('Issued date field'),
'#description' => t('Solr field that contains the date issued of the newspaper issue.'),
'#size' => 100,
'#default_value' => variable_get('islandora_newspaper_solr_date_field', 'RELS_EXT_dateIssued_literal_ms'),
'#autocomplete_path' => 'islandora_solr/autocomplete_luke',
'#states' => array(
'required' => array(
':input[name="islandora_newspaper_use_solr"]' => array('checked' => TRUE),
),
),
),
'islandora_newspaper_solr_sequence_field' => array(
'#type' => 'textfield',
'#title' => t('Sequence field'),
'#description' => t('Solr field that contains the sequence number of the issues in a newspaper.'),
'#size' => 100,
'#default_value' => variable_get('islandora_newspaper_solr_sequence_field', 'RELS_EXT_isSequenceNumber_literal_ms'),
'#autocomplete_path' => 'islandora_solr/autocomplete_luke',
'#states' => array(
'required' => array(
':input[name="islandora_newspaper_use_solr"]' => array('checked' => TRUE),
),
),
),
'islandora_newspaper_solr_remove_base_filters' => array(
'#type' => 'checkbox',
'#title' => t('Remove base Solr filters'),
'#description' => t('This option removes your configured Solr base filters from these queries. If you want your filters to be applied even though they could affect which newspaper issue objects are returned in the list, uncheck this option.'),
'#default_value' => variable_get('islandora_newspaper_solr_remove_base_filters', TRUE),
),
),
);
$form['#validate'][] = 'islandora_newspaper_admin_settings_form_validate';

module_load_include('inc', 'islandora', 'includes/solution_packs');
$form += islandora_viewers_form('islandora_newspaper_issue_viewers', array('application/pdf'), 'islandora:newspaperIssueCModel');
Expand All @@ -59,6 +125,33 @@ function islandora_newspaper_admin_settings_form(array $form, array &$form_state
return system_settings_form($form);
}

/**
* Implements hook_FORM_ID_validate().
*/
function islandora_newspaper_admin_settings_form_validate(array $form, array &$form_state) {
$error = array();
if (isset($form_state['values']['islandora_newspaper_use_solr']) &&
$form_state['values']['islandora_newspaper_use_solr']) {
if (!module_exists('islandora_solr')) {
$error['islandora_newspaper_use_solr'] = t('This option requires the Islandora Solr Search module.');
}
if (empty($form_state['values']['islandora_newspaper_solr_newspaper_parent_field'])) {
$error['islandora_newspaper_solr_newspaper_parent_field'] = t('Your must set the Newspaper issue parent field');
}
if (empty($form_state['values']['islandora_newspaper_solr_date_field'])) {
$error['islandora_newspaper_solr_date_field'] = t('You must set the date issued field.');
}
if (empty($form_state['values']['islandora_newspaper_solr_sequence_field'])) {
$error['islandora_newspaper_solr_sequence_field'] = t('You must set the sequence field.');
}
}
if (count($error) > 0) {
foreach ($error as $field => $message) {
form_set_error($field, check_plain($message));
}
}
}

/**
* Check if the required resouces are enabled.
*
Expand Down
167 changes: 151 additions & 16 deletions includes/utilities.inc
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ function islandora_newspaper_get_newspaper($object) {
/**
* Gets all the issues that the given newspaper owns.
*
* The results are ordered by their RELS-EXT dateIssued property.
* Older to newer.
*
* @param AbstractObject $object
* An AbstractObject representing a Fedora object.
*
Expand All @@ -77,9 +74,44 @@ function islandora_newspaper_get_newspaper($object) {
* - pid: The unique persistent identifier for the issue.
* - label: A descriptive label for the issue.
* - sequence: The sequence number of the issue, starts at 1.
* - issued: A DateTime object repersenting the date the issue was released.
* - issued: A DateTime object representing the date the issue was released.
*/
function islandora_newspaper_get_issues(AbstractObject $object) {

if (module_exists('islandora_solr') && variable_get('islandora_newspaper_use_solr', FALSE)) {
$issues = islandora_newspaper_get_issues_solr($object);
}
else {
$issues = islandora_newspaper_get_issues_sparql($object);
}

// Grab the PIDs...
$get_pid = function ($o) {
return $o['pid'];
};
$pids = array_map($get_pid, $issues);
// Make the PIDs the keys.
$issues = count($pids) ? array_combine($pids, $issues) : array();
return $issues;
}

/**
* Gets all the issues that the given newspaper owns use the resource index.
*
* The results are ordered by their RELS-EXT dateIssued property.
* Older to newer.
*
* @param AbstractObject $object
* An AbstractObject representing a Fedora object.
*
* @return array
* An array with element of the form of:
* - pid: The unique persistent identifier for the issue.
* - label: A descriptive label for the issue.
* - sequence: The sequence number of the issue, starts at 1.
* - issued: A DateTime object representing the date the issue was released.
*/
function islandora_newspaper_get_issues_sparql(AbstractObject $object) {
$query = <<<EOQ
PREFIX islandora-rels-ext: <http://islandora.ca/ontology/relsext#>
PREFIX fedora-rels-ext: <info:fedora/fedora-system:def/relations-external#>
Expand Down Expand Up @@ -115,14 +147,14 @@ EOQ;

$results = $object->repository->ri->sparqlQuery($query);
// Map the results using a default Datetime for missing issued dates.
$map_results = function($o) {
$map_results = function ($o) {
try {
@$issued = new DateTime($o['issued']['value']);
}
catch (Exception $e) {
// Use the current time as a place holder.
$issued = new DateTime();
$msg = 'Failed to get issued date from SPARQL query for @pid';
$msg = 'Failed to get issued date from SPARQL query for @pid';
$vars = array('@pid' => $o['object']['value']);
watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR);
}
Expand All @@ -134,13 +166,116 @@ EOQ;
);
};
$issues = array_map($map_results, $results);
// Grab the PIDs...
$get_pid = function($o) {
return $o['pid'];
return $issues;
}

/**
* Gets all the issues that the given newspaper owns using Solr.
*
* @param AbstractObject $object
* An AbstractObject representing a Fedora object.
*
* @return array
* An array with elements of the form of:
* - pid: The unique persistent identifier for the issue.
* - label: A descriptive label for the issue.
* - sequence: The sequence number of the issue, starts at 1.
* - issued: A DateTime object representing the date the issue was released.
*/
function islandora_newspaper_get_issues_solr(AbstractObject $object) {
$parent_field = variable_get('islandora_newspaper_parent_issue_solr_field', 'RELS_EXT_isMemberOf_uri_ms');
$date_field = variable_get('islandora_newspaper_solr_date_field', 'RELS_EXT_dateIssued_literal_ms');
$sequence_field = variable_get('islandora_newspaper_solr_sequence_field', 'RELS_EXT_isSequenceNumber_literal_ms');
// Counter for looping the start.
$start = -1;
// Number of records to return at once.
$rows = 10000;
$count = 0;
$results = array();

$solr_build = new IslandoraSolrQueryProcessor();
$solr_query = format_string('!parent_field:("info:fedora/!pid" OR "!pid") AND'
. ' RELS_EXT_hasModel_uri_ms:"info:fedora/islandora:newspaperIssueCModel"',
array(
'!parent_field' => $parent_field,
'!pid' => $object->id,
)
);

do {
$start += 1;
$solr_param = array(
'rows' => $rows,
'limit' => $rows,
'fl' => format_string('!date, !sequence, PID, fgs_label_s', array(
'!date' => $date_field,
'!sequence' => $sequence_field,
)),
'start' => ($rows * $start),
'hl' => 'false',
'facet' => 'false',
);

$solr_build->buildQuery($solr_query, $solr_param);
$solr_build->solrParams = array_replace_recursive($solr_build->solrParams, $solr_param);
if (variable_get('islandora_newspaper_solr_remove_base_filters', 1)) {
module_load_include('inc', 'islandora_solr', 'includes/utilities');
$solr_build->solrParams = islandora_solr_remove_base_filters($solr_build->solrParams);
}
try {
$solr_build->executeQuery(FALSE);
$count = $solr_build->islandoraSolrResult['response']['numFound'];
$results = array_merge($results, $solr_build->islandoraSolrResult['response']['objects']);
}
catch (Exception $error) {
drupal_set_message(check_plain(t('Error searching Solr index')) . ' ' . $error->getMessage(), 'error');
}

} while ($count > ($rows * $start + $rows) && !isset($error));

// Check the Solr doc array top and inside 'solr_doc'.
$check_fn = function ($o, $name) {
$value = "";
if (isset($o[$name])) {
$value = $o[$name];
}
elseif (isset($o['solr_doc'][$name])) {
$value = $o['solr_doc'][$name];
}
if (is_array($value)) {
return reset($value);
}
else {
return $value;
}
};
$pids = array_map($get_pid, $issues);
// Make the PIDs the keys.
return count($pids) ? array_combine($pids, $issues) : array();

// Map the results using a default Datetime for missing issued dates.
$map_results = function ($o) use ($date_field, $check_fn, $sequence_field) {
try {
@$issued = new DateTime($check_fn($o, $date_field));
}
catch (Exception $e) {
// Use the current time as a place holder.
$issued = new DateTime();
$msg = 'Failed to get issued date from Solr query for @pid';
$vars = array('@pid' => $o['PID']);
watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR);
}
$sequence = $check_fn($o, $sequence_field);
if (empty($sequence)) {
$sequence = 0;
}
return array(
'pid' => $o['PID'],
'label' => $o['object_label'],
'sequence' => $sequence,
'issued' => $issued,
);
};
$issues = array_map($map_results, $results);

return $issues;
}

/**
Expand Down Expand Up @@ -193,7 +328,7 @@ function islandora_newspaper_get_date_issued_from_mods(AbstractDatastream $datas
$out = new DateTime($result);
}
catch (Exception $e) {
$msg = 'Failed to get issued date from MODS for @pid';
$msg = 'Failed to get issued date from MODS for @pid';
$vars = array('@pid' => $datastream->parent->id);
watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR);
}
Expand Down Expand Up @@ -234,7 +369,7 @@ function islandora_newspaper_get_date_issued(AbstractObject $object) {
/**
* Fetch the issues of a newspaper that do not have a date.
*
* @param AbstractObject|NULL $object
* @param AbstractObject|null $object
* Newspaper object or empty for all issues.
*
* @return array
Expand Down Expand Up @@ -288,7 +423,7 @@ EOQ;
'!filters' => implode(' ', array_map($filter_map, $query_filters)),
));
$results = $tuque->repository->ri->sparqlQuery($query);
$map_results = function($o) {
$map_results = function ($o) {
return array(
'pid' => $o['object']['value'],
'label' => $o['label']['value'],
Expand Down Expand Up @@ -332,7 +467,7 @@ function islandora_newspaper_set_mods_date_issued(AbstractDatastream $datastream
$out = TRUE;
}
catch (Exception $e) {
$msg = 'Failed to get save MODS datastream for @pid';
$msg = 'Failed to get save MODS datastream for @pid';
$vars = array('@pid' => $datastream->parent->id);
watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR);
}
Expand Down
Loading

0 comments on commit bd5ba36

Please sign in to comment.