From d06d835f6bfb004eef6d0ace921e2f68890db4b1 Mon Sep 17 00:00:00 2001 From: Paul Beaudoin Date: Fri, 14 Dec 2018 15:22:56 -0500 Subject: [PATCH 1/2] Add updated annotated-marc-rules, improved update Updates annotated-marc-rules based on current contents of webpub.def in catalog. Also improves the process for updating said file by: - Supporting a configured `CATALOG_WEBPUB_DEF_URL` to allow one to simplify automatically fetching latest webpub.def from catalog - Codifies the two overrides we need to apploy to a straightforward interpretation of webpub.def (previously these were manual edits): - Un-comment the catch-all "Added Title" entry, originally addressed in https://jira.nypl.org/browse/SCC-854 - Change label for URLs from "Url" to "Connect to:" - Registers update script to be run via: `npm run rebuild-annotated-marc-rules` https://jira.nypl.org/browse/SCC-1260 --- .env.example | 2 + data/annotated-marc-rules.json | 13 +++++ data/webpub.def | 10 ++-- lib/annotated-marc-serializer.js | 12 +++++ package.json | 3 +- scripts/update-annotated-marc-rules.js | 74 +++++++++++++++++++++----- 6 files changed, 95 insertions(+), 19 deletions(-) mode change 100644 => 100755 scripts/update-annotated-marc-rules.js diff --git a/.env.example b/.env.example index 44742ca2..614bf8b8 100644 --- a/.env.example +++ b/.env.example @@ -16,3 +16,5 @@ NYPL_OAUTH_URL=https://url-to-our-oauth-server-including-slash.example.com/ NYPL_OAUTH_ID=who-you-will-connect-to-the-api-as NYPL_OAUTH_SECRET=that-accounts-pw NYPL_CORE_VERSION=v1.21 + +CATALOG_WEBPUB_DEF_URL=[fqdn to catalog webpub.def] diff --git a/data/annotated-marc-rules.json b/data/annotated-marc-rules.json index c2884931..a18af8b8 100644 --- a/data/annotated-marc-rules.json +++ b/data/annotated-marc-rules.json @@ -694,6 +694,19 @@ "label": "Description", "directive": "include" }, + { + "fieldTag": "r", + "marcIndicatorRegExp": "^386", + "subfieldSpec": { + "subfields": [ + "2", + "6" + ], + "directive": "exclude" + }, + "label": "Creator/Contributor Characteristics", + "directive": "include" + }, { "fieldTag": "r", "marcIndicatorRegExp": "^", diff --git a/data/webpub.def b/data/webpub.def index 55f16127..0bb91d48 100644 --- a/data/webpub.def +++ b/data/webpub.def @@ -1,4 +1,3 @@ - # ----------------------------------------------------- # Converted by cvpubdef from pubdef # ----------------------------------------------------- @@ -18,7 +17,7 @@ #updated 2015 01 14 by AGB to correct Holdings PUBLIC NOTE display #updated 2015 10 01 by HM to remove Help icon from Item Status header #updated 2017 07 24 by HM to suppress $0 identifier control subfield -#updated 2018 03 20 by HM to add 588 Source of Description note +#updated 2018 03 20 by HM to add 588 Source of Description note # # # converted by cvpubdef from HOLDINGS_POS: atpR @@ -108,6 +107,7 @@ b|r|350|-6|||b| b|r|35[1257]|-6|Description||b| b|r|362|-6|Publication Date||b| b|r|36[56]|-6|Description||b| +b|r|386|-26|Creator/Contributor Characteristics||b| b|r||-6|Description||b| b|s|4..|-6|Series||b| b|s|8..|-6|Series||b| @@ -179,7 +179,7 @@ b|n|590 |-67|Local Note||b| b|n|5900.||||b| b|n||-67|Note||b| b|y|255|-6|Cartographic Data||b| -b|y|856|u|Connect to:||b| +b|y|856|u|Url||b| b|y|[^8]..|u|||b| b|y|8[^5].|u|||b| b|y|85[^6]|u|||b| @@ -220,11 +220,11 @@ b|u|24701|-6|||b| b|u|24710|-6|Former Title||b| b|u|24711|-6|||b| -# modified 3/14/16 +# modified 3/14/16 # b|u|7[34]0..|-06|Added Title||b| b|u|730..|-06|Added Title||b| b|u|740..|-06|Added Title||b| -b|u||-06|Added Title||b| +# b|u||-06|Added Title||b| # New label for old-style Donor note b|u|799|-6|Donor/Sponsor||b| diff --git a/lib/annotated-marc-serializer.js b/lib/annotated-marc-serializer.js index bbf63aab..722ac069 100644 --- a/lib/annotated-marc-serializer.js +++ b/lib/annotated-marc-serializer.js @@ -100,7 +100,19 @@ AnnotatedMarcSerializer.parseWebpubToAnnotatedMarcRules = function (webpubConten * Given raw webpub.def content, builds an array of {AnnotatedMarcRule}s */ AnnotatedMarcSerializer.buildAnnotatedMarcRules = function (webpubContent) { + // No one can say why, but there's an "Added Title" entry that is commented + // out, but should not be. Un-comment the Added Title catch-all rule: + webpubContent = webpubContent.replace('# b|u||-06|Added Title||b|', 'b|u||-06|Added Title||b|') + return AnnotatedMarcSerializer.parseWebpubToAnnotatedMarcRules(webpubContent) + // Apply label overrides + .map((rule) => { + // Override label for URLs. We want them labeled "Connect to": + if (rule.fieldTag === 'y' && rule.marcIndicatorRegExp.source === '^856') { + rule.label = 'Connect to:' + } + return rule + }) } /** diff --git a/package.json b/package.json index 844e7796..da138f21 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,8 @@ "start": "node app.js", "deploy-development": "git checkout development && git pull origin development && eb deploy discovery-api-dev --profile nypl-sandbox", "deploy-qa": "git checkout qa && git pull origin qa && eb deploy discovery-api-qa --profile nypl-digital-dev", - "deploy-production": "git checkout production && git pull origin qa && eb deploy discovery-api-production --profile nypl-digital-dev" + "deploy-production": "git checkout production && git pull origin qa && eb deploy discovery-api-production --profile nypl-digital-dev", + "rebuild-annotated-marc-rules": "./scripts/update-annotated-marc-rules.js --refetch" }, "description": "Discovery API as an AWS Lambda.", "license": "MIT", diff --git a/scripts/update-annotated-marc-rules.js b/scripts/update-annotated-marc-rules.js old mode 100644 new mode 100755 index 991dd698..89047644 --- a/scripts/update-annotated-marc-rules.js +++ b/scripts/update-annotated-marc-rules.js @@ -1,25 +1,73 @@ +#!/usr/bin/env node /** * This file rebuilds data/annotated-marc-rules.json from data/webpub.def + * + * Webpub.def is a Sierra configuration file, which controls how specific marc + * fields are rendered in the catalog. We use it to build our own "annotated- + * marc-rules" document, which builds a similarly formatted document for the + * front-end. This script exists to rebuild that mapping file using the + * current Sierra webpub.def, which changes occassionally (e.g. to introduce + * a new mapping). + * + * Usage: + * node ./scripts/update-annotated-marc-rules.js [--refetch] + * + * If --refetch given, script updates local webpub.def from remote. */ const fs = require('fs') +const request = require('request-promise') const AnnotatedMarcSerializer = require('../lib/annotated-marc-serializer') -// Read raw webpub.def -const mappingRulesRaw = fs.readFileSync('./data/webpub.def', 'utf8') +require('dotenv').config() -// Transform raw webpub.def into a series of mapping rules: -const mappingRules = AnnotatedMarcSerializer.buildAnnotatedMarcRules(mappingRulesRaw) - .map((rule) => { - return Object.assign({}, rule, { - // RegExp.proto.source returns .toString() without '/' bookends - marcIndicatorRegExp: rule.marcIndicatorRegExp.source +const argv = require('minimist')(process.argv.slice(2)) + +const WEBPUB_DEF_LOCAL_PATH = './data/webpub.def' + +/** + * Fetch latest webpub.def from catalog server + */ +function refetch () { + console.log(`Fetching latest webpub.def from ${process.env.CATALOG_WEBPUB_DEF_URL}`) + return request({ uri: process.env.CATALOG_WEBPUB_DEF_URL }) + .then((resp) => { + fs.writeFileSync(WEBPUB_DEF_LOCAL_PATH, resp) + console.log('Updated webpub.def') + }) +} + +/** + * Rebuild local annotated-marc-rules from local webpub.def + */ +function updateAnnotatedMarcRules () { + // Read raw webpub.def + const mappingRulesRaw = fs.readFileSync(WEBPUB_DEF_LOCAL_PATH, 'utf8') + + // Transform raw webpub.def into a series of mapping rules: + const mappingRules = AnnotatedMarcSerializer.buildAnnotatedMarcRules(mappingRulesRaw) + .map((rule) => { + return Object.assign({}, rule, { + // RegExp.proto.source returns .toString() without '/' bookends + marcIndicatorRegExp: rule.marcIndicatorRegExp.source + }) }) - }) -// Serialize: -const content = JSON.stringify(mappingRules, null, 2) + // Serialize: + const content = JSON.stringify(mappingRules, null, 2) + + // Write: + fs.writeFileSync('./data/annotated-marc-rules.json', content) + + console.log('Finished updating annotated-marc-rules') +} + +// If told to fetch latest webpub.def, do so: +if (argv.refetch) { + refetch().then(updateAnnotatedMarcRules) -// Write: -fs.writeFileSync('./data/annotated-marc-rules.json', content) +// Otherwise, just build from current copy of webpub.def: +} else { + updateAnnotatedMarcRules() +} From 70ec3365ef8219485af835df71b3c59d05b944ae Mon Sep 17 00:00:00 2001 From: Paul Beaudoin Date: Fri, 14 Dec 2018 16:14:36 -0500 Subject: [PATCH 2/2] Add test for new 386 annotated marc mapping --- test/annotated-marc-rules.test.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/annotated-marc-rules.test.js b/test/annotated-marc-rules.test.js index 4b78c9c8..c9cdcdb7 100644 --- a/test/annotated-marc-rules.test.js +++ b/test/annotated-marc-rules.test.js @@ -686,4 +686,21 @@ describe('Annotated Marc Rules', function () { }) }) }) + + describe('Creator/Contributor Characteristics', function () { + it('should extract Creator/Contributor Characteristics from 386', function () { + const sampleBib = { varFields: [ + { fieldTag: 'r', marcTag: '386', subfields: [ { tag: 'a', content: 'Creator/Contributor Characteristics content' }, { tag: '6', content: 'ignore' } ] } + ] } + + const serialized = AnnotatedMarcSerializer.serialize(sampleBib) + expect(serialized.bib).to.be.a('object') + expect(serialized.bib.fields).to.be.a('array') + expect(serialized.bib.fields[0]).to.be.a('object') + expect(serialized.bib.fields[0].label).to.equal('Creator/Contributor Characteristics') + expect(serialized.bib.fields[0].values).to.be.a('array') + expect(serialized.bib.fields[0].values[0]).to.be.a('object') + expect(serialized.bib.fields[0].values[0].content).to.equal('Creator/Contributor Characteristics content') + }) + }) })