Skip to content

Commit

Permalink
Merge pull request #126 from NYPL-discovery/pb/update-annotated-marc-…
Browse files Browse the repository at this point in the history
…rules

Add updated annotated-marc-rules, improved update
  • Loading branch information
nonword authored Jan 22, 2019
2 parents 3ef4359 + 70ec336 commit 87f81ed
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ NYPL_OAUTH_URL=https://url-to-our-oauth-server-including-slash.example.com/
NYPL_OAUTH_ID=who-you-will-connect-to-the-api-as
NYPL_OAUTH_SECRET=that-accounts-pw
NYPL_CORE_VERSION=v1.21

CATALOG_WEBPUB_DEF_URL=[fqdn to catalog webpub.def]
13 changes: 13 additions & 0 deletions data/annotated-marc-rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,19 @@
"label": "Description",
"directive": "include"
},
{
"fieldTag": "r",
"marcIndicatorRegExp": "^386",
"subfieldSpec": {
"subfields": [
"2",
"6"
],
"directive": "exclude"
},
"label": "Creator/Contributor Characteristics",
"directive": "include"
},
{
"fieldTag": "r",
"marcIndicatorRegExp": "^",
Expand Down
10 changes: 5 additions & 5 deletions data/webpub.def
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# -----------------------------------------------------
# Converted by cvpubdef from pubdef
# -----------------------------------------------------
Expand All @@ -18,7 +17,7 @@
#updated 2015 01 14 by AGB to correct Holdings PUBLIC NOTE display
#updated 2015 10 01 by HM to remove Help icon from Item Status header
#updated 2017 07 24 by HM to suppress $0 identifier control subfield
#updated 2018 03 20 by HM to add 588 Source of Description note
#updated 2018 03 20 by HM to add 588 Source of Description note
#
#
# converted by cvpubdef from HOLDINGS_POS: atpR
Expand Down Expand Up @@ -108,6 +107,7 @@ b|r|350|-6|||b|
b|r|35[1257]|-6|Description||b|
b|r|362|-6|Publication Date||b|
b|r|36[56]|-6|Description||b|
b|r|386|-26|Creator/Contributor Characteristics||b|
b|r||-6|Description||b|
b|s|4..|-6|Series||b|
b|s|8..|-6|Series||b|
Expand Down Expand Up @@ -179,7 +179,7 @@ b|n|590 |-67|Local Note||b|
b|n|5900.||||b|
b|n||-67|Note||b|
b|y|255|-6|Cartographic Data||b|
b|y|856|u|Connect to:||b|
b|y|856|u|Url||b|
b|y|[^8]..|u|||b|
b|y|8[^5].|u|||b|
b|y|85[^6]|u|||b|
Expand Down Expand Up @@ -220,11 +220,11 @@ b|u|24701|-6|||b|
b|u|24710|-6|Former Title||b|
b|u|24711|-6|||b|

# modified 3/14/16
# modified 3/14/16
# b|u|7[34]0..|-06|Added Title||b|
b|u|730..|-06|Added Title||b|
b|u|740..|-06|Added Title||b|
b|u||-06|Added Title||b|
# b|u||-06|Added Title||b|

# New label for old-style Donor note
b|u|799|-6|Donor/Sponsor||b|
Expand Down
12 changes: 12 additions & 0 deletions lib/annotated-marc-serializer.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,19 @@ AnnotatedMarcSerializer.parseWebpubToAnnotatedMarcRules = function (webpubConten
* Given raw webpub.def content, builds an array of {AnnotatedMarcRule}s
*/
AnnotatedMarcSerializer.buildAnnotatedMarcRules = function (webpubContent) {
// No one can say why, but there's an "Added Title" entry that is commented
// out, but should not be. Un-comment the Added Title catch-all rule:
webpubContent = webpubContent.replace('# b|u||-06|Added Title||b|', 'b|u||-06|Added Title||b|')

return AnnotatedMarcSerializer.parseWebpubToAnnotatedMarcRules(webpubContent)
// Apply label overrides
.map((rule) => {
// Override label for URLs. We want them labeled "Connect to":
if (rule.fieldTag === 'y' && rule.marcIndicatorRegExp.source === '^856') {
rule.label = 'Connect to:'
}
return rule
})
}

/**
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
"start": "node app.js",
"deploy-development": "git checkout development && git pull origin development && eb deploy discovery-api-dev --profile nypl-sandbox",
"deploy-qa": "git checkout qa && git pull origin qa && eb deploy discovery-api-qa --profile nypl-digital-dev",
"deploy-production": "git checkout production && git pull origin qa && eb deploy discovery-api-production --profile nypl-digital-dev"
"deploy-production": "git checkout production && git pull origin qa && eb deploy discovery-api-production --profile nypl-digital-dev",
"rebuild-annotated-marc-rules": "./scripts/update-annotated-marc-rules.js --refetch"
},
"description": "Discovery API as an AWS Lambda.",
"license": "MIT",
Expand Down
74 changes: 61 additions & 13 deletions scripts/update-annotated-marc-rules.js
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,25 +1,73 @@
#!/usr/bin/env node
/**
* This file rebuilds data/annotated-marc-rules.json from data/webpub.def
*
* Webpub.def is a Sierra configuration file, which controls how specific marc
* fields are rendered in the catalog. We use it to build our own "annotated-
* marc-rules" document, which builds a similarly formatted document for the
* front-end. This script exists to rebuild that mapping file using the
* current Sierra webpub.def, which changes occassionally (e.g. to introduce
* a new mapping).
*
* Usage:
* node ./scripts/update-annotated-marc-rules.js [--refetch]
*
* If --refetch given, script updates local webpub.def from remote.
*/

const fs = require('fs')
const request = require('request-promise')

const AnnotatedMarcSerializer = require('../lib/annotated-marc-serializer')

// Read raw webpub.def
const mappingRulesRaw = fs.readFileSync('./data/webpub.def', 'utf8')
require('dotenv').config()

// Transform raw webpub.def into a series of mapping rules:
const mappingRules = AnnotatedMarcSerializer.buildAnnotatedMarcRules(mappingRulesRaw)
.map((rule) => {
return Object.assign({}, rule, {
// RegExp.proto.source returns .toString() without '/' bookends
marcIndicatorRegExp: rule.marcIndicatorRegExp.source
const argv = require('minimist')(process.argv.slice(2))

const WEBPUB_DEF_LOCAL_PATH = './data/webpub.def'

/**
* Fetch latest webpub.def from catalog server
*/
function refetch () {
console.log(`Fetching latest webpub.def from ${process.env.CATALOG_WEBPUB_DEF_URL}`)
return request({ uri: process.env.CATALOG_WEBPUB_DEF_URL })
.then((resp) => {
fs.writeFileSync(WEBPUB_DEF_LOCAL_PATH, resp)
console.log('Updated webpub.def')
})
}

/**
* Rebuild local annotated-marc-rules from local webpub.def
*/
function updateAnnotatedMarcRules () {
// Read raw webpub.def
const mappingRulesRaw = fs.readFileSync(WEBPUB_DEF_LOCAL_PATH, 'utf8')

// Transform raw webpub.def into a series of mapping rules:
const mappingRules = AnnotatedMarcSerializer.buildAnnotatedMarcRules(mappingRulesRaw)
.map((rule) => {
return Object.assign({}, rule, {
// RegExp.proto.source returns .toString() without '/' bookends
marcIndicatorRegExp: rule.marcIndicatorRegExp.source
})
})
})

// Serialize:
const content = JSON.stringify(mappingRules, null, 2)
// Serialize:
const content = JSON.stringify(mappingRules, null, 2)

// Write:
fs.writeFileSync('./data/annotated-marc-rules.json', content)

console.log('Finished updating annotated-marc-rules')
}

// If told to fetch latest webpub.def, do so:
if (argv.refetch) {
refetch().then(updateAnnotatedMarcRules)

// Write:
fs.writeFileSync('./data/annotated-marc-rules.json', content)
// Otherwise, just build from current copy of webpub.def:
} else {
updateAnnotatedMarcRules()
}
17 changes: 17 additions & 0 deletions test/annotated-marc-rules.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -686,4 +686,21 @@ describe('Annotated Marc Rules', function () {
})
})
})

describe('Creator/Contributor Characteristics', function () {
it('should extract Creator/Contributor Characteristics from 386', function () {
const sampleBib = { varFields: [
{ fieldTag: 'r', marcTag: '386', subfields: [ { tag: 'a', content: 'Creator/Contributor Characteristics content' }, { tag: '6', content: 'ignore' } ] }
] }

const serialized = AnnotatedMarcSerializer.serialize(sampleBib)
expect(serialized.bib).to.be.a('object')
expect(serialized.bib.fields).to.be.a('array')
expect(serialized.bib.fields[0]).to.be.a('object')
expect(serialized.bib.fields[0].label).to.equal('Creator/Contributor Characteristics')
expect(serialized.bib.fields[0].values).to.be.a('array')
expect(serialized.bib.fields[0].values[0]).to.be.a('object')
expect(serialized.bib.fields[0].values[0].content).to.equal('Creator/Contributor Characteristics content')
})
})
})

0 comments on commit 87f81ed

Please sign in to comment.