From 976625b63422195f531e175f03a3796c40de8e20 Mon Sep 17 00:00:00 2001 From: Ethan Lee Date: Tue, 6 Dec 2016 00:21:36 +0000 Subject: [PATCH 1/3] Change filepath for Windows support --- test/static.js | 2 +- test/static/{Turtle_Article.html => turtle_article_case.html} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename test/static/{Turtle_Article.html => turtle_article_case.html} (100%) diff --git a/test/static.js b/test/static.js index f3425b1..dbb1eb5 100644 --- a/test/static.js +++ b/test/static.js @@ -34,7 +34,7 @@ describe('static files', function() { it('should be case insensitive on Turtle Article file', function() { expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}}; - $ = cheerio.load(fs.readFileSync('./test/static/Turtle_Article.html')); + $ = cheerio.load(fs.readFileSync('./test/static/turtle_article_case.html')); return meta.parseAll($).then(function(results){ assert.deepEqual(results, expected); }); diff --git a/test/static/Turtle_Article.html b/test/static/turtle_article_case.html similarity index 100% rename from test/static/Turtle_Article.html rename to test/static/turtle_article_case.html From 261a40c1b817e9a0ac06b5355c8491edfe018b57 Mon Sep 17 00:00:00 2001 From: Ethan Lee Date: Wed, 7 Dec 2016 06:28:32 -0800 Subject: [PATCH 2/3] Adds JSON-LD scraping capability --- index.js | 11 +++++++++++ lib/index.js | 30 +++++++++++++++++++++++++++++- test/errors.js | 16 ++++++++++++++++ test/scraping.js | 29 +++++++++++++++++++++++++++++ test/static/turtle_article.html | 7 +++++++ 5 files changed, 92 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index 84dd8bf..2c1463a 100644 --- a/index.js +++ b/index.js @@ -154,6 +154,17 @@ exports.parseTwitter = function(chtml, callback){ return index.parseTwitter(chtml).nodeify(callback); }; +/** + * Retrieves JSON-LD for given html object + * + * @param {Object} chtml html Cheerio object + * @param {Function} [callback] optional callback function + * @return {Object} BBPromise for JSON-LD + */ +exports.parseJsonLd = function(chtml, callback){ + return index.parseJsonLd(chtml).nodeify(callback); +}; + /** * Global exportable list of scraping promises with string keys * @type {Object} diff --git a/lib/index.js b/lib/index.js index 33d7254..10098ea 100644 --- a/lib/index.js +++ b/lib/index.js @@ -544,6 +544,33 @@ exports.parseTwitter = BBPromise.method(function(chtml) { }); +/** + * Returns JSON-LD provided by page given HTML object + * @param {Object} chtml html Cheerio object + * @return {Object} BBPromise for JSON-LD + */ +exports.parseJsonLd = BBPromise.method(function(chtml) { + var json = []; + var jsonLd = chtml('script[type="application/ld+json"]'); + + if (jsonLd.length === 0) { + throw new Error("No JSON-LD script tag present on page"); + } + + jsonLd.each(function() { + var contents = chtml(this).text().trim(); + try { + contents = JSON.parse(contents); + } catch (e) { + throw new Error("Could not parse JSON-LD: " + e); + } + json.push(contents); + }); + + return jsonLd.length > 1 ? json : json[0]; +}); + + /** * Global exportable list of scraping promises with string keys * @type {Object} @@ -557,5 +584,6 @@ exports.metadataFunctions = { 'highwirePress': exports.parseHighwirePress, 'openGraph': exports.parseOpenGraph, 'schemaOrg': exports.parseSchemaOrgMicrodata, - 'twitter': exports.parseTwitter + 'twitter': exports.parseTwitter, + 'jsonLd': exports.parseJsonLd }; diff --git a/test/errors.js b/test/errors.js index 33c0211..3742d82 100644 --- a/test/errors.js +++ b/test/errors.js @@ -8,6 +8,7 @@ var cheerio = require('cheerio'); var meta = require('../index'); var preq = require('preq'); // Promisified Request library var assert = require('./utils/assert.js'); +var fs = require('fs'); // mocha defines to avoid JSHint breakage @@ -98,6 +99,21 @@ describe('errors', function() { }); }); + it('should not find JSON-LD, reject promise', function() { + var url = 'http://example.com'; + return preq.get(url) + .then(function(callRes) { + var $ = cheerio.load(callRes.body); + var prom = meta.parseJsonLd($); + return assert.fails(prom); + }); + }); + + it('should reject promise with malformed JSON-LD', function() { + var $ = cheerio.load(fs.readFileSync('./test/static/turtle_article.html')); + return assert.fails(meta.parseJsonLd($)); + }); + //TODO: Add test for lacking general metadata //TODO: Add test for lacking any metadata diff --git a/test/scraping.js b/test/scraping.js index edd1411..7ce48b3 100644 --- a/test/scraping.js +++ b/test/scraping.js @@ -169,4 +169,33 @@ describe('scraping', function() { }); }); + describe('JSON-LD tests (for types of Organizations)', function() { + var urls = ['http://www.uber.com/en-GB/', 'http://www.theguardian.com/us', 'http://jsonld.com/']; + urls.forEach(function(test) { + describe(test, function() { + it('should return an object or array', function() { + return meta(test) + .then(function(res) { + assert.ok(typeof res.jsonLd === 'object'); + }); + }); + + it('should get correct JSON-LD data', function() { + return meta(test) + .then(function(res) { + var result = res.jsonLd; + if (res.jsonLd instanceof Array) { + result = res.jsonLd.filter(function(r) { + return r['@type'] === 'Organization' + })[0]; + }; + ['@context', '@type', 'url', 'logo'].forEach(function(key) { + assert.ok(result.hasOwnProperty(key)); + }); + }); + }); + }); + }); + }); + }); diff --git a/test/static/turtle_article.html b/test/static/turtle_article.html index 4b71765..0868512 100644 --- a/test/static/turtle_article.html +++ b/test/static/turtle_article.html @@ -130,6 +130,13 @@ + + From 6b1ae4c0d0fe51036308fc79ef2c9ce104ce8a53 Mon Sep 17 00:00:00 2001 From: Ethan H Lee Date: Thu, 8 Dec 2016 17:16:25 +0000 Subject: [PATCH 3/3] Reordered functions and fixed testing Fixed ordering Version bump, Uber->Apple test Line endings --- README.md | 2 +- index.js | 22 ++--- lib/index.js | 58 +++++------ package.json | 2 +- test/errors.js | 2 +- test/scraping.js | 4 +- test/static.js | 6 +- test/static/turtle_article.html | 18 +++- test/static/turtle_article.json | 130 +++++++++++++++++++++++++ test/static/turtle_article_case.html | 8 ++ test/static/turtle_article_errors.html | 18 ++++ test/static/turtle_movie.json | 57 +++++++++++ 12 files changed, 276 insertions(+), 51 deletions(-) create mode 100644 test/static/turtle_article.json create mode 100644 test/static/turtle_article_errors.html create mode 100644 test/static/turtle_movie.json diff --git a/README.md b/README.md index 1e9b8fb..7236e6f 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ html-metadata # MetaData html scraper and parser for Node.js (supports Promises and callback style) -The aim of this library is to be a comprehensive source for extracting all html embedded metadata. Currently it supports Schema.org microdata using a third party library, a native BEPress, Dublin Core, Highwire Press, Open Graph, Twitter, EPrints, and COinS implementation, and some general metadata that doesn't belong to a particular standard (for instance, the content of the title tag, or meta description tags). +The aim of this library is to be a comprehensive source for extracting all html embedded metadata. Currently it supports Schema.org microdata using a third party library, a native BEPress, Dublin Core, Highwire Press, JSON-LD, Open Graph, Twitter, EPrints, and COinS implementation, and some general metadata that doesn't belong to a particular standard (for instance, the content of the title tag, or meta description tags). Planned is support for RDFa, AGLS, and other yet unheard of metadata types. Contributions and requests for other metadata types welcome! diff --git a/index.js b/index.js index 2c1463a..db29fa4 100644 --- a/index.js +++ b/index.js @@ -121,6 +121,17 @@ exports.parseHighwirePress = function(chtml, callback){ return index.parseHighwirePress(chtml).nodeify(callback); }; +/** + * Retrieves JSON-LD for given html object + * + * @param {Object} chtml html Cheerio object + * @param {Function} [callback] optional callback function + * @return {Object} BBPromise for JSON-LD + */ +exports.parseJsonLd = function(chtml, callback){ + return index.parseJsonLd(chtml).nodeify(callback); +}; + /** * Scrapes OpenGraph data given html object * @@ -154,17 +165,6 @@ exports.parseTwitter = function(chtml, callback){ return index.parseTwitter(chtml).nodeify(callback); }; -/** - * Retrieves JSON-LD for given html object - * - * @param {Object} chtml html Cheerio object - * @param {Function} [callback] optional callback function - * @return {Object} BBPromise for JSON-LD - */ -exports.parseJsonLd = function(chtml, callback){ - return index.parseJsonLd(chtml).nodeify(callback); -}; - /** * Global exportable list of scraping promises with string keys * @type {Object} diff --git a/lib/index.js b/lib/index.js index 10098ea..823d034 100644 --- a/lib/index.js +++ b/lib/index.js @@ -338,6 +338,33 @@ exports.parseHighwirePress = BBPromise.method(function(chtml){ }); +/** + * Returns JSON-LD provided by page given HTML object + * @param {Object} chtml html Cheerio object + * @return {Object} BBPromise for JSON-LD + */ +exports.parseJsonLd = BBPromise.method(function(chtml) { + var json = []; + var jsonLd = chtml('script[type="application/ld+json"]'); + + jsonLd.each(function() { + var contents = chtml(this).text().trim(); + try { + contents = JSON.parse(contents); + } catch (e) { + // Fail silently, just in case there are valid tags + return; + } + json.push(contents); + }); + + if (json.length === 0) { + throw new Error("No JSON-LD valid script tags present on page"); + } + + return json.length > 1 ? json : json[0]; +}); + /** * Scrapes OpenGraph data given html object * @param {Object} chtml html Cheerio object @@ -544,33 +571,6 @@ exports.parseTwitter = BBPromise.method(function(chtml) { }); -/** - * Returns JSON-LD provided by page given HTML object - * @param {Object} chtml html Cheerio object - * @return {Object} BBPromise for JSON-LD - */ -exports.parseJsonLd = BBPromise.method(function(chtml) { - var json = []; - var jsonLd = chtml('script[type="application/ld+json"]'); - - if (jsonLd.length === 0) { - throw new Error("No JSON-LD script tag present on page"); - } - - jsonLd.each(function() { - var contents = chtml(this).text().trim(); - try { - contents = JSON.parse(contents); - } catch (e) { - throw new Error("Could not parse JSON-LD: " + e); - } - json.push(contents); - }); - - return jsonLd.length > 1 ? json : json[0]; -}); - - /** * Global exportable list of scraping promises with string keys * @type {Object} @@ -582,8 +582,8 @@ exports.metadataFunctions = { 'eprints': exports.parseEprints, 'general': exports.parseGeneral, 'highwirePress': exports.parseHighwirePress, + 'jsonLd': exports.parseJsonLd, 'openGraph': exports.parseOpenGraph, 'schemaOrg': exports.parseSchemaOrgMicrodata, - 'twitter': exports.parseTwitter, - 'jsonLd': exports.parseJsonLd + 'twitter': exports.parseTwitter }; diff --git a/package.json b/package.json index 9298175..e0dfa27 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "html-metadata", - "version": "1.5.0", + "version": "1.6.0", "description": "Scrapes metadata of several different standards", "main": "index.js", "dependencies": { diff --git a/test/errors.js b/test/errors.js index 3742d82..473cc47 100644 --- a/test/errors.js +++ b/test/errors.js @@ -110,7 +110,7 @@ describe('errors', function() { }); it('should reject promise with malformed JSON-LD', function() { - var $ = cheerio.load(fs.readFileSync('./test/static/turtle_article.html')); + var $ = cheerio.load(fs.readFileSync('./test/static/turtle_article_errors.html')); return assert.fails(meta.parseJsonLd($)); }); diff --git a/test/scraping.js b/test/scraping.js index 7ce48b3..9a75259 100644 --- a/test/scraping.js +++ b/test/scraping.js @@ -170,7 +170,7 @@ describe('scraping', function() { }); describe('JSON-LD tests (for types of Organizations)', function() { - var urls = ['http://www.uber.com/en-GB/', 'http://www.theguardian.com/us', 'http://jsonld.com/']; + var urls = ['http://www.theguardian.com/us', 'http://jsonld.com/', 'http://www.apple.com/']; urls.forEach(function(test) { describe(test, function() { it('should return an object or array', function() { @@ -186,7 +186,7 @@ describe('scraping', function() { var result = res.jsonLd; if (res.jsonLd instanceof Array) { result = res.jsonLd.filter(function(r) { - return r['@type'] === 'Organization' + return r['@type'] === 'Organization'; })[0]; }; ['@context', '@type', 'url', 'logo'].forEach(function(key) { diff --git a/test/static.js b/test/static.js index dbb1eb5..918a224 100644 --- a/test/static.js +++ b/test/static.js @@ -17,7 +17,7 @@ describe('static files', function() { var expected; it('should get correct info from turtle movie file', function() { - expected = {"dublinCore":{"title":"Turtles of the Jungle","creator":"http://www.example.com/turtlelvr","description":"A 2008 film about jungle turtles.","date":"2012-02-04 12:00:00","type":"Image.Moving"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"openGraph":{"locale":"en_US","type":"video.movie","title":"Turtles of the Jungle","description":"A 2008 film about jungle turtles.","url":"http://example.com","site_name":"Awesome Turtle Movies Website","image":[{"url":"http://example.com/turtle.jpg"},{"url":"http://example.com/shell.jpg"}],"tag":["turtle","movie","awesome"],"director":"http://www.example.com/PhilTheTurtle","actor":["http://www.example.com/PatTheTurtle","http://www.example.com/SaminaTheTurtle"],"writer":"http://www.example.com/TinaTheTurtle","release_date":"2015-01-14T19:14:27+00:00","duration":"1000000"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":"@Turtlessssssssss","url":"http://www.example.com/turtles","title":"Turtles of the Jungle","description":"A 2008 film about jungle turtles.","player":{"url":"http://www.example.com/turtles/player","width":"400","height":"400","stream":{"url":"http://www.example.com/turtles/turtle.mp4","content_type":"video/mp4"}}}}; + expected = JSON.parse(fs.readFileSync('./test/static/turtle_movie.json')); $ = cheerio.load(fs.readFileSync('./test/static/turtle_movie.html')); return meta.parseAll($).then(function(results){ assert.deepEqual(results, expected); @@ -25,7 +25,7 @@ describe('static files', function() { }); it('should get correct info from turtle article file', function() { - expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}}; + expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json')); $ = cheerio.load(fs.readFileSync('./test/static/turtle_article.html')); return meta.parseAll($).then(function(results){ assert.deepEqual(results, expected); @@ -33,7 +33,7 @@ describe('static files', function() { }); it('should be case insensitive on Turtle Article file', function() { - expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}}; + expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json')); $ = cheerio.load(fs.readFileSync('./test/static/turtle_article_case.html')); return meta.parseAll($).then(function(results){ assert.deepEqual(results, expected); diff --git a/test/static/turtle_article.html b/test/static/turtle_article.html index 0868512..0edd4d8 100644 --- a/test/static/turtle_article.html +++ b/test/static/turtle_article.html @@ -130,10 +130,22 @@ - + + diff --git a/test/static/turtle_article.json b/test/static/turtle_article.json new file mode 100644 index 0000000..086bedf --- /dev/null +++ b/test/static/turtle_article.json @@ -0,0 +1,130 @@ +{ + "bePress": { + "series_title": "Turtles", + "author": "Turtle Lvr", + "author_institution": "Mediawiki", + "title": "Turtles are AWESOME!!1", + "date": "2012", + "pdf_url": "http://www.example.com/turtlelvr/pdf", + "abstract_html_url": "http://www.example.com/turtlelvr", + "publisher": "Turtles Society", + "online_date": "2012/02/04" + }, + "coins": [{ + "ctx_ver": "Z39.88-2004", + "rft_id": "info:doi/http://dx.doi.org/10.5555/12345678", + "rfr_id": "info:sid/crossref.org:search", + "rft_val_fmt": "info:ofi/fmt:kev:mtx:journal", + "rft": { + "atitle": "Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory", + "jtitle": "Journal of Psychoceramics", + "date": "2008", + "volume": "5", + "issue": "11", + "spage": "1", + "epage": "3", + "aufirst": "Josiah", + "aulast": "Carberry", + "genre": "article", + "au": ["Josiah Carberry"] + } + }], + "dublinCore": { + "title": "Turtles are AWESOME!!1", + "creator": "http://www.example.com/turtlelvr", + "description": "Exposition on the awesomeness of turtles", + "date": "2012-02-04 12:00:00", + "type": "Text.Article" + }, + "general": { + "author": "Turtle Lvr", + "authorlink": "http://examples.com/turtlelvr", + "canonical": "http://example.com/turtles", + "description": "Exposition on the awesomeness of turtles", + "publisher": "https://mediawiki.org", + "robots": "we welcome our robot overlords", + "shortlink": "http://example.com/c", + "title": "Turtles are AWESOME!!1 | Awesome Turtles Website", + "lang": "en" + }, + "highwirePress": { + "journal_title": "Turtles", + "issn": "1234-5678", + "doi": "10.1000/123", + "publication_date": "2012-02-04", + "title": "Turtles are AWESOME!!1", + "author": "Turtle Lvr", + "author_institution": "Mediawiki", + "volume": "150", + "issue": "1", + "firstpage": "123", + "lastpage": "456", + "publisher": "Turtles Society", + "abstract": "Exposition on the awesomeness of turtles." + }, + "jsonLd": { + "@context": "http://schema.org", + "@type": "Organization", + "url": "https://www.turtles.com" + }, + "openGraph": { + "locale": "en_US", + "type": "article", + "title": "Turtles are AWESOME!!1", + "description": "Exposition on the awesomeness of turtles", + "url": "http://example.com", + "site_name": "Awesome Turtles Website", + "image": [{ + "url": "http://example.com/turtle.jpg", + "secure_url": "https://secure.example.com/turtle.jpg", + "type": "image/jpeg", + "width": "400", + "height": "300" + }, { + "url": "http://example.com/shell.jpg", + "width": "200", + "height": "150" + }], + "audio": { + "url": "http://example.com/sound.mp3", + "secure_url": "https://secure.example.com/sound.mp3", + "type": "audio/mpeg" + }, + "tag": ["turtles", "are", "awesome"], + "section": ["Turtles are tough", "Turtles are flawless", "Turtles are cute"], + "published_time": "2012-02-04T12:00:00+00:00", + "modified_time": "2015-01-14T19:14:27+00:00", + "author": "http://examples.com/turtlelvr", + "publisher": "http://mediawiki.org" + }, + "eprints": { + "title": "Turtles are AWESOME!!1", + "creators_name": "http://www.example.com/turtlelvr", + "abstract": "Exposition on the awesomeness of turtles", + "datestamp": "2012-02-04 12:00:00", + "type": "article" + }, + "twitter": { + "card": "summary", + "site": "@Turtlessssssssss", + "creator": ["@Turtlessssssssss", "@Turtlezzzzzzzzzz"], + "url": "http://www.example.com/turtles", + "title": "Turtles are AWESOME!!1", + "description": "Exposition on the awesomeness of turtles", + "image": { + "url": "http://example.com/turtles.jpg", + "alt": "It's a bunch of turtles!" + }, + "app": { + "url": { + "iphone": "turtle://", + "googleplay": "turtle://" + }, + "id": { + "iphone": "000", + "googleplay": "superturtlearticle.androidapp" + } + } + } +} + diff --git a/test/static/turtle_article_case.html b/test/static/turtle_article_case.html index 56ceb53..b9fee6e 100644 --- a/test/static/turtle_article_case.html +++ b/test/static/turtle_article_case.html @@ -130,6 +130,14 @@ + + + diff --git a/test/static/turtle_article_errors.html b/test/static/turtle_article_errors.html new file mode 100644 index 0000000..c5600d4 --- /dev/null +++ b/test/static/turtle_article_errors.html @@ -0,0 +1,18 @@ + + +Turtles are AWESOME!!1 | Invalid Turtles Website + + + + + + + + + diff --git a/test/static/turtle_movie.json b/test/static/turtle_movie.json new file mode 100644 index 0000000..1fef875 --- /dev/null +++ b/test/static/turtle_movie.json @@ -0,0 +1,57 @@ +{ + "dublinCore": { + "title": "Turtles of the Jungle", + "creator": "http://www.example.com/turtlelvr", + "description": "A 2008 film about jungle turtles.", + "date": "2012-02-04 12:00:00", + "type": "Image.Moving" + }, + "general": { + "author": "Turtle Lvr", + "authorlink": "http://examples.com/turtlelvr", + "canonical": "http://example.com/turtles", + "description": "Exposition on the awesomeness of turtles", + "publisher": "https://mediawiki.org", + "robots": "we welcome our robot overlords", + "shortlink": "http://example.com/c", + "title": "Turtles are AWESOME!!1 | Awesome Turtles Website", + "lang": "en" + }, + "openGraph": { + "locale": "en_US", + "type": "video.movie", + "title": "Turtles of the Jungle", + "description": "A 2008 film about jungle turtles.", + "url": "http://example.com", + "site_name": "Awesome Turtle Movies Website", + "image": [{ + "url": "http://example.com/turtle.jpg" + }, { + "url": "http://example.com/shell.jpg" + }], + "tag": ["turtle", "movie", "awesome"], + "director": "http://www.example.com/PhilTheTurtle", + "actor": ["http://www.example.com/PatTheTurtle", "http://www.example.com/SaminaTheTurtle"], + "writer": "http://www.example.com/TinaTheTurtle", + "release_date": "2015-01-14T19:14:27+00:00", + "duration": "1000000" + }, + "twitter": { + "card": "summary", + "site": "@Turtlessssssssss", + "creator": "@Turtlessssssssss", + "url": "http://www.example.com/turtles", + "title": "Turtles of the Jungle", + "description": "A 2008 film about jungle turtles.", + "player": { + "url": "http://www.example.com/turtles/player", + "width": "400", + "height": "400", + "stream": { + "url": "http://www.example.com/turtles/turtle.mp4", + "content_type": "video/mp4" + } + } + } +} +