Mvolz has uploaded a new change for review. https://gerrit.wikimedia.org/r/225727
Change subject: Include 5 digit registrant codes in DOI regex ...................................................................... Include 5 digit registrant codes in DOI regex * Allow registrant codes between 3-5 characters in DOI regex * Make DOI comparison in crossRef case-insensitive as the DOI system is case-insensitive and different publishers and systems use different cases. * Add test for DOI with 5 digit registrant code in upper case which is lower case in the crossRef database * Pin request package to specific version Bug: T106235 Change-Id: Id9e9c5476fb1f7839861079daa240c71a0d506e3 --- M lib/CitoidService.js M lib/crossRefRequest.js M package.json M test/features/scraping/index.js 4 files changed, 11 insertions(+), 3 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid refs/changes/27/225727/1 diff --git a/lib/CitoidService.js b/lib/CitoidService.js index ac474ba..d03b1c6 100644 --- a/lib/CitoidService.js +++ b/lib/CitoidService.js @@ -62,7 +62,7 @@ var reHTTP = new RegExp('^((https?)://.+\\..+)'); // Assumes all strings with http/s protocol are URLs var reWWW = new RegExp('^((www)\\..+\\..+)'); // Assumes all strings with www substring are URLs - var reDOI = new RegExp('\\b10\\.?[0-9]{3,4}(?:[.][0-9]+)*/.*'); + var reDOI = new RegExp('\\b10\\.?[0-9]{3,5}(?:[.][0-9]+)*/.*'); var matchHTTP = search.match(reHTTP); var matchWWW = search.match(reWWW); var matchDOI = search.match(reDOI); diff --git a/lib/crossRefRequest.js b/lib/crossRefRequest.js index 3392f9c..db47870 100644 --- a/lib/crossRefRequest.js +++ b/lib/crossRefRequest.js @@ -35,7 +35,7 @@ return BBPromise.reject(message); } else { // API returns fuzzy results, so ensure the first citation corresponds to correct doi - if (body[0].doi !== 'http://dx.doi.org/' + doi){ + if (body[0].doi.toLowerCase() !== 'http://dx.doi.org/' + doi.toLowerCase()){ // Case insensitive return BBPromise.reject('DOI in return crossRef citation does not match requested doi:' + doi); } return parseCOinS(body[0].coins).then(function(metadata){ diff --git a/package.json b/package.json index 94b12c5..ea9dae8 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "iconv-lite": "0.4.11", "js-yaml": "3.3.1", "preq": "0.4.4", - "request": "^2.58.0", + "request": "2.58.0", "service-runner": "0.2.1", "tough-cookie": "2.0.0", "striptags": "2.0.2" diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js index 27377f5..0023fbf 100644 --- a/test/features/scraping/index.js +++ b/test/features/scraping/index.js @@ -240,6 +240,14 @@ }); }); + it.only('Case sensitive DOI with 5 digit registrant code and unknown genre in crossRef', function() { + return server.query('10.14344/IOC.ML.4.4').then(function(res) { + assert.status(res, 200); + assert.checkZotCitation(res, 'IOC World Bird List 4.4'); + assert.deepEqual(!!res.body[0].DOI, true, 'Missing DOI'); + }); + }); + // Ensure DOI is present in non-zotero scraped page where scraping fails it('DOI pointing to resource that can\'t be scraped - uses crossRef', function() { return server.query('10.1038/scientificamerican0200-90') -- To view, visit https://gerrit.wikimedia.org/r/225727 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id9e9c5476fb1f7839861079daa240c71a0d506e3 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/citoid Gerrit-Branch: master Gerrit-Owner: Mvolz <mv...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits