Mvolz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/225727

Change subject: Include 5 digit registrant codes in DOI regex
......................................................................

Include 5 digit registrant codes in DOI regex

* Allow registrant codes between 3-5 characters
in DOI regex
* Make DOI comparison in crossRef case-insensitive
as the DOI system is case-insensitive and different
publishers and systems use different cases.
* Add test for DOI with 5 digit registrant code
in upper case which is lower case in the crossRef
database
* Pin request package to specific version

Bug: T106235
Change-Id: Id9e9c5476fb1f7839861079daa240c71a0d506e3
---
M lib/CitoidService.js
M lib/crossRefRequest.js
M package.json
M test/features/scraping/index.js
4 files changed, 11 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid 
refs/changes/27/225727/1

diff --git a/lib/CitoidService.js b/lib/CitoidService.js
index ac474ba..d03b1c6 100644
--- a/lib/CitoidService.js
+++ b/lib/CitoidService.js
@@ -62,7 +62,7 @@
 
        var reHTTP = new RegExp('^((https?)://.+\\..+)'); // Assumes all 
strings with http/s protocol are URLs
        var reWWW = new RegExp('^((www)\\..+\\..+)'); // Assumes all strings 
with www substring are URLs
-       var reDOI = new RegExp('\\b10\\.?[0-9]{3,4}(?:[.][0-9]+)*/.*');
+       var reDOI = new RegExp('\\b10\\.?[0-9]{3,5}(?:[.][0-9]+)*/.*');
        var matchHTTP = search.match(reHTTP);
        var matchWWW = search.match(reWWW);
        var matchDOI = search.match(reDOI);
diff --git a/lib/crossRefRequest.js b/lib/crossRefRequest.js
index 3392f9c..db47870 100644
--- a/lib/crossRefRequest.js
+++ b/lib/crossRefRequest.js
@@ -35,7 +35,7 @@
                                return BBPromise.reject(message);
                        } else {
                                // API returns fuzzy results, so ensure the 
first citation corresponds to correct doi
-                               if (body[0].doi !== 'http://dx.doi.org/' + doi){
+                               if (body[0].doi.toLowerCase() !== 
'http://dx.doi.org/' + doi.toLowerCase()){ // Case insensitive
                                        return BBPromise.reject('DOI in return 
crossRef citation does not match requested doi:' + doi);
                                }
                                return 
parseCOinS(body[0].coins).then(function(metadata){
diff --git a/package.json b/package.json
index 94b12c5..ea9dae8 100644
--- a/package.json
+++ b/package.json
@@ -21,7 +21,7 @@
     "iconv-lite": "0.4.11",
     "js-yaml": "3.3.1",
     "preq": "0.4.4",
-    "request": "^2.58.0",
+    "request": "2.58.0",
     "service-runner": "0.2.1",
     "tough-cookie": "2.0.0",
     "striptags": "2.0.2"
diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js
index 27377f5..0023fbf 100644
--- a/test/features/scraping/index.js
+++ b/test/features/scraping/index.js
@@ -240,6 +240,14 @@
                        });
                });
 
+               it.only('Case sensitive DOI with 5 digit registrant code and 
unknown genre in crossRef', function() {
+                       return 
server.query('10.14344/IOC.ML.4.4').then(function(res) {
+                               assert.status(res, 200);
+                               assert.checkZotCitation(res, 'IOC World Bird 
List 4.4');
+                               assert.deepEqual(!!res.body[0].DOI, true, 
'Missing DOI');
+                       });
+               });
+
                // Ensure DOI is present in non-zotero scraped page where 
scraping fails
                it('DOI pointing to resource that can\'t be scraped - uses 
crossRef', function() {
                        return server.query('10.1038/scientificamerican0200-90')

-- 
To view, visit https://gerrit.wikimedia.org/r/225727
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id9e9c5476fb1f7839861079daa240c71a0d506e3
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <mv...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to