Mobrovac has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/354249 )

Change subject: Relax date validation significantly
......................................................................


Relax date validation significantly

* Return dates where month and year are known in the format
month year (English natural language).
* If unable to parse date, leave the date in instead of
deleting it.

Bug: T132308
Change-Id: If35261294ede8c0942b4f7394fa17b12f38c2709
---
M lib/Exporter.js
M test/features/scraping/isbn.js
M test/features/unit/exporter.js
M test/features/unit/translators/util.js
4 files changed, 54 insertions(+), 28 deletions(-)

Approvals:
  Mobrovac: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/Exporter.js b/lib/Exporter.js
index 33f5f30..674f4d9 100644
--- a/lib/Exporter.js
+++ b/lib/Exporter.js
@@ -19,9 +19,10 @@
 var CachedTypes = require('./zotero/cachedTypes.js');
 var pubMedRequest = require('./pubMedRequest.js');
 
-//TODO: Remove
+/* Globals */
 var defaultLogger;
 var userAgent;
+var acceptLanguage = 'en-US'; // Set global language default
 
 /* Custom chrono parsers */
 var customChrono = new chrono.Chrono();
@@ -35,7 +36,6 @@
         text: match[0],
         index: match.index,
         start: {
-            day: 1,
             month: match[2],
             year: match[1]
         }
@@ -44,6 +44,7 @@
 customChrono.parsers.push(partialISO);
 
 //Target year first with range, i.e. 1975 Dec-Nov
+//TODO: Add to end
 var journalFormat = new chrono.Parser();
 journalFormat.pattern = function () { return 
/(\d{4})\s+([a-z]{3})\s*-\s*[a-z]{3}$/ig; };
 journalFormat.extract = function(text, ref, match, opt) {
@@ -54,7 +55,6 @@
         text: match[0],
         index: match.index,
         start: {
-            day: 1,
             month: month,
             year: match[1]
         }
@@ -553,22 +553,27 @@
             // Try to parse with chrono first
             var p = customChrono.parse(citation.date); // Create ParsedResult 
object with chrono
             if (p && p[0] && p[0].start){
-                p[0].start.assign('timezoneOffset', 0); // Remove timezone 
offset so that the user-observed date doesn't change based on offset
-                d = p[0].start.date(); // Create a Date object from 
ParsedComponents Object
-            } else {
-                // Try to parse with Date.parse() as fallback; chrono doesn't 
seem to work with ambigious dates, such as '2010'
-                d = new Date(citation.date + 
(/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); // Explicitly set to GMT 
time to avoid offset issue
+                // Remove timezone offset so that the user-observed date 
doesn't change based on offset
+                p[0].start.assign('timezoneOffset', 0);
+                // Create a Date object from ParsedComponents Object
+                d = p[0].start.date();
+                // If a Date object is formed, format it.
+                if (isFinite(d)) {
+                    // Only turn into ISO date if an all fields are known
+                    if(p[0].start.knownValues.year && 
p[0].start.knownValues.month && p[0].start.knownValues.day) {
+                            // Remove time from date
+                            citation.date = d.toISOString().split('T').shift();
+                    } else if (p[0].start.knownValues.year && 
p[0].start.knownValues.month){
+                        var options = {
+                            year: 'numeric',
+                            month: 'long'
+                        };
+                        citation.date = d.toLocaleDateString(acceptLanguage, 
options);
+                    }
+                }
             }
-
-            // Lastly, remove time from date
-            if (isFinite(d)) {
-                citation.date = d.toISOString().split('T').shift();
-            } else {
-                // If no finite translation of the date is available, remove 
the field
-                delete citation.date;
-            }
-        } catch (e) { // Remove field if errors are thrown
-            delete citation.date;
+        } catch (e) {
+            // Leave field as written if errors are thrown
         }
     }
     return citation;
diff --git a/test/features/scraping/isbn.js b/test/features/scraping/isbn.js
index 3434998..037c778 100644
--- a/test/features/scraping/isbn.js
+++ b/test/features/scraping/isbn.js
@@ -109,7 +109,7 @@
                 assert.deepEqual(res.body[0].contributor, 
[['Sheen,','Martin.'],['Cohen,','Bonni.'],['Thomson,','Richard.'],['DK 
Publishing,','Inc.']], 'Unexpected value:' + res.body[0].author); // only get 
this sometimes
                 assert.deepEqual(res.body[0].studio, 'DK Pub', 'Unexpected 
value; expected DK Pub, got ' + res.body[0].studio);
                 //assert.deepEqual(res.body[0].place, 'New York', 'Unexpected 
value; expected New York, got ' + res.body[0].place);
-                //assert.deepEqual(res.body[0].date, '2010', 'Unexpected 
value; expected 2010, got ' + res.body[0].date); // Not currently working with 
worldcat; date is returned to us as '2010, ©1996'
+                assert.deepEqual(res.body[0].date, '2010, ©1996', 'Unexpected 
value; expected 2010, ©1996, got ' + res.body[0].date); // Not currently 
working with worldcat; date is returned to us as '2010, ©1996'
                 assert.isInArray(res.body[0].ISBN, '9780756662967');
                 assert.deepEqual(res.body[0].itemType, 'videoRecording', 
'Wrong itemType; expected videoRecording, got ' + res.body[0].itemType);
             });
diff --git a/test/features/unit/exporter.js b/test/features/unit/exporter.js
index f6bb020..b677571 100644
--- a/test/features/unit/exporter.js
+++ b/test/features/unit/exporter.js
@@ -82,9 +82,30 @@
             assert.deepEqual(result, expected);
         });
 
-        it('Uses year from ambiguous date', function() { // Partial ISO?
+        it('Unable to parse to leaves as written; season', function() { // 
Partial ISO?
             date = 'Fall 1975';
-            expected = {date: '1975-01-01'};
+            expected = {date: 'Fall 1975'};
+            result = exporter.fixDate({date:date});
+            assert.deepEqual(result, expected);
+        });
+
+        it('Unable to parse so leaves it as written', function() {
+            date = '2014, ©2010';
+            expected = {date: '2014, ©2010'};
+            result = exporter.fixDate({date:date});
+            assert.deepEqual(result, expected);
+        });
+
+        it('Multilingual date - Spanish - leaves as written', function() {
+            date = 'Mayo de 2010';
+            expected = {date: 'Mayo de 2010'};
+            result = exporter.fixDate({date:date});
+            assert.deepEqual(result, expected);
+        });
+
+        it('Multilingual date - Russian - leaves as written', function() {
+            date = 'Май 2010 г.';
+            expected = {date: 'Май 2010 г.'};
             result = exporter.fixDate({date:date});
             assert.deepEqual(result, expected);
         });
@@ -139,22 +160,22 @@
         });
 
         it('Year first date', function() {
-            date = '"1975 Nov-Dec';
-            expected = {date: '1975-11-01'};
+            date = '1975 Nov-Dec';
+            expected = {date: 'November 1975'};
             result = exporter.fixDate({date:date});
             assert.deepEqual(result, expected);
         });
 
         it('Partial ISO date no preceeding 0', function() {
-            date = '"1975-2';
-            expected = {date: '1975-02-01'};
+            date = '1975-2';
+            expected = {date: 'February 1975'};
             result = exporter.fixDate({date:date});
             assert.deepEqual(result, expected);
         });
 
         it('Partial ISO date proceeding 0', function() {
-            date = '"1975-02';
-            expected = {date: '1975-02-01'};
+            date = '1975-02';
+            expected = {date: 'February 1975'};
             result = exporter.fixDate({date:date});
             assert.deepEqual(result, expected);
         });
diff --git a/test/features/unit/translators/util.js 
b/test/features/unit/translators/util.js
index eb20791..aab04ab 100644
--- a/test/features/unit/translators/util.js
+++ b/test/features/unit/translators/util.js
@@ -29,7 +29,7 @@
         });
 
         it('correctly adds date with fixDate validate function', function() {
-            expected = {date: '2012-08-01'};
+            expected = {date: 'August 2012'};
             result = makeTranslator('date', fixDate).translate({}, 
{date:['August 2012']},'date');
             assert.deepEqual(result, expected);
         });

-- 
To view, visit https://gerrit.wikimedia.org/r/354249
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If35261294ede8c0942b4f7394fa17b12f38c2709
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <mv...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Mvolz <mv...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to