Mobrovac has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/354249 )
Change subject: Relax date validation significantly ...................................................................... Relax date validation significantly * Return dates where month and year are known in the format month year (English natural language). * If unable to parse date, leave the date in instead of deleting it. Bug: T132308 Change-Id: If35261294ede8c0942b4f7394fa17b12f38c2709 --- M lib/Exporter.js M test/features/scraping/isbn.js M test/features/unit/exporter.js M test/features/unit/translators/util.js 4 files changed, 54 insertions(+), 28 deletions(-) Approvals: Mobrovac: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/lib/Exporter.js b/lib/Exporter.js index 33f5f30..674f4d9 100644 --- a/lib/Exporter.js +++ b/lib/Exporter.js @@ -19,9 +19,10 @@ var CachedTypes = require('./zotero/cachedTypes.js'); var pubMedRequest = require('./pubMedRequest.js'); -//TODO: Remove +/* Globals */ var defaultLogger; var userAgent; +var acceptLanguage = 'en-US'; // Set global language default /* Custom chrono parsers */ var customChrono = new chrono.Chrono(); @@ -35,7 +36,6 @@ text: match[0], index: match.index, start: { - day: 1, month: match[2], year: match[1] } @@ -44,6 +44,7 @@ customChrono.parsers.push(partialISO); //Target year first with range, i.e. 1975 Dec-Nov +//TODO: Add to end var journalFormat = new chrono.Parser(); journalFormat.pattern = function () { return /(\d{4})\s+([a-z]{3})\s*-\s*[a-z]{3}$/ig; }; journalFormat.extract = function(text, ref, match, opt) { @@ -54,7 +55,6 @@ text: match[0], index: match.index, start: { - day: 1, month: month, year: match[1] } @@ -553,22 +553,27 @@ // Try to parse with chrono first var p = customChrono.parse(citation.date); // Create ParsedResult object with chrono if (p && p[0] && p[0].start){ - p[0].start.assign('timezoneOffset', 0); // Remove timezone offset so that the user-observed date doesn't change based on offset - d = p[0].start.date(); // Create a Date object from ParsedComponents Object - } else { - // Try to parse with Date.parse() as fallback; chrono doesn't seem to work with ambigious dates, such as '2010' - d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); // Explicitly set to GMT time to avoid offset issue + // Remove timezone offset so that the user-observed date doesn't change based on offset + p[0].start.assign('timezoneOffset', 0); + // Create a Date object from ParsedComponents Object + d = p[0].start.date(); + // If a Date object is formed, format it. + if (isFinite(d)) { + // Only turn into ISO date if an all fields are known + if(p[0].start.knownValues.year && p[0].start.knownValues.month && p[0].start.knownValues.day) { + // Remove time from date + citation.date = d.toISOString().split('T').shift(); + } else if (p[0].start.knownValues.year && p[0].start.knownValues.month){ + var options = { + year: 'numeric', + month: 'long' + }; + citation.date = d.toLocaleDateString(acceptLanguage, options); + } + } } - - // Lastly, remove time from date - if (isFinite(d)) { - citation.date = d.toISOString().split('T').shift(); - } else { - // If no finite translation of the date is available, remove the field - delete citation.date; - } - } catch (e) { // Remove field if errors are thrown - delete citation.date; + } catch (e) { + // Leave field as written if errors are thrown } } return citation; diff --git a/test/features/scraping/isbn.js b/test/features/scraping/isbn.js index 3434998..037c778 100644 --- a/test/features/scraping/isbn.js +++ b/test/features/scraping/isbn.js @@ -109,7 +109,7 @@ assert.deepEqual(res.body[0].contributor, [['Sheen,','Martin.'],['Cohen,','Bonni.'],['Thomson,','Richard.'],['DK Publishing,','Inc.']], 'Unexpected value:' + res.body[0].author); // only get this sometimes assert.deepEqual(res.body[0].studio, 'DK Pub', 'Unexpected value; expected DK Pub, got ' + res.body[0].studio); //assert.deepEqual(res.body[0].place, 'New York', 'Unexpected value; expected New York, got ' + res.body[0].place); - //assert.deepEqual(res.body[0].date, '2010', 'Unexpected value; expected 2010, got ' + res.body[0].date); // Not currently working with worldcat; date is returned to us as '2010, ©1996' + assert.deepEqual(res.body[0].date, '2010, ©1996', 'Unexpected value; expected 2010, ©1996, got ' + res.body[0].date); // Not currently working with worldcat; date is returned to us as '2010, ©1996' assert.isInArray(res.body[0].ISBN, '9780756662967'); assert.deepEqual(res.body[0].itemType, 'videoRecording', 'Wrong itemType; expected videoRecording, got ' + res.body[0].itemType); }); diff --git a/test/features/unit/exporter.js b/test/features/unit/exporter.js index f6bb020..b677571 100644 --- a/test/features/unit/exporter.js +++ b/test/features/unit/exporter.js @@ -82,9 +82,30 @@ assert.deepEqual(result, expected); }); - it('Uses year from ambiguous date', function() { // Partial ISO? + it('Unable to parse to leaves as written; season', function() { // Partial ISO? date = 'Fall 1975'; - expected = {date: '1975-01-01'}; + expected = {date: 'Fall 1975'}; + result = exporter.fixDate({date:date}); + assert.deepEqual(result, expected); + }); + + it('Unable to parse so leaves it as written', function() { + date = '2014, ©2010'; + expected = {date: '2014, ©2010'}; + result = exporter.fixDate({date:date}); + assert.deepEqual(result, expected); + }); + + it('Multilingual date - Spanish - leaves as written', function() { + date = 'Mayo de 2010'; + expected = {date: 'Mayo de 2010'}; + result = exporter.fixDate({date:date}); + assert.deepEqual(result, expected); + }); + + it('Multilingual date - Russian - leaves as written', function() { + date = 'Май 2010 г.'; + expected = {date: 'Май 2010 г.'}; result = exporter.fixDate({date:date}); assert.deepEqual(result, expected); }); @@ -139,22 +160,22 @@ }); it('Year first date', function() { - date = '"1975 Nov-Dec'; - expected = {date: '1975-11-01'}; + date = '1975 Nov-Dec'; + expected = {date: 'November 1975'}; result = exporter.fixDate({date:date}); assert.deepEqual(result, expected); }); it('Partial ISO date no preceeding 0', function() { - date = '"1975-2'; - expected = {date: '1975-02-01'}; + date = '1975-2'; + expected = {date: 'February 1975'}; result = exporter.fixDate({date:date}); assert.deepEqual(result, expected); }); it('Partial ISO date proceeding 0', function() { - date = '"1975-02'; - expected = {date: '1975-02-01'}; + date = '1975-02'; + expected = {date: 'February 1975'}; result = exporter.fixDate({date:date}); assert.deepEqual(result, expected); }); diff --git a/test/features/unit/translators/util.js b/test/features/unit/translators/util.js index eb20791..aab04ab 100644 --- a/test/features/unit/translators/util.js +++ b/test/features/unit/translators/util.js @@ -29,7 +29,7 @@ }); it('correctly adds date with fixDate validate function', function() { - expected = {date: '2012-08-01'}; + expected = {date: 'August 2012'}; result = makeTranslator('date', fixDate).translate({}, {date:['August 2012']},'date'); assert.deepEqual(result, expected); }); -- To view, visit https://gerrit.wikimedia.org/r/354249 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: If35261294ede8c0942b4f7394fa17b12f38c2709 Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/services/citoid Gerrit-Branch: master Gerrit-Owner: Mvolz <mv...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Mvolz <mv...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits