Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/202674
Change subject: Refactor roundtrip-test.js
......................................................................
Refactor roundtrip-test.js
* Using promises.
* Fixes the error on kowiki/이완구 whose content is the empty string.
That led to API returning a 302 redirect which the roundtrip-test.js
doesn't follow but tries to return an error with res.body, which is
undefined, tricking us into believing err is null and setting the
html as null and resending that to API which wasn't expecting it.
So, a nice cascade of failure, all of which is addressed.
* There are some FIXMEs left to look at ... the most pressing being the
nodify callback on err.
Change-Id: I646f8a916add26ba60171ed31d1593c6bb6b63c5
---
M api/routes.js
M tests/client/client.js
M tests/roundtrip-test.js
3 files changed, 420 insertions(+), 480 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/74/202674/1
diff --git a/api/routes.js b/api/routes.js
index 8b597d2..b348aca 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -503,7 +503,7 @@
}
var p;
- if ( wt && (!res.local('pageName') || !oldid) ) {
+ if ( typeof wt === 'string' && (!res.local('pageName') || !oldid) ) {
// don't fetch the page source
env.setPageSrcInfo( wt );
p = Promise.resolve();
@@ -511,7 +511,7 @@
p = promiseTemplateReq( env, target, oldid );
}
- if ( wt ) {
+ if ( typeof wt === 'string' ) {
p = p.then( parseWt )
.timeout( REQ_TIMEOUT )
.then(sendRes);
@@ -833,7 +833,7 @@
// Accept wikitext as a string or object{body,headers}
var wikitext = (v2.wikitext && typeof v2.wikitext !== "string")
?
v2.wikitext.body : v2.wikitext;
- if ( !wikitext ) {
+ if ( typeof v2.wikitext !== "string" ) {
if ( !res.local('pageName') ) {
return errOut( "No title or wikitext was
provided.", 400 );
}
@@ -849,7 +849,8 @@
return errOut( "No html was supplied.", 400 );
}
// Accept html as a string or object{body,headers}
- var html = (typeof v2.html === "string") ? v2.html :
v2.html.body;
+ var html = (typeof v2.html === "string") ?
+ v2.html : (v2.html.body || "");
html2wt( req, res, html );
}
};
diff --git a/tests/client/client.js b/tests/client/client.js
index 25c9cb5..3a73ec1 100755
--- a/tests/client/client.js
+++ b/tests/client/client.js
@@ -69,10 +69,19 @@
Util.retryingHTTPRequest(10, requestOptions, callback );
};
-var runTest = function( cb, test) {
- var results, callback = rtTest.cbCombinator.bind( null,
rtTest.xmlFormat, function ( err, results ) {
- if ( err ) {
- console.log( 'ERROR in ' + test.prefix + ':' +
test.title + ':\n' + err + '\n' + err.stack);
+
+var runTest = function(cb, test) {
+ rtTest.fetch(test.title, {
+ setup: config.setup,
+ prefix: test.prefix,
+ rtTestMode: true,
+ parsoidURL: parsoidURL
+ }, rtTest.xmlFormat).nodify(function(err, results) {
+ var callback = null;
+ if (err) {
+ // Log it to console (for gabriel to watch scroll by)
+ console.error('Error in %s:%s: %s\n%s', test.prefix,
test.title,
+ err, err.stack || '');
/*
* If you're looking at the line below and thinking
"Why in the
* hell would they have done that, it causes
unnecessary problems
@@ -83,29 +92,10 @@
* In sum, easier to die than to worry about having to
reset any
* broken application state.
*/
- cb( 'postResult', err, results, test, function () {
process.exit( 1 ); } );
- } else {
- cb( 'postResult', err, results, test, null );
+ callback = function() { process.exit(1); };
}
- } );
-
- try {
- rtTest.fetch( test.title, {
- setup: config.setup,
- prefix: test.prefix,
- rtTestMode: true,
- parsoidURL: parsoidURL
- }, callback );
- } catch ( err ) {
- // Log it to console (for gabriel to watch scroll by)
- console.error( "ERROR in " + test.prefix + ':' + test.title +
': ' + err + '\n' + err.stack);
-
- results = rtTest.xmlFormat( {
- page: { name: test.title },
- wiki: { iwp: test.prefix }
- }, err );
- cb( 'postResult', err, results, test, function() {
process.exit( 1 ); } );
- }
+ cb('postResult', err, results, test, callback);
+ });
};
/**
diff --git a/tests/roundtrip-test.js b/tests/roundtrip-test.js
index 8a4ffda..4020801 100755
--- a/tests/roundtrip-test.js
+++ b/tests/roundtrip-test.js
@@ -1,99 +1,101 @@
#!/usr/bin/env node
-"use strict";
-require( '../lib/core-upgrade.js' );
+'use strict';
+require('../lib/core-upgrade.js');
-var request = require( 'request' ),
- yargs = require( 'yargs' ),
- domino = require( 'domino' ),
- url = require( 'url' ),
- zlib = require( 'zlib' ),
- JSUtils = require( '../lib/jsutils.js' ).JSUtils,
- Util = require( '../lib/mediawiki.Util.js' ).Util,
- DU = require( '../lib/mediawiki.DOMUtils.js' ).DOMUtils,
- TemplateRequest = require( '../lib/mediawiki.ApiRequest.js'
).TemplateRequest,
- ParsoidConfig = require( '../lib/mediawiki.ParsoidConfig'
).ParsoidConfig,
- MWParserEnvironment = require( '../lib/mediawiki.parser.environment.js'
).MWParserEnvironment,
- Diff = require('../lib/mediawiki.Diff.js').Diff;
+var request = require('request');
+var yargs = require('yargs');
+var domino = require('domino');
+var url = require('url');
+var zlib = require('zlib');
+var JSUtils = require('../lib/jsutils.js').JSUtils;
+var Util = require('../lib/mediawiki.Util.js').Util;
+var DU = require('../lib/mediawiki.DOMUtils.js').DOMUtils;
+var TemplateRequest =
require('../lib/mediawiki.ApiRequest.js').TemplateRequest;
+var ParsoidConfig = require('../lib/mediawiki.ParsoidConfig').ParsoidConfig;
+var MWParserEnvironment =
require('../lib/mediawiki.parser.environment.js').MWParserEnvironment;
+var Diff = require('../lib/mediawiki.Diff.js').Diff;
-var plainCallback = function ( env, err, results ) {
- var i, result, output = '',
- semanticDiffs = 0, syntacticDiffs = 0,
- testDivider = ( new Array( 70 ) ).join( '=' ) + '\n',
- diffDivider = ( new Array( 70 ) ).join( '-' ) + '\n';
- if ( err ) {
+var plainFormat = function(err, prefix, title, results, profile) {
+ var output = '';
+ var semanticDiffs = 0;
+ var syntacticDiffs = 0;
+ var testDivider = '='.repeat(70) + '\n';
+ var diffDivider = '-'.repeat(70) + '\n';
+
+ if (err) {
output += 'Parser failure!\n\n';
output += diffDivider;
output += err;
+ if (err.stack) {
+ output += '\nStack trace: ' + err.stack;
+ }
} else {
- for ( i = 0; i < results.length; i++ ) {
- result = results[i];
-
+ for (var i = 0; i < results.length; i++) {
+ var result = results[i];
output += testDivider;
- if ( result.type === 'fail' ) {
- output += 'Semantic difference' +
(result.selser ? ' (selser)' : '') + ':\n\n';
+ if (result.type === 'fail') {
+ output += 'Semantic difference' +
+ (result.selser ? ' (selser)' : '') +
':\n\n';
output += result.wtDiff + '\n';
- output += diffDivider + 'HTML diff:\n\n' +
result.htmlDiff + '\n';
+ output += diffDivider + 'HTML diff:\n\n' +
+ result.htmlDiff + '\n';
semanticDiffs++;
} else {
- output += 'Syntactic difference' +
(result.selser ? ' (selser)' : '') + ':\n\n';
+ output += 'Syntactic difference' +
+ (result.selser ? ' (selser)' : '') +
':\n\n';
output += result.wtDiff + '\n';
syntacticDiffs++;
}
}
-
-
output += testDivider;
output += testDivider;
- output += "SUMMARY:\n";
- output += "Semantic differences : " + semanticDiffs + "\n";
- output += "Syntactic differences: " + syntacticDiffs + "\n";
+ output += 'SUMMARY:\n';
+ output += 'Semantic differences : ' + semanticDiffs + '\n';
+ output += 'Syntactic differences: ' + syntacticDiffs + '\n';
output += diffDivider;
- output += "ALL differences : " + (semanticDiffs +
syntacticDiffs) + "\n";
+ output += 'ALL differences : ' +
+ (semanticDiffs + syntacticDiffs) + '\n';
output += testDivider;
output += testDivider;
}
-
return output;
};
-var encodeXmlEntities = function( str ) {
- return str.replace( /&/g, '&' )
- .replace( /</g, '<' )
- .replace( />/g, '>' );
-};
-
-function encodeAttribute (str) {
- return encodeXmlEntities(str)
- .replace(/"/g, '"');
+function encodeXmlEntities(str) {
+ return str.replace(/&/g, '&')
+ .replace(/</g, '<')
+ .replace(/>/g, '>');
}
+function encodeAttribute(str) {
+ return encodeXmlEntities(str).replace(/"/g, '"');
+}
-var xmlCallback = function ( env, err, results ) {
+var xmlFormat = function(err, prefix, title, results, profile) {
var i, result;
- var prefix = ( env && env.conf && env.conf.wiki && env.conf.wiki.iwp )
|| '';
- var title = ( env && env.page && env.page.name ) || '';
-
+ var article = encodeAttribute(prefix + ':' + title);
var output = '<testsuites>\n';
- var outputTestSuite = function (selser) {
- output += '<testsuite name="Roundtrip article ' +
encodeAttribute( prefix + ':' + title );
+ var outputTestSuite = function(selser) {
+ output += '<testsuite name="Roundtrip article ' +
article;
if (selser) {
output += ' (selser)';
}
output += '">\n';
};
- if ( err ) {
+ if (err) {
outputTestSuite(false);
- output += '<testcase name="entire article"><error
type="parserFailedToFinish">';
- output += encodeXmlEntities( err.stack || err.toString() );
+ output += '<testcase name="entire article">';
+ output += '<error type="parserFailedToFinish">';
+ output += encodeXmlEntities(err.stack || err.toString());
output += '</error></testcase>';
} else if (!results.length) {
outputTestSuite(false);
} else {
var currentSelser = results[0].selser;
outputTestSuite(currentSelser);
- for ( i = 0; i < results.length; i++ ) {
+ for (i = 0; i < results.length; i++) {
result = results[i];
// When going from normal to selser results, switch to
a new
@@ -104,24 +106,24 @@
outputTestSuite(currentSelser);
}
- output += '<testcase name="' + encodeAttribute( prefix
+ ':' + title );
+ output += '<testcase name="' + article;
output += ' character ' + result.offset[0].start +
'">\n';
- if ( result.type === 'fail' ) {
+ if (result.type === 'fail') {
output += '<failure
type="significantHtmlDiff">\n';
output += '<diff class="wt">\n';
- output += encodeXmlEntities( result.wtDiff );
+ output += encodeXmlEntities(result.wtDiff);
output += '\n</diff>\n';
output += '<diff class="html">\n';
- output += encodeXmlEntities( result.htmlDiff );
+ output += encodeXmlEntities(result.htmlDiff);
output += '\n</diff>\n';
output += '</failure>\n';
} else {
output += '<skipped
type="insignificantWikitextDiff">\n';
- output += encodeXmlEntities( result.wtDiff );
+ output += encodeXmlEntities(result.wtDiff);
output += '\n</skipped>\n';
}
@@ -131,23 +133,21 @@
output += '</testsuite>\n';
// Output the profiling data
- if ( env.profile ) {
-
- // Delete the total timer to avoid serializing it
- if (env.profile.time && env.profile.time.total_timer) {
- delete( env.profile.time.total_timer );
+ if (profile) {
+ // Delete the start time to avoid serializing it
+ if (profile.time && profile.time.start) {
+ delete(profile.time.start);
}
-
output += '<perfstats>\n';
- for ( var type in env.profile ) {
- for ( var prop in env.profile[ type ] ) {
- output += '<perfstat type="' + DU.encodeXml(
type ) + ':';
- output += DU.encodeXml( prop );
+ Object.keys(profile).forEach(function(type) {
+ Object.keys(profile[type]).forEach(function(prop) {
+ output += '<perfstat type="' +
DU.encodeXml(type) + ':';
+ output += DU.encodeXml(prop);
output += '">';
- output += DU.encodeXml( env.profile[ type ][
prop ].toString() );
+ output +=
DU.encodeXml(profile[type][prop].toString());
output += '</perfstat>\n';
- }
- }
+ });
+ });
output += '</perfstats>\n';
}
output += '</testsuites>';
@@ -155,32 +155,35 @@
return output;
};
-var findMatchingNodes = function (root, targetRange, sourceLen) {
- var currentOffset = null, wasWaiting = false, waitingForEndMatch =
false;
+var findMatchingNodes = function(root, targetRange, sourceLen) {
+ var currentOffset = null;
+ var wasWaiting = false;
+ var waitingForEndMatch = false;
function walkDOM(element) {
- var elements = [],
- precedingNodes = [],
- attribs = DU.getJSONAttribute(element, 'data-parsoid');
+ var elements = [];
+ var precedingNodes = [];
+ var attribs = DU.getJSONAttribute(element, 'data-parsoid');
- if ( attribs.dsr && attribs.dsr.length ) {
- var start = attribs.dsr[0] || 0,
- end = attribs.dsr[1] || sourceLen - 1;
+ if (attribs.dsr && attribs.dsr.length) {
+ var start = attribs.dsr[0] || 0;
+ var end = attribs.dsr[1] || sourceLen - 1;
- if ( (targetRange.end - 1) < start ||
targetRange.start > (end - 1) ) {
+ if ((targetRange.end - 1) < start || targetRange.start
> (end - 1)) {
return null;
}
- if ( waitingForEndMatch ) {
- if ( end >= targetRange.end ) {
+ if (waitingForEndMatch) {
+ if (end >= targetRange.end) {
waitingForEndMatch = false;
}
return { done: true, nodes: [element] };
}
- if ( attribs.dsr[0] !== null && targetRange.start ===
start && end === targetRange.end ) {
+ if (attribs.dsr[0] !== null && targetRange.start ===
start &&
+ end === targetRange.end) {
return { done: true, nodes: [element] };
- } else if ( targetRange.start === start ) {
+ } else if (targetRange.start === start) {
waitingForEndMatch = true;
if (end < targetRange.end) {
// No need to walk children
@@ -196,38 +199,40 @@
while (c) {
wasWaiting = waitingForEndMatch;
- if ( DU.isElt(c) ) {
+ if (DU.isElt(c)) {
var res = walkDOM(c);
var matchedChildren = res ? res.nodes : null;
- if ( matchedChildren ) {
- if ( !currentOffset && attribs.dsr &&
(attribs.dsr[0] !== null) ) {
+ if (matchedChildren) {
+ if (!currentOffset && attribs.dsr &&
(attribs.dsr[0] !== null)) {
var elesOnOffset = [];
currentOffset = attribs.dsr[0];
- // Walk the preceding nodes
without dsr values and prefix matchedChildren
- // till we get the desired
matching start value.
+ // Walk the preceding nodes
without dsr values and
+ // prefix matchedChildren till
we get the desired
+ // matching start value.
var diff = currentOffset -
targetRange.start;
- while ( precedingNodes.length >
0 && diff > 0 ) {
+ while (precedingNodes.length >
0 && diff > 0) {
var n =
precedingNodes.pop();
var len =
DU.isComment(n) ?
DU.decodedCommentLength(n) :
n.nodeValue.length;
- if ( len > diff ) {
+ if (len > diff) {
break;
}
diff -= len;
- elesOnOffset.push( n );
+ elesOnOffset.push(n);
}
elesOnOffset.reverse();
- matchedChildren =
elesOnOffset.concat( matchedChildren );
+ matchedChildren =
elesOnOffset.concat(matchedChildren);
}
- // Check if there's only one child, and
make sure it's a node with getAttribute
- if ( matchedChildren.length === 1 &&
DU.isElt(matchedChildren[0]) ) {
- var childAttribs =
matchedChildren[0].getAttribute( 'data-parsoid' );
- if ( childAttribs ) {
- childAttribs =
JSON.parse( childAttribs );
- if ( childAttribs.dsr
&& childAttribs.dsr[1]) {
- if (
childAttribs.dsr[1] >= targetRange.end ) {
+ // Check if there's only one child,
+ // and make sure it's a node with
getAttribute.
+ if (matchedChildren.length === 1 &&
DU.isElt(matchedChildren[0])) {
+ var childAttribs =
matchedChildren[0].getAttribute('data-parsoid');
+ if (childAttribs) {
+ childAttribs =
JSON.parse(childAttribs);
+ if (childAttribs.dsr &&
childAttribs.dsr[1]) {
+ if
(childAttribs.dsr[1] >= targetRange.end) {
res.done = true;
} else {
currentOffset = childAttribs.dsr[1];
@@ -248,14 +253,14 @@
// Clear out when an element node is
encountered.
precedingNodes = [];
- } else if ( c.nodeType === c.TEXT_NODE || c.nodeType
=== c.COMMENT_NODE ) {
- if ( currentOffset && ( currentOffset <
targetRange.end ) ) {
+ } else if (c.nodeType === c.TEXT_NODE || c.nodeType ===
c.COMMENT_NODE) {
+ if (currentOffset && (currentOffset <
targetRange.end)) {
if (DU.isComment(c)) {
currentOffset +=
DU.decodedCommentLength(c);
} else {
currentOffset +=
c.nodeValue.length;
}
- if ( currentOffset >= targetRange.end )
{
+ if (currentOffset >= targetRange.end) {
waitingForEndMatch = false;
}
}
@@ -263,18 +268,18 @@
if (wasWaiting || waitingForEndMatch) {
// Part of target range
elements.push(c);
- } else if ( !currentOffset ) {
+ } else if (!currentOffset) {
// Accumulate nodes without dsr
- precedingNodes.push( c );
+ precedingNodes.push(c);
}
}
- if ( wasWaiting && !waitingForEndMatch ) {
+ if (wasWaiting && !waitingForEndMatch) {
break;
}
// Skip over encapsulated content
- var typeOf = DU.isElt(c) ? c.getAttribute( 'typeof' )
|| '' : '';
+ var typeOf = DU.isElt(c) ? c.getAttribute('typeof') ||
'' : '';
if
(/\bmw:(?:Transclusion\b|Param\b|Extension\/[^\s]+)/.test(typeOf)) {
c = DU.skipOverEncapsulatedContent(c);
} else {
@@ -286,7 +291,7 @@
var numChildren = element.childNodes.length;
if (numElements === 0) {
return null;
- } else if ( numElements < numChildren ) {
+ } else if (numElements < numChildren) {
return { done: !waitingForEndMatch, nodes: elements } ;
} else { /* numElements === numChildren */
return { done: !waitingForEndMatch, nodes: [element] } ;
@@ -296,61 +301,58 @@
return walkDOM(root);
};
-var checkIfSignificant = function (env, offsets, oldWt, oldBody, oldDp, newWt,
cb, err, html, dp) {
- if (err) {
- cb(err, null, []);
- return;
- }
+var normalizeWikitext = function(str) {
+ // Ignore leading tabs vs. leading spaces
+ str = str.replace(/^\t/, ' ');
+ str = str.replace(/\n\t/g, '\n ');
+ // Normalize multiple spaces to single space
+ str = str.replace(/ +/g, ' ');
+ // Eliminate spaces around wikitext chars
+ // gwicke: disabled for now- too aggressive IMO
+ // str = str.replace(/([<"'!#\*:;+-=|{}\[\]\/]) /g, "$1");
+ // Ignore capitalization of tags and void tag indications
+ str = str.replace(/<(\/?)([^ >\/]+)((?:[^>\/]|\/(?!>))*)\/?>/g,
+ function(match, close, name, remaining) {
+ return '<' + close + name.toLowerCase() +
+ remaining.replace(/ $/, '') + '>';
+ });
+ // Ignore whitespace in table cell attributes
+ str = str.replace(/(^|\n|\|(?=\|)|!(?=!))(\{\||\|[\-+]*|!) *([^|\n]*?)
*(?=[|\n]|$)/g, '$1$2$3');
+ // Ignore trailing semicolons and spaces in style attributes
+ str = str.replace(/style\s*=\s*"[^"]+"/g, function(match) {
+ return match.replace(/\s|;(?=")/g, '');
+ });
+ // Strip double-quotes
+ str = str.replace(/"([^"]*?)"/g, '$1');
+ // Ignore implicit </small> and </center> in table cells or the end
+ // of the string for now
+ str = str.replace(/(^|\n)<\/(?:small|center)>(?=\n[|!]|\n?$)/g, '');
+ str = str.replace(/([|!].*?)<\/(?:small|center)>(?=\n[|!]|\n?$)/gi,
'$1');
+ return str;
+};
- var normalizeWikitext = function (str) {
- // Ignore leading tabs vs. leading spaces
- str = str.replace(/^\t/, ' ');
- str = str.replace(/\n\t/g, '\n ');
- // Normalize multiple spaces to single space
- str = str.replace(/ +/g, " ");
- // Eliminate spaces around wikitext chars
- // gwicke: disabled for now- too aggressive IMO
- //str = str.replace(/([<"'!#\*:;+-=|{}\[\]\/]) /g, "$1");
- // Ignore capitalization of tags and void tag indications
- str = str.replace(/<(\/?)([^ >\/]+)((?:[^>\/]|\/(?!>))*)\/?>/g,
function(match, close, name, remaining) {
- return '<' + close + name.toLowerCase() +
remaining.replace(/ $/, '') + '>';
- } );
- // Ignore whitespace in table cell attributes
- str = str.replace(/(^|\n|\|(?=\|)|!(?=!))(\{\||\|[\-+]*|!)
*([^|\n]*?) *(?=[|\n]|$)/g, '$1$2$3');
- // Ignore trailing semicolons and spaces in style attributes
- str = str.replace(/style\s*=\s*"[^"]+"/g, function(match) {
- return match.replace(/\s|;(?=")/g, '');
- });
- // Strip double-quotes
- str = str.replace(/"([^"]*?)"/g, "$1");
+// Get diff substrings from offsets
+var formatDiff = function(oldWt, newWt, offset, context) {
+ return [
+ '----',
+ oldWt.substring(offset[0].start - context, offset[0].end +
context),
+ '++++',
+ newWt.substring(offset[1].start - context, offset[1].end +
context),
+ ].join('\n');
+};
- // Ignore implicit </small> and </center> in table cells or the
end
- // of the string for now
- str = str.replace(/(^|\n)<\/(?:small|center)>(?=\n[|!]|\n?$)/g,
'');
- str =
str.replace(/([|!].*?)<\/(?:small|center)>(?=\n[|!]|\n?$)/gi, '$1');
+var checkIfSignificant = function(offsets, data) {
+ var oldWt = data.oldWt;
+ var newWt = data.newWt;
- return str;
- };
-
- // Get diff substrings from offsets
- var formatDiff = function (offset, context) {
- return [
- '----',
- oldWt.substring(offset[0].start - context,
offset[0].end + context),
- '++++',
- newWt.substring(offset[1].start - context,
offset[1].end + context)
- ].join('\n');
- };
-
- var newDOC = domino.createDocument(html);
+ var oldBody = domino.createDocument(data.oldHTML.body).body;
+ var newBody = domino.createDocument(data.newHTML.body).body;
// Merge data-parsoid so that HTML nodes can be compared and diff'ed.
- DU.applyDataParsoid(oldBody.ownerDocument, oldDp.body);
- DU.applyDataParsoid(newDOC, dp.body);
- // console.warn("\nnewDOC:", newDOC)
+ DU.applyDataParsoid(oldBody.ownerDocument, data.oldDp.body);
+ DU.applyDataParsoid(newBody.ownerDocument, data.newDp.body);
var i, k, diff, offset;
- var thisResult;
var results = [];
// Use the full tests for fostered content.
@@ -360,410 +362,357 @@
// If parsoid-normalized HTML for old and new wikitext is
identical,
// the wt-diffs are purely syntactic.
var normalizedOld = DU.normalizeOut(oldBody, true);
- var normalizedNew = DU.normalizeOut(newDOC.body, true);
+ var normalizedNew = DU.normalizeOut(newBody, true);
if (normalizedOld === normalizedNew) {
for (i = 0; i < offsets.length; i++) {
offset = offsets[i];
results.push({
type: 'skip',
offset: offset,
- wtDiff: formatDiff(offset, 0),
+ wtDiff: formatDiff(oldWt, newWt,
offset, 0),
});
}
- cb( null, env, results );
- return;
+ return results;
}
}
- var origOut, newOut, origHTML, newHTML, origOrigHTML, origNewHTML;
+ var origOut, newOut, origHTML, newHTML;
// Now, proceed with full blown diffs
for (i = 0; i < offsets.length; i++) {
- thisResult = {};
- origOrigHTML = '';
- origNewHTML = '';
-
offset = offsets[i];
+ var origOrigHTML = '';
+ var origNewHTML = '';
+ var thisResult = { offset: offset };
- thisResult.offset = offset;
- // console.warn("--processing: " + JSON.stringify(offset));
-
- if (offset[0].start === offset[0].end &&
+ var implicitlyClosed = (offset[0].start === offset[0].end &&
newWt.substr(offset[1].start, offset[1].end -
offset[1].start)
- .match(/^\n?<\/[^>]+>\n?$/)) {
+ .match(/^\n?<\/[^>]+>\n?$/));
+ if (implicitlyClosed) {
// An element was implicitly closed. Fudge the orig
offset
// slightly so it finds the corresponding elements
which have the
// original (unclosed) DSR.
offset[0].start--;
}
- // console.warn("--orig--");
+
var res = findMatchingNodes(oldBody, offset[0] || {},
oldWt.length);
origOut = res ? res.nodes : [];
for (k = 0; k < origOut.length; k++) {
// node need not be an element always!
- origOrigHTML += DU.serializeNode(origOut[k],
{smartQuote: false});
+ origOrigHTML += DU.serializeNode(origOut[k], {
smartQuote: false });
}
origHTML = DU.formatHTML(DU.normalizeOut(origOrigHTML));
- // console.warn("# nodes: " + origOut.length);
- // console.warn("html: " + origHTML);
- // console.warn("--new--");
- res = findMatchingNodes(newDOC.body, offset[1] || {},
newWt.length);
+ res = findMatchingNodes(newBody, offset[1] || {}, newWt.length);
newOut = res ? res.nodes : [];
for (k = 0; k < newOut.length; k++) {
// node need not be an element always!
- origNewHTML += DU.serializeNode(newOut[k], {smartQuote:
false});
+ origNewHTML += DU.serializeNode(newOut[k], {
smartQuote: false });
}
newHTML = DU.formatHTML(DU.normalizeOut(origNewHTML));
- // console.warn("# nodes: " + newOut.length);
- // console.warn("html: " + newHTML);
// compute wt diffs
var wt1 = oldWt.substring(offset[0].start, offset[0].end);
var wt2 = newWt.substring(offset[1].start, offset[1].end);
- //thisResult.wtDiff = Util.contextDiff(wt1, wt2, false, true,
true);
+ // thisResult.wtDiff = Util.contextDiff(wt1, wt2, false, true,
true);
diff = Diff.htmlDiff(origHTML, newHTML, false, true, true);
// No context by default
- thisResult.wtDiff = formatDiff(offset, 0);
+ thisResult.wtDiff = formatDiff(oldWt, newWt, offset, 0);
// Normalize wts to check if we really have a semantic diff
thisResult.type = 'skip';
if (diff.length > 0) {
- var normWT1 = normalizeWikitext(wt1),
- normWT2 = normalizeWikitext(wt2);
-
+ var normWT1 = normalizeWikitext(wt1);
+ var normWT2 = normalizeWikitext(wt2);
if (normWT1 !== normWT2) {
- //console.log( 'normDiff: =======\n' + normWT1
+ '\n--------\n' + normWT2);
thisResult.htmlDiff = diff;
thisResult.type = 'fail';
// Provide context for semantic diffs
- thisResult.wtDiff = formatDiff(offset, 25);
+ thisResult.wtDiff = formatDiff(oldWt, newWt,
offset, 25);
}
}
results.push(thisResult);
}
- cb(null, env, results);
+ return results;
};
-var parsoidPost = function (env, uri, domain, title, text, dp, oldid,
- recordSizes, profilePrefix, cb) {
- var data = {};
+function parsoidPost(env, options, cb) {
+ var title = encodeURIComponent(options.title);
+
+ var uri = options.uri;
// make sure the Parsoid URI ends on /
- if ( !/\/$/.test(uri) ) {
+ if (!/\/$/.test(uri)) {
uri += '/';
}
- uri += 'v2/' + domain + '/';
- title = encodeURIComponent(title);
+ uri += 'v2/' + options.domain + '/';
- if ( oldid ) {
- // We want html2wt
- uri += 'wt/' + title + '/' + oldid;
- data.html = {
- body: text
- };
- data.original = {
- 'data-parsoid': dp
- };
- } else {
- // We want wt2html
+ if (options.html2wt) {
+ uri += 'wt/' + title + '/' + options.oldid;
+ } else { // wt2html
uri += 'pagebundle/' + title;
- data.wikitext = text;
}
- var options = {
+ var httpOptions = {
uri: uri,
method: 'POST',
json: true,
- body: data
+ body: options.data,
};
- Util.retryingHTTPRequest( 10, options, function( err, res, body ) {
- if (err) {
- cb( err, null );
- } else if (res.statusCode !== 200) {
- cb(res.body, null);
- } else {
- var resBody, resDP;
- if (oldid) {
- // Extract the wikitext from the response
- resBody = body.wikitext.body;
- } else {
- resBody = body.html.body;
- resDP = body['data-parsoid'];
+ return new Promise(function(resolve, reject) {
+ // FIXME: convert Util.retryingHTTPRequest to a promise
returning func
+ Util.retryingHTTPRequest(10, httpOptions, function(err, res,
body) {
+ if (!err && res.statusCode !== 200) {
+ err = new Error('Got status code: ' +
res.statusCode);
}
- if ( env.profile ) {
- if (!profilePrefix) {
- profilePrefix = '';
+ if (err) { return reject(err); }
+
+ // FIXME: Parse time was removed from profiling when we
stopped
+ // sending the x-parsoid-performance header.
+ if (options.recordSizes) {
+ var prefix = '';
+ if (options.profilePrefix) {
+ prefix += options.profilePrefix + ':';
}
- // FIXME: Parse time was removed from profiling
when we stopped
- // sending the x-parsoid-performance header.
- if (recordSizes) {
- // Record the sizes
- var sizePrefix = profilePrefix + (oldid
? 'wt' : 'html');
- env.profile.size[ sizePrefix + 'raw' ] =
- resBody.length;
- // Compress to record the gzipped size
- zlib.gzip( resBody, function( err,
gzippedbuf ) {
- if ( !err ) {
- env.profile.size[
sizePrefix + 'gzip' ] =
-
gzippedbuf.length;
- }
- cb( null, resBody, resDP );
- } );
+ var str;
+ if (options.html2wt) {
+ prefix += 'html:';
+ str = body.wikitext.body;
} else {
- cb(null, resBody, resDP);
+ prefix += 'wt:';
+ str = body.html.body;
}
+ env.profile.size[prefix + 'raw'] = str.length;
+ // Compress to record the gzipped size
+ zlib.gzip(str, function(err, gzippedbuf) {
+ if (err) { return reject(err); }
+ env.profile.size[prefix + 'gzip'] =
gzippedbuf.length;
+ resolve(body);
+ });
} else {
- cb( null, resBody, resDP );
+ resolve(body);
}
- }
- } );
-};
+ });
+ }).nodify(cb);
+}
-var doubleRoundtripDiff = function (env, uri, domain, title, offsets, src,
body, dp, out, cb) {
- if ( offsets.length > 0 ) {
- env.setPageSrcInfo( out );
- env.errCB = function ( error ) {
- cb( error, env, [] );
- process.exit( 1 );
- };
+function roundTripDiff(env, parsoidOptions, data) {
+ var diff = Diff.diffLines(data.newWt, data.oldWt);
+ var offsets = Diff.convertDiffToOffsetPairs(diff);
+ if (!diff.length || !offsets.length) { return []; }
- parsoidPost(env, uri, domain, title, out, null, null, false,
null,
- checkIfSignificant.bind(null, env, offsets, src, body,
dp, out, cb));
-
- } else {
- cb( null, env, [] );
- }
-};
-
-var roundTripDiff = function ( env, uri, domain, title, src, html, dp, out, cb
) {
- var diff, offsetPairs;
-
- try {
- diff = Diff.diffLines(out, src);
- offsetPairs = Diff.convertDiffToOffsetPairs(diff);
-
- if ( diff.length > 0 ) {
- var body = domino.createDocument( html ).body;
- doubleRoundtripDiff( env, uri, domain, title,
offsetPairs, src, body, dp, out, cb );
- } else {
- cb( null, env, [] );
- }
- } catch ( e ) {
- cb( e, env, [] );
- }
-};
-
-var selserRoundTripDiff = function (env, uri, domain, title, html, dp, out,
diffs, cb) {
- var selserDiff, offsetPairs,
- src = env.page.src.replace(/\n(?=\n)/g, '\n ');
- // Remove the selser trigger comment
- out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, '');
- out = out.replace(/\n(?=\n)/g, '\n ');
-
- roundTripDiff(env, uri, domain, title, src, html, dp, out, function
(err, env, selserDiffs) {
- if (err) {
- cb(err, env, diffs);
- } else {
- for (var sD in selserDiffs) {
- selserDiffs[sD].selser = true;
- }
- if (selserDiffs.length) {
- diffs = diffs.concat(selserDiffs);
- }
- cb(null, env, diffs);
- }
+ var options = Object.assign({
+ wt2html: true,
+ data: { wikitext: data.newWt },
+ }, parsoidOptions);
+ return parsoidPost(env, options).then(function(body) {
+ data.newHTML = body.html;
+ data.newDp = body['data-parsoid'];
+ return checkIfSignificant(offsets, data);
});
-};
+}
-// Returns a Promise for an { env, rtDiffs } object. `cb` is optional.
-var fetch = function ( page, options, cb ) {
- cb = JSUtils.mkPromised( cb, [ 'env', 'rtDiffs' ] );
- var domain, prefix, apiURL,
- // options are ParsoidConfig options if module.parent,
otherwise they
- // are CLI options (so use the Util.set* helpers to process
them)
- parsoidConfig = new ParsoidConfig( module.parent ? options :
null );
+// FIXME: This is copied from api/routes.js
+function promiseTemplateReq(env, target, oldid) {
+ return new Promise(function(resolve, reject) {
+ var tpr = new TemplateRequest(env, target, oldid);
+ tpr.once('src', function(err, srcAndMetadata) {
+ if (err) {
+ reject(err);
+ } else {
+ env.setPageSrcInfo(srcAndMetadata);
+ resolve();
+ }
+ });
+ });
+}
+
+// Returns a Promise for a formatted string. `cb` is optional.
+function fetch(title, options, formatter, cb) {
+ // options are ParsoidConfig options if module.parent, otherwise they
+ // are CLI options (so use the Util.set* helpers to process them)
+ var parsoidConfig = new ParsoidConfig(module.parent ? options : null);
if (!module.parent) {
// only process CLI flags if we're running as a CLI program.
- Util.setTemplatingAndProcessingFlags( parsoidConfig, options );
- Util.setDebuggingFlags( parsoidConfig, options );
+ Util.setTemplatingAndProcessingFlags(parsoidConfig, options);
+ Util.setDebuggingFlags(parsoidConfig, options);
}
-
- if ( options.apiURL ) {
+ if (options.apiURL) {
parsoidConfig.setInterwiki(options.prefix || 'localhost',
options.apiURL);
}
+ var err, domain, prefix;
if (options.prefix) {
// If prefix is present, use that.
prefix = options.prefix;
// Get the domain from the interwiki map.
- apiURL = parsoidConfig.interwikiMap.get(prefix);
+ var apiURL = parsoidConfig.interwikiMap.get(prefix);
if (!apiURL) {
- cb("Couldn't find the domain for prefix " + prefix,
null, []);
+ err = new Error('Couldn\'t find the domain for prefix '
+ prefix);
}
domain = url.parse(apiURL).hostname;
} else if (options.domain) {
domain = options.domain;
prefix = parsoidConfig.reverseIWMap.get(domain);
- }
-
- var envCb = function ( err, env ) {
- env.errCB = function ( error ) {
- cb( error, env, [] );
- };
- if ( err !== null ) {
- env.errCB( err );
- return;
- }
- env.profile = { time: { total: 0, total_timer: new Date() },
size: {} };
-
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
- var tpr = new TemplateRequest( env, target, null );
-
- tpr.once( 'src', function ( err, src_and_metadata ) {
- if ( err ) {
- cb( err, env, [] );
- } else {
- // Shortcut for calling parsoidPost with common
options
- var parsoidPostShort = function (postBody,
postDp, postOldId,
- postRecordSizes,
postProfilePrefix, postCb) {
- parsoidPost(env, options.parsoidURL,
domain, page,
- postBody, postDp, postOldId,
postRecordSizes, postProfilePrefix,
- function (err, postResult,
postResultDp) {
- if (err) {
- cb(err, env,
[]);
- } else {
-
postCb(postResult, postResultDp);
- }
- });
- };
-
- // Once we have the diffs between the
round-tripped wt,
- // to test rt selser we need to modify the HTML
and request
- // the wt again to compare with selser, and
then concat the
- // resulting diffs to the ones we got from
basic rt
- var rtSelserTest = function (origHTMLBody,
origDp, err, env, rtDiffs) {
- if (err) {
- cb(err, env, rtDiffs);
- } else {
- var newDocument =
DU.parseHTML(origHTMLBody),
- newNode =
newDocument.createComment('rtSelserEditTestComment');
-
newDocument.body.appendChild(newNode);
-
parsoidPostShort(newDocument.outerHTML, origDp,
-
src_and_metadata.revision.revid, false, 'selser',
- function (wtSelserBody)
{
- // Finish the
total time now
- if (
env.profile && env.profile.time ) {
-
env.profile.time.total += new Date() - env.profile.time.total_timer;
- }
-
-
selserRoundTripDiff(env, options.parsoidURL,
- domain,
page, origHTMLBody, origDp, wtSelserBody,
-
rtDiffs, cb);
- });
- }
- };
-
- env.setPageSrcInfo(src_and_metadata);
- // First, fetch the HTML for the requested
page's wikitext
- parsoidPostShort(env.page.src, null, null,
true, null, function (htmlBody, htmlDp) {
- // Now, request the wikitext for the
obtained HTML
- // (without sending data-parsoid, as we
don't want selser yet).
- parsoidPostShort(htmlBody, htmlDp,
-
src_and_metadata.revision.revid, true, null,
- function (wtBody) {
- roundTripDiff(env,
options.parsoidURL, domain, page,
- env.page.src,
htmlBody, htmlDp, wtBody,
-
rtSelserTest.bind(null, htmlBody, htmlDp));
- });
- });
- }
- } );
- };
-
- MWParserEnvironment.getParserEnv( parsoidConfig, null, { prefix:
prefix, pageName: page }, envCb );
- return cb.promise;
-};
-
-var cbCombinator = function ( formatter, cb, err, env, text ) {
- cb( err, formatter( env, err, text ) );
-};
-
-var consoleOut = function ( err, output ) {
- if ( err ) {
- console.log( 'ERROR: ' + err);
- if (err.stack) {
- console.log( 'Stack trace: ' + err.stack);
- }
- process.exit( 1 );
} else {
- console.log( output );
- process.exit( 0 );
+ err = new Error('No domain or prefix provided.');
}
-};
+ var env;
+ var closeFormatter = function(err, results) {
+ return formatter(err, prefix, title, results, env &&
env.profile);
+ };
+ var parsoidOptions = {
+ uri: options.parsoidURL,
+ domain: domain,
+ title: title,
+ };
+ var data = {};
+ return Promise[err ? 'reject' : 'resolve'](err).then(function() {
+ return MWParserEnvironment.getParserEnv(
+ parsoidConfig, null, { prefix: prefix, pageName: title }
+ );
+ }).then(function(_env) {
+ env = _env;
+ env.profile = { time: { total: 0, start: Date.now() }, size: {}
};
+ var target =
env.resolveTitle(env.normalizeTitle(env.page.name), '');
+ return promiseTemplateReq(env, target, null);
+ }).then(function() {
+ data.oldWt = env.page.src;
+ // First, fetch the HTML for the requested page's wikitext
+ var options = Object.assign({
+ wt2html: true,
+ recordSizes: true,
+ data: { wikitext: data.oldWt },
+ }, parsoidOptions);
+ return parsoidPost(env, options);
+ }).then(function(body) {
+ data.oldHTML = body.html;
+ data.oldDp = body['data-parsoid'];
+ // Now, request the wikitext for the obtained HTML
+ var options = Object.assign({
+ html2wt: true,
+ recordSizes: true,
+ oldid: env.page.meta.revision.revid,
+ data: {
+ html: data.oldHTML,
+ original: { 'data-parsoid': data.oldDp },
+ },
+ }, parsoidOptions);
+ return parsoidPost(env, options);
+ }).then(function(body) {
+ data.newWt = body.wikitext.body;
+ return roundTripDiff(env, parsoidOptions, data);
+ }).then(function(results) {
+ data.diffs = results;
+ // Once we have the diffs between the round-tripped wt,
+ // to test rt selser we need to modify the HTML and request
+ // the wt again to compare with selser, and then concat the
+ // resulting diffs to the ones we got from basic rt
+ var newDocument = DU.parseHTML(data.oldHTML.body);
+ var newNode =
newDocument.createComment('rtSelserEditTestComment');
+ newDocument.body.appendChild(newNode);
+ var options = Object.assign({
+ html2wt: true,
+ oldid: env.page.meta.revision.revid,
+ data: {
+ html: newDocument.outerHTML,
+ original: { 'data-parsoid': data.oldDp },
+ },
+ profilePrefix: 'selser',
+ }, parsoidOptions);
+ return parsoidPost(env, options);
+ }).then(function(body) {
+ var out = body.wikitext.body;
-if ( typeof module === 'object' ) {
- module.exports.fetch = fetch;
- module.exports.plainFormat = plainCallback;
- module.exports.xmlFormat = xmlCallback;
- module.exports.cbCombinator = cbCombinator;
+ // Finish the total time now
+ // FIXME: Is the right place to end it?
+ if (env.profile && env.profile.time) {
+ env.profile.time.total = Date.now() -
env.profile.time.start;
+ }
+
+ data.oldWt = data.newWt; // FIXME: I guess so? This needs a
comment.
+
+ // Remove the selser trigger comment
+ out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, '');
+ data.newWt = out;
+
+ // FIXME: not sure about this stuff?
+ // data.oldWt = data.oldWt.replace(/\n(?=\n)/g, '\n ');
+ // data.newWt = data.newWt.replace(/\n(?=\n)/g, '\n ');
+ return roundTripDiff(env, parsoidOptions, data);
+ }).then(function(selserDiffs) {
+ selserDiffs.forEach(function(diff) {
+ diff.selser = true;
+ });
+ if (selserDiffs.length) {
+ data.diffs = data.diffs.concat(selserDiffs);
+ }
+ return data.diffs;
+ }).then(
+ closeFormatter.bind(null, null),
+ closeFormatter
+ ).nodify(cb); // FIXME: cb should get the closeFormatter output.
}
-if ( !module.parent ) {
- var standardOpts = Util.addStandardOptions({
- 'xml': {
+
+if (require.main === module) {
+ var options = Util.addStandardOptions({
+ xml: {
description: 'Use xml callback',
- 'boolean': true,
- 'default': false
+ boolean: true,
+ default: false,
},
- 'prefix': {
- description: 'Which wiki prefix to use; e.g. "enwiki"
for English wikipedia, "eswiki" for Spanish, "mediawikiwiki" for mediawiki.org',
- 'default': ''
+ prefix: {
+ description: 'Which wiki prefix to use; e.g. "enwiki"
for ' +
+ 'English wikipedia, "eswiki" for Spanish,
"mediawikiwiki" ' +
+ 'for mediawiki.org',
+ default: '',
},
- 'domain': {
- description: 'Which wiki to use; e.g.
"en.wikipedia.org" for English wikipedia',
- 'default': 'en.wikipedia.org'
+ domain: {
+ description: 'Which wiki to use; e.g.
"en.wikipedia.org" for' +
+ ' English wikipedia',
+ default: 'en.wikipedia.org',
},
- 'parsoidURL': {
+ parsoidURL: {
description: 'The URL for the Parsoid API',
- }
+ },
}, {
// defaults for standard options
- rtTestMode: true // suppress noise by default
+ rtTestMode: true, // suppress noise by default
});
var opts = yargs.usage(
- 'Usage: $0 [options] <page-title> \n\n',
- standardOpts
- ).check(Util.checkUnknownArgs.bind(null, standardOpts));
+ 'Usage: $0 [options] <page-title> \n\n', options
+ ).check(Util.checkUnknownArgs.bind(null, options));
- var callback;
var argv = opts.argv;
var title = argv._[0];
-
- if ( title ) {
- callback = cbCombinator.bind( null,
- Util.booleanOption( argv.xml ) ?
- xmlCallback : plainCallback,
consoleOut );
- if ( !argv.parsoidURL ) {
- // Start our own Parsoid server
- // TODO: This will not be necessary once we have a
top-level testing
- // script that takes care of setting everything up.
- var apiServer = require( './apiServer.js' ),
- parsoidOptions = {quiet: true};
- if (opts.apiURL) {
- parsoidOptions.mockUrl = opts.apiURL;
- }
-
apiServer.startParsoidServer(parsoidOptions).then(function( ret ) {
- argv.parsoidURL = ret.url;
- fetch( title, argv, callback );
- } ).done();
- apiServer.exitOnProcessTerm();
- } else {
- fetch( title, argv, callback );
- }
- } else {
- opts.showHelp();
+ if (!title) {
+ return opts.showHelp();
}
+ Promise.resolve().then(function() {
+ if (argv.parsoidURL) { return; }
+ // Start our own Parsoid server
+ // TODO: This will not be necessary once we have a top-level
testing
+ // script that takes care of setting everything up.
+ var apiServer = require('./apiServer.js');
+ var parsoidOptions = { quiet: true };
+ if (opts.apiURL) {
+ parsoidOptions.mockUrl = opts.apiURL;
+ }
+ apiServer.exitOnProcessTerm();
+ return
apiServer.startParsoidServer(parsoidOptions).then(function(ret) {
+ argv.parsoidURL = ret.url;
+ });
+ }).then(function() {
+ var formatter = Util.booleanOption(argv.xml) ? xmlFormat :
plainFormat;
+ return fetch(title, argv, formatter);
+ }).then(function(output) {
+ console.log(output);
+ process.exit(0);
+ }).done();
+} else if (typeof module === 'object') {
+ module.exports.fetch = fetch;
+ module.exports.xmlFormat = xmlFormat;
}
--
To view, visit https://gerrit.wikimedia.org/r/202674
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I646f8a916add26ba60171ed31d1593c6bb6b63c5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits