C. Scott Ananian has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/402080 )
Change subject: WIP: serializeaswikilink
......................................................................
WIP: serializeaswikilink
This breaks something still.
Change-Id: Ia0a4cfd0a881e9d401eff66b0602ccdc81be7358
---
M lib/html2wt/LinkHandler.js
1 file changed, 157 insertions(+), 168 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/80/402080/1
diff --git a/lib/html2wt/LinkHandler.js b/lib/html2wt/LinkHandler.js
index fbe9592..935c24a 100644
--- a/lib/html2wt/LinkHandler.js
+++ b/lib/html2wt/LinkHandler.js
@@ -383,8 +383,8 @@
return canUseSimple;
};
-var serializeAsWikiLink = Promise.method(function(node, state, linkData) {
- var contentParts, p;
+var serializeAsWikiLink = Promise.async(function *(node, state, linkData) {
+ var contentParts;
var contentSrc = '';
var isPiped = false;
var requiresEscaping = true;
@@ -425,17 +425,16 @@
// we need to fully shadow the sort key.
// if ( !target.modified ) {
// The target and source key was not modified
- p = state.serializer.serializedAttrVal(node, 'mw:sortKey')
- .then(function(sortKeySrc) {
- if (sortKeySrc.value !== null) {
- linkData.contentNode = undefined;
- linkData.content.string = sortKeySrc.value;
- // TODO: generalize this flag. It is already
used by
- // getAttributeShadowInfo. Maybe use the same
- // structure as its return value?
- linkData.content.fromsrc = true;
- }
- });
+ var sortKeySrc =
+ yield state.serializer.serializedAttrVal(node,
'mw:sortKey');
+ if (sortKeySrc.value !== null) {
+ linkData.contentNode = undefined;
+ linkData.content.string = sortKeySrc.value;
+ // TODO: generalize this flag. It is already used by
+ // getAttributeShadowInfo. Maybe use the same
+ // structure as its return value?
+ linkData.content.fromsrc = true;
+ }
// }
} else if (linkData.type === 'mw:PageProp/Language') {
// Fix up the the content string
@@ -443,183 +442,173 @@
if (linkData.content.string === undefined) {
linkData.content.string =
Util.decodeEntities(target.value);
}
- p = Promise.resolve();
- } else {
- p = Promise.resolve();
}
// The string value of the content, if it is plain text.
var linkTarget, escapedTgt;
-
- return p.then(function() {
- if (linkData.isRedirect) {
- linkTarget = target.value;
- if (target.modified || !target.fromsrc) {
- linkTarget = linkTarget.replace(/^(\.\.?\/)*/,
'').replace(/_/g, ' ');
- escapedTgt = escapeLinkTarget(linkTarget,
state);
- linkTarget = escapedTgt.linkTarget;
- // Determine if it's a redirect to a category,
in which case
- // it needs a ':' on front to distingish from a
category link.
- var categoryMatch =
linkTarget.match(/^([^:]+)[:]/);
- if (categoryMatch) {
- var ns =
wiki.namespaceIds.get(Util.normalizeNamespaceName(categoryMatch[1]));
- if (ns ===
wiki.canonicalNamespaces.category) {
- // Check that the next node
isn't a category link,
- // in which case we don't want
the ':'.
- var nextNode = node.nextSibling;
- if (!(nextNode &&
DU.isElt(nextNode) && DU.hasNodeName(nextNode, "link") &&
-
nextNode.getAttribute('rel') === "mw:PageProp/Category" &&
-
nextNode.getAttribute('href') === node.getAttribute('href'))) {
- linkTarget = ':' +
linkTarget;
- }
+ if (linkData.isRedirect) {
+ linkTarget = target.value;
+ if (target.modified || !target.fromsrc) {
+ linkTarget = linkTarget.replace(/^(\.\.?\/)*/,
'').replace(/_/g, ' ');
+ escapedTgt = escapeLinkTarget(linkTarget, state);
+ linkTarget = escapedTgt.linkTarget;
+ // Determine if it's a redirect to a category, in which
case
+ // it needs a ':' on front to distingish from a
category link.
+ var categoryMatch = linkTarget.match(/^([^:]+)[:]/);
+ if (categoryMatch) {
+ var ns =
wiki.namespaceIds.get(Util.normalizeNamespaceName(categoryMatch[1]));
+ if (ns === wiki.canonicalNamespaces.category) {
+ // Check that the next node isn't a
category link,
+ // in which case we don't want the ':'.
+ var nextNode = node.nextSibling;
+ if (!(
+ nextNode && DU.isElt(nextNode)
&& DU.hasNodeName(nextNode, "link") &&
+ nextNode.getAttribute('rel')
=== "mw:PageProp/Category" &&
+ nextNode.getAttribute('href')
=== node.getAttribute('href')
+ )) {
+ linkTarget = ':' + linkTarget;
}
}
}
- } else if (isSimpleWikiLink(env, dp, target, linkData)) {
- // Simple case
- if (!target.modified && !linkData.contentModified) {
+ }
+ } else if (isSimpleWikiLink(env, dp, target, linkData)) {
+ // Simple case
+ if (!target.modified && !linkData.contentModified) {
+ linkTarget = target.value.replace(/^\.\//, '');
+ } else {
+ // If token has templated attrs or is a subpage, use
target.value
+ // since content string will be drastically different.
+ if (DU.hasExpandedAttrsType(node) ||
+ /(^|\/)\.\.\//.test(target.value)) {
linkTarget = target.value.replace(/^\.\//, '');
} else {
- // If token has templated attrs or is a
subpage, use target.value
- // since content string will be drastically
different.
- if (DU.hasExpandedAttrsType(node) ||
- /(^|\/)\.\.\//.test(target.value)) {
- linkTarget =
target.value.replace(/^\.\//, '');
+ escapedTgt =
escapeLinkTarget(linkData.content.string, state);
+ if (!escapedTgt.invalidLink) {
+ linkTarget = addColonEscape(env,
escapedTgt.linkTarget, linkData);
} else {
- escapedTgt =
escapeLinkTarget(linkData.content.string, state);
- if (!escapedTgt.invalidLink) {
- linkTarget =
addColonEscape(env, escapedTgt.linkTarget, linkData);
- } else {
- linkTarget =
escapedTgt.linkTarget;
- }
- }
- if (linkData.isInterwikiLang &&
!/^[:]/.test(linkTarget) &&
- linkData.type !==
'mw:PageProp/Language') {
- // ensure interwiki links can't be
confused with
- // interlanguage links.
- linkTarget = ':' + linkTarget;
- }
- }
- } else if (isURLLink(state.env, node, linkData)/* &&
!linkData.isInterwiki */) {
- // Uncomment the above check if we want
[[wikipedia:Foo|http://en.wikipedia.org/wiki/Foo]]
- // for '<a
href="http://en.wikipedia.org/wiki/Foo">http://en.wikipedia.org/wiki/Foo</a>'
- linkData.linkType = "mw:URLLink";
- return;
- } else {
- // Emit piped wikilink syntax
- isPiped = true;
-
- var pp;
- // First get the content source
- if (linkData.contentNode) {
- pp = state.serializeLinkChildrenToString(
- linkData.contentNode,
-
state.serializer.wteHandlers.wikilinkHandler)
- .then(function(cs) {
- // strip off the tail and handle the
pipe trick
- contentParts =
splitLinkContentString(cs, dp);
- contentSrc = contentParts.contentString;
- dp.tail = contentParts.tail;
- linkData.tail = contentParts.tail;
- dp.prefix = contentParts.prefix;
- linkData.prefix = contentParts.prefix;
- requiresEscaping = false;
- });
- } else {
- contentSrc = linkData.content.string || '';
- requiresEscaping = !linkData.content.fromsrc;
- pp = Promise.resolve();
- }
-
- return pp.then(function() {
- if (contentSrc === '' &&
- linkData.type !==
'mw:PageProp/Category') {
- // Protect empty link content from PST
pipe trick
- contentSrc = '<nowiki/>';
- requiresEscaping = false;
- }
-
- linkTarget = target.value;
- if (target.modified || !target.fromsrc) {
- // Links starting with ./ shouldn't get
_ replaced with ' '
- var linkContentIsRelative =
- linkData.content &&
linkData.content.string &&
-
linkData.content.string.match(/^\.\//);
- linkTarget =
linkTarget.replace(/^(\.\.?\/)*/, '');
- if (!linkData.isInterwiki &&
!linkContentIsRelative) {
- linkTarget =
linkTarget.replace(/_/g, ' ');
- }
- escapedTgt =
escapeLinkTarget(linkTarget, state);
linkTarget = escapedTgt.linkTarget;
}
-
- // If we are reusing the target from source, we
don't
- // need to worry about colon-escaping because
it will
- // be in the right form already.
- //
- // Trying to eliminate this check and always
check for
- // colon-escaping seems a bit tricky when the
reused
- // target has encoded entities that won't
resolve to
- // valid titles.
- if ((!escapedTgt || !escapedTgt.invalidLink) &&
!target.fromsrc) {
- linkTarget = addColonEscape(env,
linkTarget, linkData);
- }
- });
+ }
+ if (linkData.isInterwikiLang &&
!/^[:]/.test(linkTarget) &&
+ linkData.type !== 'mw:PageProp/Language') {
+ // ensure interwiki links can't be confused with
+ // interlanguage links.
+ linkTarget = ':' + linkTarget;
+ }
}
- }).then(function() {
- if (linkData.linkType === "mw:URLLink") {
- state.emitChunk(new AutoURLLinkText(node.textContent,
node), node);
+ } else if (isURLLink(state.env, node, linkData)/* &&
!linkData.isInterwiki */) {
+ // Uncomment the above check if we want
[[wikipedia:Foo|http://en.wikipedia.org/wiki/Foo]]
+ // for '<a
href="http://en.wikipedia.org/wiki/Foo">http://en.wikipedia.org/wiki/Foo</a>'
+ linkData.linkType = "mw:URLLink";
+ return;
+ } else {
+ // Emit piped wikilink syntax
+ isPiped = true;
+
+ // First get the content source
+ if (linkData.contentNode) {
+ var cs = yield state.serializeLinkChildrenToString(
+ linkData.contentNode,
+ state.serializer.wteHandlers.wikilinkHandler
+ );
+ // strip off the tail and handle the pipe trick
+ contentParts = splitLinkContentString(cs, dp);
+ contentSrc = contentParts.contentString;
+ dp.tail = contentParts.tail;
+ linkData.tail = contentParts.tail;
+ dp.prefix = contentParts.prefix;
+ linkData.prefix = contentParts.prefix;
+ requiresEscaping = false;
+ } else {
+ contentSrc = linkData.content.string || '';
+ requiresEscaping = !linkData.content.fromsrc;
+ }
+
+ if (contentSrc === '' &&
+ linkData.type !== 'mw:PageProp/Category') {
+ // Protect empty link content from PST pipe trick
+ contentSrc = '<nowiki/>';
+ requiresEscaping = false;
+ }
+
+ linkTarget = target.value;
+ if (target.modified || !target.fromsrc) {
+ // Links starting with ./ shouldn't get _ replaced with
' '
+ var linkContentIsRelative =
+ linkData.content && linkData.content.string &&
+ linkData.content.string.match(/^\.\//);
+ linkTarget = linkTarget.replace(/^(\.\.?\/)*/, '');
+ if (!linkData.isInterwiki && !linkContentIsRelative) {
+ linkTarget = linkTarget.replace(/_/g, ' ');
+ }
+ escapedTgt = escapeLinkTarget(linkTarget, state);
+ linkTarget = escapedTgt.linkTarget;
+ }
+
+ // If we are reusing the target from source, we don't
+ // need to worry about colon-escaping because it will
+ // be in the right form already.
+ //
+ // Trying to eliminate this check and always check for
+ // colon-escaping seems a bit tricky when the reused
+ // target has encoded entities that won't resolve to
+ // valid titles.
+ if ((!escapedTgt || !escapedTgt.invalidLink) &&
!target.fromsrc) {
+ linkTarget = addColonEscape(env, linkTarget, linkData);
+ }
+ }
+ if (linkData.linkType === "mw:URLLink") {
+ state.emitChunk(new AutoURLLinkText(node.textContent, node),
node);
+ return;
+ }
+
+ if (linkData.isRedirect) {
+ // Drop duplicates
+ if (state.redirectText !== null) {
return;
}
- if (linkData.isRedirect) {
- // Drop duplicates
- if (state.redirectText !== null) {
- return;
- }
-
- // Buffer redirect text if it is not in start of file
position
- if (!REDIRECT_TEST_RE.test(state.out +
state.currLine.text)) {
- state.redirectText = linkData.prefix + '[[' +
linkTarget + ']]';
- state.emitChunk('', node); // Flush seperators
for this node
- return;
- }
-
- // Set to some non-null string
- state.redirectText = 'unbuffered';
+ // Buffer redirect text if it is not in start of file position
+ if (!REDIRECT_TEST_RE.test(state.out + state.currLine.text)) {
+ state.redirectText = linkData.prefix + '[[' +
linkTarget + ']]';
+ state.emitChunk('', node); // Flush seperators for
this node
+ return;
}
- var pipedText;
- if (escapedTgt && escapedTgt.invalidLink) {
- // If the link target was invalid, instead of emitting
an invalid link,
- // omit the link and serialize just the content
instead. But, log the
- // invalid html for Parsoid clients to investigate
later.
- state.env.log("error/html2wt/link", "Bad title text",
node.outerHTML);
+ // Set to some non-null string
+ state.redirectText = 'unbuffered';
+ }
- // For non-piped content, use the original invalid link
text
- pipedText = isPiped ? contentSrc : linkTarget;
+ var pipedText;
+ if (escapedTgt && escapedTgt.invalidLink) {
+ // If the link target was invalid, instead of emitting an
invalid link,
+ // omit the link and serialize just the content instead. But,
log the
+ // invalid html for Parsoid clients to investigate later.
+ state.env.log("error/html2wt/link", "Bad title text",
node.outerHTML);
- if (requiresEscaping) {
- // Escape the text in the old sol context
- state.onSOL = oldSOLState;
- pipedText =
state.serializer.wteHandlers.escapeWikiText(pipedText, { node: node });
- }
- state.emitChunk(linkData.prefix + pipedText +
linkData.tail, node);
+ // For non-piped content, use the original invalid link text
+ pipedText = isPiped ? contentSrc : linkTarget;
+
+ if (requiresEscaping) {
+ // Escape the text in the old sol context
+ state.onSOL = oldSOLState;
+ pipedText =
state.serializer.wteHandlers.escapeWikiText(pipedText, { node: node });
+ }
+ state.emitChunk(linkData.prefix + pipedText + linkData.tail,
node);
+ } else {
+ if (isPiped && requiresEscaping) {
+ // We are definitely not in sol context since content
+ // will be preceded by "[[" or "[" text in target
wikitext.
+ pipedText = '|' + escapeLinkContent(contentSrc, state,
false, node);
+ } else if (isPiped) {
+ pipedText = '|' + contentSrc;
} else {
- if (isPiped && requiresEscaping) {
- // We are definitely not in sol context since
content
- // will be preceded by "[[" or "[" text in
target wikitext.
- pipedText = '|' + escapeLinkContent(contentSrc,
state, false, node);
- } else if (isPiped) {
- pipedText = '|' + contentSrc;
- } else {
- pipedText = '';
- }
- state.emitChunk(new WikiLinkText(
- linkData.prefix + '[[' + linkTarget + pipedText
+ ']]' + linkData.tail,
- node, wiki, linkData.type), node);
+ pipedText = '';
}
- });
+ state.emitChunk(new WikiLinkText(
+ linkData.prefix + '[[' + linkTarget + pipedText + ']]'
+ linkData.tail,
+ node, wiki, linkData.type), node);
+ }
});
var serializeAsExtLink = Promise.async(function *(node, state, linkData) {
--
To view, visit https://gerrit.wikimedia.org/r/402080
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia0a4cfd0a881e9d401eff66b0602ccdc81be7358
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits