C. Scott Ananian has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/402080 )

Change subject: WIP: serializeaswikilink
......................................................................

WIP: serializeaswikilink

This breaks something still.

Change-Id: Ia0a4cfd0a881e9d401eff66b0602ccdc81be7358
---
M lib/html2wt/LinkHandler.js
1 file changed, 157 insertions(+), 168 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/80/402080/1

diff --git a/lib/html2wt/LinkHandler.js b/lib/html2wt/LinkHandler.js
index fbe9592..935c24a 100644
--- a/lib/html2wt/LinkHandler.js
+++ b/lib/html2wt/LinkHandler.js
@@ -383,8 +383,8 @@
        return canUseSimple;
 };
 
-var serializeAsWikiLink = Promise.method(function(node, state, linkData) {
-       var contentParts, p;
+var serializeAsWikiLink = Promise.async(function *(node, state, linkData) {
+       var contentParts;
        var contentSrc = '';
        var isPiped = false;
        var requiresEscaping = true;
@@ -425,17 +425,16 @@
                // we need to fully shadow the sort key.
                // if ( !target.modified ) {
                // The target and source key was not modified
-               p = state.serializer.serializedAttrVal(node, 'mw:sortKey')
-               .then(function(sortKeySrc) {
-                       if (sortKeySrc.value !== null) {
-                               linkData.contentNode = undefined;
-                               linkData.content.string = sortKeySrc.value;
-                               // TODO: generalize this flag. It is already 
used by
-                               // getAttributeShadowInfo. Maybe use the same
-                               // structure as its return value?
-                               linkData.content.fromsrc = true;
-                       }
-               });
+               var sortKeySrc =
+                       yield state.serializer.serializedAttrVal(node, 
'mw:sortKey');
+               if (sortKeySrc.value !== null) {
+                       linkData.contentNode = undefined;
+                       linkData.content.string = sortKeySrc.value;
+                       // TODO: generalize this flag. It is already used by
+                       // getAttributeShadowInfo. Maybe use the same
+                       // structure as its return value?
+                       linkData.content.fromsrc = true;
+               }
                // }
        } else if (linkData.type === 'mw:PageProp/Language') {
                // Fix up the the content string
@@ -443,183 +442,173 @@
                if (linkData.content.string === undefined) {
                        linkData.content.string = 
Util.decodeEntities(target.value);
                }
-               p = Promise.resolve();
-       } else {
-               p = Promise.resolve();
        }
 
        // The string value of the content, if it is plain text.
        var linkTarget, escapedTgt;
-
-       return p.then(function() {
-               if (linkData.isRedirect) {
-                       linkTarget = target.value;
-                       if (target.modified || !target.fromsrc) {
-                               linkTarget = linkTarget.replace(/^(\.\.?\/)*/, 
'').replace(/_/g, ' ');
-                               escapedTgt = escapeLinkTarget(linkTarget, 
state);
-                               linkTarget = escapedTgt.linkTarget;
-                               // Determine if it's a redirect to a category, 
in which case
-                               // it needs a ':' on front to distingish from a 
category link.
-                               var categoryMatch = 
linkTarget.match(/^([^:]+)[:]/);
-                               if (categoryMatch) {
-                                       var ns = 
wiki.namespaceIds.get(Util.normalizeNamespaceName(categoryMatch[1]));
-                                       if (ns === 
wiki.canonicalNamespaces.category) {
-                                               // Check that the next node 
isn't a category link,
-                                               // in which case we don't want 
the ':'.
-                                               var nextNode = node.nextSibling;
-                                               if (!(nextNode && 
DU.isElt(nextNode) && DU.hasNodeName(nextNode, "link") &&
-                                                       
nextNode.getAttribute('rel') === "mw:PageProp/Category" &&
-                                                       
nextNode.getAttribute('href') === node.getAttribute('href'))) {
-                                                       linkTarget = ':' + 
linkTarget;
-                                               }
+       if (linkData.isRedirect) {
+               linkTarget = target.value;
+               if (target.modified || !target.fromsrc) {
+                       linkTarget = linkTarget.replace(/^(\.\.?\/)*/, 
'').replace(/_/g, ' ');
+                       escapedTgt = escapeLinkTarget(linkTarget, state);
+                       linkTarget = escapedTgt.linkTarget;
+                       // Determine if it's a redirect to a category, in which 
case
+                       // it needs a ':' on front to distingish from a 
category link.
+                       var categoryMatch = linkTarget.match(/^([^:]+)[:]/);
+                       if (categoryMatch) {
+                               var ns = 
wiki.namespaceIds.get(Util.normalizeNamespaceName(categoryMatch[1]));
+                               if (ns === wiki.canonicalNamespaces.category) {
+                                       // Check that the next node isn't a 
category link,
+                                       // in which case we don't want the ':'.
+                                       var nextNode = node.nextSibling;
+                                       if (!(
+                                               nextNode && DU.isElt(nextNode) 
&& DU.hasNodeName(nextNode, "link") &&
+                                               nextNode.getAttribute('rel') 
=== "mw:PageProp/Category" &&
+                                               nextNode.getAttribute('href') 
=== node.getAttribute('href')
+                                       )) {
+                                               linkTarget = ':' + linkTarget;
                                        }
                                }
                        }
-               } else if (isSimpleWikiLink(env, dp, target, linkData)) {
-                       // Simple case
-                       if (!target.modified && !linkData.contentModified) {
+               }
+       } else if (isSimpleWikiLink(env, dp, target, linkData)) {
+               // Simple case
+               if (!target.modified && !linkData.contentModified) {
+                       linkTarget = target.value.replace(/^\.\//, '');
+               } else {
+                       // If token has templated attrs or is a subpage, use 
target.value
+                       // since content string will be drastically different.
+                       if (DU.hasExpandedAttrsType(node) ||
+                               /(^|\/)\.\.\//.test(target.value)) {
                                linkTarget = target.value.replace(/^\.\//, '');
                        } else {
-                               // If token has templated attrs or is a 
subpage, use target.value
-                               // since content string will be drastically 
different.
-                               if (DU.hasExpandedAttrsType(node) ||
-                                       /(^|\/)\.\.\//.test(target.value)) {
-                                       linkTarget = 
target.value.replace(/^\.\//, '');
+                               escapedTgt = 
escapeLinkTarget(linkData.content.string, state);
+                               if (!escapedTgt.invalidLink) {
+                                       linkTarget = addColonEscape(env, 
escapedTgt.linkTarget, linkData);
                                } else {
-                                       escapedTgt = 
escapeLinkTarget(linkData.content.string, state);
-                                       if (!escapedTgt.invalidLink) {
-                                               linkTarget = 
addColonEscape(env, escapedTgt.linkTarget, linkData);
-                                       } else {
-                                               linkTarget = 
escapedTgt.linkTarget;
-                                       }
-                               }
-                               if (linkData.isInterwikiLang && 
!/^[:]/.test(linkTarget) &&
-                                       linkData.type !== 
'mw:PageProp/Language') {
-                                       // ensure interwiki links can't be 
confused with
-                                       // interlanguage links.
-                                       linkTarget = ':' + linkTarget;
-                               }
-                       }
-               } else if (isURLLink(state.env, node, linkData)/* && 
!linkData.isInterwiki */) {
-                       // Uncomment the above check if we want 
[[wikipedia:Foo|http://en.wikipedia.org/wiki/Foo]]
-                       // for '<a 
href="http://en.wikipedia.org/wiki/Foo";>http://en.wikipedia.org/wiki/Foo</a>'
-                       linkData.linkType = "mw:URLLink";
-                       return;
-               } else {
-                       // Emit piped wikilink syntax
-                       isPiped = true;
-
-                       var pp;
-                       // First get the content source
-                       if (linkData.contentNode) {
-                               pp = state.serializeLinkChildrenToString(
-                                               linkData.contentNode,
-                                               
state.serializer.wteHandlers.wikilinkHandler)
-                               .then(function(cs) {
-                                       // strip off the tail and handle the 
pipe trick
-                                       contentParts = 
splitLinkContentString(cs, dp);
-                                       contentSrc = contentParts.contentString;
-                                       dp.tail = contentParts.tail;
-                                       linkData.tail = contentParts.tail;
-                                       dp.prefix = contentParts.prefix;
-                                       linkData.prefix = contentParts.prefix;
-                                       requiresEscaping = false;
-                               });
-                       } else {
-                               contentSrc = linkData.content.string || '';
-                               requiresEscaping = !linkData.content.fromsrc;
-                               pp = Promise.resolve();
-                       }
-
-                       return pp.then(function() {
-                               if (contentSrc === '' &&
-                                               linkData.type !== 
'mw:PageProp/Category') {
-                                       // Protect empty link content from PST 
pipe trick
-                                       contentSrc = '<nowiki/>';
-                                       requiresEscaping = false;
-                               }
-
-                               linkTarget = target.value;
-                               if (target.modified || !target.fromsrc) {
-                                       // Links starting with ./ shouldn't get 
_ replaced with ' '
-                                       var linkContentIsRelative =
-                                               linkData.content && 
linkData.content.string &&
-                                               
linkData.content.string.match(/^\.\//);
-                                       linkTarget = 
linkTarget.replace(/^(\.\.?\/)*/, '');
-                                       if (!linkData.isInterwiki && 
!linkContentIsRelative) {
-                                               linkTarget = 
linkTarget.replace(/_/g, ' ');
-                                       }
-                                       escapedTgt = 
escapeLinkTarget(linkTarget, state);
                                        linkTarget = escapedTgt.linkTarget;
                                }
-
-                               // If we are reusing the target from source, we 
don't
-                               // need to worry about colon-escaping because 
it will
-                               // be in the right form already.
-                               //
-                               // Trying to eliminate this check and always 
check for
-                               // colon-escaping seems a bit tricky when the 
reused
-                               // target has encoded entities that won't 
resolve to
-                               // valid titles.
-                               if ((!escapedTgt || !escapedTgt.invalidLink) && 
!target.fromsrc) {
-                                       linkTarget = addColonEscape(env, 
linkTarget, linkData);
-                               }
-                       });
+                       }
+                       if (linkData.isInterwikiLang && 
!/^[:]/.test(linkTarget) &&
+                               linkData.type !== 'mw:PageProp/Language') {
+                               // ensure interwiki links can't be confused with
+                               // interlanguage links.
+                               linkTarget = ':' + linkTarget;
+                       }
                }
-       }).then(function() {
-               if (linkData.linkType === "mw:URLLink") {
-                       state.emitChunk(new AutoURLLinkText(node.textContent, 
node), node);
+       } else if (isURLLink(state.env, node, linkData)/* && 
!linkData.isInterwiki */) {
+               // Uncomment the above check if we want 
[[wikipedia:Foo|http://en.wikipedia.org/wiki/Foo]]
+               // for '<a 
href="http://en.wikipedia.org/wiki/Foo";>http://en.wikipedia.org/wiki/Foo</a>'
+               linkData.linkType = "mw:URLLink";
+               return;
+       } else {
+               // Emit piped wikilink syntax
+               isPiped = true;
+
+               // First get the content source
+               if (linkData.contentNode) {
+                       var cs = yield state.serializeLinkChildrenToString(
+                               linkData.contentNode,
+                               state.serializer.wteHandlers.wikilinkHandler
+                       );
+                       // strip off the tail and handle the pipe trick
+                       contentParts = splitLinkContentString(cs, dp);
+                       contentSrc = contentParts.contentString;
+                       dp.tail = contentParts.tail;
+                       linkData.tail = contentParts.tail;
+                       dp.prefix = contentParts.prefix;
+                       linkData.prefix = contentParts.prefix;
+                       requiresEscaping = false;
+               } else {
+                       contentSrc = linkData.content.string || '';
+                       requiresEscaping = !linkData.content.fromsrc;
+               }
+
+               if (contentSrc === '' &&
+                       linkData.type !== 'mw:PageProp/Category') {
+                       // Protect empty link content from PST pipe trick
+                       contentSrc = '<nowiki/>';
+                       requiresEscaping = false;
+               }
+
+               linkTarget = target.value;
+               if (target.modified || !target.fromsrc) {
+                       // Links starting with ./ shouldn't get _ replaced with 
' '
+                       var linkContentIsRelative =
+                               linkData.content && linkData.content.string &&
+                               linkData.content.string.match(/^\.\//);
+                       linkTarget = linkTarget.replace(/^(\.\.?\/)*/, '');
+                       if (!linkData.isInterwiki && !linkContentIsRelative) {
+                               linkTarget = linkTarget.replace(/_/g, ' ');
+                       }
+                       escapedTgt = escapeLinkTarget(linkTarget, state);
+                       linkTarget = escapedTgt.linkTarget;
+               }
+
+               // If we are reusing the target from source, we don't
+               // need to worry about colon-escaping because it will
+               // be in the right form already.
+               //
+               // Trying to eliminate this check and always check for
+               // colon-escaping seems a bit tricky when the reused
+               // target has encoded entities that won't resolve to
+               // valid titles.
+               if ((!escapedTgt || !escapedTgt.invalidLink) && 
!target.fromsrc) {
+                       linkTarget = addColonEscape(env, linkTarget, linkData);
+               }
+       }
+       if (linkData.linkType === "mw:URLLink") {
+               state.emitChunk(new AutoURLLinkText(node.textContent, node), 
node);
+               return;
+       }
+
+       if (linkData.isRedirect) {
+               // Drop duplicates
+               if (state.redirectText !== null) {
                        return;
                }
 
-               if (linkData.isRedirect) {
-                       // Drop duplicates
-                       if (state.redirectText !== null) {
-                               return;
-                       }
-
-                       // Buffer redirect text if it is not in start of file 
position
-                       if (!REDIRECT_TEST_RE.test(state.out + 
state.currLine.text)) {
-                               state.redirectText = linkData.prefix + '[[' + 
linkTarget + ']]';
-                               state.emitChunk('', node);  // Flush seperators 
for this node
-                               return;
-                       }
-
-                       // Set to some non-null string
-                       state.redirectText = 'unbuffered';
+               // Buffer redirect text if it is not in start of file position
+               if (!REDIRECT_TEST_RE.test(state.out + state.currLine.text)) {
+                       state.redirectText = linkData.prefix + '[[' + 
linkTarget + ']]';
+                       state.emitChunk('', node);  // Flush seperators for 
this node
+                       return;
                }
 
-               var pipedText;
-               if (escapedTgt && escapedTgt.invalidLink) {
-                       // If the link target was invalid, instead of emitting 
an invalid link,
-                       // omit the link and serialize just the content 
instead. But, log the
-                       // invalid html for Parsoid clients to investigate 
later.
-                       state.env.log("error/html2wt/link", "Bad title text", 
node.outerHTML);
+               // Set to some non-null string
+               state.redirectText = 'unbuffered';
+       }
 
-                       // For non-piped content, use the original invalid link 
text
-                       pipedText = isPiped ? contentSrc : linkTarget;
+       var pipedText;
+       if (escapedTgt && escapedTgt.invalidLink) {
+               // If the link target was invalid, instead of emitting an 
invalid link,
+               // omit the link and serialize just the content instead. But, 
log the
+               // invalid html for Parsoid clients to investigate later.
+               state.env.log("error/html2wt/link", "Bad title text", 
node.outerHTML);
 
-                       if (requiresEscaping) {
-                               // Escape the text in the old sol context
-                               state.onSOL = oldSOLState;
-                               pipedText = 
state.serializer.wteHandlers.escapeWikiText(pipedText, { node: node });
-                       }
-                       state.emitChunk(linkData.prefix + pipedText + 
linkData.tail, node);
+               // For non-piped content, use the original invalid link text
+               pipedText = isPiped ? contentSrc : linkTarget;
+
+               if (requiresEscaping) {
+                       // Escape the text in the old sol context
+                       state.onSOL = oldSOLState;
+                       pipedText = 
state.serializer.wteHandlers.escapeWikiText(pipedText, { node: node });
+               }
+               state.emitChunk(linkData.prefix + pipedText + linkData.tail, 
node);
+       } else {
+               if (isPiped && requiresEscaping) {
+                       // We are definitely not in sol context since content
+                       // will be preceded by "[[" or "[" text in target 
wikitext.
+                       pipedText = '|' + escapeLinkContent(contentSrc, state, 
false, node);
+               } else if (isPiped) {
+                       pipedText = '|' + contentSrc;
                } else {
-                       if (isPiped && requiresEscaping) {
-                               // We are definitely not in sol context since 
content
-                               // will be preceded by "[[" or "[" text in 
target wikitext.
-                               pipedText = '|' + escapeLinkContent(contentSrc, 
state, false, node);
-                       } else if (isPiped) {
-                               pipedText = '|' + contentSrc;
-                       } else {
-                               pipedText = '';
-                       }
-                       state.emitChunk(new WikiLinkText(
-                               linkData.prefix + '[[' + linkTarget + pipedText 
+ ']]' + linkData.tail,
-                               node, wiki, linkData.type), node);
+                       pipedText = '';
                }
-       });
+               state.emitChunk(new WikiLinkText(
+                       linkData.prefix + '[[' + linkTarget + pipedText + ']]' 
+ linkData.tail,
+                       node, wiki, linkData.type), node);
+       }
 });
 
 var serializeAsExtLink = Promise.async(function *(node, state, linkData) {

-- 
To view, visit https://gerrit.wikimedia.org/r/402080
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia0a4cfd0a881e9d401eff66b0602ccdc81be7358
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to