Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/200874
Change subject: Code cleanup in wts.LinkHandler.js
......................................................................
Code cleanup in wts.LinkHandler.js
This is one of the most messy parts of the codebase with lots
and lots of conditions.
This first pass does some very superficial minimal cleanup
and also identified some dead code that could potentially be
killed.
We should extract more code into smaller property-checking
functions.
Change-Id: Idb618997faefb77c1aeb57b74074effcb6f84835
---
M lib/wts.LinkHandler.js
1 file changed, 49 insertions(+), 53 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/74/200874/1
diff --git a/lib/wts.LinkHandler.js b/lib/wts.LinkHandler.js
index 1cfa57b..db582de 100644
--- a/lib/wts.LinkHandler.js
+++ b/lib/wts.LinkHandler.js
@@ -87,7 +87,16 @@
var typeMatch = rel.match( /(?:^|\s)(mw:[^\s]+)/ );
if ( typeMatch ) {
rtData.type = typeMatch[1];
+ // Strip link subtype info
+ if (/^mw:ExtLink\//.test(rtData.type)) {
+ rtData.type = 'mw:ExtLink';
+ }
}
+ }
+
+ // Default type
+ if (rtData.type === null && !node.querySelector('IMG')) {
+ rtData.type = 'mw:ExtLink';
}
var href = getHref( env, node );
@@ -96,7 +105,7 @@
rtData.href = href.replace( /^(\.\.?\/)+/, '' );
// Now get the target from rt data
- rtData.target = state.serializer.serializedAttrVal(node, 'href', {});
+ rtData.target = state.serializer.serializedAttrVal(node, 'href');
// Check if the link content has been modified
// FIXME: This will only work with selser of course. Hard to test
without
@@ -159,16 +168,11 @@
state.inLink = false;
state.wteHandlerStack.pop();
- if (!suppressLinkTest) {
- // env.isValidLinkTarget doesn't check for anything that the
tokenizer
- // already handles. So, handle those here.
- //
- // FIXME: Should we make env.isValidLinkTarget self-contained
so that
- // it can be used in other contexts without relying on
tokenizer fixups?
- if (!state.env.isValidLinkTarget(linkTarget) ||
/[\|]/.test(linkTarget)) {
- linkTarget = "MediaWiki:Badtitletext";
- state.env.log("error", "Bad title text",
node.outerHTML);
- }
+ if (!suppressLinkTest &&
+ (!state.env.isValidLinkTarget(linkTarget) ||
/[\|]/.test(linkTarget)))
+ {
+ linkTarget = "MediaWiki:Badtitletext";
+ state.env.log("error", "Bad title text", node.outerHTML);
}
return { contentSrc: res, linkTarget: linkTarget };
@@ -262,8 +266,8 @@
// Strip colon escapes from the original target as that is
// stripped when deriving the content string.
- var strippedTargetValue = target.value.replace(/^:/, ''),
- identicalTarget = function (a, b) {
+ var strippedTargetValue = target.value.replace(/^:/, '');
+ var identicalTarget = function (a, b) {
return (
a === Util.stripPipeTrickChars(b) ||
env.normalizeTitle(a) ===
env.normalizeTitle(Util.stripPipeTrickChars(Util.decodeURI(b)))
@@ -287,31 +291,22 @@
// external wiki links for html import. Might want to consider
converting
// relative links without path component and file extension to wiki
links.
var env = state.env,
+ wiki = env.conf.wiki,
dp = DU.getDataParsoid( node ),
- linkData, contentParts,
- contentSrc = '',
- rel = node.getAttribute('rel') || '';
+ contentParts,
+ contentSrc = '';
// Get the rt data from the token and tplAttrs
- linkData = getLinkRoundTripData(env, node, state);
-
- if (linkData.type === null && !node.querySelector('IMG')) {
- linkData.type = 'mw:ExtLink';
- }
-
+ var linkData = getLinkRoundTripData(env, node, state);
if ( linkData.type !== null && linkData.target.value !== null ) {
// We have a type and target info
-
- if (/^mw:ExtLink\//.test(linkData.type)) {
- linkData.type = 'mw:ExtLink';
- }
var target = linkData.target,
href = getHref( env, node );
if
(/\b(mw:ExtLink|mw:PageProp\/Language)\b/.test(linkData.type)) {
var targetVal = target.fromsrc || true ? target.value :
Util.decodeURI(target.value);
// Check if the href matches any of our interwiki URL
patterns
- var interWikiMatch =
env.conf.wiki.InterWikiMatcher().match(href);
+ var interWikiMatch =
wiki.InterWikiMatcher().match(href);
if (interWikiMatch &&
// Remaining target
// 1) is not just a fragment id (#foo),
and
@@ -329,7 +324,7 @@
}
linkData.isInterwiki = true;
// could this be confused with a language link?
- var iwi = env.conf.wiki.interwikiMap.get
+ var iwi = wiki.interwikiMap.get
(
interWikiMatch[0].toLowerCase().replace( /^:/, '' ) );
linkData.isInterwikiLang = iwi && iwi.language
!== undefined;
// is this our own wiki?
@@ -340,7 +335,7 @@
oldPrefix =
target.value.slice(localPrefix.length).
match(/^(:?[^:]+):/);
if (!oldPrefix) { break; }
- iwi = env.conf.wiki.interwikiMap.get
+ iwi = wiki.interwikiMap.get
( Util.normalizeNamespaceName
( oldPrefix[1].replace(
/^:/, '' ) ) );
if (!iwi || iwi.localinterwiki ===
undefined) { break; }
@@ -352,8 +347,8 @@
// Check if the old prefix
mapped to the same URL as
// the new one. Use the old one
if that's the case.
// Example: [[w:Foo]] vs.
[[:en:Foo]]
-
(env.conf.wiki.interwikiMap.get(oldPrefix[1].toLowerCase().trim().replace(/^:/,
'')) || {}).url ===
-
(env.conf.wiki.interwikiMap.get(interWikiMatch[0].toLowerCase().replace(/^:/,
'')) || {}).url
+
(wiki.interwikiMap.get(oldPrefix[1].toLowerCase().trim().replace(/^:/, '')) ||
{}).url ===
+
(wiki.interwikiMap.get(interWikiMatch[0].toLowerCase().replace(/^:/, '')) ||
{}).url
))
{
// Reuse old prefix capitalization
@@ -381,7 +376,7 @@
if (/^mw:WikiLink$/.test( linkData.type ) ||
Util.solTransparentLinkRegexp.test( linkData.type ) ) {
// Decode any link that did not come from the source
- if (! target.fromsrc) {
+ if (!target.fromsrc) {
target.value = Util.decodeURI(target.value);
}
@@ -406,8 +401,7 @@
contentParts = splitLinkContentString(
Util.decodeURI(
targetParts[2] )
.replace(
/%23/g, '#' )
- // gwicke:
verify that spaces are really
- //
double-encoded!
+ // gwicke:
verify that spaces are really double-encoded!
.replace(
/%20/g, ' '),
dp );
linkData.content.string =
contentParts.contentString;
@@ -427,7 +421,7 @@
// we need to fully shadow the sort key.
//if ( ! target.modified ) {
// The target and source key was not
modified
- var sortKeySrc =
this.serializedAttrVal(node, 'mw:sortKey', {});
+ var sortKeySrc =
this.serializedAttrVal(node, 'mw:sortKey');
if ( sortKeySrc.value !== null ) {
linkData.contentNode =
undefined;
linkData.content.string =
sortKeySrc.value;
@@ -452,15 +446,14 @@
linkTarget = target.value;
if (target.modified || !target.fromsrc) {
linkTarget =
linkTarget.replace(/^(\.\.?\/)*/, '').replace(/_/g, ' ');
- escapedRes =
escapeWikiLinkContentString(linkTarget,
- state, node);
+ escapedRes =
escapeWikiLinkContentString(linkTarget, state, node);
linkTarget = escapedRes.linkTarget;
// Determine if it's a redirect to a
category, in which case
// it needs a ':' on front to
distingish from a category link.
var categoryMatch =
linkTarget.match(/^([^:]+)[:]/);
if (categoryMatch) {
- var ns =
this.env.conf.wiki.namespaceIds[Util.normalizeNamespaceName(categoryMatch[1])];
- if (ns ===
this.env.conf.wiki.canonicalNamespaces.category) {
+ var ns =
wiki.namespaceIds[Util.normalizeNamespaceName(categoryMatch[1])];
+ if (ns ===
wiki.canonicalNamespaces.category) {
// Check that the next
node isn't a category link,
// in which case we
don't want the ':'.
var nextNode =
node.nextSibling;
@@ -472,7 +465,7 @@
}
}
}
- cb( new WikiLinkText( linkData.prefix + '[[' +
linkTarget + ']]', node, env.conf.wiki, linkData.type ), node );
+ cb( new WikiLinkText( linkData.prefix + '[[' +
linkTarget + ']]', node, wiki, linkData.type ), node );
return;
} else if ( isSimpleWikiLink(env, dp, target, linkData)
) {
// Simple case
@@ -481,7 +474,7 @@
} else {
escapedRes =
escapeWikiLinkContentString(linkData.content.string,
state, node);
- linkTarget = addColonEscape(this.env,
escapedRes.linkTarget, linkData);
+ linkTarget = addColonEscape(env,
escapedRes.linkTarget, linkData);
if (linkData.isInterwikiLang &&
!/^[:]/.test(linkTarget) &&
linkData.type !==
'mw:PageProp/Language') {
// ensure interwiki links can't
be confused with
@@ -489,7 +482,7 @@
linkTarget = ':' + linkTarget;
}
}
- cb( new WikiLinkText( linkData.prefix + '[[' +
linkTarget + ']]' + linkData.tail, node, env.conf.wiki, linkData.type ), node );
+ cb( new WikiLinkText( linkData.prefix + '[[' +
linkTarget + ']]' + linkData.tail, node, wiki, linkData.type ), node );
return;
} else {
var usePT = usePipeTrick(env, dp, target,
linkData);
@@ -537,12 +530,12 @@
state, node);
linkTarget = escapedRes.linkTarget;
}
- linkTarget = addColonEscape(this.env,
linkTarget, linkData);
+ linkTarget = addColonEscape(env, linkTarget,
linkData);
cb( new WikiLinkText(
linkData.prefix +
'[[' + linkTarget + '|' + contentSrc +
']]' +
- linkData.tail, node, env.conf.wiki,
linkData.type ), node );
+ linkData.tail, node, wiki,
linkData.type ), node );
return;
}
} else if ( linkData.type === 'mw:ExtLink' ) {
@@ -572,7 +565,7 @@
return;
} else {
// TODO: match vs. interwikis too
- var magicLinkMatch =
env.conf.wiki.ExtResourceURLPatternMatcher
+ var magicLinkMatch =
wiki.ExtResourceURLPatternMatcher
.match(href);
// Fully serialize the content
contentStr =
state.serializeLinkChildrenToString(node,
@@ -581,7 +574,7 @@
// First check for ISBN/RFC/PMID links. We rely
on selser to
// preserve non-minimal forms.
if (magicLinkMatch) {
- var serializer =
env.conf.wiki.ExtResourceSerializer[magicLinkMatch[0]];
+ var serializer =
wiki.ExtResourceSerializer[magicLinkMatch[0]];
cb( new MagicLinkText(
serializer(magicLinkMatch, target.value, contentStr ), node ), node );
return;
// There is an interwiki for RFCs, but
strangely none for PMIDs.
@@ -599,12 +592,12 @@
linktext = '[' + urlStr +
(contentStr ? ' ' + contentStr : '') + ']';
}
- cb( new Construct( linktext, node,
env.conf.wiki, linkData.type ), node );
+ cb( new Construct( linktext, node,
wiki, linkData.type ), node );
return;
}
}
} else if ( linkData.type.match( /mw:ExtLink\/(?:RFC|PMID)/ ) ||
- /mw:(?:Wiki|Ext)Link\/ISBN/.test(rel) )
{
+
/mw:(?:Wiki|Ext)Link\/ISBN/.test(node.getAttribute('rel') || '') ) {
// FIXME: Handle RFC/PMID in generic ExtLink handler by
matching prefixes!
// FIXME: Handle ISBN in generic WikiLink handler by
looking for
// Special:BookSources!
@@ -615,17 +608,20 @@
return;
} else {
// Unknown rel was set
- //this._htmlElementHandler(node, state, cb);
if (target.modified || !target.fromsrc) {
target.value = escapeExtLinkURL(target.value);
}
cb( new ExtLinkText(
'[' + target.value + ' ' +
state.serializeLinkChildrenToString(node,
this.wteHandlers.aHandler, false) +
- ']', node, env.conf.wiki, linkData.type ), node
);
+ ']', node, wiki, linkData.type ), node );
return;
}
} else {
+ // SSS FIXME: This whole else-part can be deleted I think.
+ // At first glance, the only time control gets here is if
+ // the href attribute is not set: Ex: "<a title='foo'>BOO</a>"
+
// TODO: default to extlink for simple links with unknown rel
set
// switch to html only when needed to support attributes
@@ -657,7 +653,7 @@
cb( new ExtLinkText(
'[' + hrefStr + ' ' +
state.serializeLinkChildrenToString(node,
this.wteHandlers.aHandler, false) +
- ']', node, env.conf.wiki, 'mw:ExtLink' ), node
);
+ ']', node, wiki, 'mw:ExtLink' ), node );
return;
}
}
@@ -705,7 +701,7 @@
// The only essential thing is the IMG tag!
if (!imgElt) {
- this.env.log("error", "In WSP.handleImage, node does not have
any img elements:", node.outerHTML );
+ env.log("error", "In WSP.handleImage, node does not have any
img elements:", node.outerHTML );
return cb( '', node );
}
@@ -718,7 +714,7 @@
// (this won't work for manual-thumb images)
var src = imgElt.getAttribute( 'src' );
if (!src) {
- this.env.log("error", "In WSP.handleImage, img does not
have resource or src:", node.outerHTML);
+ env.log("error", "In WSP.handleImage, img does not have
resource or src:", node.outerHTML);
return cb( '', node );
}
if (/^https?:/.test(src)) {
--
To view, visit https://gerrit.wikimedia.org/r/200874
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Idb618997faefb77c1aeb57b74074effcb6f84835
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits