BearND has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/302745

Change subject: Change hrefs so they are the same as Mobileview API emits them
......................................................................

Change hrefs so they are the same as Mobileview API emits them

Unfortunately, we have to take apart the different parts of the title 
(namespace, title, fragment) if it's a wiki internal link.

Compare the links in the output of
https://en.m.wikipedia.org/w/api.php?action=mobileview&format=json&formatversion=2&prop=text%7Csections%7Clanguagecount%7Cthumb%7Cimage%7Cid%7Crevision%7Cdescription%7Clastmodified%7Cnormalizedtitle%7Cdisplaytitle%7Cprotection%7Ceditable&onlyrequestedsections=1&sections=all&sectionprop=toclevel%7Cline%7Canchor&noheadings=true&page=User:BSitzmann_%28WMF%29%2FTry%2FTitleLinkEncoding&thumbsize=1024
with the output of
http://localhost:6927/en.wikipedia.org/v1/page/mobile-sections-lead/User%3ABSitzmann_(WMF)%2FMCS%2FTest%2FTitleLinkEncoding

Change-Id: I0e8e4fe011826fc4b007ed9d45b71b866dee7363
related: T136346
Bug: T136223
---
M lib/transforms.js
M test/features/mobile-sections/pagecontent.js
M test/lib/transforms/transforms-test.js
3 files changed, 173 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/45/302745/1

diff --git a/lib/transforms.js b/lib/transforms.js
index 096e12a..354ee6b 100644
--- a/lib/transforms.js
+++ b/lib/transforms.js
@@ -5,6 +5,7 @@
 
 'use strict';
 
+var a = require('./anchorencode');
 var domino = require('domino');
 var util = require('util');
 var anchorPopUpMediaTransforms = 
require('./transformations/anchorPopUpMediaTransforms');
@@ -80,14 +81,107 @@
     }
 }
 
-function _rewriteUrlAttribute(doc, selector, attribute) {
+// from Parsoid Util.js, with '+' added in regex
+function _sanitizeTitleURI(title) {
+    var bits = title.split('#');
+    var anchor = null;
+    var sanitize = function(s) {
+        return s.replace(/[+%? \[\]#|<>]/g, function(m) {
+            return encodeURIComponent(m);
+        });
+    };
+    if (bits.length > 1) { // split at first '#'
+        anchor = title.substring(bits[0].length + 1);
+        title = bits[0];
+    }
+    title = sanitize(title);
+    if (anchor !== null) {
+        title += '#' + sanitize(anchor);
+    }
+    return title;
+}
+
+// from Parsoid Util.js
+function _decodeURI(s) {
+    return s.replace(/(%[0-9a-fA-F][0-9a-fA-F])+/g, function(m) {
+        try {
+            // JS library function
+            return decodeURIComponent(m);
+        } catch (e) {
+            return m;
+        }
+    });
+}
+
+/**
+ * Re-encode links based on Parsoid output to match the output MW Core would 
produce, so we're compatible with the
+ * links and the reading focused clients have an easier time to transition to 
MCS output.
+ *
+ * Replaces ./ with /wiki/.
+ * Removes page title if it's the same as the current title so the client 
doesn't think this is a link to a different
+ * page.
+ * Anchorencodes fragments.
+ * Encodes title while preserving slashes and some other special characters.
+ *
+ * @param url the link to be re-encoded
+ * @param currentTitle the title of the current page, so we can detect page 
internal links
+ * @returns encoded URL the same way as MW Core would emit
+ * @private
+ */
+function _encodeTitleLikeMwParser(url, currentTitle) {
+    url = url.replace(/^\.\//, ''); // remove ./ prefix
+    let nsTitle = url;
+    let output, namespace, title, fragment;
+
+    // split out fragment
+    let indexOfHashSign = url.indexOf('#');
+    if (indexOfHashSign > -1) {
+        // remove beginning if the same as current title and has a fragment (= 
page internal link)
+        if (url.indexOf(currentTitle + '#') === 0) {
+            url = url.substring(indexOfHashSign);
+            indexOfHashSign = url.indexOf('#');
+        }
+
+        nsTitle = url.substring(0, indexOfHashSign);
+        fragment = url.substring(indexOfHashSign + 1);
+        fragment = a.anchorencode(_decodeURI(fragment));
+    }
+
+    // split out namespace
+    let indexOfColon = nsTitle.indexOf(':');
+    if (indexOfColon > -1) {
+        namespace = nsTitle.substring(0, indexOfColon);
+        title = nsTitle.substring(indexOfColon + 1);
+    } else {
+        title = nsTitle;
+    }
+
+    output = _sanitizeTitleURI(_decodeURI(title));
+
+    // now put things back together
+    if (namespace) {
+        output = namespace + ':' + output;
+    }
+
+    if (fragment) {
+        output = output + '#' + fragment;
+    }
+
+    if (indexOfHashSign !== 0) { // don't add the /wiki/ prefix if the URL 
starts with a # (page internal link)
+        output = '/wiki/' + output;
+    }
+
+    return output;
+}
+
+function _rewriteUrlAttribute(doc, selector, attribute, currentTitle) {
     var ps = doc.querySelectorAll(selector) || [],
         value;
     for (var idx = 0; idx < ps.length; idx++) {
         var node = ps[idx];
         value = node.getAttribute(attribute);
-        if (value) {
-            value = value.replace(/^\.\//, '/wiki/');
+        if (value && value.indexOf("./") === 0) {
+            value = _encodeTitleLikeMwParser(value, currentTitle);
             node.setAttribute(attribute, value);
         }
     }
@@ -122,8 +216,12 @@
     _rmComments(doc);
 }
 
+function _getCurrentParsoidPageTitle(doc) {
+    return doc.querySelector('head > title').innerHTML;
+}
+
 function _addParsoidSpecificMarkup(doc) {
-    _rewriteUrlAttribute(doc, 'a', 'href');
+    _rewriteUrlAttribute(doc, 'a', 'href', _getCurrentParsoidPageTitle(doc));
 
     // Set <a class=\'external\"
     // on all <a rel=\"mw:ExtLink\"
@@ -266,7 +364,8 @@
     }
 };
 
-// Make two internal functions visible for testing
+// Make internal functions visible for testing
+transforms._rewriteUrlAttribute = _rewriteUrlAttribute;
 transforms._rmBracketSpans = _rmBracketSpans;
 transforms._rmElementsWithSelectors = _rmElementsWithSelectors;
 
diff --git a/test/features/mobile-sections/pagecontent.js 
b/test/features/mobile-sections/pagecontent.js
index 2d21807..a76f762 100644
--- a/test/features/mobile-sections/pagecontent.js
+++ b/test/features/mobile-sections/pagecontent.js
@@ -1,6 +1,7 @@
 'use strict';
 
 var assert = require('../../utils/assert.js');
+var domino = require('domino');
 var preq   = require('preq');
 var server = require('../../utils/server.js');
 var headers = require('../../utils/headers.js');
@@ -146,4 +147,18 @@
                 assert.deepEqual(res.body.lead.redirected, 'User:BSitzmann 
(WMF)/MCS/Test/redirect test3 target#Section_.25');
             });
     });
+    it('"Sort (C++)" in link should be encoded the same way action=mobileview 
does', function() {
+        // so the apps don't have to add special cases for RB/Parsoid vs MW 
API.
+        // compare with output from
+        // 
https://en.m.wikipedia.org/w/api.php?action=mobileview&format=json&formatversion=2&prop=text%7Csections%7Clanguagecount%7Cthumb%7Cimage%7Cid%7Crevision%7Cdescription%7Clastmodified%7Cnormalizedtitle%7Cdisplaytitle%7Cprotection%7Ceditable&onlyrequestedsections=1&sections=all&sectionprop=toclevel%7Cline%7Canchor&noheadings=true&page=User:BSitzmann_%28WMF%29%2FTry%2FTitleLinkEncoding&thumbsize=1024
+        return preq.get({ uri: server.config.uri + 
'en.wikipedia.org/v1/page/mobile-sections/User:BSitzmann_%28WMF%29%2FMCS%2FTest%2FTitleLinkEncoding'
 })
+            .then(function(res) {
+                assert.deepEqual(res.status, 200);
+                let text = res.body.lead.sections[0].text;
+                assert.contains(text, '<a href="/wiki/Sort_(C%2B%2B)">');
+                assert.contains(text, '<a href="#Special_chars_.24.25.26">');
+                assert.contains(text, '<a 
href="https://phabricator.wikimedia.org/T136346";>');
+                assert.contains(text, '<a 
href="/wiki/User:BSitzmann_(WMF)/MCS/Test/Lead_paragraph_move');
+            });
+    });
 });
diff --git a/test/lib/transforms/transforms-test.js 
b/test/lib/transforms/transforms-test.js
index d2cdcb0..6cfe790 100644
--- a/test/lib/transforms/transforms-test.js
+++ b/test/lib/transforms/transforms-test.js
@@ -7,15 +7,15 @@
 describe('lib:transforms', function() {
     this.timeout(20000);
 
-    it('rmBracketSpans should remove the spans around brackets', function() {
-        var doc = 
domino.createDocument('<body><a><span>[</span>1<span>]</span></a></body>');
+    it('_rmBracketSpans should remove the spans around brackets', function() {
+        let doc = 
domino.createDocument('<body><a><span>[</span>1<span>]</span></a></body>');
         assert.selectorExistsNTimes(doc, 'body span', 2);
         transforms._rmBracketSpans(doc);
         assert.selectorExistsNTimes(doc, 'body span', 0);
     });
 
-    it('rmElementsWithSelectors should remove the spans with display:none', 
function() {
-        var doc = domino.createDocument('<body><span 
style=\"display:none\">foo</span></body>');
+    it('_rmElementsWithSelectors should remove the spans with display:none', 
function() {
+        let doc = domino.createDocument('<body><span 
style=\"display:none\">foo</span></body>');
         assert.selectorExistsNTimes(doc, 'body span', 1);
         transforms._rmElementsWithSelectors(doc, [
             //'span',                               // Remove <span 
class=\"Z3988\"></span>
@@ -25,4 +25,54 @@
         //assert.selectorExistsNTimes(doc, 'body span', 0);
         // Does not yet work. Filed https://github.com/fgnass/domino/issues/59
     });
+
+    it('_rewriteUrlAttribute should preserve : to separate namespace', 
function() {
+        let doc = domino.createDocument('<body><a 
href="./Talk:ABC">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo');
+        let href = doc.querySelector('body a').getAttribute('href');
+        assert.equal(href, '/wiki/Talk:ABC');
+    });
+
+    it('_rewriteUrlAttribute should preserve fragment', function() {
+        let doc = domino.createDocument('<body><a 
href="./User:ABC#fragment1">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo');
+        let href = doc.querySelector('body a').getAttribute('href');
+        assert.equal(href, '/wiki/User:ABC#fragment1');
+    });
+
+    it('_rewriteUrlAttribute should anchorencode fragents', function() {
+        let doc = domino.createDocument('<body><a 
href="./User:ABC#Special_chars_$%&">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo');
+        let href = doc.querySelector('body a').getAttribute('href');
+        console.log("href = " + href);
+        assert.equal(href, '/wiki/User:ABC#Special_chars_.24.25.26');
+    });
+
+    it('_rewriteUrlAttribute should replace %25 with %', function() {
+        let doc = domino.createDocument('<body><a 
href="./%25%25">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo');
+        let href = doc.querySelector('body a').getAttribute('href');
+        assert.equal(href, '/wiki/%25%25');
+    });
+
+    it('_rewriteUrlAttribute should preserve slashes in page title', 
function() {
+        let doc = domino.createDocument('<body><a 
href="./This/Page/has/Slashes">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo');
+        let href = doc.querySelector('body a').getAttribute('href');
+        assert.equal(href, '/wiki/This/Page/has/Slashes');
+    });
+
+    it('_rewriteUrlAttribute should preserve slashes in current page title', 
function() {
+        let doc = domino.createDocument('<body><a 
href="./This/Page/has/Slashes#fragment1">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 
'This/Page/has/Slashes');
+        let href = doc.querySelector('body a').getAttribute('href');
+        assert.equal(href, '#fragment1');
+    });
+
+    it('_rewriteUrlAttribute should strip page name in link if it\'s the same 
as the current page', function() {
+        let doc = domino.createDocument('<body><a 
href="./This_Page#fragment1">text</a></body>');
+        transforms._rewriteUrlAttribute(doc, 'a', 'href', 'This_Page');
+        let href = doc.querySelector('body a').getAttribute('href');
+        assert.equal(href, '#fragment1');
+    });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/302745
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0e8e4fe011826fc4b007ed9d45b71b866dee7363
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND <bsitzm...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to