BearND has uploaded a new change for review. https://gerrit.wikimedia.org/r/302745
Change subject: Change hrefs so they are the same as Mobileview API emits them ...................................................................... Change hrefs so they are the same as Mobileview API emits them Unfortunately, we have to take apart the different parts of the title (namespace, title, fragment) if it's a wiki internal link. Compare the links in the output of https://en.m.wikipedia.org/w/api.php?action=mobileview&format=json&formatversion=2&prop=text%7Csections%7Clanguagecount%7Cthumb%7Cimage%7Cid%7Crevision%7Cdescription%7Clastmodified%7Cnormalizedtitle%7Cdisplaytitle%7Cprotection%7Ceditable&onlyrequestedsections=1§ions=all§ionprop=toclevel%7Cline%7Canchor&noheadings=true&page=User:BSitzmann_%28WMF%29%2FTry%2FTitleLinkEncoding&thumbsize=1024 with the output of http://localhost:6927/en.wikipedia.org/v1/page/mobile-sections-lead/User%3ABSitzmann_(WMF)%2FMCS%2FTest%2FTitleLinkEncoding Change-Id: I0e8e4fe011826fc4b007ed9d45b71b866dee7363 related: T136346 Bug: T136223 --- M lib/transforms.js M test/features/mobile-sections/pagecontent.js M test/lib/transforms/transforms-test.js 3 files changed, 173 insertions(+), 9 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/45/302745/1 diff --git a/lib/transforms.js b/lib/transforms.js index 096e12a..354ee6b 100644 --- a/lib/transforms.js +++ b/lib/transforms.js @@ -5,6 +5,7 @@ 'use strict'; +var a = require('./anchorencode'); var domino = require('domino'); var util = require('util'); var anchorPopUpMediaTransforms = require('./transformations/anchorPopUpMediaTransforms'); @@ -80,14 +81,107 @@ } } -function _rewriteUrlAttribute(doc, selector, attribute) { +// from Parsoid Util.js, with '+' added in regex +function _sanitizeTitleURI(title) { + var bits = title.split('#'); + var anchor = null; + var sanitize = function(s) { + return s.replace(/[+%? \[\]#|<>]/g, function(m) { + return encodeURIComponent(m); + }); + }; + if (bits.length > 1) { // split at first '#' + anchor = title.substring(bits[0].length + 1); + title = bits[0]; + } + title = sanitize(title); + if (anchor !== null) { + title += '#' + sanitize(anchor); + } + return title; +} + +// from Parsoid Util.js +function _decodeURI(s) { + return s.replace(/(%[0-9a-fA-F][0-9a-fA-F])+/g, function(m) { + try { + // JS library function + return decodeURIComponent(m); + } catch (e) { + return m; + } + }); +} + +/** + * Re-encode links based on Parsoid output to match the output MW Core would produce, so we're compatible with the + * links and the reading focused clients have an easier time to transition to MCS output. + * + * Replaces ./ with /wiki/. + * Removes page title if it's the same as the current title so the client doesn't think this is a link to a different + * page. + * Anchorencodes fragments. + * Encodes title while preserving slashes and some other special characters. + * + * @param url the link to be re-encoded + * @param currentTitle the title of the current page, so we can detect page internal links + * @returns encoded URL the same way as MW Core would emit + * @private + */ +function _encodeTitleLikeMwParser(url, currentTitle) { + url = url.replace(/^\.\//, ''); // remove ./ prefix + let nsTitle = url; + let output, namespace, title, fragment; + + // split out fragment + let indexOfHashSign = url.indexOf('#'); + if (indexOfHashSign > -1) { + // remove beginning if the same as current title and has a fragment (= page internal link) + if (url.indexOf(currentTitle + '#') === 0) { + url = url.substring(indexOfHashSign); + indexOfHashSign = url.indexOf('#'); + } + + nsTitle = url.substring(0, indexOfHashSign); + fragment = url.substring(indexOfHashSign + 1); + fragment = a.anchorencode(_decodeURI(fragment)); + } + + // split out namespace + let indexOfColon = nsTitle.indexOf(':'); + if (indexOfColon > -1) { + namespace = nsTitle.substring(0, indexOfColon); + title = nsTitle.substring(indexOfColon + 1); + } else { + title = nsTitle; + } + + output = _sanitizeTitleURI(_decodeURI(title)); + + // now put things back together + if (namespace) { + output = namespace + ':' + output; + } + + if (fragment) { + output = output + '#' + fragment; + } + + if (indexOfHashSign !== 0) { // don't add the /wiki/ prefix if the URL starts with a # (page internal link) + output = '/wiki/' + output; + } + + return output; +} + +function _rewriteUrlAttribute(doc, selector, attribute, currentTitle) { var ps = doc.querySelectorAll(selector) || [], value; for (var idx = 0; idx < ps.length; idx++) { var node = ps[idx]; value = node.getAttribute(attribute); - if (value) { - value = value.replace(/^\.\//, '/wiki/'); + if (value && value.indexOf("./") === 0) { + value = _encodeTitleLikeMwParser(value, currentTitle); node.setAttribute(attribute, value); } } @@ -122,8 +216,12 @@ _rmComments(doc); } +function _getCurrentParsoidPageTitle(doc) { + return doc.querySelector('head > title').innerHTML; +} + function _addParsoidSpecificMarkup(doc) { - _rewriteUrlAttribute(doc, 'a', 'href'); + _rewriteUrlAttribute(doc, 'a', 'href', _getCurrentParsoidPageTitle(doc)); // Set <a class=\'external\" // on all <a rel=\"mw:ExtLink\" @@ -266,7 +364,8 @@ } }; -// Make two internal functions visible for testing +// Make internal functions visible for testing +transforms._rewriteUrlAttribute = _rewriteUrlAttribute; transforms._rmBracketSpans = _rmBracketSpans; transforms._rmElementsWithSelectors = _rmElementsWithSelectors; diff --git a/test/features/mobile-sections/pagecontent.js b/test/features/mobile-sections/pagecontent.js index 2d21807..a76f762 100644 --- a/test/features/mobile-sections/pagecontent.js +++ b/test/features/mobile-sections/pagecontent.js @@ -1,6 +1,7 @@ 'use strict'; var assert = require('../../utils/assert.js'); +var domino = require('domino'); var preq = require('preq'); var server = require('../../utils/server.js'); var headers = require('../../utils/headers.js'); @@ -146,4 +147,18 @@ assert.deepEqual(res.body.lead.redirected, 'User:BSitzmann (WMF)/MCS/Test/redirect test3 target#Section_.25'); }); }); + it('"Sort (C++)" in link should be encoded the same way action=mobileview does', function() { + // so the apps don't have to add special cases for RB/Parsoid vs MW API. + // compare with output from + // https://en.m.wikipedia.org/w/api.php?action=mobileview&format=json&formatversion=2&prop=text%7Csections%7Clanguagecount%7Cthumb%7Cimage%7Cid%7Crevision%7Cdescription%7Clastmodified%7Cnormalizedtitle%7Cdisplaytitle%7Cprotection%7Ceditable&onlyrequestedsections=1§ions=all§ionprop=toclevel%7Cline%7Canchor&noheadings=true&page=User:BSitzmann_%28WMF%29%2FTry%2FTitleLinkEncoding&thumbsize=1024 + return preq.get({ uri: server.config.uri + 'en.wikipedia.org/v1/page/mobile-sections/User:BSitzmann_%28WMF%29%2FMCS%2FTest%2FTitleLinkEncoding' }) + .then(function(res) { + assert.deepEqual(res.status, 200); + let text = res.body.lead.sections[0].text; + assert.contains(text, '<a href="/wiki/Sort_(C%2B%2B)">'); + assert.contains(text, '<a href="#Special_chars_.24.25.26">'); + assert.contains(text, '<a href="https://phabricator.wikimedia.org/T136346">'); + assert.contains(text, '<a href="/wiki/User:BSitzmann_(WMF)/MCS/Test/Lead_paragraph_move'); + }); + }); }); diff --git a/test/lib/transforms/transforms-test.js b/test/lib/transforms/transforms-test.js index d2cdcb0..6cfe790 100644 --- a/test/lib/transforms/transforms-test.js +++ b/test/lib/transforms/transforms-test.js @@ -7,15 +7,15 @@ describe('lib:transforms', function() { this.timeout(20000); - it('rmBracketSpans should remove the spans around brackets', function() { - var doc = domino.createDocument('<body><a><span>[</span>1<span>]</span></a></body>'); + it('_rmBracketSpans should remove the spans around brackets', function() { + let doc = domino.createDocument('<body><a><span>[</span>1<span>]</span></a></body>'); assert.selectorExistsNTimes(doc, 'body span', 2); transforms._rmBracketSpans(doc); assert.selectorExistsNTimes(doc, 'body span', 0); }); - it('rmElementsWithSelectors should remove the spans with display:none', function() { - var doc = domino.createDocument('<body><span style=\"display:none\">foo</span></body>'); + it('_rmElementsWithSelectors should remove the spans with display:none', function() { + let doc = domino.createDocument('<body><span style=\"display:none\">foo</span></body>'); assert.selectorExistsNTimes(doc, 'body span', 1); transforms._rmElementsWithSelectors(doc, [ //'span', // Remove <span class=\"Z3988\"></span> @@ -25,4 +25,54 @@ //assert.selectorExistsNTimes(doc, 'body span', 0); // Does not yet work. Filed https://github.com/fgnass/domino/issues/59 }); + + it('_rewriteUrlAttribute should preserve : to separate namespace', function() { + let doc = domino.createDocument('<body><a href="./Talk:ABC">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo'); + let href = doc.querySelector('body a').getAttribute('href'); + assert.equal(href, '/wiki/Talk:ABC'); + }); + + it('_rewriteUrlAttribute should preserve fragment', function() { + let doc = domino.createDocument('<body><a href="./User:ABC#fragment1">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo'); + let href = doc.querySelector('body a').getAttribute('href'); + assert.equal(href, '/wiki/User:ABC#fragment1'); + }); + + it('_rewriteUrlAttribute should anchorencode fragents', function() { + let doc = domino.createDocument('<body><a href="./User:ABC#Special_chars_$%&">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo'); + let href = doc.querySelector('body a').getAttribute('href'); + console.log("href = " + href); + assert.equal(href, '/wiki/User:ABC#Special_chars_.24.25.26'); + }); + + it('_rewriteUrlAttribute should replace %25 with %', function() { + let doc = domino.createDocument('<body><a href="./%25%25">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo'); + let href = doc.querySelector('body a').getAttribute('href'); + assert.equal(href, '/wiki/%25%25'); + }); + + it('_rewriteUrlAttribute should preserve slashes in page title', function() { + let doc = domino.createDocument('<body><a href="./This/Page/has/Slashes">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'Foo'); + let href = doc.querySelector('body a').getAttribute('href'); + assert.equal(href, '/wiki/This/Page/has/Slashes'); + }); + + it('_rewriteUrlAttribute should preserve slashes in current page title', function() { + let doc = domino.createDocument('<body><a href="./This/Page/has/Slashes#fragment1">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'This/Page/has/Slashes'); + let href = doc.querySelector('body a').getAttribute('href'); + assert.equal(href, '#fragment1'); + }); + + it('_rewriteUrlAttribute should strip page name in link if it\'s the same as the current page', function() { + let doc = domino.createDocument('<body><a href="./This_Page#fragment1">text</a></body>'); + transforms._rewriteUrlAttribute(doc, 'a', 'href', 'This_Page'); + let href = doc.querySelector('body a').getAttribute('href'); + assert.equal(href, '#fragment1'); + }); }); -- To view, visit https://gerrit.wikimedia.org/r/302745 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0e8e4fe011826fc4b007ed9d45b71b866dee7363 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: BearND <bsitzm...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits