jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/370295 )
Change subject: Add read-base-html route ...................................................................... Add read-base-html route This new route is currently similar to read-html, except that it is not optimized for payload size.In the future read-html will request read-base-html and then run transformations to reduce the payload and to prepare consumption by app clients. Bug: T162179 Change-Id: I3752626fd9ff86dec7c57a8ffc10e95139772494 --- M lib/parsoid-access.js M routes/read-html.js A test/features/read-html/pagecontent-base.js 3 files changed, 57 insertions(+), 8 deletions(-) Approvals: Gergő Tisza: Looks good to me, approved jenkins-bot: Verified Mholloway: Looks good to me, but someone else must approve diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js index c4dc9c9..df44853 100644 --- a/lib/parsoid-access.js +++ b/lib/parsoid-access.js @@ -131,19 +131,21 @@ /** * @param {!Object} app the application object * @param {!Object} req the request object - * @param {?Boolean} [legacy] if enabled will apply additional transformations - * including a legacy version of relocation of first paragraph - * and hiding IPA via an inline style rather than clas. + * @param {?Boolean} [optimized] if true will apply additional transformations + * to reduce the payload * @return {!promise} Returns a promise to retrieve the page content from Parsoid */ -function pageHtmlPromise(app, req, legacy) { +function pageHtmlPromise(app, req, optimized) { return getParsoidHtml(app, req) .then((response) => { const meta = { revision: getRevisionFromEtag(response.headers) }; const doc = domino.createDocument(response.body); - transforms.stripReferenceListContent(doc); - transforms.stripUnneededMarkup(doc, legacy); + if (optimized) { + transforms.stripReferenceListContent(doc); + transforms.stripUnneededMarkup(doc, false); + } + parsoidSections.addSectionDivs(doc); const html = doc.outerHTML; diff --git a/routes/read-html.js b/routes/read-html.js index d39f5ef..e7db2ca 100644 --- a/routes/read-html.js +++ b/routes/read-html.js @@ -15,11 +15,11 @@ let app; /** - * GET {domain}/v1/page/read-html/{title}/{revision?}/{tid?} + * GET {domain}/v1/page/read-base-html/{title}/{revision?}/{tid?} * Gets page content in HTML. This is based on Parsoid with some minor modifications more * suitable for the reading use cases. */ -router.get('/read-html/:title/:revision?/:tid?', (req, res) => { +router.get('/read-base-html/:title/:revision?/:tid?', (req, res) => { return parsoid.pageHtmlPromise(app, req, false) .then((response) => { res.status(200); @@ -29,6 +29,21 @@ }); }); +/** + * GET {domain}/v1/page/read-html/{title}/{revision?}/{tid?} + * Gets page content in HTML. This is a more optimized for direct consumption by reading + * clients. + */ +router.get('/read-html/:title/:revision?/:tid?', (req, res) => { + return parsoid.pageHtmlPromise(app, req, true) + .then((response) => { + res.status(200); + mUtil.setContentType(res, mUtil.CONTENT_TYPES.readHtml, 'text/html'); + mUtil.setETag(res, response.meta.revision); + res.send(response.html).end(); + }); +}); + module.exports = function(appObj) { app = appObj; return { diff --git a/test/features/read-html/pagecontent-base.js b/test/features/read-html/pagecontent-base.js new file mode 100644 index 0000000..b7ae121 --- /dev/null +++ b/test/features/read-html/pagecontent-base.js @@ -0,0 +1,32 @@ +'use strict'; + +const domino = require('domino'); +const preq = require('preq'); +const assert = require('../../utils/assert.js'); +const headers = require('../../utils/headers.js'); +const server = require('../../utils/server.js'); + +describe('read-html', function() { + + this.timeout(20000); // eslint-disable-line no-invalid-this + + before(() => { return server.start(); }); + + const localUri = (title, domain = 'en.wikipedia.org') => { + return `${server.config.uri}${domain}/v1/page/read-base-html/${title}`; + }; + + it('should respond to GET request with expected headers, incl. CORS and CSP headers', () => { + const uri = localUri('Foobar'); + return headers.checkHeaders(uri, headers.HTML_CONTENT_TYPE_REGEX); + }); + + it('HTML should be sectioned', () => { + const uri = localUri('Foobar/788941783'); + return preq.get({ uri }) + .then((res) => { + const document = domino.createDocument(res.body); + assert.selectorExistsNTimes(document, 'section', 7, 'should have 7 sections'); + }); + }); +}); -- To view, visit https://gerrit.wikimedia.org/r/370295 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I3752626fd9ff86dec7c57a8ffc10e95139772494 Gerrit-PatchSet: 23 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Gergő Tisza <gti...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits