Mholloway has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/391359 )
Change subject: WIP: Get media items directly from Parsoid HTML ...................................................................... WIP: Get media items directly from Parsoid HTML Saves a MediaWiki API call. Problem: /page/html doesn't appear to automatically handle redirects... Change-Id: Iaaefc337730b494e5e639deea16dfd9968a02b8a --- M lib/media.js M routes/media.js M test/lib/media/media-test.js 3 files changed, 45 insertions(+), 61 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/59/391359/1 diff --git a/lib/media.js b/lib/media.js index 049bace..af9e33a 100644 --- a/lib/media.js +++ b/lib/media.js @@ -1,10 +1,8 @@ 'use strict'; -const domino = require('domino'); const api = require('./api-util'); const Title = require('mediawiki-title').Title; -const MAX_ITEM_COUNT = 500; const MAX_IMAGE_WIDTH = 1280; @@ -14,21 +12,12 @@ /** * Sort an array of media items in place by their order of appearance in an HTML document. - * @param {!string} html a raw HTML document - * @param {!Array} media an array of media items as returned by gallery.collectionPromise + * @param {!Array} titles a list of File page titles for media items + * @param {!Array} items an array of media metadata items as returned by getMetadata * @param {!Object} si a site info object as returned by mwapi.getSiteInfo */ -function sort(html, media, si) { - const doc = domino.createDocument(html); - const images = doc.querySelectorAll('img,video'); - const titles = []; - // TODO: handle Mathoid-rendered math images - images.forEach((img) => { - if (img.hasAttribute('resource')) { - titles.push(img.getAttribute('resource').replace(/^\.\//, '')); - } - }); - media.items.sort((a, b) => { +function sort(titles, items, si) { + items.sort((a, b) => { return titles.indexOf(dbKey(a.title, si)) - titles.indexOf(dbKey(b.title, si)); }); } @@ -58,55 +47,29 @@ }); } -function getTitles(items) { - // Reject gallery items if they're too small. - // Also reject SVG and PNG items by default, because they're likely to be - // logos and/or presentational images. - return items.map((item) => { - return item.title; - }); -} - /** * Gets the gallery content from MW API * TODO: ensure that all media items are correctly accounted for on very large articles */ -function collectionPromise(app, req) { +function getMetadata(app, req, titles) { const query = { action: 'query', format: 'json', formatversion: 2, - titles: req.params.title, - continue: '', - prop: 'imageinfo', - iiprop: 'dimensions|mime', - generator: 'images', - gimlimit: MAX_ITEM_COUNT, - redirects: true + prop: 'videoinfo', + viprop: 'url|dimensions|mime|extmetadata|derivatives', + viurlwidth: MAX_IMAGE_WIDTH, + titles: titles.join('|'), + continue: '' }; return api.mwApiGet(app, req.params.domain, query).then((response) => { - if (!response.body.query || !response.body.query.pages) { - return { items: [] }; - } - const query = { - action: 'query', - format: 'json', - formatversion: 2, - prop: 'videoinfo', - viprop: 'url|dimensions|mime|extmetadata|derivatives', - viurlwidth: MAX_IMAGE_WIDTH, - titles: getTitles(response.body.query.pages).join('|'), - continue: '' - }; - return api.mwApiGet(app, req.params.domain, query).then((response) => { - const pages = response.body.query && response.body.query.pages; - const items = pages ? makeResults(pages) : []; - return { items }; - }); + const pages = response.body.query && response.body.query.pages; + const items = pages ? makeResults(pages) : []; + return { items }; }); } module.exports = { sort, - collectionPromise + getMetadata }; diff --git a/routes/media.js b/routes/media.js index 7275d50..c60ece7 100644 --- a/routes/media.js +++ b/routes/media.js @@ -1,6 +1,7 @@ 'use strict'; const BBPromise = require('bluebird'); +const domino = require('domino'); const mUtil = require('../lib/mobile-util'); const parsoid = require('../lib/parsoid-access'); const sUtil = require('../lib/util'); @@ -16,17 +17,31 @@ */ router.get('/media/:title', (req, res) => { return BBPromise.props({ - page: parsoid.pageHtmlPromise(app, req), - media: media.collectionPromise(app, req), + html: parsoid.getParsoidHtml(app, req), + // media: media.collectionPromise(app, req), siteinfo: mwapi.getSiteInfo(app, req) }).then((response) => { - if (response.media.items && response.media.items.length > 1) { - media.sort(response.page.html, response.media, response.siteinfo); - } - res.status(200); - mUtil.setETag(res, response.page.meta.revision); - mUtil.setContentType(res, mUtil.CONTENT_TYPES.unpublished); - res.json(response.media).end(); + const doc = domino.createDocument(response.html.body); + // todo: handle Mathoid-rendered math images + const selection = doc.querySelectorAll('img,video'); + // todo: handle zero-length result + const titles = [].map.call(selection, (elem) => { + return elem.getAttribute('resource').replace(/^.\//, ''); + }); + // todo: filter for size, mime + return BBPromise.props({ + titles, + metadata: media.getMetadata(app, req, titles), + siteinfo: response.siteinfo, + }).then((response) => { + if (response.metadata.items && response.metadata.items.length > 1) { + media.sort(response.titles, response.metadata.items, response.siteinfo); + } + res.status(200); + // mUtil.setETag(res, response.page.meta.revision); + mUtil.setContentType(res, mUtil.CONTENT_TYPES.unpublished); + res.send({ items: response.metadata.items }).end(); + }); }); }); diff --git a/test/lib/media/media-test.js b/test/lib/media/media-test.js index 6b38e35..d5441c6 100644 --- a/test/lib/media/media-test.js +++ b/test/lib/media/media-test.js @@ -1,5 +1,6 @@ 'use strict'; +const domino = require('domino'); const assert = require('../../utils/assert'); const sort = require('../../../lib/media').sort; @@ -22,7 +23,12 @@ it('Results should be sorted in order of appearance on the page', () => { const result = unsorted; - sort(page, result, siteInfo); + const doc = domino.createDocument(page); + const selection = doc.querySelectorAll('img,video'); + const titles = [].map.call(selection, (elem) => { + return elem.getAttribute('resource').replace(/^.\//, ''); + }); + sort(titles, result.items, siteInfo); assert.deepEqual(result, sorted); }); -- To view, visit https://gerrit.wikimedia.org/r/391359 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iaaefc337730b494e5e639deea16dfd9968a02b8a Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Mholloway <mhollo...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits