Mholloway has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/391359 )

Change subject: WIP: Get media items directly from Parsoid HTML
......................................................................

WIP: Get media items directly from Parsoid HTML

Saves a MediaWiki API call.

Problem: /page/html doesn't appear to automatically handle redirects...

Change-Id: Iaaefc337730b494e5e639deea16dfd9968a02b8a
---
M lib/media.js
M routes/media.js
M test/lib/media/media-test.js
3 files changed, 45 insertions(+), 61 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/59/391359/1

diff --git a/lib/media.js b/lib/media.js
index 049bace..af9e33a 100644
--- a/lib/media.js
+++ b/lib/media.js
@@ -1,10 +1,8 @@
 'use strict';
 
-const domino = require('domino');
 const api = require('./api-util');
 const Title = require('mediawiki-title').Title;
 
-const MAX_ITEM_COUNT = 500;
 const MAX_IMAGE_WIDTH = 1280;
 
 
@@ -14,21 +12,12 @@
 
 /**
  * Sort an array of media items in place by their order of appearance in an 
HTML document.
- * @param {!string} html a raw HTML document
- * @param {!Array} media an array of media items as returned by 
gallery.collectionPromise
+ * @param {!Array} titles a list of File page titles for media items
+ * @param {!Array} items an array of media metadata items as returned by 
getMetadata
  * @param {!Object} si a site info object as returned by mwapi.getSiteInfo
  */
-function sort(html, media, si) {
-    const doc = domino.createDocument(html);
-    const images = doc.querySelectorAll('img,video');
-    const titles = [];
-    // TODO: handle Mathoid-rendered math images
-    images.forEach((img) => {
-        if (img.hasAttribute('resource')) {
-            titles.push(img.getAttribute('resource').replace(/^\.\//, ''));
-        }
-    });
-    media.items.sort((a, b) => {
+function sort(titles, items, si) {
+    items.sort((a, b) => {
         return titles.indexOf(dbKey(a.title, si)) - 
titles.indexOf(dbKey(b.title, si));
     });
 }
@@ -58,55 +47,29 @@
     });
 }
 
-function getTitles(items) {
-    // Reject gallery items if they're too small.
-    // Also reject SVG and PNG items by default, because they're likely to be
-    // logos and/or presentational images.
-    return items.map((item) => {
-        return item.title;
-    });
-}
-
 /**
  * Gets the gallery content from MW API
  * TODO: ensure that all media items are correctly accounted for on very large 
articles
  */
-function collectionPromise(app, req) {
+function getMetadata(app, req, titles) {
     const query = {
         action: 'query',
         format: 'json',
         formatversion: 2,
-        titles: req.params.title,
-        continue: '',
-        prop: 'imageinfo',
-        iiprop: 'dimensions|mime',
-        generator: 'images',
-        gimlimit: MAX_ITEM_COUNT,
-        redirects: true
+        prop: 'videoinfo',
+        viprop: 'url|dimensions|mime|extmetadata|derivatives',
+        viurlwidth: MAX_IMAGE_WIDTH,
+        titles: titles.join('|'),
+        continue: ''
     };
     return api.mwApiGet(app, req.params.domain, query).then((response) => {
-        if (!response.body.query || !response.body.query.pages) {
-            return { items: [] };
-        }
-        const query = {
-            action: 'query',
-            format: 'json',
-            formatversion: 2,
-            prop: 'videoinfo',
-            viprop: 'url|dimensions|mime|extmetadata|derivatives',
-            viurlwidth: MAX_IMAGE_WIDTH,
-            titles: getTitles(response.body.query.pages).join('|'),
-            continue: ''
-        };
-        return api.mwApiGet(app, req.params.domain, query).then((response) => {
-            const pages = response.body.query && response.body.query.pages;
-            const items = pages ? makeResults(pages) : [];
-            return { items };
-        });
+        const pages = response.body.query && response.body.query.pages;
+        const items = pages ? makeResults(pages) : [];
+        return { items };
     });
 }
 
 module.exports = {
     sort,
-    collectionPromise
+    getMetadata
 };
diff --git a/routes/media.js b/routes/media.js
index 7275d50..c60ece7 100644
--- a/routes/media.js
+++ b/routes/media.js
@@ -1,6 +1,7 @@
 'use strict';
 
 const BBPromise = require('bluebird');
+const domino = require('domino');
 const mUtil = require('../lib/mobile-util');
 const parsoid = require('../lib/parsoid-access');
 const sUtil = require('../lib/util');
@@ -16,17 +17,31 @@
  */
 router.get('/media/:title', (req, res) => {
     return BBPromise.props({
-        page: parsoid.pageHtmlPromise(app, req),
-        media: media.collectionPromise(app, req),
+        html: parsoid.getParsoidHtml(app, req),
+        // media: media.collectionPromise(app, req),
         siteinfo: mwapi.getSiteInfo(app, req)
     }).then((response) => {
-        if (response.media.items && response.media.items.length > 1) {
-            media.sort(response.page.html, response.media, response.siteinfo);
-        }
-        res.status(200);
-        mUtil.setETag(res, response.page.meta.revision);
-        mUtil.setContentType(res, mUtil.CONTENT_TYPES.unpublished);
-        res.json(response.media).end();
+        const doc = domino.createDocument(response.html.body);
+        // todo: handle Mathoid-rendered math images
+        const selection = doc.querySelectorAll('img,video');
+        // todo: handle zero-length result
+        const titles = [].map.call(selection, (elem) => {
+            return elem.getAttribute('resource').replace(/^.\//, '');
+        });
+        // todo: filter for size, mime
+        return BBPromise.props({
+            titles,
+            metadata: media.getMetadata(app, req, titles),
+            siteinfo: response.siteinfo,
+        }).then((response) => {
+            if (response.metadata.items && response.metadata.items.length > 1) 
{
+                media.sort(response.titles, response.metadata.items, 
response.siteinfo);
+            }
+            res.status(200);
+            // mUtil.setETag(res, response.page.meta.revision);
+            mUtil.setContentType(res, mUtil.CONTENT_TYPES.unpublished);
+            res.send({ items: response.metadata.items }).end();
+        });
     });
 });
 
diff --git a/test/lib/media/media-test.js b/test/lib/media/media-test.js
index 6b38e35..d5441c6 100644
--- a/test/lib/media/media-test.js
+++ b/test/lib/media/media-test.js
@@ -1,5 +1,6 @@
 'use strict';
 
+const domino = require('domino');
 const assert = require('../../utils/assert');
 const sort = require('../../../lib/media').sort;
 
@@ -22,7 +23,12 @@
 
     it('Results should be sorted in order of appearance on the page', () => {
         const result = unsorted;
-        sort(page, result, siteInfo);
+        const doc = domino.createDocument(page);
+        const selection = doc.querySelectorAll('img,video');
+        const titles = [].map.call(selection, (elem) => {
+            return elem.getAttribute('resource').replace(/^.\//, '');
+        });
+        sort(titles, result.items, siteInfo);
         assert.deepEqual(result, sorted);
     });
 

-- 
To view, visit https://gerrit.wikimedia.org/r/391359
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaaefc337730b494e5e639deea16dfd9968a02b8a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <mhollo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to