jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/395653 )
Change subject: Media: Exclude .noviewer and .metadata items ...................................................................... Media: Exclude .noviewer and .metadata items Looks like this mostly overlaps with our size- and mime-based filtering. Bug: T177430 Change-Id: Idfe8664cf187e0811a27dbce94a1f361e42cb910 --- M lib/media.js M lib/selectors.js M test/diff/results/page_media-enwiki-Hummingbird.json M test/lib/media/media-test.js 4 files changed, 14 insertions(+), 50 deletions(-) Approvals: BearND: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/media.js b/lib/media.js index aeae4e8..136aa86 100644 --- a/lib/media.js +++ b/lib/media.js @@ -4,7 +4,8 @@ const api = require('./api-util'); const mUtil = require('./mobile-util'); const Title = require('mediawiki-title').Title; -const MediaSelectors = require('./selectors').MediaSelectors; +const Selectors = require('./selectors').MediaSelectors; +const Blacklist = require('./selectors').MediaBlacklist; const SpokenWikipediaId = require('./selectors').SpokenWikipediaId; const MIN_IMAGE_SIZE = 64; @@ -55,8 +56,8 @@ function getMediaItemInfoFromPage(html) { const doc = domino.createDocument(html); // todo: handle Mathoid-rendered math images - const selection = doc.querySelectorAll(MediaSelectors.join()); - return [].map.call(selection, (elem) => { + const elems = doc.querySelectorAll(Selectors.join()).filter(e => !e.closest(Blacklist.join())); + return [].map.call(elems, (elem) => { const mediaType = getMediaType(elem); const resource = mediaType.selector && elem.querySelector(mediaType.selector); const figCaption = elem.querySelector('figcaption'); diff --git a/lib/selectors.js b/lib/selectors.js index 835aac7..3e66897 100644 --- a/lib/selectors.js +++ b/lib/selectors.js @@ -13,6 +13,11 @@ 'span.IPA+small a[rel=mw:MediaLink]' ]; +const MediaBlacklist = [ + '.noviewer', + '.metadata' +]; + const ImageSelectors = MediaSelectors.filter(selector => selector.includes('Image')); const VideoSelectors = MediaSelectors.filter(selector => selector.includes('Video')); const PronunciationSelector = MediaSelectors.filter(selector => selector.includes('IPA'))[0]; @@ -22,6 +27,7 @@ module.exports = { MediaSelectors, + MediaBlacklist, ImageSelectors, VideoSelectors, PronunciationSelector, diff --git a/test/diff/results/page_media-enwiki-Hummingbird.json b/test/diff/results/page_media-enwiki-Hummingbird.json index 1bf7b9c..e3a50d5 100644 --- a/test/diff/results/page_media-enwiki-Hummingbird.json +++ b/test/diff/results/page_media-enwiki-Hummingbird.json @@ -826,52 +826,6 @@ "license": "CC BY-SA 4.0", "license_url": "https://creativecommons.org/licenses/by-sa/4.0", "description": "A female Anna's Hummingbird <i><a href=\"//commons.wikimedia.org/wiki/Calypte_anna\" title=\"Calypte anna\">Calypte anna</a></i> perched on a small branch." - }, - { - "type": "image", - "titles": { - "canonical": "File:Caribou_from_Wagon_Trails.jpg", - "normalized": "File:Caribou from Wagon Trails.jpg", - "display": "File:Caribou from Wagon Trails.jpg" - }, - "thumbnail": { - "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/Caribou_from_Wagon_Trails.jpg/320px-Caribou_from_Wagon_Trails.jpg", - "width": 320, - "height": 219, - "mime": "image/jpeg" - }, - "original": { - "source": "https://upload.wikimedia.org/wikipedia/commons/e/e0/Caribou_from_Wagon_Trails.jpg", - "width": 2064, - "height": 1413, - "mime": "image/jpeg" - }, - "file_page": "https://commons.wikimedia.org/wiki/File:Caribou_from_Wagon_Trails.jpg", - "license": "CC BY-SA 2.5", - "license_url": "https://creativecommons.org/licenses/by-sa/2.5" - }, - { - "type": "image", - "titles": { - "canonical": "File:Issoria_lathonia.jpg", - "normalized": "File:Issoria lathonia.jpg", - "display": "File:Issoria lathonia.jpg" - }, - "thumbnail": { - "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Issoria_lathonia.jpg/320px-Issoria_lathonia.jpg", - "width": 320, - "height": 226, - "mime": "image/jpeg" - }, - "original": { - "source": "https://upload.wikimedia.org/wikipedia/commons/2/2d/Issoria_lathonia.jpg", - "width": 629, - "height": 445, - "mime": "image/jpeg" - }, - "file_page": "https://commons.wikimedia.org/wiki/File:Issoria_lathonia.jpg", - "license": "CC-BY-SA-3.0", - "license_url": "http://creativecommons.org/licenses/by-sa/3.0/" } ] } \ No newline at end of file diff --git a/test/lib/media/media-test.js b/test/lib/media/media-test.js index 7ab95bd..c4ea7eb 100644 --- a/test/lib/media/media-test.js +++ b/test/lib/media/media-test.js @@ -29,12 +29,15 @@ const noTypeSpan = '<span><video resource="./File:Foo"/></span>'; const noTypeFigureInline = '<figure-inline><video resource="./File:Foo"/></figure-inline>'; +const imageNoViewer = '<figure typeof="mw:Image" class="noviewer"><img resource="./File:Foo"/></figure>'; +const imageMetadata = '<span class="metadata"><figure typeof="mw:Image"><img resource="./File:Foo"/></figure></span>'; + const images = [imageFigure, imageSpan, imageFigureInline, imageThumbFigure, imageThumbSpan, imageThumbFigureInline]; const videos = [videoFigure, videoSpan, videoFigureInline, videoThumbFigure, videoThumbSpan, videoThumbFigureInline]; const audio = [audioFigure, audioSpan, audioFigureInline]; const validItems = images.concat(videos).concat(audio); -const invalidItems = [noTypeFigure, noTypeSpan, noTypeFigureInline]; +const invalidItems = [noTypeFigure, noTypeSpan, noTypeFigureInline, imageNoViewer, imageMetadata]; const imageWithCaption = '<figure typeof="mw:Image">' + -- To view, visit https://gerrit.wikimedia.org/r/395653 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Idfe8664cf187e0811a27dbce94a1f361e42cb910 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: Gergő Tisza <gti...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits