[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Media: Filter images based on size on the page
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/405010 ) Change subject: Media: Filter images based on size on the page .. Media: Filter images based on size on the page Reject if on-page width or height < 64px. Previously, we were using size (among other criteria) as a heuristic for excluding icons and other images unrelated to the article content, but basing the exclusion on the file's original size as reported by the MediaWiki API. The on-page size is what we actually care about. Bug: T177430 Change-Id: I9e6521b2b0ab579b97ef852e91c7517be6ff15a5 --- M lib/media.js M test/diff/results/page_media-enwiki-Hummingbird.json M test/diff/results/page_media-enwiki-Ko%C5%A1ice.json M test/lib/media/media-test-inclusion.js M test/lib/media/media-test-metadata.js 5 files changed, 36 insertions(+), 578 deletions(-) Approvals: BearND: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/media.js b/lib/media.js index 7c02672..6c5107e 100644 --- a/lib/media.js +++ b/lib/media.js @@ -48,6 +48,25 @@ } /** + * Returns whether the element or an ancestor is part of a blacklisted class + * @param {!Element} elem an HTML Element + * @return {!Boolean} true if the element or an ancestor is part of a blacklisted class + */ +function isDisallowed(elem) { +return !!(elem.closest(Blacklist.join())); +} + +/** + * Returns whether the on-page size of an element is small enough to filter from the response + * @param {!Element} img an element + */ +function isTooSmall(img) { +const width = img.getAttribute('width'); +const height = img.getAttribute('height'); +return width < MIN_IMAGE_SIZE || height < MIN_IMAGE_SIZE; +} + +/** * Get file page titles from a NodeList of media elements from Parsoid HTML * @param {!String} html raw Parsoid HTML * @return {!Array} array containing the information on the media items on the page, in order of @@ -56,7 +75,11 @@ function getMediaItemInfoFromPage(html) { const doc = domino.createDocument(html); // todo: handle Mathoid-rendered math images -const elems = doc.querySelectorAll(Selectors.join()).filter(e => !e.closest(Blacklist.join())); +const elems = doc.querySelectorAll(Selectors.join()).filter((elem) => { +const mediaType = getMediaType(elem); +const resource = mediaType.selector && elem.querySelector(mediaType.selector); +return !isDisallowed(elem) && (mediaType === Image ? !isTooSmall(resource) : true); +}); const results = [].map.call(elems, (elem) => { const mediaType = getMediaType(elem); const resource = mediaType.selector && elem.querySelector(mediaType.selector); diff --git a/test/diff/results/page_media-enwiki-Hummingbird.json b/test/diff/results/page_media-enwiki-Hummingbird.json index b448dbf..1f3aabb 100644 --- a/test/diff/results/page_media-enwiki-Hummingbird.json +++ b/test/diff/results/page_media-enwiki-Hummingbird.json @@ -1006,75 +1006,6 @@ "license": "CC BY-SA 4.0", "license_url": "https://creativecommons.org/licenses/by-sa/4.0;, "description": "A female Anna's Hummingbird Calypte anna perched on a small branch." -}, -{ - "section_id": 32, - "type": "image", - "titles": { -"canonical": "File:Lock-green.svg", -"normalized": "File:Lock-green.svg", -"display": "File:Lock-green.svg" - }, - "thumbnail": { -"source": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/65/Lock-green.svg/320px-Lock-green.svg.png;, -"width": 320, -"height": 508, -"mime": "image/png" - }, - "original": { -"source": "https://upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg;, -"mime": "image/svg+xml" - }, - "file_page": "https://commons.wikimedia.org/wiki/File:Lock-green.svg;, - "artist": "User:Trappist the monk", - "credit": "https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; class=\"extiw\" title=\"en:File:Free-to-read lock 75.svg\">en:File:Free-to-read_lock_75.svg", - "license": "CC0", - "license_url": "http://creativecommons.org/publicdomain/zero/1.0/deed.en;, - "description": "Copy of https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; class=\"extiw\" title=\"en:File:Free-to-read lock 75.svg\">en:File:Free-to-read_lock_75.svg, with consistent name for the series of locks" -}, -{ - "section_id": 33, - "type": "image", - "titles": { -"canonical": "File:Folder_Hexagonal_Icon.svg", -"normalized": "File:Folder Hexagonal Icon.svg", -"display": "File:Folder Hexagonal Icon.svg" - }, - "thumbnail": { -"source": "https://upload.wikimedia.org/wikipedia/en/thumb/4/48/Folder_Hexagonal_Icon.svg/320px-Folder_Hexagonal_Icon.svg.png;, -"width": 320, -"height": 276, -"mime":
[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Media: Filter images based on size on the page
Mholloway has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/405010 ) Change subject: Media: Filter images based on size on the page .. Media: Filter images based on size on the page Reject if on-page width or height < 64px. Bug: T177430 Change-Id: I9e6521b2b0ab579b97ef852e91c7517be6ff15a5 --- M lib/media.js M test/diff/results/page_media-enwiki-Hummingbird.json M test/diff/results/page_media-enwiki-Ko%C5%A1ice.json M test/lib/media/media-test-inclusion.js M test/lib/media/media-test-metadata.js 5 files changed, 27 insertions(+), 578 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/10/405010/1 diff --git a/lib/media.js b/lib/media.js index 7c02672..d7b9bf7 100644 --- a/lib/media.js +++ b/lib/media.js @@ -47,6 +47,16 @@ } } +function isDisallowed(elem) { +return !!(elem.closest(Blacklist.join())); +} + +function isTooSmall(img) { +const width = img.getAttribute('width'); +const height = img.getAttribute('height'); +return width < MIN_IMAGE_SIZE || height < MIN_IMAGE_SIZE; +} + /** * Get file page titles from a NodeList of media elements from Parsoid HTML * @param {!String} html raw Parsoid HTML @@ -56,7 +66,11 @@ function getMediaItemInfoFromPage(html) { const doc = domino.createDocument(html); // todo: handle Mathoid-rendered math images -const elems = doc.querySelectorAll(Selectors.join()).filter(e => !e.closest(Blacklist.join())); +const elems = doc.querySelectorAll(Selectors.join()).filter((elem) => { +const mediaType = getMediaType(elem); +const resource = mediaType.selector && elem.querySelector(mediaType.selector); +return !isDisallowed(elem) && (mediaType === Image ? !isTooSmall(resource) : true); +}); const results = [].map.call(elems, (elem) => { const mediaType = getMediaType(elem); const resource = mediaType.selector && elem.querySelector(mediaType.selector); diff --git a/test/diff/results/page_media-enwiki-Hummingbird.json b/test/diff/results/page_media-enwiki-Hummingbird.json index b448dbf..1f3aabb 100644 --- a/test/diff/results/page_media-enwiki-Hummingbird.json +++ b/test/diff/results/page_media-enwiki-Hummingbird.json @@ -1006,75 +1006,6 @@ "license": "CC BY-SA 4.0", "license_url": "https://creativecommons.org/licenses/by-sa/4.0;, "description": "A female Anna's Hummingbird Calypte anna perched on a small branch." -}, -{ - "section_id": 32, - "type": "image", - "titles": { -"canonical": "File:Lock-green.svg", -"normalized": "File:Lock-green.svg", -"display": "File:Lock-green.svg" - }, - "thumbnail": { -"source": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/65/Lock-green.svg/320px-Lock-green.svg.png;, -"width": 320, -"height": 508, -"mime": "image/png" - }, - "original": { -"source": "https://upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg;, -"mime": "image/svg+xml" - }, - "file_page": "https://commons.wikimedia.org/wiki/File:Lock-green.svg;, - "artist": "User:Trappist the monk", - "credit": "https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; class=\"extiw\" title=\"en:File:Free-to-read lock 75.svg\">en:File:Free-to-read_lock_75.svg", - "license": "CC0", - "license_url": "http://creativecommons.org/publicdomain/zero/1.0/deed.en;, - "description": "Copy of https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; class=\"extiw\" title=\"en:File:Free-to-read lock 75.svg\">en:File:Free-to-read_lock_75.svg, with consistent name for the series of locks" -}, -{ - "section_id": 33, - "type": "image", - "titles": { -"canonical": "File:Folder_Hexagonal_Icon.svg", -"normalized": "File:Folder Hexagonal Icon.svg", -"display": "File:Folder Hexagonal Icon.svg" - }, - "thumbnail": { -"source": "https://upload.wikimedia.org/wikipedia/en/thumb/4/48/Folder_Hexagonal_Icon.svg/320px-Folder_Hexagonal_Icon.svg.png;, -"width": 320, -"height": 276, -"mime": "image/png" - }, - "original": { -"source": "https://upload.wikimedia.org/wikipedia/en/4/48/Folder_Hexagonal_Icon.svg;, -"mime": "image/svg+xml" - }, - "file_page": "https://commons.wikimedia.org/wiki/File:Folder_Hexagonal_Icon.svg;, - "license": "Cc-by-sa-3.0", - "license_url": "https://creativecommons.org/licenses/by-sa/3.0/; -}, -{ - "section_id": 33, - "type": "image", - "titles": { -"canonical": "File:Portal-puzzle.svg", -"normalized": "File:Portal-puzzle.svg", -"display": "File:Portal-puzzle.svg" - }, - "thumbnail": { -"source":