[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Media: Filter images based on size on the page

2018-01-18 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/405010 )

Change subject: Media: Filter images based on size on the page
..


Media: Filter images based on size on the page

Reject if on-page width or height < 64px. Previously, we were using
size (among other criteria) as a heuristic for excluding icons and other
images unrelated to the article content, but basing the exclusion on the
file's original size as reported by the MediaWiki API. The on-page size
is what we actually care about.

Bug: T177430
Change-Id: I9e6521b2b0ab579b97ef852e91c7517be6ff15a5
---
M lib/media.js
M test/diff/results/page_media-enwiki-Hummingbird.json
M test/diff/results/page_media-enwiki-Ko%C5%A1ice.json
M test/lib/media/media-test-inclusion.js
M test/lib/media/media-test-metadata.js
5 files changed, 36 insertions(+), 578 deletions(-)

Approvals:
  BearND: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/media.js b/lib/media.js
index 7c02672..6c5107e 100644
--- a/lib/media.js
+++ b/lib/media.js
@@ -48,6 +48,25 @@
 }
 
 /**
+ * Returns whether the element or an ancestor is part of a blacklisted class
+ * @param {!Element} elem an HTML Element
+ * @return {!Boolean} true if the element or an ancestor is part of a 
blacklisted class
+ */
+function isDisallowed(elem) {
+return !!(elem.closest(Blacklist.join()));
+}
+
+/**
+ * Returns whether the on-page size of an  element is small enough to 
filter from the response
+ * @param {!Element} img an  element
+ */
+function isTooSmall(img) {
+const width = img.getAttribute('width');
+const height = img.getAttribute('height');
+return width < MIN_IMAGE_SIZE || height < MIN_IMAGE_SIZE;
+}
+
+/**
  * Get file page titles from a NodeList of media elements from Parsoid HTML
  * @param {!String} html raw Parsoid HTML
  * @return {!Array} array containing the information on the media items on the 
page, in order of
@@ -56,7 +75,11 @@
 function getMediaItemInfoFromPage(html) {
 const doc = domino.createDocument(html);
 // todo: handle Mathoid-rendered math images
-const elems = doc.querySelectorAll(Selectors.join()).filter(e => 
!e.closest(Blacklist.join()));
+const elems = doc.querySelectorAll(Selectors.join()).filter((elem) => {
+const mediaType = getMediaType(elem);
+const resource = mediaType.selector && 
elem.querySelector(mediaType.selector);
+return !isDisallowed(elem) && (mediaType === Image ? 
!isTooSmall(resource) : true);
+});
 const results = [].map.call(elems, (elem) => {
 const mediaType = getMediaType(elem);
 const resource = mediaType.selector && 
elem.querySelector(mediaType.selector);
diff --git a/test/diff/results/page_media-enwiki-Hummingbird.json 
b/test/diff/results/page_media-enwiki-Hummingbird.json
index b448dbf..1f3aabb 100644
--- a/test/diff/results/page_media-enwiki-Hummingbird.json
+++ b/test/diff/results/page_media-enwiki-Hummingbird.json
@@ -1006,75 +1006,6 @@
   "license": "CC BY-SA 4.0",
   "license_url": "https://creativecommons.org/licenses/by-sa/4.0;,
   "description": "A female Anna's Hummingbird Calypte anna perched on a small branch."
-},
-{
-  "section_id": 32,
-  "type": "image",
-  "titles": {
-"canonical": "File:Lock-green.svg",
-"normalized": "File:Lock-green.svg",
-"display": "File:Lock-green.svg"
-  },
-  "thumbnail": {
-"source": 
"https://upload.wikimedia.org/wikipedia/commons/thumb/6/65/Lock-green.svg/320px-Lock-green.svg.png;,
-"width": 320,
-"height": 508,
-"mime": "image/png"
-  },
-  "original": {
-"source": 
"https://upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg;,
-"mime": "image/svg+xml"
-  },
-  "file_page": "https://commons.wikimedia.org/wiki/File:Lock-green.svg;,
-  "artist": "User:Trappist the monk",
-  "credit": "https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; 
class=\"extiw\" title=\"en:File:Free-to-read lock 
75.svg\">en:File:Free-to-read_lock_75.svg",
-  "license": "CC0",
-  "license_url": 
"http://creativecommons.org/publicdomain/zero/1.0/deed.en;,
-  "description": "Copy of https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; 
class=\"extiw\" title=\"en:File:Free-to-read lock 
75.svg\">en:File:Free-to-read_lock_75.svg, with consistent name for the 
series of locks"
-},
-{
-  "section_id": 33,
-  "type": "image",
-  "titles": {
-"canonical": "File:Folder_Hexagonal_Icon.svg",
-"normalized": "File:Folder Hexagonal Icon.svg",
-"display": "File:Folder Hexagonal Icon.svg"
-  },
-  "thumbnail": {
-"source": 
"https://upload.wikimedia.org/wikipedia/en/thumb/4/48/Folder_Hexagonal_Icon.svg/320px-Folder_Hexagonal_Icon.svg.png;,
-"width": 320,
-"height": 276,
-"mime": 

[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Media: Filter images based on size on the page

2018-01-18 Thread Mholloway (Code Review)
Mholloway has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/405010 )

Change subject: Media: Filter images based on size on the page
..

Media: Filter images based on size on the page

Reject if on-page width or height < 64px.

Bug: T177430
Change-Id: I9e6521b2b0ab579b97ef852e91c7517be6ff15a5
---
M lib/media.js
M test/diff/results/page_media-enwiki-Hummingbird.json
M test/diff/results/page_media-enwiki-Ko%C5%A1ice.json
M test/lib/media/media-test-inclusion.js
M test/lib/media/media-test-metadata.js
5 files changed, 27 insertions(+), 578 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/10/405010/1

diff --git a/lib/media.js b/lib/media.js
index 7c02672..d7b9bf7 100644
--- a/lib/media.js
+++ b/lib/media.js
@@ -47,6 +47,16 @@
 }
 }
 
+function isDisallowed(elem) {
+return !!(elem.closest(Blacklist.join()));
+}
+
+function isTooSmall(img) {
+const width = img.getAttribute('width');
+const height = img.getAttribute('height');
+return width < MIN_IMAGE_SIZE || height < MIN_IMAGE_SIZE;
+}
+
 /**
  * Get file page titles from a NodeList of media elements from Parsoid HTML
  * @param {!String} html raw Parsoid HTML
@@ -56,7 +66,11 @@
 function getMediaItemInfoFromPage(html) {
 const doc = domino.createDocument(html);
 // todo: handle Mathoid-rendered math images
-const elems = doc.querySelectorAll(Selectors.join()).filter(e => 
!e.closest(Blacklist.join()));
+const elems = doc.querySelectorAll(Selectors.join()).filter((elem) => {
+const mediaType = getMediaType(elem);
+const resource = mediaType.selector && 
elem.querySelector(mediaType.selector);
+return !isDisallowed(elem) && (mediaType === Image ? 
!isTooSmall(resource) : true);
+});
 const results = [].map.call(elems, (elem) => {
 const mediaType = getMediaType(elem);
 const resource = mediaType.selector && 
elem.querySelector(mediaType.selector);
diff --git a/test/diff/results/page_media-enwiki-Hummingbird.json 
b/test/diff/results/page_media-enwiki-Hummingbird.json
index b448dbf..1f3aabb 100644
--- a/test/diff/results/page_media-enwiki-Hummingbird.json
+++ b/test/diff/results/page_media-enwiki-Hummingbird.json
@@ -1006,75 +1006,6 @@
   "license": "CC BY-SA 4.0",
   "license_url": "https://creativecommons.org/licenses/by-sa/4.0;,
   "description": "A female Anna's Hummingbird Calypte anna perched on a small branch."
-},
-{
-  "section_id": 32,
-  "type": "image",
-  "titles": {
-"canonical": "File:Lock-green.svg",
-"normalized": "File:Lock-green.svg",
-"display": "File:Lock-green.svg"
-  },
-  "thumbnail": {
-"source": 
"https://upload.wikimedia.org/wikipedia/commons/thumb/6/65/Lock-green.svg/320px-Lock-green.svg.png;,
-"width": 320,
-"height": 508,
-"mime": "image/png"
-  },
-  "original": {
-"source": 
"https://upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg;,
-"mime": "image/svg+xml"
-  },
-  "file_page": "https://commons.wikimedia.org/wiki/File:Lock-green.svg;,
-  "artist": "User:Trappist the monk",
-  "credit": "https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; 
class=\"extiw\" title=\"en:File:Free-to-read lock 
75.svg\">en:File:Free-to-read_lock_75.svg",
-  "license": "CC0",
-  "license_url": 
"http://creativecommons.org/publicdomain/zero/1.0/deed.en;,
-  "description": "Copy of https://en.wikipedia.org/wiki/File:Free-to-read_lock_75.svg\; 
class=\"extiw\" title=\"en:File:Free-to-read lock 
75.svg\">en:File:Free-to-read_lock_75.svg, with consistent name for the 
series of locks"
-},
-{
-  "section_id": 33,
-  "type": "image",
-  "titles": {
-"canonical": "File:Folder_Hexagonal_Icon.svg",
-"normalized": "File:Folder Hexagonal Icon.svg",
-"display": "File:Folder Hexagonal Icon.svg"
-  },
-  "thumbnail": {
-"source": 
"https://upload.wikimedia.org/wikipedia/en/thumb/4/48/Folder_Hexagonal_Icon.svg/320px-Folder_Hexagonal_Icon.svg.png;,
-"width": 320,
-"height": 276,
-"mime": "image/png"
-  },
-  "original": {
-"source": 
"https://upload.wikimedia.org/wikipedia/en/4/48/Folder_Hexagonal_Icon.svg;,
-"mime": "image/svg+xml"
-  },
-  "file_page": 
"https://commons.wikimedia.org/wiki/File:Folder_Hexagonal_Icon.svg;,
-  "license": "Cc-by-sa-3.0",
-  "license_url": "https://creativecommons.org/licenses/by-sa/3.0/;
-},
-{
-  "section_id": 33,
-  "type": "image",
-  "titles": {
-"canonical": "File:Portal-puzzle.svg",
-"normalized": "File:Portal-puzzle.svg",
-"display": "File:Portal-puzzle.svg"
-  },
-  "thumbnail": {
-"source":