jenkins-bot has submitted this change and it was merged.

Change subject: Update: move full headline selector into news-sites
......................................................................


Update: move full headline selector into news-sites

• Move complete selector logic into news-sites. Some of the selectors
  are now more specific but the usage of the selector is much simpler:
  it's just a selector to extract all the headlines

• Rename news "story" terminology to "headline". All the stories used
  are highlights and "headline" is slightly more accurate

• Fix selection for Danish Wikipedia to omit recently deceased

Bug: T148444
Change-Id: I02689eb7342dcb2c2f5bbfa0c164ff3a547a0858
---
M etc/feed/news-sites.js
M lib/feed/news.js
2 files changed, 25 insertions(+), 27 deletions(-)

Approvals:
  Mholloway: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/etc/feed/news-sites.js b/etc/feed/news-sites.js
index a08a005..1dc0065 100644
--- a/etc/feed/news-sites.js
+++ b/etc/feed/news-sites.js
@@ -6,28 +6,27 @@
  * Wikipedia site.
  * The object consists of a title and a selector.
  * 1) The title is the page title used to scrape the news items from.
- * 2) The selector is used to find a parent HTML element for <li> elements.
- *    In most cases simply the first 'ul' suffices but there are exceptions,
- *    often because the first <ul> contains template instructions instead
- *    of the actual news items, or instead of a <ul> Parsoid uses a <div>.
+ * 2) The selector is used to query news headlines, usually found in the
+ *    first 'ul' but there are exceptions, often because the first <ul>
+ *    contains template instructions instead of the actual news items,
+ *    or instead of a <ul> Parsoid uses a <div>. This selector should
+ *    not include specific categories like deaths, sports, etc.
  */
-const NEWS_TEMPLATES = {
-    en: { title: 'Template:In_the_news', selector: 'ul[id^=mw]' },
-
-    da: { title: 'Skabelon:Forside_aktuelle_begivenheder', selector: 'div' },
-    de: { title: 'Wikipedia:Hauptseite/Aktuelles', selector: 'ul' },
-    el: { title: 'Πύλη:Τρέχοντα_γεγονότα/Επικεφαλίδες', selector: 'ul' },
-    es: { title: 'Portal:Actualidad', selector: 'ul' },
-    fi: { title: 'Malline:Uutisissa', selector: 'ul' },
-    fr: { title: 'Modèle:Accueil_actualité', selector: 'ul[id^=mw]' },
-    he: { title: 'תבנית:חדשות_ואקטואליה', selector: 'ul' },
-    ko: { title: '틀:새로_들어온_소식', selector: 'ul' },
-    no: { title: 'Mal:Aktuelt', selector: 'ul' },
-    pl: { title: 'Szablon:Aktualności', selector: 'ul:last-of-type' },
-    pt: { title: 'Portal:Eventos_atuais', selector: 'ul' },
-    ru: { title: 'Шаблон:Актуальные_события', selector: 'ul' },
-    sv: { title: 'Portal:Huvudsida/Aktuella händelser', selector: 'ul' },
-    vi: { title: 'Bản_mẫu:Tin_tức', selector: 'ul' }
+module.exports = {
+    da: { title: 'Skabelon:Forside_aktuelle_begivenheder', headlineSelector: 
'div > li' },
+    de: { title: 'Wikipedia:Hauptseite/Aktuelles', headlineSelector: 'li' },
+    el: { title: 'Πύλη:Τρέχοντα_γεγονότα/Επικεφαλίδες', headlineSelector: 'li' 
},
+    en: { title: 'Template:In_the_news', headlineSelector: 'ul[id^=mw] li' },
+    es: { title: 'Portal:Actualidad',
+        headlineSelector: 'table:nth-of-type(1) > tbody > tr > td > 
ul:nth-of-type(1) > li' },
+    fi: { title: 'Malline:Uutisissa', headlineSelector: 'body > ul > li' },
+    fr: { title: 'Modèle:Accueil_actualité', headlineSelector: 'div ul[id^=mw] 
> li' },
+    he: { title: 'תבנית:חדשות_ואקטואליה', headlineSelector: 'body > ul > li' },
+    ko: { title: '틀:새로_들어온_소식', headlineSelector: 'body > ul > li' },
+    no: { title: 'Mal:Aktuelt', headlineSelector: 'ul > li' },
+    pl: { title: 'Szablon:Aktualności', headlineSelector: 'ul:last-of-type > 
li' },
+    pt: { title: 'Portal:Eventos_atuais', headlineSelector: 'div > ul > li' },
+    ru: { title: 'Шаблон:Актуальные_события', headlineSelector: 'body > ul > 
li' },
+    sv: { title: 'Portal:Huvudsida/Aktuella händelser', headlineSelector: 
'body > ul > li' },
+    vi: { title: 'Bản_mẫu:Tin_tức', headlineSelector: 'ul > li' }
 };
-
-module.exports = NEWS_TEMPLATES;
diff --git a/lib/feed/news.js b/lib/feed/news.js
index af945d2..afb0426 100644
--- a/lib/feed/news.js
+++ b/lib/feed/news.js
@@ -54,15 +54,14 @@
     req.params.title = NEWS_TEMPLATES[lang].title;
     return parsoid.getParsoidHtml(app, req)
     .then((response) => {
-        const stories = domino.createDocument(response.body)
-                            .querySelector(NEWS_TEMPLATES[lang].selector)
-                            .getElementsByTagName('li');
+        const headlines = domino.createDocument(response.body)
+                            
.querySelectorAll(NEWS_TEMPLATES[lang].headlineSelector);
         const result = {
             payload: [],
             meta: { etag: parsoid.getRevisionFromEtag(response.headers) }
         };
 
-        Array.prototype.forEach.call(stories, (storyHtml) => {
+        Array.prototype.forEach.call(headlines, (storyHtml) => {
             result.payload.push(constructStory(app.restbase_tpl, 
req.params.domain, storyHtml));
         });
 

-- 
To view, visit https://gerrit.wikimedia.org/r/323085
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I02689eb7342dcb2c2f5bbfa0c164ff3a547a0858
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Niedzielski <sniedziel...@wikimedia.org>
Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org>
Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org>
Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: Jhernandez <jhernan...@wikimedia.org>
Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org>
Gerrit-Reviewer: Mhurd <mh...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org>
Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to