Nschaaf has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/326903 )

Change subject: Use related-articles in translation recommendation
......................................................................

Use related-articles in translation recommendation

Bug: T151793
Change-Id: Iff6b932606cebb7fc239fdb1d64703525069c782
---
M recommendation/api/external_data/fetcher.py
M recommendation/api/external_data/wikidata.py
M recommendation/api/types/translation/candidate_finders.py
M recommendation/api/types/translation/translation.py
M recommendation/data/labs_setup.sh
5 files changed, 35 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/research/recommendation-api 
refs/changes/03/326903/1

diff --git a/recommendation/api/external_data/fetcher.py 
b/recommendation/api/external_data/fetcher.py
index 76e8b6b..6d6fdf7 100644
--- a/recommendation/api/external_data/fetcher.py
+++ b/recommendation/api/external_data/fetcher.py
@@ -108,3 +108,8 @@
         seed = 'morelike:' + seed
     params['srsearch'] = seed
     return endpoint, params
+
+
+def get_related_articles(source, seed):
+    return 
get('http://recommend-related-articles.wmflabs.org/types/related_articles/v1/articles',
+               dict(source=source, seed=seed, count=500))
diff --git a/recommendation/api/external_data/wikidata.py 
b/recommendation/api/external_data/wikidata.py
index 82f7178..7359fbc 100644
--- a/recommendation/api/external_data/wikidata.py
+++ b/recommendation/api/external_data/wikidata.py
@@ -9,11 +9,11 @@
 WikidataItem = collections.namedtuple('WikidataItem', ['id', 'title', 'url'])
 
 
-def query(params):
+def query(params, expected_sitelinks=1):
     """
     Query the wikidata endpoint and return a list of WikidataItem
 
-     This only includes items that have exactly 1 sitelink
+     This only includes items that have exactly expected_sitelinks sitelink
     """
     endpoint = configuration.get_config_value('endpoints', 'wikidata')
     try:
@@ -28,7 +28,7 @@
 
     for id, entity in entities.items():
         sitelinks = entity.get('sitelinks', {})
-        if len(sitelinks.keys()) != 1:
+        if len(sitelinks.keys()) != expected_sitelinks:
             continue
         sitelink = sitelinks.popitem()[1]
 
@@ -43,7 +43,12 @@
 def get_items_in_source_missing_in_target_by_titles(source, target, titles):
     params = configuration.get_config_dict('wikidata_titles_to_items_params')
     params['sites'] = params['sites'].format(source=source)
+    # We want the sitefilter to include both the source and target
+    # wikis. This sets up the scenario where if there is only 1 sitelink
+    # present, that means that the article is missing in the target (since
+    # the title will have come from the source wiki)
     params['sitefilter'] = params['sitefilter'].format(target=target)
+    params['sitefilter'] += '|{}wiki'.format(source)
     params['titles'] = '|'.join(titles)
 
     items = query(params)
diff --git a/recommendation/api/types/translation/candidate_finders.py 
b/recommendation/api/types/translation/candidate_finders.py
index 70ae772..ba73bf2 100644
--- a/recommendation/api/types/translation/candidate_finders.py
+++ b/recommendation/api/types/translation/candidate_finders.py
@@ -112,3 +112,19 @@
             articles.append(a)
 
         return articles[:n]
+
+
+class RelatedArticleFinder(CandidateFinder):
+    def get_candidates(self, s, seed, n):
+        results = fetcher.get_related_articles(s, seed)
+        if len(results) == 0:
+            return MorelikeCandidateFinder().get_candidates(s, seed, n)
+
+        articles = []
+        for item in results:
+            a = Article(item['title'])
+            a.wikidata_id = item['wikidata_id']
+            a.rank = item['score']
+            articles.append(a)
+
+        return articles[:n]
diff --git a/recommendation/api/types/translation/translation.py 
b/recommendation/api/types/translation/translation.py
index d271c50..f22a880 100644
--- a/recommendation/api/types/translation/translation.py
+++ b/recommendation/api/types/translation/translation.py
@@ -166,6 +166,7 @@
     'morelike': candidate_finders.MorelikeCandidateFinder(),
     'wiki': candidate_finders.MorelikeCandidateFinder(),
     'mostpopular': candidate_finders.PageviewCandidateFinder(),
+    'related_articles': candidate_finders.RelatedArticleFinder()
 }
 
 
diff --git a/recommendation/data/labs_setup.sh 
b/recommendation/data/labs_setup.sh
index 8290c97..228241d 100755
--- a/recommendation/data/labs_setup.sh
+++ b/recommendation/data/labs_setup.sh
@@ -8,6 +8,10 @@
 apt-get install -y git nginx npm python3 python3-pip
 pip3 install --upgrade pip
 
+# Need to add uwsgi to the wheels
+apt-get install -y build-essential python3-dev
+pip3 install uwsgi
+
 rm -rf ${TMP_PATH}
 mkdir -p ${TMP_PATH}
 mkdir -p ${SRV_PATH}/resources
@@ -31,7 +35,7 @@
 cp ${TMP_PATH}/recommendation-api/recommendation/data/* ${ETC_PATH}
 cp ${ETC_PATH}/recommendation.nginx /etc/nginx/sites-available/recommendation
 ln -s /etc/nginx/sites-available/recommendation /etc/nginx/sites-enabled/
-cp ${ETC_PATH}/recommendation.service 
/etc/systemd/system/multi-user.target/wants/
+cp ${ETC_PATH}/recommendation.service 
/etc/systemd/system/multi-user.target.wants/
 systemctl enable recommendation.service
 systemctl daemon-reload
 

-- 
To view, visit https://gerrit.wikimedia.org/r/326903
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iff6b932606cebb7fc239fdb1d64703525069c782
Gerrit-PatchSet: 1
Gerrit-Project: research/recommendation-api
Gerrit-Branch: master
Gerrit-Owner: Nschaaf <nsch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to