[MediaWiki-commits] [Gerrit] research/recommendation-api[master]: Use related-articles in translation recommendation
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/326903 ) Change subject: Use related-articles in translation recommendation .. Use related-articles in translation recommendation Bug: T151793 Change-Id: Iff6b932606cebb7fc239fdb1d64703525069c782 --- M recommendation/api/external_data/fetcher.py M recommendation/api/external_data/wikidata.py M recommendation/api/types/translation/candidate_finders.py M recommendation/api/types/translation/translation.py M recommendation/data/labs_setup.sh 5 files changed, 31 insertions(+), 4 deletions(-) Approvals: Nschaaf: Looks good to me, approved jenkins-bot: Verified diff --git a/recommendation/api/external_data/fetcher.py b/recommendation/api/external_data/fetcher.py index 76e8b6b..6d6fdf7 100644 --- a/recommendation/api/external_data/fetcher.py +++ b/recommendation/api/external_data/fetcher.py @@ -108,3 +108,8 @@ seed = 'morelike:' + seed params['srsearch'] = seed return endpoint, params + + +def get_related_articles(source, seed): +return get('http://recommend-related-articles.wmflabs.org/types/related_articles/v1/articles', + dict(source=source, seed=seed, count=500)) diff --git a/recommendation/api/external_data/wikidata.py b/recommendation/api/external_data/wikidata.py index 82f7178..7359fbc 100644 --- a/recommendation/api/external_data/wikidata.py +++ b/recommendation/api/external_data/wikidata.py @@ -9,11 +9,11 @@ WikidataItem = collections.namedtuple('WikidataItem', ['id', 'title', 'url']) -def query(params): +def query(params, expected_sitelinks=1): """ Query the wikidata endpoint and return a list of WikidataItem - This only includes items that have exactly 1 sitelink + This only includes items that have exactly expected_sitelinks sitelink """ endpoint = configuration.get_config_value('endpoints', 'wikidata') try: @@ -28,7 +28,7 @@ for id, entity in entities.items(): sitelinks = entity.get('sitelinks', {}) -if len(sitelinks.keys()) != 1: +if len(sitelinks.keys()) != expected_sitelinks: continue sitelink = sitelinks.popitem()[1] @@ -43,7 +43,12 @@ def get_items_in_source_missing_in_target_by_titles(source, target, titles): params = configuration.get_config_dict('wikidata_titles_to_items_params') params['sites'] = params['sites'].format(source=source) +# We want the sitefilter to include both the source and target +# wikis. This sets up the scenario where if there is only 1 sitelink +# present, that means that the article is missing in the target (since +# the title will have come from the source wiki) params['sitefilter'] = params['sitefilter'].format(target=target) +params['sitefilter'] += '|{}wiki'.format(source) params['titles'] = '|'.join(titles) items = query(params) diff --git a/recommendation/api/types/translation/candidate_finders.py b/recommendation/api/types/translation/candidate_finders.py index 70ae772..ba73bf2 100644 --- a/recommendation/api/types/translation/candidate_finders.py +++ b/recommendation/api/types/translation/candidate_finders.py @@ -112,3 +112,19 @@ articles.append(a) return articles[:n] + + +class RelatedArticleFinder(CandidateFinder): +def get_candidates(self, s, seed, n): +results = fetcher.get_related_articles(s, seed) +if len(results) == 0: +return MorelikeCandidateFinder().get_candidates(s, seed, n) + +articles = [] +for item in results: +a = Article(item['title']) +a.wikidata_id = item['wikidata_id'] +a.rank = item['score'] +articles.append(a) + +return articles[:n] diff --git a/recommendation/api/types/translation/translation.py b/recommendation/api/types/translation/translation.py index d271c50..f22a880 100644 --- a/recommendation/api/types/translation/translation.py +++ b/recommendation/api/types/translation/translation.py @@ -166,6 +166,7 @@ 'morelike': candidate_finders.MorelikeCandidateFinder(), 'wiki': candidate_finders.MorelikeCandidateFinder(), 'mostpopular': candidate_finders.PageviewCandidateFinder(), +'related_articles': candidate_finders.RelatedArticleFinder() } diff --git a/recommendation/data/labs_setup.sh b/recommendation/data/labs_setup.sh index 8290c97..377b2d1 100755 --- a/recommendation/data/labs_setup.sh +++ b/recommendation/data/labs_setup.sh @@ -31,7 +31,7 @@ cp ${TMP_PATH}/recommendation-api/recommendation/data/* ${ETC_PATH} cp ${ETC_PATH}/recommendation.nginx /etc/nginx/sites-available/recommendation ln -s /etc/nginx/sites-available/recommendation /etc/nginx/sites-enabled/ -cp ${ETC_PATH}/recommendation.service /etc/systemd/system/multi-user.target/wants/ +cp ${ETC_PATH}/recommendation.service /etc/systemd/system/multi-user.target.wants/
[MediaWiki-commits] [Gerrit] research/recommendation-api[master]: Use related-articles in translation recommendation
Nschaaf has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/326903 ) Change subject: Use related-articles in translation recommendation .. Use related-articles in translation recommendation Bug: T151793 Change-Id: Iff6b932606cebb7fc239fdb1d64703525069c782 --- M recommendation/api/external_data/fetcher.py M recommendation/api/external_data/wikidata.py M recommendation/api/types/translation/candidate_finders.py M recommendation/api/types/translation/translation.py M recommendation/data/labs_setup.sh 5 files changed, 35 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/research/recommendation-api refs/changes/03/326903/1 diff --git a/recommendation/api/external_data/fetcher.py b/recommendation/api/external_data/fetcher.py index 76e8b6b..6d6fdf7 100644 --- a/recommendation/api/external_data/fetcher.py +++ b/recommendation/api/external_data/fetcher.py @@ -108,3 +108,8 @@ seed = 'morelike:' + seed params['srsearch'] = seed return endpoint, params + + +def get_related_articles(source, seed): +return get('http://recommend-related-articles.wmflabs.org/types/related_articles/v1/articles', + dict(source=source, seed=seed, count=500)) diff --git a/recommendation/api/external_data/wikidata.py b/recommendation/api/external_data/wikidata.py index 82f7178..7359fbc 100644 --- a/recommendation/api/external_data/wikidata.py +++ b/recommendation/api/external_data/wikidata.py @@ -9,11 +9,11 @@ WikidataItem = collections.namedtuple('WikidataItem', ['id', 'title', 'url']) -def query(params): +def query(params, expected_sitelinks=1): """ Query the wikidata endpoint and return a list of WikidataItem - This only includes items that have exactly 1 sitelink + This only includes items that have exactly expected_sitelinks sitelink """ endpoint = configuration.get_config_value('endpoints', 'wikidata') try: @@ -28,7 +28,7 @@ for id, entity in entities.items(): sitelinks = entity.get('sitelinks', {}) -if len(sitelinks.keys()) != 1: +if len(sitelinks.keys()) != expected_sitelinks: continue sitelink = sitelinks.popitem()[1] @@ -43,7 +43,12 @@ def get_items_in_source_missing_in_target_by_titles(source, target, titles): params = configuration.get_config_dict('wikidata_titles_to_items_params') params['sites'] = params['sites'].format(source=source) +# We want the sitefilter to include both the source and target +# wikis. This sets up the scenario where if there is only 1 sitelink +# present, that means that the article is missing in the target (since +# the title will have come from the source wiki) params['sitefilter'] = params['sitefilter'].format(target=target) +params['sitefilter'] += '|{}wiki'.format(source) params['titles'] = '|'.join(titles) items = query(params) diff --git a/recommendation/api/types/translation/candidate_finders.py b/recommendation/api/types/translation/candidate_finders.py index 70ae772..ba73bf2 100644 --- a/recommendation/api/types/translation/candidate_finders.py +++ b/recommendation/api/types/translation/candidate_finders.py @@ -112,3 +112,19 @@ articles.append(a) return articles[:n] + + +class RelatedArticleFinder(CandidateFinder): +def get_candidates(self, s, seed, n): +results = fetcher.get_related_articles(s, seed) +if len(results) == 0: +return MorelikeCandidateFinder().get_candidates(s, seed, n) + +articles = [] +for item in results: +a = Article(item['title']) +a.wikidata_id = item['wikidata_id'] +a.rank = item['score'] +articles.append(a) + +return articles[:n] diff --git a/recommendation/api/types/translation/translation.py b/recommendation/api/types/translation/translation.py index d271c50..f22a880 100644 --- a/recommendation/api/types/translation/translation.py +++ b/recommendation/api/types/translation/translation.py @@ -166,6 +166,7 @@ 'morelike': candidate_finders.MorelikeCandidateFinder(), 'wiki': candidate_finders.MorelikeCandidateFinder(), 'mostpopular': candidate_finders.PageviewCandidateFinder(), +'related_articles': candidate_finders.RelatedArticleFinder() } diff --git a/recommendation/data/labs_setup.sh b/recommendation/data/labs_setup.sh index 8290c97..228241d 100755 --- a/recommendation/data/labs_setup.sh +++ b/recommendation/data/labs_setup.sh @@ -8,6 +8,10 @@ apt-get install -y git nginx npm python3 python3-pip pip3 install --upgrade pip +# Need to add uwsgi to the wheels +apt-get install -y build-essential python3-dev +pip3 install uwsgi + rm -rf ${TMP_PATH} mkdir -p ${TMP_PATH} mkdir -p ${SRV_PATH}/resources @@ -31,7 +35,7 @@ cp ${TMP_PATH}/recommendation-api/recommendation/data/* ${ETC_PATH} cp ${ETC_PATH}/recommendation.nginx