Matěj Suchánek has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/342588 )

Change subject: [IMPR] Make preloading generators work with arbitrary entity 
types
......................................................................

[IMPR] Make preloading generators work with arbitrary entity types

Bug: T160397
Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d
---
M pywikibot/pagegenerators.py
M pywikibot/site.py
2 files changed, 47 insertions(+), 19 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/88/342588/2

diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 13618af..fab81fc 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -14,7 +14,7 @@
 &params;
 """
 #
-# (C) Pywikibot team, 2008-2017
+# (C) Pywikibot team, 2008-2018
 #
 # Distributed under the terms of the MIT license.
 #
@@ -488,7 +488,7 @@
                 dupfiltergen, self.subpage_max_depth)
 
         if self.claimfilter_list:
-            dupfiltergen = PreloadingItemGenerator(dupfiltergen)
+            dupfiltergen = PreloadingEntityGenerator(dupfiltergen)
             for claim in self.claimfilter_list:
                 dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen,
                                                             claim[0], claim[1],
@@ -1948,11 +1948,11 @@
 
 
 @deprecated_args(step='groupsize')
-def PreloadingItemGenerator(generator, groupsize=50):
+def PreloadingEntityGenerator(generator, groupsize=50):
     """
     Yield preloaded pages taken from another generator.
 
-    Function basically is copied from above, but for ItemPage's
+    Function basically is copied from above, but for Wikibase entites.
 
     @param generator: pages to iterate over
     @param groupsize: how many pages to preload at once
@@ -1960,26 +1960,16 @@
     """
     sites = {}
     for page in generator:
-        if not isinstance(page, pywikibot.page.WikibasePage):
-            datasite = page.site.data_repository()
-            if page.namespace() != datasite.item_namespace:
-                pywikibot.output(
-                    u'PreloadingItemGenerator skipping %s as it is not in %s'
-                    % (page, datasite.item_namespace))
-                continue
-
-            page = pywikibot.ItemPage(datasite, page.title())
-
         site = page.site
         sites.setdefault(site, []).append(page)
         if len(sites[site]) >= groupsize:
             # if this site is at the groupsize, process it
             group = sites.pop(site)
-            for i in site.preloaditempages(group, groupsize):
+            for i in site.preload_entities(group, groupsize):
                 yield i
     for site, pages in sites.items():
         # process any leftover sites that never reached the groupsize
-        for i in site.preloaditempages(pages, groupsize):
+        for i in site.preload_entities(pages, groupsize):
             yield i
 
 
@@ -2880,6 +2870,8 @@
             yield page
 
 
+PreloadingItemGenerator = redirect_func(PreloadingEntityGenerator,
+                                        old_name='PreloadingItemGenerator')
 # Deprecated old names available for compatibility with compat.
 ImageGenerator = redirect_func(PageClassGenerator, old_name='ImageGenerator')
 FileGenerator = redirect_func(PageClassGenerator, old_name='FileGenerator')
diff --git a/pywikibot/site.py b/pywikibot/site.py
index c715902..7451931 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -6,7 +6,7 @@
 groups of wikis on the same topic in different languages.
 """
 #
-# (C) Pywikibot team, 2008-2017
+# (C) Pywikibot team, 2008-2018
 #
 # Distributed under the terms of the MIT license.
 #
@@ -7056,6 +7056,11 @@
 
 class DataSite(APISite):
 
+    type_to_class = {
+        'item': pywikibot.page.ItemPage,
+        'property': pywikibot.page.PropertyPage,
+    }
+
     """Wikibase data capable site."""
 
     def __init__(self, *args, **kwargs):
@@ -7255,9 +7260,9 @@
             raise api.APIError(data['errors'])
         return data['entities']
 
-    def preloaditempages(self, pagelist, groupsize=50):
+    def preload_entities(self, pagelist, groupsize=50):
         """
-        Yield ItemPages with content prefilled.
+        Yield sublasses of WikibasePages with content prefilled.
 
         Note that pages will be iterated in a different order
         than in the underlying pagelist.
@@ -7275,6 +7280,37 @@
                     for key in ident:
                         req[key].append(ident[key])
                 else:
+                    if p.site == self and p.namespace() in (
+                            self.item_namespace, self.property_namespace):
+                        req['ids'].append(p.title(withNamespace=False))
+                    else:
+                        assert p.site.has_data_repository, \
+                            'Site must have a data repository'
+                        req['sites'].append(p.site.dbName())
+                        req['titles'].append(p._link._text)
+
+            req = self._simple_request(action='wbgetentities', **req)
+            data = req.submit()
+            for entity in data['entities']:
+                if 'missing' in data['entities'][entity]:
+                    continue
+                cls = self.type_to_class[data['entities'][entity]['type']]
+                page = cls(self, entity)
+                page._content = data['entities'][entity]
+                page.get()
+                yield page
+
+    @deprecated('DataSite.preload_entities')
+    def preloaditempages(self, pagelist, groupsize=50):
+        """DEPRECATED"""
+        for sublist in itergroup(pagelist, groupsize):
+            req = {'ids': [], 'titles': [], 'sites': []}
+            for p in sublist:
+                if isinstance(p, pywikibot.page.WikibasePage):
+                    ident = p._defined_by()
+                    for key in ident:
+                        req[key].append(ident[key])
+                else:
                     assert p.site.has_data_repository, \
                         'Site must have a data repository'
                     if (p.site == p.site.data_repository() and

-- 
To view, visit https://gerrit.wikimedia.org/r/342588
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Matěj Suchánek <matejsuchane...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to