ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/65689


Change subject: include dump of (one of the) wikidata tables
......................................................................

include dump of (one of the) wikidata tables

Change-Id: I9126664d4ebbfabaf60a849b8d9b7a2735d0cff7
---
M xmldumps-backup/README.config
M xmldumps-backup/WikiDump.py
M xmldumps-backup/wikidump.conf.sample
M xmldumps-backup/worker.py
4 files changed, 14 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/89/65689/1

diff --git a/xmldumps-backup/README.config b/xmldumps-backup/README.config
index e3613ce..41a8699 100644
--- a/xmldumps-backup/README.config
+++ b/xmldumps-backup/README.config
@@ -59,6 +59,9 @@
 flaggedrevslist -- File with list of databases which have flagged revisions 
                enabled.  (Really, we should be able to determine this 
                another way instead of keeping a separate list, right?)
+wikidatalist    -- File with list of databases which act as a wikibase
+               repo. For Wikimedia projects this currently consists
+               of the project 'wikidata'.
 biglist -- File with list of large wikis for which no history dumps are 
                generated because they are too huge. (This must be an old 
                deprecated option; these days we do not care how big they 
diff --git a/xmldumps-backup/WikiDump.py b/xmldumps-backup/WikiDump.py
index ad22b7d..0c48cf0 100644
--- a/xmldumps-backup/WikiDump.py
+++ b/xmldumps-backup/WikiDump.py
@@ -175,6 +175,7 @@
                        "dblist": "",
                        "privatelist": "",
                        "flaggedrevslist": "",
+                       "wikidatalist": "",
 #                      "dir": "",
                        "forcenormal": "0",
                        "halt": "0",
@@ -258,6 +259,7 @@
                self.skipDbList = MiscUtils.dbList(self.conf.get("wiki", 
"skipdblist"))
                self.privateList = MiscUtils.dbList(self.conf.get("wiki", 
"privatelist"))
                self.flaggedRevsList = MiscUtils.dbList(self.conf.get("wiki", 
"flaggedrevslist"))
+               self.wikidataList = MiscUtils.dbList(self.conf.get("wiki", 
"wikidatalist"))
                self.wikiDir = self.conf.get("wiki", "dir")
                self.forceNormal = self.conf.getint("wiki", "forcenormal")
                self.halt = self.conf.getint("wiki", "halt")
@@ -423,6 +425,9 @@
        
        def hasFlaggedRevs(self):
                return self.dbName in self.config.flaggedRevsList
+
+       def hasWikidata(self):
+               return self.dbName in self.config.wikidataList
        
        def isLocked(self):
                return os.path.exists(self.lockFile())
diff --git a/xmldumps-backup/wikidump.conf.sample 
b/xmldumps-backup/wikidump.conf.sample
index 5feed3b..57de2e0 100644
--- a/xmldumps-backup/wikidump.conf.sample
+++ b/xmldumps-backup/wikidump.conf.sample
@@ -5,6 +5,7 @@
 skipdblist=/home/ariel/src/mediawiki/testing/backup/skip.dblist
 privatelist=/home/ariel/src/mediawiki/testing/backup/private.dblist
 flaggedrevslist=/home/ariel/src/mediawiki/testing/backup/flagged.dblist
+wikidatalist=/home/ariel/src/mediawiki/testing/backup/wikidata.dblist
 biglist=/home/ariel/src/mediawiki/testing/backup/big.dblist
 dir=/home/ariel/src/mediawiki/1.16wmf4/phase3
 forcenormal=1
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index 08c3816..fc6b015 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -557,6 +557,7 @@
        def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, 
singleJob, chunkInfo, pageIDRange, runInfoFile, dumpDir):
                self.wiki = wiki
                self._hasFlaggedRevs = self.wiki.hasFlaggedRevs()
+               self._hasWikidata = self.wiki.hasWikidata()
                self._prefetch = prefetch
                self._spawn = spawn
                self.chunkInfo = chunkInfo
@@ -663,6 +664,10 @@
                        self.dumpItems.append(
                                PublicTable( "flaggedrevs", 
"flaggedrevstable","This contains a row for each flagged revision, containing 
who flagged it, when it was flagged, reviewer comments, the flag values, and 
the quality tier those flags fall under." ))
                                              
+               if self._hasWikidata:
+                       self.dumpItems.append(
+                               PublicTable( "wb_items_per_site", 
"wbitemspersitetable","For each Wikidata item, this contains rows with the 
corresnponding page name on a given wiki project." ))
+
                self.dumpItems.append(
                        BigXmlDump("meta-history",
                                   "metahistorybz2dump",

-- 
To view, visit https://gerrit.wikimedia.org/r/65689
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9126664d4ebbfabaf60a849b8d9b7a2735d0cff7
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to