ArielGlenn has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/355075 )
Change subject: script to generate pagesperchunkhistory config setting for a
given wiki
......................................................................
script to generate pagesperchunkhistory config setting for a given wiki
uses existing config file for db information, repeated db queries
to generate list of page intervals
Change-Id: I7fed8a0ca385d13c48b2be36f754fe7998247334
---
A xmldumps-backup/rebalance_pageranges.py
1 file changed, 85 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps
refs/changes/75/355075/1
diff --git a/xmldumps-backup/rebalance_pageranges.py
b/xmldumps-backup/rebalance_pageranges.py
new file mode 100644
index 0000000..35a1acf
--- /dev/null
+++ b/xmldumps-backup/rebalance_pageranges.py
@@ -0,0 +1,85 @@
+"""
+generate new set of values for PagesPerChunkHistory based
+on current state of a given wiki
+"""
+import sys
+import getopt
+from dumps.WikiDump import Config
+from dumps.pagerange import PageRange
+from dumps.pagerange import QueryRunner
+
+
+def usage(message=None):
+ '''
+ display a helpful usage message with
+ an optional introductory message first
+ '''
+ if message is not None:
+ sys.stderr.write(message)
+ sys.stderr.write("\n")
+ usage_message = """
+Usage: rebalance_pagerange.py --wiki <wikiname>
+ --start <int> --end <int>
+ [--configfile <path>] [--verbose] [--help]
+
+--wiki (-w): name of db of wiki for which to run
+--jobs (-j): generate page ranges for this number of jobs
+--configfile (-c): path to config file
+--verbose (-v): display messages about what the script is doing
+--help (-h): display this help message
+"""
+ sys.stderr.write(usage_message)
+ sys.exit(1)
+
+
+def do_main():
+ """
+ main entry point
+ """
+ jobs = None
+ configpath = "wikidump.conf"
+ wikiname = None
+ verbose = False
+ try:
+ (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "c:j:w:vh",
+ ["jobs=", "configfile=",
"wiki=",
+ "verbose", "help"])
+ except getopt.GetoptError as err:
+ usage("Unknown option specified: " + str(err))
+
+ for (opt, val) in options:
+ if opt in ["-c", "--configfile"]:
+ configpath = val
+ elif opt in ["-w", "--wiki"]:
+ wikiname = val
+ elif opt in ["-j", "--jobs"]:
+ if not val.isdigit():
+ usage("jobs must be a number")
+ jobs = int(val)
+ elif opt in ["-v", "--verbose"]:
+ verbose = True
+ elif opt in ["-h", "--help"]:
+ usage("Help for this script")
+
+ if not jobs or not wikiname:
+ usage("one of the mandatory arguments 'jobs' or 'wiki' was not
specified")
+
+ if len(remainder) > 0:
+ usage("Unknown option(s) specified: %s" % remainder[0])
+
+ wiki_config = Config(configpath)
+ # pick up the per-wiki settings here
+ wiki_config.parse_conffile_per_project(wikiname)
+
+ prange = PageRange(QueryRunner(wikiname, wiki_config, verbose), verbose)
+ ranges = prange.get_pageranges_for_jobs(jobs)
+ # convert ranges into the output we need for the pagesperchunkhistory
config
+ pages_per_job = [page_end - page_start for (page_start, page_end) in
ranges]
+ print "for {jobs} jobs, have ranges:".format(jobs=jobs)
+ print ranges
+ print "for {jobs} jobs, have config setting:".format(jobs=jobs)
+ print pages_per_job
+
+
+if __name__ == "__main__":
+ do_main()
--
To view, visit https://gerrit.wikimedia.org/r/355075
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7fed8a0ca385d13c48b2be36f754fe7998247334
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits