ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/120769

Change subject: rename wmfgetremoteimages.py to what it does 
(listmediaperproject.py)
......................................................................

rename wmfgetremoteimages.py to what it does (listmediaperproject.py)

that was a horrible name from a horrible era

Change-Id: Ie38d49eb990fd2d4305ddc84a3e25b97a26d0f35
---
A xmldumps-backup/wikiqueries/listmediaperproject.py
1 file changed, 192 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/69/120769/1

diff --git a/xmldumps-backup/wikiqueries/listmediaperproject.py 
b/xmldumps-backup/wikiqueries/listmediaperproject.py
new file mode 100644
index 0000000..fca9f51
--- /dev/null
+++ b/xmldumps-backup/wikiqueries/listmediaperproject.py
@@ -0,0 +1,192 @@
+import os
+import sys
+import getopt
+import time
+from subprocess import Popen, PIPE
+from wikiqueries import Config
+
+
+class MediaPerProject(object):
+    def __init__(self, conf, outputDir, remoteRepoName,
+                 verbose, wqConfigFile, wqPath, overwrite, wiki=None):
+        self.conf = conf
+        self.outputDir = outputDir
+        self.remoteRepoName = remoteRepoName
+        self.verbose = verbose
+        self.date = time.strftime("%Y%m%d", time.gmtime())
+        self.fileNameFormat = "{w}-{d}-wikiqueries.gz"
+        self.wqConfigFile = wqConfigFile
+        self.wqPath = wqPath
+        self.overwrite = overwrite
+        if not os.path.exists(outputDir):
+            os.makedirs(outputDir)
+        if wiki is not None:
+            self.wikisToDo = [wiki]
+        else:
+            self.wikisToDo = [w for w in self.conf.allWikisList
+                              if w not in self.conf.privateWikisList and
+                              w not in self.conf.closedWikisList]
+
+    def getFileNameFormat(self, phase):
+        return "{w}-{d}-" + phase + "-wikiqueries.gz"
+
+    def writeLocalMedia(self):
+        if self.verbose:
+            print "Starting round one wikiqueries for image table"
+        if len(self.wikisToDo) == 1:
+            wiki = self.wikisToDo[0]
+        else:
+            wiki = None
+        self.doWikiQueries('select img_name, img_timestamp from image',
+                           self.getFileNameFormat("local"), wiki)
+        if self.verbose:
+            print "Done round one!!"
+
+    def doWikiQueries(self, query, fileNameFormat, wiki=None):
+        if not os.path.exists(wqConfigFile):
+            print "config file  %s does not exist" % wqConfigFile
+            sys.exit(1)
+        command = ["python", self.wqPath, "--configfile", wqConfigFile,
+                   "--query", query, "--outdir", self.outputDir,
+                   "--filenameformat", fileNameFormat]
+        if self.verbose:
+            command.append("--verbose")
+        if not self.overwrite:
+            command.append("--nooverwrite")
+        if wiki:
+            command.append(wiki)
+        commandString = " ".join(["'" + c + "'" for c in command])
+
+        if self.verbose:
+            print "About to run wikiqueries:", commandString
+        try:
+            proc = Popen(command, stderr=PIPE)
+            output_unused, error = proc.communicate()
+            if proc.returncode:
+                print ("command '%s failed with return code %s and error %s"
+                       % (command, proc.returncode, error))
+                sys.exit(1)
+        except:
+            print "command %s failed" % command
+            raise
+
+    def writeRemoteMedia(self):
+        if self.verbose:
+            print "Starting round two wikiqueries for global image links table"
+
+        for w in self.wikisToDo:
+            if w == self.remoteRepoName:
+                if self.verbose:
+                    print "Skipping", w, "because it's the remote repo"
+            else:
+                if self.verbose:
+                    print "Doing db", w
+                self.doWikiQueries('select gil_to from globalimagelinks'
+                                   ' where gil_wiki= "%s"' % w,
+                                   self.getFileNameFormat("remote").format(
+                                       w=w, d='{d}'), self.remoteRepoName)
+        if self.verbose:
+            print "Done round two!!"
+
+
+def usage(message=None):
+    if message:
+        sys.stderr.write(message + "\n")
+
+    usage_message = """Usage: python listmediaperproject.py --outputdir dirname
+                  [--remoterepo reponame] [--localonly] [--remoteonly]
+                  [--verbose] [--wqconfig filename] [wqpath filename] [wiki]
+
+This script produces a list of media files in use on the local wiki stored on a
+remote repo (e.g. commons).
+
+--outputdir:      where to put the list of remotely hosted media per project
+--remotereponame: name of the remote repo that houses media for projects
+                  default: 'commonswiki'
+--nooverwrite:    if run for the same wiki(s) on the same date, don't overwrite
+                  existing files
+--verbose:        print lots of status messages
+--wqconfig:       relative or absolute path of wikiquery config file
+                  default: wikiqueries.conf
+--wqpath:         relative or absolute path of the wikiqieries python script
+                  default: wikiqueries.py
+--localonly:      only generate the lists of local media (first half of run)
+--remoteonly:     only generate the lists of remotely hosted media (second half
+                  of run)
+"""
+    sys.stderr.write(usage_message)
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+    outputDir = None
+    remoteRepoName = "commonswiki"
+    verbose = False
+    wiki = None
+    remoteOnly = False
+    localOnly = False
+    # by default we will overwrite existing files for
+    # the same date and wiki(s)
+    overwrite = True
+    wqPath = os.path.join(os.getcwd(), "wikiqueries.py")
+    wqConfigFile = os.path.join(os.getcwd(), "wikiqueries.conf")
+
+    try:
+        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", [
+            "outputdir=", "remotereponame=", "wqconfig=", "wqpath=",
+            "remoteonly", "localonly",
+            "nooverwrite", "verbose"])
+    except:
+        usage("Unknown option specified")
+
+    for (opt, val) in options:
+        if opt == "--outputdir":
+            outputDir = val
+        elif opt == "--remotereponame":
+            remoteRepoName = val
+        elif opt == "--remoteonly":
+            remoteOnly = True
+        elif opt == "--localonly":
+            localOnly = True
+        elif opt == "--nooverwrite":
+            overwrite = False
+        elif opt == "--verbose":
+            verbose = True
+        elif opt == "--wqconfig":
+            wqConfigFile = val
+            if not os.sep in val:
+                wqConfigFile = os.path.join(os.getcwd(), wqConfigFile)
+            # bummer but we can't really avoid ita
+        elif opt == "--wqpath":
+            wqPath = val
+            if not os.sep in val:
+                wqPath = os.path.join(os.getcwd(), wqPath)
+
+    if len(remainder) == 1:
+        if not remainder[0].isalpha():
+            usage("Unknown argument(s) specified")
+        else:
+            wiki = remainder[0]
+    elif len(remainder) > 1:
+        usage("Unknown argument(s) specified")
+
+    if not outputDir:
+        usage("One or more mandatory options missing")
+    if localOnly and remoteOnly:
+        usage("Only one of 'localonly' and 'remoteonly'"
+              " may be specified at once.")
+
+    config = Config(wqConfigFile)
+
+    mpp = MediaPerProject(config, outputDir, remoteRepoName,
+                          verbose, wqConfigFile, wqPath, overwrite, wiki)
+    if not remoteOnly:
+        if verbose:
+            print "generating lists of local media on each project"
+        mpp.writeLocalMedia()
+    if not localOnly:
+        if verbose:
+            print "generating remote hosted media lists for all projects"
+        mpp.writeRemoteMedia()
+    if verbose:
+        print "all projects completed."

-- 
To view, visit https://gerrit.wikimedia.org/r/120769
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie38d49eb990fd2d4305ddc84a3e25b97a26d0f35
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to