Mwalker has submitted this change and it was merged.

Change subject: Banner screenshot job
......................................................................


Banner screenshot job

Renders a banner using PhantomJS, for every campaign language, and store the
images to disk.

TODO:
* Correct high-load behavior by waiting for page load or aborting.
* Add a switch to toggle between in-context rendering and clipping to 
div#centralNotice.
* Debug empty files.
* Null file if size is zero.

Change-Id: Ia7dcdaea11fddf5d2746e7e06e58b5bc9b857147
---
A banner_screenshot/.gitignore
A banner_screenshot/config.py.example
A banner_screenshot/mediawiki/__init__.py
A banner_screenshot/mediawiki/centralnotice.py
A banner_screenshot/mediawiki/time_util.py
A banner_screenshot/rasterize.js
A banner_screenshot/shoot_banners
7 files changed, 227 insertions(+), 0 deletions(-)

Approvals:
  Mwalker: Verified; Looks good to me, approved



diff --git a/banner_screenshot/.gitignore b/banner_screenshot/.gitignore
new file mode 100644
index 0000000..df81b2c
--- /dev/null
+++ b/banner_screenshot/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+config.py
diff --git a/banner_screenshot/config.py.example 
b/banner_screenshot/config.py.example
new file mode 100644
index 0000000..b7f64ab
--- /dev/null
+++ b/banner_screenshot/config.py.example
@@ -0,0 +1,7 @@
+article_url = 
"http://en.wikipedia.org/wiki/Special:Random?banner=%(banner)s&uselang=%(lang)s&country=%(country)s"
+centralnotice_mw_api = "http://meta.wikimedia.org/w/api.php";
+phantomjs = "/usr/local/phantomjs/phantomjs"
+banner_screenshots_dir = "/tmp/banner_screenshots"
+banner_screenshot_format = "png"
+crop_height = 500
+banner_name_regex = r'^B13_.*_(?P<lang>[a-z]{2})(?P<country>[A-Z0-9]{2})$'
diff --git a/banner_screenshot/mediawiki/__init__.py 
b/banner_screenshot/mediawiki/__init__.py
new file mode 100644
index 0000000..247139a
--- /dev/null
+++ b/banner_screenshot/mediawiki/__init__.py
@@ -0,0 +1,22 @@
+'''
+Dumb interface to the MediaWiki api.
+'''
+
+import config
+
+import json
+
+def mw_call( args ):
+    import simplemediawiki
+
+    wiki = simplemediawiki.MediaWiki(
+        config.centralnotice_mw_api,
+        user_agent='bot: fr-screenshots'
+    )
+    result = wiki.call( args )
+    if 'error' in result:
+        raise RuntimeError(json.dumps(result, indent=4).replace('\\n', '\n'))
+    val = result[ args['action'] ]
+    if 'list' in args:
+        val = val[ args['list'] ]
+    return val
diff --git a/banner_screenshot/mediawiki/centralnotice.py 
b/banner_screenshot/mediawiki/centralnotice.py
new file mode 100644
index 0000000..4133d29
--- /dev/null
+++ b/banner_screenshot/mediawiki/centralnotice.py
@@ -0,0 +1,56 @@
+'''
+Interface to the MediaWiki CentralNotice api
+'''
+
+from mediawiki import mw_call
+
+cached_campaigns = {}
+
+def get_banners( **kw ):
+    if 'campaign' in kw:
+        campaign = get_campaign( kw['campaign'] )
+        return campaign['banners'].keys()
+    return get_allocations( **kw )
+
+def get_campaign( campaign ):
+    #TODO: push caching down into mediawiki.mw_call, with optional invalidation
+    global cached_campaigns
+    if campaign in cached_campaigns:
+        return cached_campaigns[campaign]
+
+    #if '__iter__' in campaign: return get_campaigns
+    result = mw_call( {
+        'action': 'centralnoticequerycampaign',
+        'campaign': campaign,
+    } )
+
+    if campaign in result:
+        cached_campaigns[campaign] = result[campaign]
+        return cached_campaigns[campaign]
+
+def get_campaigns( campaigns ):
+    #FIXME cache
+    return mw_call( {
+        'action': 'centralnoticequerycampaign',
+        'campaign': '|'.join( campaigns ),
+    } )
+
+def get_allocations( project=None, language=None, country=None, 
anonymous=True, bucket='0' ): 
+    result = mw_call( {
+        'action': 'centralnoticeallocations',
+        'project': project,
+        'language': language,
+        'country': country,
+        'anonymous': anonymous,
+        'bucket': bucket,
+        'minimal': 'false'
+    } )
+    return result['banners']
+
+def get_campaign_logs( since=None ):
+    result = mw_call( {
+        'action': 'query',
+        'list': 'centralnoticelogs',
+        'start': since,
+    } )
+    return result['logs']
diff --git a/banner_screenshot/mediawiki/time_util.py 
b/banner_screenshot/mediawiki/time_util.py
new file mode 100644
index 0000000..b02c2a6
--- /dev/null
+++ b/banner_screenshot/mediawiki/time_util.py
@@ -0,0 +1,17 @@
+from datetime import datetime, timedelta
+
+def str_time_offset(str_time=None, **delta_args):
+    if not str_time:
+        str_time = str_now()
+    time_time = datetime.strptime( str_time, '%Y%m%d%H%M%S' )
+    str_time = ( time_time + timedelta( **delta_args )).strftime( 
'%Y%m%d%H%M%S' )
+    return(str_time)
+
+def str_now():
+    return( datetime.utcnow().strftime('%Y%m%d%H%M%S') )
+
+def datetimefunix( unix_timestamp ):
+    return datetime.fromtimestamp(unix_timestamp)
+
+def strfunix( unix_timestamp ):
+    return datetime.fromtimestamp(unix_timestamp).strftime('%Y-%m-%d %H:%M')
diff --git a/banner_screenshot/rasterize.js b/banner_screenshot/rasterize.js
new file mode 100644
index 0000000..2bdd15a
--- /dev/null
+++ b/banner_screenshot/rasterize.js
@@ -0,0 +1,41 @@
+var page = require('webpage').create(),
+    address, output, size;
+
+phantom.cookiesEnabled = true;
+
+if (phantom.args.length < 2 || phantom.args.length > 3) {
+    console.log('Usage: rasterize.js URL filename');
+    phantom.exit();
+} else {
+    page.onError = function (msg, trace) {
+       console.log(msg);
+       trace.forEach(function(item) {
+           console.log('  ', item.file, ':', item.line);
+       })
+    };
+    address = phantom.args[0];
+    output = phantom.args[1];
+    //page.customHeaders = { 'Referer': address };
+    page.viewportSize = { width: 1024, height: 728 };
+    page.open(address, function (status) {
+        if (status !== 'success') {
+            console.log('Unable to load the address!');
+        } else {
+            //console.log(JSON.stringify(phantom.cookies, null, 2));
+            window.setTimeout(function () {
+                page.clipRect = page.evaluate(function() {
+                    var cn = $('#centralNotice');
+                    return {
+                        top: cn.offset().top,
+                        left: cn.offset().left,
+                        width: cn.width(),
+                        height: cn.height()
+                    };
+                });
+                console.log(page.clipRect.width + " x " + 
page.clipRect.height);
+                page.render(output);
+                phantom.exit();
+            }, 1000);
+        }
+    });
+}
diff --git a/banner_screenshot/shoot_banners b/banner_screenshot/shoot_banners
new file mode 100755
index 0000000..17c78c4
--- /dev/null
+++ b/banner_screenshot/shoot_banners
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import os
+import os.path
+import subprocess
+import re
+import sys
+
+import config
+from mediawiki.centralnotice import get_campaign_logs
+from mediawiki.time_util import str_time_offset
+
+JS_RENDER_SCRIPT = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
"rasterize.js")
+
+def reduce_banners(campaign_logs):
+    '''Return a map from banner names to most recent campaign settings.'''
+    banners = dict()
+    for entry in campaign_logs:
+        settings = entry['end']
+        campaign_banners = settings['banners']
+
+        # we only need one country...
+        settings['country'] = "US"
+        if settings['geo'] == "1" and settings['countries']:
+            settings['country'] = settings['countries'][0]
+
+        if hasattr(campaign_banners, 'keys'):
+            banners.update(
+                dict.fromkeys(
+                    campaign_banners.keys(), settings
+                )
+            )
+
+    return banners
+
+def get_screenshot_path(name, lang):
+    return os.path.join(
+        config.banner_screenshots_dir,
+        "%(banner)s/%(banner)s_%(lang)s.%(ext)s" % {
+            "banner": name,
+            "lang": lang,
+            "ext": config.banner_screenshot_format,
+        }
+    )
+
+def banner_screenshot_exists(name, lang):
+    return os.path.exists(get_screenshot_path(name, lang))
+
+def render(name, lang, country):
+    url = config.article_url % { "banner": name, "lang": lang, "country": 
country }
+    path = get_screenshot_path(name, lang)
+    dir = os.path.dirname(path)
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+
+    print "Fetching " + url + " into " + path
+    subprocess.check_call([config.phantomjs, JS_RENDER_SCRIPT, url, path])
+
+def process_banners():
+    banners = reduce_banners(get_campaign_logs(since=str_time_offset(days=-2)))
+    for name, campaign_settings in banners.items():
+        country = "US"
+        m = re.match(config.banner_name_regex, name)
+        if m:
+            explicit_lang = m.group('lang')
+            if explicit_lang != "yy":
+                campaign_settings['languages'] = [ explicit_lang ]
+            explicit_country = m.group('country')
+            if explicit_country != "YY":
+                campaign_settings['country'] = explicit_country
+
+        for lang in campaign_settings['languages']:
+            if not banner_screenshot_exists(name, lang):
+                render(name, lang, campaign_settings['country'])
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        for name in sys.argv[1:]:
+            screenshot_banner(name)
+    else:
+        process_banners()

-- 
To view, visit https://gerrit.wikimedia.org/r/37361
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ia7dcdaea11fddf5d2746e7e06e58b5bc9b857147
Gerrit-PatchSet: 13
Gerrit-Project: wikimedia/fundraising/tools
Gerrit-Branch: master
Gerrit-Owner: Adamw <awi...@wikimedia.org>
Gerrit-Reviewer: Adamw <awi...@wikimedia.org>
Gerrit-Reviewer: Katie Horn <kh...@wikimedia.org>
Gerrit-Reviewer: Mwalker <mwal...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to