Sitic has submitted this change and it was merged. Change subject: Refactor backend ......................................................................
Refactor backend Split backend into different modules, created a python package. Created a basic MediaWiki API wrapper. Bug: T97900 Change-Id: I07768df24e9e1d13c95f85b9f96ea808cf01ece4 --- A backend/__init__.py A backend/__main__.py A backend/celery/__init__.py A backend/celery/api.py A backend/celery/tasks.py M backend/config.py.sample D backend/oauth_handler.py D backend/public M backend/requirements.txt D backend/server.py A backend/server/__init__.py R backend/server/flask_mwoauth/__init__.py A backend/server/oauth_handler.py A backend/server/public D backend/tasks.py M scripts/_sge_webserver.sh M scripts/_sge_webserver_internal.sh M scripts/build_webserver.sh M scripts/celery.grid.sh D scripts/paths.sh A scripts/restart_celery.sh M scripts/start_webserver.sh A scripts/stop_celery.sh 23 files changed, 469 insertions(+), 306 deletions(-) Approvals: Sitic: Verified; Looks good to me, approved diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/backend/__init__.py diff --git a/backend/__main__.py b/backend/__main__.py new file mode 100644 index 0000000..f55506b --- /dev/null +++ b/backend/__main__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import print_function +from __future__ import absolute_import +import sys + +from .server import run + +if len(sys.argv) < 2: + print("ERROR: no port number as first argument given. Quitting", + file=sys.stderr) + sys.exit(1) +else: + port = int(sys.argv[1]) + run(port) \ No newline at end of file diff --git a/backend/celery/__init__.py b/backend/celery/__init__.py new file mode 100644 index 0000000..68cef10 --- /dev/null +++ b/backend/celery/__init__.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import +from celery import Celery +from celery.utils.log import get_task_logger +from kombu import Exchange, Queue + +from .. import config + +logger = get_task_logger(__name__) + +BROKER_URL = 'redis://{server}:{port}/{db}'.format( + server=config.redis_server, + port=config.redis_port, + db=config.redis_db +) + +app = Celery(broker=BROKER_URL, + include=['backend.celery.tasks']) + +app.conf.update( + CELERY_TASK_SERIALIZER='json', + CELERY_ACCEPT_CONTENT=['json'], + CELERY_IGNORE_RESULT=True, + CELERY_DISABLE_RATE_LIMITS=True, + CELERY_DEFAULT_QUEUE=config.toolname, + CELERY_QUEUES=( + Queue(config.redis_prefix + 'q', Exchange(config.toolname), + routing_key=config.toolname), + ), + BROKER_TRANSPORT_OPTIONS={ + 'fanout_prefix': True, + 'fanout_patterns': True, + 'keyprefix_queue': config.redis_prefix + '.binding.%s', + 'unacked_key': config.redis_prefix + '_unacked', + 'unacked_index_key': config.redis_prefix + '_unacked_index', + 'unacked_mutex_key': config.redis_prefix + '_unacked_mutex' + }, +) diff --git a/backend/celery/api.py b/backend/celery/api.py new file mode 100644 index 0000000..7a4af15 --- /dev/null +++ b/backend/celery/api.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import +import requests +from requests_oauthlib import OAuth1 +from redis import StrictRedis +import json +from datetime import datetime, timedelta + +from .. import config +from . import logger + + +class MediaWiki(object): + def __init__(self, host="https://en.wikipedia.org", path="/w/api.php", + access_token=None, redis_channel=None): + self.api_url = host + path + self.wikis = {} + + user_agent = "crosswatch (https://tools.wmflabs.org/crosswatch;" +\ + "crosswa...@tools.wmflabs.org) python-requests/" +\ + requests.__version__ + self.headers = {'User-Agent': user_agent} + + if access_token: + # Construct an auth object with the consumer and access tokens + access_token = json.loads(access_token) + self.auth = OAuth1(config.consumer_token.key, + client_secret=config.consumer_token.secret, + resource_owner_key=access_token['key'], + resource_owner_secret=access_token['secret']) + else: + self.auth = None + + self.redis_channel = redis_channel + self.redis = StrictRedis( + host=config.redis_server, + port=config.redis_port, + db=config.redis_db + ) + + def publish(self, message): + if not self.redis_channel: + raise Exception("No redis channel set to publish to") + self.redis.publish(self.redis_channel, json.dumps(message)) + + def timestamp(self, daysdelta=0): + """ + :param daysdelta: calculate timestamp in ´daysdelta´ days + :return: MediaWIki timestamp format + """ + now = datetime.utcnow() + delta = timedelta(days=daysdelta) + time = now + delta + return time.strftime("%Y%m%d%H%M%S") + + def query(self, params): + params['format'] = "json" + response = requests.get(self.api_url, params=params, auth=self.auth, + headers=self.headers).json() + + if 'error' in response: + logger.error(response['error']['code']) + if response['error']['code'] == "mwoauth-invalid-authorization": + raise Exception("OAuth authentication failed") + + raise Exception(str(response['error']['code'])) + if 'warnings' in response: + logger.warn("API-request warning: " + str(response['warnings'])) + return response + + def query_gen(self, params): + params['format'] = "json" + params['action'] = "query" + last_continue = {'continue': ""} + while True: + p = params.copy() + p.update(last_continue) + response = requests.get(self.api_url, params=p, auth=self.auth, + headers=self.headers).json() + + if 'error' in response: + raise Exception(str(response['error'])) + if 'warnings' in response: + warning = response['warnings']['query']['*'] + logger.warn("API-request warning: " + warning) + if 'query' in response: + yield response['query'] + if 'continue' not in response: + break + last_continue = response['continue'] + + def get_username(self): + try: + params = { + 'action': "query", + 'meta': "userinfo", + } + response = self.query(params) + username = response['query']['userinfo']['name'] + return username + except KeyError as e: + if response['error']['code'] == "mwoauth-invalid-authorization": + logger.error('mwoauth-invalid-authorization') + raise Exception("OAuth authentication failed") + raise e + + def get_wikis(self, use_cache=True): + key = config.redis_prefix + 'cached_wikis' + wikis = self.redis.get(key) + if use_cache and wikis: + wikis = json.loads(wikis) + else: + # Cache miss, do api request and fill cache + wikis = self._get_wikis() + self.redis.setex(key, 86400, json.dumps(wikis)) # 1 day exp. + + return wikis + + def _get_wikis(self): + params = {'action': "sitematrix"} + data = self.query(params) + for key, val in data['sitematrix'].items(): + if key == 'count': + continue + + if 'code' in val: + for site in val['site']: + self._parse_sitematrix(site, val['code'], val['name']) + else: + for site in val: + self._parse_sitematrix(site, '', '') + + return self.wikis + + def _parse_sitematrix(self, site, lang, langname): + wiki = { + 'lang': lang, + 'langname': langname, + 'url': site['url'], + 'dbname': site['dbname'], + 'group': site['code'] + } + if wiki['group'] == 'wiki': + wiki['group'] = 'wikipedia' + + inactive_wikis = ['closed', 'private', 'fishbowl'] + if any([key in site for key in inactive_wikis]): + wiki['closed'] = True + + self.wikis[site['dbname']] = wiki diff --git a/backend/celery/tasks.py b/backend/celery/tasks.py new file mode 100644 index 0000000..21079fc --- /dev/null +++ b/backend/celery/tasks.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +"""Celery tasks""" +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import + +from . import app, logger +from .api import MediaWiki + + +@app.task +def initial_task(obj): + mw = MediaWiki(access_token=obj['access_token']) + wikis = mw.get_wikis() + projects = ['enwiki', 'dewiki', 'itwiki', 'frwiki', 'itwiki', + 'commonswiki', 'wikidatawiki', 'enwiktionary', 'dewiktionary', + 'metawiki', 'mediawikiwiki'] + for project in projects: + obj['wiki'] = wikis[project] + watchlistgetter.delay(obj) + + +def fix_urls(html, url): + a = u'<a target="_blank" href="' + url + u'/' + html = html.replace(u'<a href="/', a) + return html + + +@app.task +def watchlistgetter(obj): + """ + Get the watchlist for a wiki + :param obj: dict with wiki and connection information + """ + logger.info("Reading watchlist items for wiki " + + obj['wiki']['dbname']) + mw = MediaWiki(host=obj['wiki']['url'], + access_token=obj['access_token'], + redis_channel=obj['redis_channel']) + if 'watchlistperiod' in obj: + days = obj['watchlistperiod'] + else: + days = 1 + params = { + 'list': "watchlist", + 'wlallrev': "", + 'wltype': "edit|new", + 'wllimit': 500, + 'wlend': mw.timestamp(daysdelta=-days), + 'wlprop': "ids|flags|title|parsedcomment|user|timestamp|sizes|" + + "notificationtimestamp|loginfo" + } + + for response in mw.query_gen(params): + items = [] + for item in response['watchlist']: + item['project'] = obj['wiki']['dbname'] + item['projecturl'] = obj['wiki']['url'] + + if 'commenthidden' in item: + item['parsedcomment'] = "<s>edit summary removed</s>" + item['parsedcomment'] = fix_urls(item['parsedcomment'], + obj['wiki']['url']) + item['projectgroup'] = obj['wiki']['group'] + item['projectlang'] = obj['wiki']['lang'] + item['projectlangname'] = obj['wiki']['langname'] + if 'bot' in item: + item['bot'] = "b" + if 'minor' in item: + item['minor'] = "m" + if 'new' in item: + item['new'] = "n" + items.append(item) + message = { + 'msgtype': 'watchlist', + 'entires': items + } + if items: + mw.publish(message) + + +if __name__ == '__main__': + app.start() diff --git a/backend/config.py.sample b/backend/config.py.sample index c1f2e46..6056909 100644 --- a/backend/config.py.sample +++ b/backend/config.py.sample @@ -1,37 +1,21 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import import socket from mwoauth import ConsumerToken -# https://pypi.python.org/pypi/celerybeat-redis/0.0.7 toolname = 'crosswatch' redis_server = 'tools-redis' + if not socket.gethostname().startswith('tools'): redis_server = 'localhost' redis_port = 6379 redis_db = 3 -redis_prefix = 'CHANGE THIS STRING' # random string +redis_prefix = 'REPLACE THIS STRING' # random string +sql_user = 'REPLACE THIS STRING' +sql_passwd = 'REPLACE THIS STRING' -oauth_consumer_key = 'CHANGE THIS STRING' -oauth_consumer_secret = 'CHANGE THIS STRING' +oauth_consumer_key = 'REPLACE THIS STRING' +oauth_consumer_secret = 'REPLACE THIS STRING' consumer_token = ConsumerToken(oauth_consumer_key, oauth_consumer_secret) - - -class celeryconfig(): - BROKER_URL = 'redis://{server}:{port}/{db}'.format( - server=redis_server, port=redis_port, db=redis_db) - CELERY_TASK_SERIALIZER = 'json' - CELERY_ACCEPT_CONTENT = ['json'] - CELERY_IGNORE_RESULT=True - CELERY_DISABLE_RATE_LIMITS=True - CELERY_DEFAULT_QUEUE=config.toolname - CELERY_QUEUES=( - Queue(config.redis_prefix + 'q', Exchange(config.toolname), - routing_key=config.toolname), - ) - BROKER_TRANSPORT_OPTIONS = { - 'fanout_prefix': True, - 'fanout_patterns': True, - 'keyprefix_queue': redis_prefix + '.binding.%s', - 'unacked_key': redis_prefix + '_unacked', - 'unacked_index_key': redis_prefix + '_unacked_index_key', - 'unacked_mutex_key': redis_prefix + '_unacked_mutex_key' - } diff --git a/backend/oauth_handler.py b/backend/oauth_handler.py deleted file mode 100644 index c5eefba..0000000 --- a/backend/oauth_handler.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from flask import Flask -from flask_mwoauth import MWOAuth -import config - -app = Flask(__name__) - -# Generate a random secret application key -# -# NOTE: this key changes every invocation. In an actual application, the key -# should not change! Otherwise you might get a different secret key for -# different requests, which means you can't read data stored in cookies, -# which in turn breaks OAuth. -# -# So, for an actual application, use app.secret_key = "some long secret key" -# (which you could generate using os.urandom(24)) -# -app.secret_key = os.urandom(24) - -consumer_key = config.oauth_consumer_key -consumer_secret = config.oauth_consumer_secret -toolname = config.toolname - -mwoauth = MWOAuth(consumer_key=consumer_key, - consumer_secret=consumer_secret) - -app.register_blueprint(mwoauth.bp, url_prefix='/' + toolname) - -if __name__ == "__main__": - app.run() diff --git a/backend/public b/backend/public deleted file mode 120000 index f640917..0000000 --- a/backend/public +++ /dev/null @@ -1 +0,0 @@ -../frontend/dist \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index e8adf26..8ed6c96 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,31 +1,10 @@ -Flask==0.10.1 -Flask-OAuth==0.12 -Jinja2==2.7.3 -MarkupSafe==0.23 -PyJWT==0.2.1 -Werkzeug==0.10.1 -amqp==1.4.6 -anyjson==0.3.3 -argparse==1.2.1 -backports.ssl-match-hostname==3.4.0.2 -billiard==3.3.0.19 -celery==3.1.17 -certifi==14.05.14 -distribute==0.6.24 -httplib2==0.9 -itsdangerous==0.24 -kombu==3.0.24 -mwoauth==0.2.3 -oauth2==1.5.211 -oauthlib==0.7.2 -pycrypto==2.6.1 -pytz==2014.10 -redis==2.10.3 -requests==2.5.1 -requests-oauthlib==0.4.2 -simple-crypt==4.0.0 -six==1.9.0 -sockjs-tornado==1.0.1 -tornado==4.1 -tornado-redis==2.4.18 -wsgiref==0.1.2 +tornado +tornado-redis +sockjs-tornado +celery[redis] +requests[security] +requests-oauthlib +mwoauth +MySQL-python +flask-oauth +eventlet \ No newline at end of file diff --git a/backend/server.py b/backend/server.py deleted file mode 100644 index c5a45e6..0000000 --- a/backend/server.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# MIT License -# Copyright (C) 2014 sitic -""" - Tornado web server for find-and-replace tool - running on tools.wmflabs.org/find-and-replace -""" -from __future__ import print_function -import logging - -import tornado.ioloop -from tornado.web import StaticFileHandler, FallbackHandler, RedirectHandler -from tornado.web import Application -from tornado.wsgi import WSGIContainer -import sockjs.tornado -import tornadoredis -import tornadoredis.pubsub - -import json -import redis -from uuid import uuid4 - -import config # general config file -toolname = config.toolname - -# import celery - -from oauth_handler import app as oauth_wsgi - -from tasks import get_watchlist - -# Use the synchronous redis client to publish messages to a channel -redis_client = redis.StrictRedis(host=config.redis_server, - port=config.redis_port, - db=config.redis_db) -# Create the tornadoredis.Client instance -# and use it for redis channel subscriptions -subscriber = tornadoredis.pubsub.SockJSSubscriber(tornadoredis.Client( - host=config.redis_server, - port=config.redis_port, - selected_db=config.redis_db)) - - -class SockConnection(sockjs.tornado.SockJSConnection): - def on_open(self, info): - self.channels = [] - - def on_close(self): - for channel in self.channels: - subscriber.unsubscribe(channel, self) - - def on_message(self, message): - data = json.loads(message) - print(data) - if data[u'action'] == u'watchlist': - redis_channel = str(uuid4()) - self.channels.append(redis_channel) - logging.info('New redis channel ' + redis_channel) - subscriber.subscribe(redis_channel, self) - data['redis_channel'] = redis_channel - get_watchlist.delay(data) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', # NOQA - datefmt='%m-%d %H:%M') - - import sys - if len(sys.argv) < 2: - print("ERROR: no first argument given. Quitting", file=sys.stderr) - sys.exit(1) - PORT = int(sys.argv[1]) - - # Config sockjs - SockRouter = sockjs.tornado.SockJSRouter(SockConnection, - '/' + toolname + '/sockjs') - - OauthApp = WSGIContainer(oauth_wsgi) - # Config oauth consumer - OauthRouter = [(r"/" + toolname + r"/login", - FallbackHandler, dict(fallback=OauthApp)), - (r"/" + toolname + r"/logout", - FallbackHandler, dict(fallback=OauthApp)), - (r"/" + toolname + r"/oauth-callback", - FallbackHandler, dict(fallback=OauthApp))] - - # Config routes - import os - pwd_path = os.path.dirname(__file__) - - static_path = os.path.join(pwd_path, 'public') - index_path = os.path.join(pwd_path, 'public/' + toolname + '/index.html') - static_handlers = [ - (r"/()", StaticFileHandler, {"path": index_path}), - (r"/" + toolname + r"/()", StaticFileHandler, {"path": index_path}), - (r"/" + toolname, RedirectHandler, {"url": "/" + toolname + "/"}), - (r"/(.*)", StaticFileHandler, {"path": static_path}) - ] - - routes = OauthRouter + SockRouter.urls + static_handlers - app = Application(routes) - - # Start the server - app.listen(PORT) - tornado.ioloop.IOLoop.instance().start() diff --git a/backend/server/__init__.py b/backend/server/__init__.py new file mode 100644 index 0000000..6cd522f --- /dev/null +++ b/backend/server/__init__.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" +Tornado web server for crosswatch tool +running on https://tools.wmflabs.org/crosswatch + +""" +# +# ISC License +# Copyright (C) 2014 Jan Lebert +from __future__ import absolute_import +import os + +import logging +import json +from uuid import uuid4 + +import tornado.ioloop +from tornado.web import StaticFileHandler, FallbackHandler, RedirectHandler,\ + Application +from tornado.wsgi import WSGIContainer +from sockjs.tornado import SockJSConnection, SockJSRouter +from tornadoredis import Client as RedisClient +from tornadoredis.pubsub import SockJSSubscriber + +from .. import config +toolname = config.toolname +from .oauth_handler import app as oauth_wsgi +from ..celery import app as celery_app + +# Create the tornadoredis.Client instance +# and use it for redis channel subscriptions +subscriber = SockJSSubscriber(RedisClient( + host=config.redis_server, + port=config.redis_port, + selected_db=config.redis_db + )) + + +class SockConnection(SockJSConnection): + def on_open(self, info): + self.channels = [] + + def on_close(self): + for channel in self.channels: + subscriber.unsubscribe(channel, self) + + def on_message(self, message): + data = json.loads(message) + if data[u'action'] == u'watchlist': + redis_channel = str(uuid4()) + self.channels.append(redis_channel) + + logging.info('New redis channel ' + redis_channel) + subscriber.subscribe(redis_channel, self) + + data['redis_channel'] = redis_channel + celery_app.send_task('backend.celery.tasks.initial_task', (data, )) + + +def run(port): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(levelname)-8s' + + ' %(message)s', + datefmt='%m-%d %H:%M') + logging.info("Starting tornado server on port %d." % port) + + # Config sockjs + sockjsrouter = SockJSRouter(SockConnection, + '/' + toolname + '/sockjs') + + oauthapp = WSGIContainer(oauth_wsgi) + # Config oauth consumer + oauthrouter = [(r"/" + toolname + r"/login", + FallbackHandler, dict(fallback=oauthapp)), + (r"/" + toolname + r"/logout", + FallbackHandler, dict(fallback=oauthapp)), + (r"/" + toolname + r"/oauth-callback", + FallbackHandler, dict(fallback=oauthapp))] + + # Config routes + pwd_path = os.path.dirname(__file__) + + static_path = os.path.join(pwd_path, 'public') + index_path = os.path.join(pwd_path, 'public/' + toolname + '/index.html') + static_handlers = [ + (r"/()", StaticFileHandler, {"path": index_path}), + (r"/" + toolname + r"/()", StaticFileHandler, {"path": index_path}), + (r"/" + toolname, RedirectHandler, {"url": "/" + toolname + "/"}), + (r"/(.*)", StaticFileHandler, {"path": static_path}) + ] + + routes = oauthrouter + sockjsrouter.urls + static_handlers + app = Application(routes) + + # Start the server + app.listen(port) + tornado.ioloop.IOLoop.instance().start() diff --git a/backend/flask_mwoauth/__init__.py b/backend/server/flask_mwoauth/__init__.py similarity index 94% rename from backend/flask_mwoauth/__init__.py rename to backend/server/flask_mwoauth/__init__.py index b64a458..0645f7e 100644 --- a/backend/flask_mwoauth/__init__.py +++ b/backend/server/flask_mwoauth/__init__.py @@ -4,7 +4,7 @@ # Requires flask-oauth # # (C) 2013 Merlijn van Deen <valhall...@arctus.nl> -# (C) 2015 Sitic +# (C) 2015 Jan Lebert # Licensed under the MIT License // http://opensource.org/licenses/MIT # @@ -12,7 +12,7 @@ import sys import urllib -from flask import request, session, Blueprint, make_response +from flask import request, session, Blueprint, make_response, redirect from flask_oauth import OAuth, OAuthRemoteApp, OAuthException, parse_response import json @@ -42,7 +42,7 @@ clean_url='https://www.mediawiki.org/wiki', consumer_key=None, consumer_secret=None, - toolname='crosswatch'): + toolname='mytool'): if not consumer_key or not consumer_secret: raise Exception('MWOAuthBlueprintFactory needs consumer key and\ secret') @@ -96,8 +96,7 @@ 'secret': resp['oauth_token_secret']} mwo_token = json.dumps(mwo_token) - resp = make_response('This tab should close automatically.' + - '<script>self.close()</script>') + resp = make_response(redirect('/' + toolname + '/')) resp.set_cookie(self.toolname + 'Auth', mwo_token, max_age=30*24*60*60, path='/' + self.toolname + '/') @@ -110,7 +109,7 @@ @self.bp.route('/logout') def logout(): - resp = make_response('You are now logged out. Goodbye :-)') + resp = make_response(redirect('/' + toolname + '/')) session['mwo_token'] = None resp.set_cookie(self.toolname + 'Auth', '', path='/' + self.toolname + '/', expires=0) diff --git a/backend/server/oauth_handler.py b/backend/server/oauth_handler.py new file mode 100644 index 0000000..774b667 --- /dev/null +++ b/backend/server/oauth_handler.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" +WSGI container for OAuth login/logout handling. +""" +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import +import os + +from flask import Flask + +from .flask_mwoauth import MWOAuth +from ..config import oauth_consumer_key, oauth_consumer_secret, toolname + +app = Flask(__name__) +app.secret_key = os.urandom(24) + +mwoauth = MWOAuth(base_url='https://en.wikipedia.org/w', + clean_url='https://en.wikipedia.org/wiki', + consumer_key=oauth_consumer_key, + consumer_secret=oauth_consumer_secret, + toolname=toolname) + +app.register_blueprint(mwoauth.bp, url_prefix='/' + toolname) + +if __name__ == "__main__": + app.run() diff --git a/backend/server/public b/backend/server/public new file mode 120000 index 0000000..e4608c0 --- /dev/null +++ b/backend/server/public @@ -0,0 +1 @@ +../../frontend/dist \ No newline at end of file diff --git a/backend/tasks.py b/backend/tasks.py deleted file mode 100644 index fc9978b..0000000 --- a/backend/tasks.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# MIT License -# Copyright (C) 2014 sitic -""" - Tornado web server for find-and-replace tool - running on tools.wmflabs.org/find-and-replace -""" -from __future__ import print_function - -from celery import Celery -from simplecrypt import decrypt -import requests -from requests_oauthlib import OAuth1 -import json -import redis - -import config - -from celery.utils.log import get_task_logger -logger = get_task_logger(__name__) - -app = Celery() -app.config_from_object('config:celeryconfig') - -@app.task -def get_watchlist(obj): - projects = [ - ['enwiki', 'https://en.wikipedia.org/wiki', 'https://en.wikipedia.org/w/api.php'], - ['dewiki', 'https://de.wikipedia.org/wiki', 'https://de.wikipedia.org/w/api.php'], - ['frwiki', 'https://fr.wikipedia.org/wiki', 'https://fr.wikipedia.org/w/api.php'], - ['itwiki', 'https://it.wikipedia.org/wiki', 'https://it.wikipedia.org/w/api.php'], - ['commons', 'https://commons.wikimedia.org/wiki', 'https://commons.wikimedia.org/w/api.php'], - ['wikidata', 'https://www.wikidata.org/wiki/wiki', 'https://www.wikidata.org/w/api.php'], - ['metawiki', 'https://meta.wikimedia.org/wiki', 'https://meta.wikimedia.org/w/api.php'], - ['enwiktionary', 'https://en.wiktionary.org/wiki', 'https://en.wiktionary.org/w/api.php'], - ['dewiktionary', 'https://de.wiktionary.org/wiki', 'https://de.wiktionary.org/w/api.php'], - ['mediawiki', 'https://www.mediawiki.org/wiki', 'https://www.mediawiki.org/w/api.php'] - ] - for project in projects: - watchlistgetter.delay(obj, *project) - -@app.task -def watchlistgetter(obj, project, project_url, api_url): - r = redis.StrictRedis( - host=config.redis_server, - port=config.redis_port, - db=config.redis_db) - - redis_channel = obj['redis_channel'] - - access_token = obj['access_token'] - access_token = json.loads(access_token) - - # Construct an auth object with the consumer and access tokens - auth1 = OAuth1(config.consumer_token.key, - client_secret=config.consumer_token.secret, - resource_owner_key=access_token['key'], - resource_owner_secret=access_token['secret']) - - # Now, accessing the API on behalf of a user - logger.info("Reading top 70 watchlist items for wiki " + project) - response = requests.get( - api_url, - params={ - 'action': "query", - 'list': "watchlist", - 'wlallrev': "", - 'wltype': "edit", - 'wllimit': 70, - 'wlprop': "ids|flags|title|parsedcomment|user|timestamp|sizes|loginfo", - 'format': "json" - }, - auth=auth1 - ) - href = u'<a href="' + project_url + u'/' - for item in response.json()['query']['watchlist']: - item['msgtype'] = 'watchlist' - item['project'] = project - item['projecturl'] = project_url - item['parsedcomment'] = item['parsedcomment'].replace(u'<a href="/wiki/', href) - r.publish(redis_channel, json.dumps(item)) - - -if __name__ == '__main__': - app.start() diff --git a/scripts/_sge_webserver.sh b/scripts/_sge_webserver.sh index c9e66f5..411df8d 100755 --- a/scripts/_sge_webserver.sh +++ b/scripts/_sge_webserver.sh @@ -1,2 +1,3 @@ #!/bin/bash -exec portgrabber watchr $HOME/crosswatch/scripts/_sge_webserver_internal.sh +cd `dirname $0` +exec portgrabber crosswatch ./_sge_webserver_internal.sh diff --git a/scripts/_sge_webserver_internal.sh b/scripts/_sge_webserver_internal.sh index e267a93..0d22884 100755 --- a/scripts/_sge_webserver_internal.sh +++ b/scripts/_sge_webserver_internal.sh @@ -1,7 +1,6 @@ #!/bin/bash -source $HOME/.virtualenvs/backend-trusty/bin/activate -source $HOME/crosswatch/scripts/paths.sh +source $HOME/.virtualenvs/backend/bin/activate +cd `dirname $0` +cd .. export PORT=$1 -echo "running on port $PORT" -cd $BACKEND -python server.py $PORT +python -m backend $PORT diff --git a/scripts/build_webserver.sh b/scripts/build_webserver.sh index 2936905..5c57187 100755 --- a/scripts/build_webserver.sh +++ b/scripts/build_webserver.sh @@ -1,5 +1,4 @@ #!/bin/bash -source $HOME/.virtualenvs/build/bin/activate -source ./paths.sh -cd $FRONTEND +cd `dirname $0` +cd ../frontend gulp build diff --git a/scripts/celery.grid.sh b/scripts/celery.grid.sh index 3cfc495..c5a7224 100755 --- a/scripts/celery.grid.sh +++ b/scripts/celery.grid.sh @@ -1,7 +1,5 @@ #!/bin/bash -#jsub -once -continuous -l h_vmem=512M -N rahu -M jan.leb...@online.de -m abe /data/project/asurabot/scripts/rahu.grid.sh source $HOME/.virtualenvs/backend/bin/activate -source $HOME/crosswatch/scripts/paths.sh -cd $BACKEND -celery -A tasks worker -l info -# --autoreload +cd `dirname $0` +cd .. +celery -A backend worker -l info -n crosswatch$1.%n diff --git a/scripts/paths.sh b/scripts/paths.sh deleted file mode 100755 index 9809280..0000000 --- a/scripts/paths.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -export MY_ROOT="$HOME/crosswatch" -export FRONTEND="$MY_ROOT/frontend" -export BACKEND="$MY_ROOT/backend" diff --git a/scripts/restart_celery.sh b/scripts/restart_celery.sh new file mode 100755 index 0000000..0a1b6ff --- /dev/null +++ b/scripts/restart_celery.sh @@ -0,0 +1,13 @@ +#!/bin/bash +cd `dirname $0` +num=$(qstat -q continuous | grep celery | wc -l) +./stop_celery.sh +while [ "$(qstat -q continuous | grep celery | wc -l)" -gt 0 ]; +do + sleep 1 +done +for i in $(seq 1 ${num}); +do + jstart -N celery${i} -once -continuous -l h_vmem=3G -l release=trusty ./celery.grid.sh $i + sleep 10 +done diff --git a/scripts/start_webserver.sh b/scripts/start_webserver.sh index 236dbb1..a045a1a 100755 --- a/scripts/start_webserver.sh +++ b/scripts/start_webserver.sh @@ -1,4 +1,4 @@ #!/bin/bash cd `dirname $0` echo "starting webserver" -jstart -N tornado -q webgrid-generic -l release=trusty -mem 4096M ./_sge_webserver.sh +jstart -N tornado-crosswatch -once -q webgrid-generic -l release=trusty -mem 3G ./_sge_webserver.sh diff --git a/scripts/stop_celery.sh b/scripts/stop_celery.sh new file mode 100755 index 0000000..8562599 --- /dev/null +++ b/scripts/stop_celery.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "stopping celery jobs" +qdel -u $USER "celery*" -- To view, visit https://gerrit.wikimedia.org/r/210815 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I07768df24e9e1d13c95f85b9f96ea808cf01ece4 Gerrit-PatchSet: 1 Gerrit-Project: labs/tools/crosswatch Gerrit-Branch: master Gerrit-Owner: Sitic <jan.leb...@online.de> Gerrit-Reviewer: Sitic <jan.leb...@online.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits