Ori.livneh has uploaded a new change for review. https://gerrit.wikimedia.org/r/113351
Change subject: mwgrep: use a filtered boolean query ...................................................................... mwgrep: use a filtered boolean query Rather than use a query string query, use a filtered boolean query, per Nik's recommendation. Change-Id: I49406f7462cadce4b2c6ee9db04fa4c5b2c12c92 --- M files/misc/scripts/mwgrep 1 file changed, 14 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/51/113351/1 diff --git a/files/misc/scripts/mwgrep b/files/misc/scripts/mwgrep index 6c5df13..e256cf0 100755 --- a/files/misc/scripts/mwgrep +++ b/files/misc/scripts/mwgrep @@ -9,30 +9,34 @@ """ import argparse import json -import urllib import urllib2 BASE_URI = 'http://search.svc.eqiad.wmnet:9200/_all/page/_search' -BASE_QUERY = "text:{0} AND namespace:8 AND title:(js css)" ap = argparse.ArgumentParser(description='Grep for CSS/JS in MediaWiki: NS') -ap.add_argument('term', type=BASE_QUERY.format, help='text to search for') +ap.add_argument('term', help='text to search for') ap.add_argument('--max-results', type=int, default=100) args = ap.parse_args() +filters = [ + {'term': {'namespace': '8'}}, + {'regexp': {'title.keyword': '.*\\.(js|css)'}}, + {'script': {'script': "_source['text'].contains('%s')" % args.term}}, +] + query = { 'size': args.max_results, - 'analyzer': 'keyword', - 'q': args.term, + 'fields': ['namespace', 'title'], + 'query': {'filtered': {'filter': {'bool': {'must': filters}}}}, } -req = urllib2.urlopen(BASE_URI + '?' + urllib.urlencode(query)) +req = urllib2.urlopen(BASE_URI, json.dumps(query)) result = json.load(req)['hits'] for hit in result['hits']: - db_name = hit['_index'].split('_', 1)[0] - title = hit['_source']['title'] - print db_name, title + db = hit['_index'].split('_', 1)[0] + title = hit['fields']['title'] + print('{:<20}{}'.format(db, title)) -print '(total: %s, shown: %s)' % (result['total'], len(result['hits'])) +print('(total: %s, shown: %s)' % (result['total'], len(result['hits']))) -- To view, visit https://gerrit.wikimedia.org/r/113351 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I49406f7462cadce4b2c6ee9db04fa4c5b2c12c92 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ori.livneh <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits