Ori.livneh has submitted this change and it was merged.

Change subject: mwgrep: use a filtered boolean query
......................................................................


mwgrep: use a filtered boolean query

Rather than use a query string query, use a filtered boolean query, per Nik's
recommendation.

Change-Id: I49406f7462cadce4b2c6ee9db04fa4c5b2c12c92
---
M files/misc/scripts/mwgrep
1 file changed, 14 insertions(+), 10 deletions(-)

Approvals:
  Ori.livneh: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/files/misc/scripts/mwgrep b/files/misc/scripts/mwgrep
index 6c5df13..e256cf0 100755
--- a/files/misc/scripts/mwgrep
+++ b/files/misc/scripts/mwgrep
@@ -9,30 +9,34 @@
 """
 import argparse
 import json
-import urllib
 import urllib2
 
 
 BASE_URI = 'http://search.svc.eqiad.wmnet:9200/_all/page/_search'
-BASE_QUERY = "text:{0} AND namespace:8 AND title:(js css)"
 
 ap = argparse.ArgumentParser(description='Grep for CSS/JS in MediaWiki: NS')
-ap.add_argument('term', type=BASE_QUERY.format, help='text to search for')
+ap.add_argument('term', help='text to search for')
 ap.add_argument('--max-results', type=int, default=100)
 args = ap.parse_args()
 
+filters = [
+    {'term': {'namespace': '8'}},
+    {'regexp': {'title.keyword': '.*\\.(js|css)'}},
+    {'script': {'script': "_source['text'].contains('%s')" % args.term}},
+]
+
 query = {
     'size': args.max_results,
-    'analyzer': 'keyword',
-    'q': args.term,
+    'fields': ['namespace', 'title'],
+    'query': {'filtered': {'filter': {'bool': {'must': filters}}}},
 }
 
-req = urllib2.urlopen(BASE_URI + '?' + urllib.urlencode(query))
+req = urllib2.urlopen(BASE_URI, json.dumps(query))
 result = json.load(req)['hits']
 
 for hit in result['hits']:
-    db_name = hit['_index'].split('_', 1)[0]
-    title = hit['_source']['title']
-    print db_name, title
+    db = hit['_index'].split('_', 1)[0]
+    title = hit['fields']['title']
+    print('{:<20}{}'.format(db, title))
 
-print '(total: %s, shown: %s)' % (result['total'], len(result['hits']))
+print('(total: %s, shown: %s)' % (result['total'], len(result['hits'])))

-- 
To view, visit https://gerrit.wikimedia.org/r/113351
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I49406f7462cadce4b2c6ee9db04fa4c5b2c12c92
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to