jenkins-bot has submitted this change and it was merged.

Change subject: Add sorted errors
......................................................................


Add sorted errors

Change-Id: I2f5d52f2f1072878f4e976f0f41c2b920e3a7d68
---
A bash/sorted_publish_events.py
A bash/sorted_restore_events.py
A bash/sorted_save_events.py
3 files changed, 280 insertions(+), 0 deletions(-)

Approvals:
  Nikerabbit: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/bash/sorted_publish_events.py b/bash/sorted_publish_events.py
new file mode 100755
index 0000000..6e34fdd
--- /dev/null
+++ b/bash/sorted_publish_events.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+
+import re
+import sys
+import MySQLdb
+
+timestamp = sys.argv[1]
+
+rprod_pwd_file = open('rprod.txt', 'r')
+rprod_pwd = rprod_pwd_file.read().rstrip()
+rprod_pwd_file.close()
+
+log_db = MySQLdb.connect(
+       host = "db1047.eqiad.wmnet",
+       user = "research_prod",
+       passwd = rprod_pwd,
+       db = "log"
+)
+
+cursor = log_db.cursor()
+
+cursor.execute("SELECT userAgent, wiki, event_sourceLanguage, 
event_sourceTitle, event_targetLanguage, event_targetTitle, event_token, 
event_trace, event_version FROM ContentTranslationError_11767097 WHERE 
event_context = 'publish-failure' and timestamp like '" + timestamp + "%' GROUP 
BY event_session ORDER BY timestamp")
+
+log_db.close()
+
+event_trace_col = 7
+error_counts = {}
+known_error_codes = [
+       'abusefilter',
+       'assertuserfailed',
+       'badtoken',
+       'blocked',
+       'docserver',
+       'globalblocking-ipblocked-range',
+       'internal_api_error_DBConnectionError',
+       'internal_api_error_DBQueryError',
+       'internal_api_error_FileBackendError',
+       'internal_api_error_JobQueueError',
+       'invalidhtml',
+       'invalidtargetlanguage',
+       'invalidtitle',
+       'parsoidserver',
+       'parsererror',
+       'protectednamespace-interface',
+       'protectedpage',
+       'ratelimited',
+       'readonly',
+       'spamblacklist',
+       'text-status-error',
+       'text-status-timeout',
+       'titleblacklist-forbidden-edit',
+       'titleblacklist-forbidden',
+       'translation-not-found',
+       'unexpected-error-while-publishing',
+       'unknownerror',
+       'wikimedia-globalblocking-ipblocked',
+]
+cryptic_counts = {}
+known_traces = {
+       
'{"xhr":{"readyState":0,"responseText":"","status":0,"statusText":"error"},"textStatus":"error","exception":""}':
 'text-status-error',
+       
'{"xhr":{"readyState":0,"status":0,"statusText":"timeout"},"textStatus":"timeout","exception":"timeout"}':
 'text-status-timeout',
+       '"[CX] Unexpected error while publishing: [object Object]"': 
'unexpected-error-while-publishing',
+}
+
+for error_code in known_error_codes:
+       error_counts[error_code] = []
+
+rows = cursor.fetchall()
+for row in rows:
+       error_code = None
+       trace = row[event_trace_col]
+       trace = re.sub('"d":\d+,"s":\d+,', '', trace)
+
+       find_error = re.search('"error":\{"code":"([^"]+?)"', trace)
+       if find_error:
+               error_code = find_error.group(1)
+       elif trace in known_traces:
+               error_code = known_traces[trace]
+       elif re.search('"info":"Hit AbuseFilter', trace):
+               error_code = 'abusefilter'
+       elif re.search('"edit":\{"spamblacklist"', trace):
+               error_code = 'spamblacklist'
+       elif re.search('"textStatus":"parsererror"', trace):
+               error_code = 'parsererror'
+       elif re.search('"info":"Translation not found"', trace):
+               error_code = 'translation-not-found'
+
+       if error_code:
+               if not error_code in error_counts:
+                       error_counts[error_code] = []
+                       print "New kind of error! ----------> " + error_code
+
+               error_counts[error_code].append(row)
+       else:
+               if not trace in cryptic_counts:
+                       cryptic_counts[trace] = []
+
+               cryptic_counts[trace].append(row)
+
+found_counts = [ str(len(rows)) ]
+for error in known_error_codes:
+       found_counts.append(str(len(error_counts[error])))
+
+print("\t".join(found_counts))
+
+print "cryptic traces:"
+for trace in sorted(cryptic_counts):
+       print str(len(cryptic_counts[trace])) + "\t" + trace
diff --git a/bash/sorted_restore_events.py b/bash/sorted_restore_events.py
new file mode 100644
index 0000000..a40b61e
--- /dev/null
+++ b/bash/sorted_restore_events.py
@@ -0,0 +1,76 @@
+#!/usr/bin/python
+
+import re
+import sys
+import MySQLdb
+
+timestamp = sys.argv[1]
+
+rprod_pwd_file = open('rprod.txt', 'r')
+rprod_pwd = rprod_pwd_file.read().rstrip()
+rprod_pwd_file.close()
+
+log_db = MySQLdb.connect(
+       host = "db1047.eqiad.wmnet",
+       user = "research_prod",
+       passwd = rprod_pwd,
+       db = "log"
+)
+
+cursor = log_db.cursor()
+
+cursor.execute("SELECT userAgent, wiki, event_sourceLanguage, 
event_sourceTitle, event_targetLanguage, event_targetTitle, event_token, 
event_trace, event_version FROM ContentTranslationError_11767097 WHERE 
event_context = 'restore-failure' and timestamp like '" + timestamp + "%' GROUP 
BY event_session ORDER BY timestamp")
+
+log_db.close()
+
+event_trace_col = 7
+error_counts = {}
+known_error_codes = [
+       'draft-does-not-exist',
+       'you-must-log-in',
+       'text-status-error',
+       'text-status-timeout',
+]
+cryptic_counts = {}
+known_traces = {
+       
'{"xhr":{"readyState":0,"responseText":"","status":0,"statusText":"error"},"textStatus":"error","exception":"","errorCode":"http"}':
 'text-status-error',
+       
'{"xhr":{"readyState":0,"status":0,"statusText":"timeout"},"textStatus":"timeout","exception":"timeout","errorCode":"http"}':
 'text-status-timeout',
+}
+
+for error_code in known_error_codes:
+       error_counts[error_code] = []
+
+rows = cursor.fetchall()
+for row in rows:
+       error_code = None
+       trace = row[event_trace_col]
+       trace = re.sub('"d":\d+,"s":\d+,', '', trace)
+
+       if trace in known_traces:
+               error_code = known_traces[trace]
+       elif re.search('"info":"Draft does not exist"', trace):
+               error_code = 'draft-does-not-exist'
+       elif re.search('"info":"To view your translations, you must log in"', 
trace):
+               error_code = 'you-must-log-in'
+
+       if error_code:
+               if not error_code in error_counts:
+                       error_counts[error_code] = []
+                       print "New kind of error! ----------> " + error_code
+
+               error_counts[error_code].append(row)
+       else:
+               if not trace in cryptic_counts:
+                       cryptic_counts[trace] = []
+
+               cryptic_counts[trace].append(row)
+
+found_counts = [ str(len(rows)) ]
+for error in known_error_codes:
+       found_counts.append(str(len(error_counts[error])))
+
+print("\t".join(found_counts))
+
+print "cryptic traces:"
+for trace in sorted(cryptic_counts):
+       print str(len(cryptic_counts[trace])) + "\t" + trace
diff --git a/bash/sorted_save_events.py b/bash/sorted_save_events.py
new file mode 100755
index 0000000..168ca2f
--- /dev/null
+++ b/bash/sorted_save_events.py
@@ -0,0 +1,96 @@
+#!/usr/bin/python
+
+import re
+import sys
+import MySQLdb
+
+timestamp = sys.argv[1]
+
+rprod_pwd_file = open('rprod.txt', 'r')
+rprod_pwd = rprod_pwd_file.read().rstrip()
+rprod_pwd_file.close()
+
+log_db = MySQLdb.connect(
+       host = "db1047.eqiad.wmnet",
+       user = "research_prod",
+       passwd = rprod_pwd,
+       db = "log"
+)
+
+cursor = log_db.cursor()
+
+cursor.execute("SELECT userAgent, wiki, event_sourceLanguage, 
event_sourceTitle, event_targetLanguage, event_targetTitle, event_token, 
event_trace, event_version FROM ContentTranslationError_11767097 WHERE 
event_context = 'save-failure' and timestamp like '" + timestamp + "%' GROUP BY 
event_session ORDER BY timestamp")
+
+log_db.close()
+
+event_trace_col = 7
+error_counts = {}
+known_error_codes = [
+       'assertuserfailed',
+       'bad Gateway',
+       'badtoken',
+       'blocked',
+       'editconflict',
+       'internal_api_error_DBQueryError',
+       'invalidtargetlanguage',
+       'json-error-unexpected-eod',
+       'json-error-unexpected-eof',
+       'noaccess',
+       'nosourcerevision',
+       'readonly',
+       'text-status-error',
+       'text-status-timeout',
+       'unexpected end of input',
+       'wikimedia Error'
+]
+cryptic_counts = {}
+known_traces = {
+       
'{"xhr":{"readyState":0,"responseText":"","status":0,"statusText":"error"},"textStatus":"error","exception":"","errorCode":"http"}':
 'text-status-error',
+       
'{"xhr":{"readyState":0,"status":0,"statusText":"timeout"},"textStatus":"timeout","exception":"timeout","errorCode":"http"}':
 'text-status-timeout'
+}
+
+for error_code in known_error_codes:
+       error_counts[error_code] = []
+
+rows = cursor.fetchall()
+for row in rows:
+       error_code = None
+       trace = row[event_trace_col]
+       trace = re.sub('"d":\d+,"s":\d+,', '', trace)
+
+       find_error = re.search('"error":\{"code":"(.+?)"', trace)
+       if find_error:
+               error_code = find_error.group(1)
+       elif trace in known_traces:
+               error_code = known_traces[trace]
+       elif re.search('<title>Wikimedia Error</title>', trace):
+               error_code = 'wikimedia Error'
+       elif re.search('<h1>502 Bad Gateway</h1>', trace):
+               error_code = 'bad Gateway'
+       elif re.search('"exception":"SyntaxError: JSON.parse: unexpected end of 
data', trace):
+               error_code = 'json-error-unexpected-eod'
+       elif re.search('"exception":"SyntaxError: JSON Parse error: Unexpected 
EOF"', trace):
+               error_code = 'json-error-unexpected-eof'
+       elif re.search('"exception":"SyntaxError: Unexpected end of input"', 
trace):
+               error_code = 'unexpected end of input'
+       if error_code:
+               if not error_code in error_counts:
+                       error_counts[error_code] = []
+                       print "New kind of error! ----------> " + error_code
+
+               error_counts[error_code].append(row)
+       else:
+               if not trace in cryptic_counts:
+                       cryptic_counts[trace] = []
+
+               cryptic_counts[trace].append(row)
+
+found_counts = [ str(len(rows)) ]
+for error in known_error_codes:
+       found_counts.append(str(len(error_counts[error])))
+
+print("\t".join(found_counts))
+
+print "cryptic traces:"
+for trace in sorted(cryptic_counts):
+       print str(len(cryptic_counts[trace])) + "\t" + trace

-- 
To view, visit https://gerrit.wikimedia.org/r/282228
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2f5d52f2f1072878f4e976f0f41c2b920e3a7d68
Gerrit-PatchSet: 9
Gerrit-Project: analytics/limn-language-data
Gerrit-Branch: master
Gerrit-Owner: Amire80 <[email protected]>
Gerrit-Reviewer: Amire80 <[email protected]>
Gerrit-Reviewer: KartikMistry <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to