Alexandros Kosiaris has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/305260

Change subject: postgres: Provision a replication lag check script
......................................................................

postgres: Provision a replication lag check script

Have the postgres::slave class ship a replication lag check script that
can be used to monitor the lag exhibited by postgres slaves

Change-Id: I2f5965eea89d4a9362a37baa1a98ee15bfecd10a
---
A modules/postgresql/files/check_postgres_replication_lag.py
M modules/postgresql/manifests/slave.pp
2 files changed, 114 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/60/305260/1

diff --git a/modules/postgresql/files/check_postgres_replication_lag.py 
b/modules/postgresql/files/check_postgres_replication_lag.py
new file mode 100644
index 0000000..0b38b29
--- /dev/null
+++ b/modules/postgresql/files/check_postgres_replication_lag.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# written in 2.6.6 on CentOS 6. All other versions untested.
+#Header Info
+__author__= 'Kirk Hammond'
+__email__ = '[email protected]'
+__version__ = '1.0'
+__license__ = "GPLv3"
+__maintainer__ = "Kirk Hammond"
+__status__ = "Production"
+__credits__ = "Kirk Hammond"
+
+"""
+This script will check the hot standby replication delay of a postgresql 
database.
+It is more secure to provide the user executing the script with a .pgpass file 
than to include the password in the script.
+"""
+
+
+#import libraries
+from optparse import OptionParser, OptionGroup
+import psycopg2
+import sys
+
+
+# parse command arguemnts and return options
+def parse_args():
+    parser = OptionParser()
+    parser.description = "Check streaming replication delay"
+    parser.version = __version__
+    parser.add_option("-H", "--host", dest="hostname", default="127.0.0.1",
+                      help="Name of the host you are checking")
+    parser.add_option("-O", "--port", dest="port", default="5432",
+                       help="Port you will connect to the database with")
+    parser.add_option("-U", "--user", dest="username", default="postgres",
+                       help="Username for the database")
+    parser.add_option("-P", "--password", dest="password",
+                       help="Password the database")
+    parser.add_option("-D", "--database", dest="database",
+                       help="Datbase you are checking") 
+    parser.add_option("-W", "--warn", dest="warn", default="300",
+                       help="Warning alert delay in seconds") 
+    parser.add_option("-C", "--crit", dest="crit", default="1800",
+                       help="Critical alert delay in seconds") 
+    (options, args) = parser.parse_args()
+    return options
+
+
+# check delay using options from parse_args
+def check_delay(options):
+    username = str(options.username)
+    password = str(options.password)
+    port = str(options.port)
+    hostname = str(options.hostname)
+    database = str(options.database)
+    conn_string = "host=" + hostname + " dbname=" + database + " user=" + 
username + " password=" + password
+    conn = psycopg2.connect(conn_string)
+    cursor = conn.cursor()
+    cursor.execute('SELECT CASE WHEN pg_last_xlog_receive_location() = 
pg_last_xlog_replay_location() THEN 0 ELSE EXTRACT (EPOCH FROM now() - 
pg_last_xact_replay_timestamp()) END AS log_delay;')
+    delay = cursor.fetchall()
+    delay = delay.pop()
+    delay = delay[0]
+    return delay
+
+
+# return results and graphing data to Nagios
+def nagios(delay,options):
+    #nagios return codes
+    UNKNOWN = -1
+    OK = 0
+    WARNING = 1
+    CRITICAL = 2
+    warn = float(options.warn)
+    crit = float(options.crit)
+    #pop delay out of list and get float out of tuple for direct comparison to 
warn/crit float values
+    if delay > crit:
+        print  "CRITICAL - Rep Delay is:", str(delay), 'Seconds', '| Seconds=' 
+ str(delay) + 's' + str(";") + str(warn) + str(";") + str(crit) + str(";" ) + 
str("14400")
+        sys.exit(CRITICAL)
+    elif delay > warn:
+        print "WARNING  - Rep Delay is:", str(delay), 'Seconds', '| Seconds=' 
+ str(delay) + 's' + str(";") + str(warn) + str(";") + str(crit) + str(";" ) + 
str("14400")
+        sys.exit(WARNING)
+    elif delay < warn and delay < crit:
+        print "OK - Rep Delay is:", str(delay), 'Seconds', '| Seconds=' + 
str(delay) + 's' + str(";") + str(warn) + str(";") + str(crit) + str(";" ) + 
str("14400")
+        sys.exit(OK)
+    else:
+        print  "UNKNOWN"
+        sys.exit(UNKNOWN)
+
+
+
+
+# main function, controls flow of script
+def main():
+
+    #call parse_arges and return options for script
+    options = parse_args()
+
+    # execute command using options from parse_args
+    delay = check_delay(options)
+
+    #call nagios process
+    nagios(delay,options)
+
+
+# call main function
+if __name__ == '__main__':
+  main()
diff --git a/modules/postgresql/manifests/slave.pp 
b/modules/postgresql/manifests/slave.pp
index a95d4d0..22bcc88 100644
--- a/modules/postgresql/manifests/slave.pp
+++ b/modules/postgresql/manifests/slave.pp
@@ -83,4 +83,13 @@
             require     => Class['postgresql::server'],
         }
     }
+
+    # Provisioning a script to conduct replication lag checks
+    file { '/usr/lib/nagios/plugins/check_postgres_replication_lag.py':
+        ensure => $ensure,
+        owner  => 'root',
+        group  => 'root',
+        mode   => '0755',
+        source => 
'puppet:///modules/postgres/check_postgres_replication_lag.py',
+    }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/305260
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2f5965eea89d4a9362a37baa1a98ee15bfecd10a
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Alexandros Kosiaris <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to