Muehlenhoff has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/398272 )
Change subject: Add Prometheus exporter for Blazegraph ...................................................................... Add Prometheus exporter for Blazegraph Based on previous Diamond collector Bug: T182857 Change-Id: I352643896d956f59de8dfdcaebc12e8b7b6f5095 --- A prometheus-blazegraph-exporter 1 file changed, 181 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/debs/prometheus-blazegraph-exporter refs/changes/72/398272/1 diff --git a/prometheus-blazegraph-exporter b/prometheus-blazegraph-exporter new file mode 100755 index 0000000..36c0e5e --- /dev/null +++ b/prometheus-blazegraph-exporter @@ -0,0 +1,181 @@ +#!/usr/bin/python +# Copyright 2017 Moritz Muehlenhoff +# Filippo Giunchedi +# Wikimedia Foundation +# Copyright 2015 Stanislav Malyshev +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +import sys +import time +import urllib2 +import json +import urllib +import datetime +import yaml + +from datetime import timedelta, tzinfo +from dateutil.parser import parse +from xml.etree import ElementTree + +from prometheus_client import start_http_server, Summary +from prometheus_client.core import (CounterMetricFamily, GaugeMetricFamily, + REGISTRY) + +log = logging.getLogger(__name__) + +ZERO = timedelta(0) + + +class UTC(tzinfo): + def utcoffset(self, dt): + return ZERO + + def tzname(self, dt): + return "UTC" + + def dst(self, dt): + return ZERO + + +utc = UTC() + + +class PrometheusBlazeGraphExporter(object): + scrape_duration = Summary( + 'blazegraph_scrape_duration_seconds', 'Bazegraph exporter scrape duration') + + url = 'http://localhost:9999/bigdata/' + counters = [] + sparql_endpoint = 'http://localhost:9999/bigdata/namespace/wdq/sparql' + + def query_to_metric(self, qname): + return qname.replace(' ', '_').replace('/', '.').lstrip('.') + + def get_counter(self, cnt_name): + # Not sure why we need depth but some counters don't work without it + url = self.url + "counters?depth=10&" + \ + urllib.urlencode({'path': cnt_name}) + + req = urllib2.Request(url) + req.add_header('Accept', 'application/xml') + response = urllib2.urlopen(req) + el = ElementTree.fromstring(response.read()) + last_name = cnt_name.split('/')[-1] + + for cnt in el.getiterator('c'): + if cnt.attrib['name'] == last_name: + return cnt.attrib['value'] + return None + + def execute_sparql(self, query): + params = urllib.urlencode({'format': 'json', 'query': query}) + request = urllib2.Request(self.sparql_endpoint + "?" + params) + response = urllib2.urlopen(request) + return json.loads(response.read()) + + @scrape_duration.time() + def collect(self): + jolokia_metrics = { + '/Query Engine/queryStartCount': CounterMetricFamily('blazegraph_queries_start', ''), + '/Query Engine/queryDoneCount': CounterMetricFamily('blazegraph_queries_done', ''), + '/Query Engine/queryErrorCount': CounterMetricFamily('blazegraph_queries_error', ''), + '/Query Engine/queriesPerSecond': GaugeMetricFamily('blazegraph_queries_per_second', ''), + '/Query Engine/GeoSpatial/geoSpatialSearchRequests': GaugeMetricFamily('blazegraph_geospatial_search_requets', ''), + '/Journal/bytesReadPerSec': GaugeMetricFamily('blazegraph_journal_bytes_read_per_second', ''), + '/Journal/bytesWrittenPerSec': GaugeMetricFamily('blazegraph_journal_bytes_written_per_second', ''), + '/Journal/extent': GaugeMetricFamily('blazegraph_journal_extent', ''), + '/Journal/commitCount': CounterMetricFamily('blazegraph_journal_commit_count', ''), + '/Journal/commit/totalCommitSecs': GaugeMetricFamily('blazegraph_journal_total_commit_seconds', ''), + '/Journal/commit/flushWriteSetSecs': GaugeMetricFamily('blazegraph_journal_flush_write_set_seconds', ''), + '/JVM/Memory/DirectBufferPool/default/bytesUsed': GaugeMetricFamily('blazegraph_jvm_memory_direct_buffer_pool_default_bytes_used', ''), + '/JVM/Memory/Runtime Free Memory': GaugeMetricFamily('blazegraph_jvm_memory_runtime_free_memory', ''), + '/JVM/Memory/Runtime Max Memory': GaugeMetricFamily('blazegraph_jvm_memory_runtime_max_memory', ''), + '/JVM/Memory/Runtime Total Memory': GaugeMetricFamily('blazegraph_jvm_memory_runtime_total_memory', ''), + '/JVM/Memory/Garbage Collectors/G1 Old Generation/Collection Count': CounterMetricFamily('blazegraph_jvm_memory_gc_g1_old_collecton_count', ''), + '/JVM/Memory/Garbage Collectors/G1 Old Generation/Cumulative Collection Time': GaugeMetricFamily('blazegraph_jvm_memory_gc_g1_old_cumulative_collection_time', ''), + '/JVM/Memory/Garbage Collectors/G1 Young Generation/Collection Count': CounterMetricFamily('blazegraph_jvm_memory_gc_g1_yound_collection_count', ''), + '/JVM/Memory/Garbage Collectors/G1 Young Generation/Cumulative Collection Time': GaugeMetricFamily('blazegraph_jvm_memory_gc_g1_young_cumulative_collection_time', ''), + } + + for metric_name in jolokia_metrics: + if metric_name is None: + print unknown + log.warn('Unknown metric %r', metric_name) + metric_value = self.get_counter(metric_name) + metric_family = jolokia_metrics.get(metric_name) + + try: + value = float(metric_value) + except ValueError: + value = float('nan') + + metric_family.add_metric([], value) + + sparql_query = """ prefix schema: <http://schema.org/> + SELECT * WHERE { { + SELECT ( COUNT( * ) AS ?count ) { ?s ?p ?o } + } UNION { + SELECT * WHERE { <http://www.wikidata.org> schema:dateModified ?y } + } }""" + data = self.execute_sparql(sparql_query) + for binding in data['results']['bindings']: + if 'count' in binding: + triple_count = binding['count']['value'] + triple_metric = CounterMetricFamily('blazegraph_triples', '') + triple_metric.add_metric([], float(triple_count)) + yield triple_metric + + elif 'y' in binding: + lastUpdated = parse(binding['y']['value']) + lag = datetime.datetime.now(utc) - lastUpdated + lag_metric = CounterMetricFamily('blazegraph_lag', '') + lag_metric.add_metric([], float(lag.total_seconds())) + yield lag_metric + else: + raise ValueError('SPARQL binding returned with unexpected key') + + for metric in jolokia_metrics.values(): + yield metric + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-l', '--listen', metavar='ADDRESS', + help='Listen on this address', default=':9193') + parser.add_argument('-d', '--debug', action='store_true', + help='Enable debug logging') + args = parser.parse_args() + + if args.debug: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.WARNING) + + address, port = args.listen.split(':', 1) + + log.info('Starting blazegraph_exporter on %s:%s', address, port) + + REGISTRY.register(PrometheusBlazeGraphExporter()) + start_http_server(int(port), addr=address) + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + return 1 + + +if __name__ == "__main__": + sys.exit(main()) -- To view, visit https://gerrit.wikimedia.org/r/398272 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I352643896d956f59de8dfdcaebc12e8b7b6f5095 Gerrit-PatchSet: 1 Gerrit-Project: operations/debs/prometheus-blazegraph-exporter Gerrit-Branch: master Gerrit-Owner: Muehlenhoff <mmuhlenh...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits