This is an automated email from the ASF dual-hosted git repository. domino pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit eab9213a1d698f4029fa54df6152320afdf2a1bc Author: Domino Valdano <dvald...@pivotal.io> AuthorDate: Thu May 7 23:48:34 2020 +0000 Add utilities/debug.py_in Starting a module of debugging functions we can use, to easily turn on or off certain kinds of debug messages. debug.print_timings(force=False) : Used in madlib_keras_fit_multiple.py_in to print timing information for fit_multiple() Breaks down run_training() into _hop_time_, _uda_time_, _truncate_time_, _copy_time_, and _delete_time_ Enable all by uncommenting: debug.timings_enabled = True at top of madlib_keras_fit_multiple.py_in Enable a single call to debug.print_timings(), by passing force=True. Will print even if debug.timings_enabled is not set. debug.print_mst_keys(force=False) : print a map between mst_keys and seg_id's as the mst_keys hop around from segment to segment... to verify correct MOP behavior. debug.plpy.execute(query) prints query, EXPLAIN ANALYZE, and timing of a query while executing it --- src/ports/postgres/modules/utilities/debug.py_in | 145 +++++++++++++++++++++ .../postgres/modules/utilities/utilities.py_in | 21 --- 2 files changed, 145 insertions(+), 21 deletions(-) diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in new file mode 100644 index 0000000..ccdceba --- /dev/null +++ b/src/ports/postgres/modules/utilities/debug.py_in @@ -0,0 +1,145 @@ +import plpy as plpy_orig +import time +from deep_learning.madlib_keras_model_selection import ModelSelectionSchema +from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME + +mst_key_col = ModelSelectionSchema.MST_KEY +dist_key_col = DISTRIBUTION_KEY_COLNAME + +start_times = dict() +timings_enabled = False + +def start_timing(msg, force=False): + if timings_enabled or force: + start_times[msg] = time.time() + plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg)) + +def print_timing(msg, force=False): + if timings_enabled or force: + try: + start_time = start_times[msg] + except: + raise Exception( + "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg) + ) + current_time = time.time() + plpy_orig.info( + '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format( + msg, + current_time - start_time, + time.ctime(current_time), + current_time, + time.ctime(start_time), + start_time + ) + ) + +mst_keys_enabled = False +def print_mst_keys(table, label, force=False): + if not (mst_keys_enabled or force): + return + + res = plpy_orig.execute(""" + SELECT gp_segment_id AS seg_id, + {mst_key_col}, + {dist_key_col} + FROM {table} ORDER BY {dist_key_col} + """.format(dist_key_col=dist_key_col, + table=table, + mst_key_col=mst_key_col)) + + plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals())) + if not res: + plpy_orig.error("{table} is empty! Aborting".format(table=table)) + + for r in res: + seg_id = r['seg_id'] + mst_key = r['mst_key'] + dist_key = r[dist_key_col] + plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals())) + +plpy_execute_enabled = False +def plpy_execute(*args, **kwargs): + """ debug.plpy.execute(sql, ..., force=False) + + Replace plpy.execute(sql, ...) with + debug.plpy.execute(sql, ...) to debug + a query. Shows the query itself, the + EXPLAIN of it, and how long the query + takes to execute. + """ + + force = False + if 'force' in kwargs: + del kwargs['force'] + force = force['force'] + + plpy = plpy_orig # override global plpy, + # to avoid infinite recursion + + if not (plpy_execute_enabled or force): + return plpy.execute(*args, **kwargs) + + if len(args) > 0: + sql = args[0] + else: + raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed') + + if type(sql) == str: # can't print if a PLyPlan object + plpy.info(sql) + + # Print EXPLAIN of sql command + res = plpy.execute("EXPLAIN " + sql, *args[1:], **kwargs) + for r in res: + plpy.info(r['QUERY PLAN']) + + # Run actual sql command, with timing + start = time.time() + res = plpy.execute(*args, **kwargs) + + # Print how long execution of query took + plpy.info("Query took {0}s".format(time.time() - start)) + if res: + plpy.info("Query returned {} row(s)".format(len(res))) + else: + plpy.info("Query returned 0 rows") + return res + +plpy_info_enabled = False +def plpy_info(*args, **kwargs): + """ plpy_info(..., force=False) + + plpy.info() if enabled, otherwise do nothing + """ + + force = False + if 'force' in kwargs: + del kwargs['force'] + force = kwargs['force'] + + if plpy_info_enabled or force: + plpy_orig.info(*args, **kwargs) + +plpy_debug_enabled = False +def plpy_debug(*args, **kwargs): + """ debug.plpy.debug(..., force=False) + + Behaves like plpy.debug() if disabled (printing only + if DEBUG level is set high enough), but becomes a + plpy.info() if enabled. + """ + + force = False + if 'force' in kwargs: + del kwargs['force'] + force = kwargs['force'] + + if plpy_debug_enabled or force: + plpy_orig.info(*args, **kwargs) + else: + plpy_orig.debug(*args, **kwargs) + +class plpy: + execute = staticmethod(plpy_execute) + info = staticmethod(plpy_info) + debug = staticmethod(plpy_debug) diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in index eb507d9..3cb219a 100644 --- a/src/ports/postgres/modules/utilities/utilities.py_in +++ b/src/ports/postgres/modules/utilities/utilities.py_in @@ -21,29 +21,8 @@ from validate_args import unquote_ident from validate_args import drop_tables import plpy - m4_changequote(`<!', `!>') -def plpy_execute_debug(sql, *args, **kwargs): - """ Replace plpy.execute(sql, ...) with - plpy_execute_debug(sql, ...) to debug - a query. Shows the query itself, the - EXPLAIN of it, and how long the query - takes to execute. - """ - plpy.info(sql) # Print sql command - - # Print EXPLAIN of sql command - res = plpy.execute("EXPLAIN " + sql, *args) - for r in res: - plpy.info(r['QUERY PLAN']) - - # Run actual sql command, with timing - start = time.time() - plpy.execute(sql, *args) - - # Print how long execution of query took - plpy.info("Query took {0}s".format(time.time() - start)) def has_function_properties(): """ __HAS_FUNCTION_PROPERTIES__ variable defined during configure """