khannaekta commented on code in PR #600: URL: https://github.com/apache/madlib/pull/600#discussion_r1145559786
########## src/madpack/configyml.py: ########## @@ -29,35 +39,39 @@ def get_version(configdir): try: conf = yaml.load(open(configdir + '/Version.yml')) except: - print "configyml : ERROR : missing or malformed Version.yml" + print("configyml : ERROR : missing or malformed Version.yml") exit(2) + # XXX Review Comment: missing comment ########## src/madpack/madpack.py: ########## @@ -26,12 +26,12 @@ # Required Python version py_min_ver = [2, 6] -# Check python version -if sys.version_info[:2] < py_min_ver: - print("ERROR: python version too old ({0}). You need {1} or greater.". - format('.'.join(map(str, sys.version_info[:3])), - '.'.join(map(str, py_min_ver)))) - exit(1) +# XXX py3 Check python version +#if sys.version_info[:2] < py_min_ver: Review Comment: don't we still want to have this check? ########## src/ports/postgres/13/madpack/SQLCommon.m4: ########## @@ -0,0 +1,124 @@ +/* Review Comment: don't we need a license header for the new files? ########## src/ports/postgres/CMakeLists.txt: ########## @@ -201,10 +207,11 @@ function(add_${PORT_LC}_library IN_PORT_VERSION) # END Legacy Code - configure_file("${PORT_SOURCE_DIR}/madpack/SQLCommon.m4_in" - "${CMAKE_CURRENT_BINARY_DIR}/madpack/SQLCommon.m4" - @ONLY - ) + # TODO py3 Review Comment: can we remove this todo? ########## src/ports/postgres/modules/graph/pagerank.py_in: ########## @@ -869,4 +868,8 @@ SELECT {schema_madlib}.pagerank('usage'); """ return help_string.format(schema_madlib=schema_madlib) +<<<<<<< HEAD Review Comment: left over merge conflict ########## src/madpack/yaml/LICENSE: ########## Review Comment: For more context, what were these yaml files used for previously? ########## src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in: ########## @@ -350,17 +350,27 @@ class DBSCANTestCase(unittest.TestCase): self.assertEqual(SD, {}, "SD was not properly cleaned up after calls to dbscan_leaf() complete") # Expected results for eps=3.0, min_samples=1 + # ext [ F, F, F T, F, T, F, F] ret_ids = [ 5555, 0, 66, 66, 21, 21, 43, 11 ] cluster_ids = [ 2, 1, 1, 1, 1, 2, 1, 2 ] external_flags = [ False, False, False, True, False, True, False, False ] core_points = { 5555, 0, 66, 21, 43, 11 } # All internal points should be marked core expected_res = self.build_expected_dbscan_leaf_output(ret_ids, cluster_ids, external_flags, core_points) - for output_rec in res: - if output_rec.leaf_id == output_rec.dist_id: - self.assertEqual(output_rec, expected_res[output_rec.id]['internal']) - else: - self.assertEqual(output_rec, expected_res[output_rec.id]['external'].pop()) + # FIXME: This test depends on the order of reading from a dictionary which is not guaranteed in py3 Review Comment: Do we plan to revisit these tests later ? ########## src/ports/postgres/modules/linalg/matrix_ops.py_in: ########## @@ -67,9 +67,7 @@ def _matrix_column_to_array_format(source_table, row_id, output_table, "Not all columns are numeric!") plpy.execute(""" - CREATE {temp_str} TABLE {output_table} - m4_ifdef(`__POSTGRESQL__', `', - `WITH (APPENDONLY=TRUE)') AS Review Comment: why did we remove `WITH (APPENDONLY=TRUE)`? ########## src/ports/postgres/modules/deep_learning/madlib_keras.py_in: ########## @@ -250,20 +250,20 @@ def fit(schema_madlib, source_table, model, model_arch_table, [serialized_weights, custom_function_map] )[0]['iteration_result'] except plpy.SPIError as e: - msg = e.args[0] - if 'TransAggDetail' in msg: - e.args[0], detail = msg.split('TransAggDetail') - elif 'MergeAggDetail' in msg: - e.args[0], detail = msg.split('MergeAggDetail') - elif 'FinalAggDetail' in msg: - e.args[0], detail = msg.split('FinalAggDetail') - else: - raise e - # Extract Traceback from segment, add to - # DETAIL of error message on coordinator - spidata = list(e.spidata) - spidata[1] = detail - e.spidata = tuple(spidata) + # msg = e.args[0] + # if 'TransAggDetail' in msg: + # e.args[0], detail = msg.split('TransAggDetail') + # elif 'MergeAggDetail' in msg: + # e.args[0], detail = msg.split('MergeAggDetail') + # elif 'FinalAggDetail' in msg: + # e.args[0], detail = msg.split('FinalAggDetail') + # else: + # raise e + # # Extract Traceback from segment, add to + # # DETAIL of error message on coordinator + # spidata = list(e.spidata) + # spidata[1] = detail + # e.spidata = tuple(spidata) Review Comment: Lets add a TODO here to take care of this later. ########## src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in: ########## @@ -158,7 +167,7 @@ class BasePredict(): {self.schema_madlib}.internal_keras_predict ({independent_varname_sql}, $1, - CASE WHEN {self.test_table}.ctid = min_ctid.ctid THEN $2 ELSE NULL END, + $2, Review Comment: The case was added to ensure we pass the weight only once for the very first row on each segment to speed up performance of predict. Doesn't it work for gp7 or with python 3 ? ########## src/madpack/madpack.py: ########## @@ -388,19 +389,21 @@ def _plpy_check(py_min_ver): # Check PL/Python existence rv = _internal_run_query("SELECT count(*) AS CNT FROM pg_language " - "WHERE lanname = 'plpythonu'", True) + "WHERE lanname = 'plpython3u'", True) if int(rv[0]['cnt']) > 0: info_(this, "> PL/Python already installed", verbose) else: info_(this, "> PL/Python not installed", verbose) info_(this, "> Creating language PL/Python...", True) try: - _internal_run_query("CREATE LANGUAGE plpythonu;", True) + # XXX py3 + _internal_run_query("CREATE LANGUAGE plpython3u;", True) except: - error_(this, """Cannot create language plpythonu. Please check if you + error_(this, """Cannot create language plpython3u. Please check if you have configured and installed portid (your platform) with `--with-python` option. Stopping installation...""", False) - raise Exception + # XXX py3 + #raise Exception Review Comment: don't we want to raise the exception after erroring out? ########## src/ports/postgres/modules/glm/multinom.py_in: ########## @@ -121,7 +121,7 @@ def __multinom_validate_args( if not (isinstance(category_list[0], int) or isinstance(category_list[0], float) or - isinstance(category_list[0], long) or + isinstance(category_list[0], int) or Review Comment: why did we change from long to int? ########## src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in: ########## @@ -289,7 +289,9 @@ SELECT tree_train('dt_golf'::text, -- source table ); SELECT _print_decision_tree(tree) from train_output; -SELECT tree_display('train_output', False); +-- TODO: fix displayLeafNode Review Comment: Maybe tag this as a FIXME instead of TODO, to be consistent with other places where we need to make changes ########## src/ports/postgres/modules/deep_learning/keras_model_arch_table.sql_in: ########## @@ -162,9 +162,9 @@ delete_keras_model( -# Define model architecture. Use tensorflow.keras to define the model architecture: <pre class="example"> -from tensorflow import keras -from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense +import keras Review Comment: we wanted to use keras that is part of tensorflow. Would this require separately installing Keras ? ########## src/ports/postgres/madpack/SQLCommon.m4_in: ########## @@ -36,15 +36,6 @@ m4_define(<!WithTracebackForwarding!>, <! try: $1 except Exception as e: - global SD Review Comment: For context, do we know why were we clearing the SD and GD for any exceptions raised? ########## src/ports/postgres/modules/utilities/debug.py_in: ########## @@ -183,17 +183,17 @@ def plpy_execute(*args, **kwargs): try: res = plpy.execute(*args, **kwargs) except plpy.SPIError as e: - msg = e.message - if 'SegmentTraceback' in msg: - e.message, detail = msg.split('SegmentTraceback') - else: - raise e - # Extract Traceback from segment, add to - # DETAIL of error message on coordinator - e.args = (e.message,) - spidata = list(e.spidata) - spidata[1] = detail - e.spidata = tuple(spidata) + # msg = e.args[0] + # if 'SegmentTraceback' in msg: + # e.args[0], detail = msg.split('SegmentTraceback') + # else: + # raise e + # # Extract Traceback from segment, add to + # # DETAIL of error message on coordinator + # e.args = (e.args[0],) + # spidata = list(e.spidata) + # spidata[1] = detail + # e.spidata = tuple(spidata) Review Comment: Add a todo to take care of it in the future ########## src/madpack/madpack.py: ########## @@ -760,8 +760,18 @@ def _execute_per_module_unit_test_algo(module, pyfile, cur_tmpdir): milliseconds = 0 run_start = datetime.datetime.now() # Run the python unit test file - runcmd = ["python", pyfile] - runenv = os.environ + runcmd = ["python3", pyfile] + # runenv = os.environ + # export LD_LIBRARY_PATH="/usr/local/greenplum-db-devel/ext/python3.9/lib:$LD_LIBRARY_PATH" + # export PATH="/usr/local/greenplum-db-devel/ext/python3.9/bin:$PATH" + # export PYTHONHOME=/usr/local/greenplum-db-devel/ext/python3.9 + # export PYTHONPATH=/usr/local/greenplum-db-devel/ext/python3.9/lib Review Comment: commented out code -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org