khannaekta commented on code in PR #600:
URL: https://github.com/apache/madlib/pull/600#discussion_r1145559786
##########
src/madpack/configyml.py:
##########
@@ -29,35 +39,39 @@ def get_version(configdir):
try:
conf = yaml.load(open(configdir + '/Version.yml'))
except:
- print "configyml : ERROR : missing or malformed Version.yml"
+ print("configyml : ERROR : missing or malformed Version.yml")
exit(2)
+ # XXX
Review Comment:
missing comment
##########
src/madpack/madpack.py:
##########
@@ -26,12 +26,12 @@
# Required Python version
py_min_ver = [2, 6]
-# Check python version
-if sys.version_info[:2] < py_min_ver:
- print("ERROR: python version too old ({0}). You need {1} or greater.".
- format('.'.join(map(str, sys.version_info[:3])),
- '.'.join(map(str, py_min_ver))))
- exit(1)
+# XXX py3 Check python version
+#if sys.version_info[:2] < py_min_ver:
Review Comment:
don't we still want to have this check?
##########
src/ports/postgres/13/madpack/SQLCommon.m4:
##########
@@ -0,0 +1,124 @@
+/*
Review Comment:
don't we need a license header for the new files?
##########
src/ports/postgres/CMakeLists.txt:
##########
@@ -201,10 +207,11 @@ function(add_${PORT_LC}_library IN_PORT_VERSION)
# END Legacy Code
- configure_file("${PORT_SOURCE_DIR}/madpack/SQLCommon.m4_in"
- "${CMAKE_CURRENT_BINARY_DIR}/madpack/SQLCommon.m4"
- @ONLY
- )
+ # TODO py3
Review Comment:
can we remove this todo?
##########
src/ports/postgres/modules/graph/pagerank.py_in:
##########
@@ -869,4 +868,8 @@ SELECT {schema_madlib}.pagerank('usage');
"""
return help_string.format(schema_madlib=schema_madlib)
+<<<<<<< HEAD
Review Comment:
left over merge conflict
##########
src/madpack/yaml/LICENSE:
##########
Review Comment:
For more context, what were these yaml files used for previously?
##########
src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in:
##########
@@ -350,17 +350,27 @@ class DBSCANTestCase(unittest.TestCase):
self.assertEqual(SD, {}, "SD was not properly cleaned up after calls
to dbscan_leaf() complete")
# Expected results for eps=3.0, min_samples=1
+ # ext [ F, F, F T, F, T, F, F]
ret_ids = [ 5555, 0, 66, 66, 21, 21, 43, 11 ]
cluster_ids = [ 2, 1, 1, 1, 1, 2, 1, 2 ]
external_flags = [ False, False, False, True, False, True, False,
False ]
core_points = { 5555, 0, 66, 21, 43, 11 } # All internal points
should be marked core
expected_res = self.build_expected_dbscan_leaf_output(ret_ids,
cluster_ids, external_flags,
core_points)
- for output_rec in res:
- if output_rec.leaf_id == output_rec.dist_id:
- self.assertEqual(output_rec,
expected_res[output_rec.id]['internal'])
- else:
- self.assertEqual(output_rec,
expected_res[output_rec.id]['external'].pop())
+ # FIXME: This test depends on the order of reading from a dictionary
which is not guaranteed in py3
Review Comment:
Do we plan to revisit these tests later ?
##########
src/ports/postgres/modules/linalg/matrix_ops.py_in:
##########
@@ -67,9 +67,7 @@ def _matrix_column_to_array_format(source_table, row_id,
output_table,
"Not all columns are numeric!")
plpy.execute("""
- CREATE {temp_str} TABLE {output_table}
- m4_ifdef(`__POSTGRESQL__', `',
- `WITH (APPENDONLY=TRUE)') AS
Review Comment:
why did we remove `WITH (APPENDONLY=TRUE)`?
##########
src/ports/postgres/modules/deep_learning/madlib_keras.py_in:
##########
@@ -250,20 +250,20 @@ def fit(schema_madlib, source_table, model,
model_arch_table,
[serialized_weights,
custom_function_map]
)[0]['iteration_result']
except plpy.SPIError as e:
- msg = e.args[0]
- if 'TransAggDetail' in msg:
- e.args[0], detail = msg.split('TransAggDetail')
- elif 'MergeAggDetail' in msg:
- e.args[0], detail = msg.split('MergeAggDetail')
- elif 'FinalAggDetail' in msg:
- e.args[0], detail = msg.split('FinalAggDetail')
- else:
- raise e
- # Extract Traceback from segment, add to
- # DETAIL of error message on coordinator
- spidata = list(e.spidata)
- spidata[1] = detail
- e.spidata = tuple(spidata)
+ # msg = e.args[0]
+ # if 'TransAggDetail' in msg:
+ # e.args[0], detail = msg.split('TransAggDetail')
+ # elif 'MergeAggDetail' in msg:
+ # e.args[0], detail = msg.split('MergeAggDetail')
+ # elif 'FinalAggDetail' in msg:
+ # e.args[0], detail = msg.split('FinalAggDetail')
+ # else:
+ # raise e
+ # # Extract Traceback from segment, add to
+ # # DETAIL of error message on coordinator
+ # spidata = list(e.spidata)
+ # spidata[1] = detail
+ # e.spidata = tuple(spidata)
Review Comment:
Lets add a TODO here to take care of this later.
##########
src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in:
##########
@@ -158,7 +167,7 @@ class BasePredict():
{self.schema_madlib}.internal_keras_predict
({independent_varname_sql},
$1,
- CASE WHEN {self.test_table}.ctid =
min_ctid.ctid THEN $2 ELSE NULL END,
+ $2,
Review Comment:
The case was added to ensure we pass the weight only once for the very first
row on each segment to speed up performance of predict. Doesn't it work for gp7
or with python 3 ?
##########
src/madpack/madpack.py:
##########
@@ -388,19 +389,21 @@ def _plpy_check(py_min_ver):
# Check PL/Python existence
rv = _internal_run_query("SELECT count(*) AS CNT FROM pg_language "
- "WHERE lanname = 'plpythonu'", True)
+ "WHERE lanname = 'plpython3u'", True)
if int(rv[0]['cnt']) > 0:
info_(this, "> PL/Python already installed", verbose)
else:
info_(this, "> PL/Python not installed", verbose)
info_(this, "> Creating language PL/Python...", True)
try:
- _internal_run_query("CREATE LANGUAGE plpythonu;", True)
+ # XXX py3
+ _internal_run_query("CREATE LANGUAGE plpython3u;", True)
except:
- error_(this, """Cannot create language plpythonu. Please check if
you
+ error_(this, """Cannot create language plpython3u. Please check if
you
have configured and installed portid (your platform) with
`--with-python` option. Stopping installation...""", False)
- raise Exception
+ # XXX py3
+ #raise Exception
Review Comment:
don't we want to raise the exception after erroring out?
##########
src/ports/postgres/modules/glm/multinom.py_in:
##########
@@ -121,7 +121,7 @@ def __multinom_validate_args(
if not (isinstance(category_list[0], int) or
isinstance(category_list[0], float) or
- isinstance(category_list[0], long) or
+ isinstance(category_list[0], int) or
Review Comment:
why did we change from long to int?
##########
src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in:
##########
@@ -289,7 +289,9 @@ SELECT tree_train('dt_golf'::text, -- source table
);
SELECT _print_decision_tree(tree) from train_output;
-SELECT tree_display('train_output', False);
+-- TODO: fix displayLeafNode
Review Comment:
Maybe tag this as a FIXME instead of TODO, to be consistent with other
places where we need to make changes
##########
src/ports/postgres/modules/deep_learning/keras_model_arch_table.sql_in:
##########
@@ -162,9 +162,9 @@ delete_keras_model(
-# Define model architecture. Use tensorflow.keras to define
the model architecture:
<pre class="example">
-from tensorflow import keras
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense
+import keras
Review Comment:
we wanted to use keras that is part of tensorflow. Would this require
separately installing Keras ?
##########
src/ports/postgres/madpack/SQLCommon.m4_in:
##########
@@ -36,15 +36,6 @@ m4_define(<!WithTracebackForwarding!>, <!
try:
$1
except Exception as e:
- global SD
Review Comment:
For context, do we know why were we clearing the SD and GD for any
exceptions raised?
##########
src/ports/postgres/modules/utilities/debug.py_in:
##########
@@ -183,17 +183,17 @@ def plpy_execute(*args, **kwargs):
try:
res = plpy.execute(*args, **kwargs)
except plpy.SPIError as e:
- msg = e.message
- if 'SegmentTraceback' in msg:
- e.message, detail = msg.split('SegmentTraceback')
- else:
- raise e
- # Extract Traceback from segment, add to
- # DETAIL of error message on coordinator
- e.args = (e.message,)
- spidata = list(e.spidata)
- spidata[1] = detail
- e.spidata = tuple(spidata)
+ # msg = e.args[0]
+ # if 'SegmentTraceback' in msg:
+ # e.args[0], detail = msg.split('SegmentTraceback')
+ # else:
+ # raise e
+ # # Extract Traceback from segment, add to
+ # # DETAIL of error message on coordinator
+ # e.args = (e.args[0],)
+ # spidata = list(e.spidata)
+ # spidata[1] = detail
+ # e.spidata = tuple(spidata)
Review Comment:
Add a todo to take care of it in the future
##########
src/madpack/madpack.py:
##########
@@ -760,8 +760,18 @@ def _execute_per_module_unit_test_algo(module, pyfile,
cur_tmpdir):
milliseconds = 0
run_start = datetime.datetime.now()
# Run the python unit test file
- runcmd = ["python", pyfile]
- runenv = os.environ
+ runcmd = ["python3", pyfile]
+ # runenv = os.environ
+ # export
LD_LIBRARY_PATH="/usr/local/greenplum-db-devel/ext/python3.9/lib:$LD_LIBRARY_PATH"
+ # export PATH="/usr/local/greenplum-db-devel/ext/python3.9/bin:$PATH"
+ # export PYTHONHOME=/usr/local/greenplum-db-devel/ext/python3.9
+ # export PYTHONPATH=/usr/local/greenplum-db-devel/ext/python3.9/lib
Review Comment:
commented out code
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]