This is an automated email from the ASF dual-hosted git repository. nkak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit ec3eee7a9bd428bbed18f82b37514c3ae717a361 Author: Nikhil Kak <n...@pivotal.io> AuthorDate: Thu Mar 19 12:27:11 2020 -0700 DL: add tests for asserting that guc values are unchanged JIRA: MADLIB-1406 This commit adds tests for making sure that the gpdb gucs that we use for constant folding and subtransaction truncate get reset after calling the respective fit functions. 1. Added a few helper functions to check for gpdb and pg versions. 2. Also enabled model_averaging_e2e tests for pg --- .../deep_learning/test/madlib_keras_fit.sql_in | 1 - .../test/madlib_keras_model_averaging_e2e.sql_in | 7 ++--- .../test/madlib_keras_model_selection.sql_in | 2 +- .../test/madlib_keras_model_selection_e2e.sql_in | 4 +++ .../test/madlib_keras_transfer_learning.sql_in | 3 +- .../utilities/test/unit_tests/test_utilities.py_in | 28 ++++++++++++++++++ .../postgres/modules/utilities/utilities.py_in | 7 ++++- .../postgres/modules/utilities/utilities.sql_in | 33 ++++++++++++++++++++++ 8 files changed, 76 insertions(+), 9 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in index 23ee058..a35eb6b 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in @@ -417,4 +417,3 @@ SELECT madlib_keras_fit( $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text, $$ batch_size=2, epochs=1, verbose=0 $$::text, 3); - diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in index bbb757b..20e2332 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in @@ -26,9 +26,6 @@ m4_include(`SQLCommon.m4') `\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in' ) -m4_changequote(`<!', `!>') -m4_ifdef(<!__POSTGRESQL__!>, <!!>, <! --- Multiple models End-to-End test DROP TABLE if exists pg_temp.iris_model, pg_temp.iris_model_summary; SELECT madlib_keras_fit( 'iris_data_packed', @@ -40,6 +37,7 @@ SELECT madlib_keras_fit( 3, FALSE ); +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END; SELECT assert( model_arch_table = 'iris_model_arch' AND @@ -69,6 +67,7 @@ SELECT madlib_keras_predict( 'pg_temp.iris_predict', 'prob', FALSE); +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END; -- Run Evaluate DROP TABLE IF EXISTS pg_temp.evaluate_out; @@ -82,6 +81,7 @@ SELECT assert(loss >= 0 AND metric >= 0 AND metrics_type = '{accuracy}', 'Evaluate output validation failed. Actual:' || __to_char(evaluate_out)) FROM pg_temp.evaluate_out; +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END; -- Test for one-hot encoded user input data DROP TABLE if exists iris_model, iris_model_summary, iris_model_info; @@ -137,4 +137,3 @@ SELECT assert(loss >= 0 AND metric >= 0 AND metrics_type = '{accuracy}', 'Evaluate output validation failed. Actual:' || __to_char(evaluate_out)) FROM evaluate_out; -!>) diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in index d101135..391beb7 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in @@ -349,7 +349,7 @@ SELECT madlib_keras_fit_multiple_model( -- The default value of the guc 'dev_opt_unsafe_truncate_in_subtransaction' is 'off' -- but we change it to 'on' in fit_multiple.py. Assert that the value is -- reset after calling fit_multiple -select assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off'); +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off') END; SELECT assert(COUNT(*)=4, 'Info table must have exactly same rows as the number of msts.') FROM iris_multiple_model_info; diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in index 818a013..d738f48 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in @@ -112,6 +112,8 @@ SELECT madlib_keras_fit_multiple_model( FALSE ); +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END; + SELECT assert( model_arch_table = 'iris_model_arch' AND validation_table is NULL AND @@ -142,6 +144,7 @@ SELECT madlib_keras_predict( 'prob', NULL, 1); +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END; -- Run Evaluate DROP TABLE IF EXISTS evaluate_out; @@ -151,6 +154,7 @@ SELECT madlib_keras_evaluate( 'evaluate_out', NULL, 1); +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END; SELECT assert(loss >= 0 AND metric >= 0 AND diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in index c5c8a93..4334fa6 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in @@ -254,8 +254,7 @@ SELECT madlib_keras_fit_multiple_model( -- The default value of the guc 'dev_opt_unsafe_truncate_in_subtransaction' is 'off' -- but we change it to 'on' in fit_multiple.py. Assert that the value is -- reset after calling fit_multiple -select assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off'); - +SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off') END; SELECT assert( 5 IN (SELECT mst_key FROM iris_multiple_model), diff --git a/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in b/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in index 0318516..9df924e 100644 --- a/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in +++ b/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in @@ -378,5 +378,33 @@ class UtilitiesTestCase(unittest.TestCase): self.py_list, self.sql_array_col, self.colname, self.coltype, self.has_one_ele, "dummy_module") + def test_is_platform_gp6_or_up_input_gpdb6(self): + + self.subject.is_platform_pg = Mock(return_value = False) + + self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 9.4.24 (Greenplum Database 6.3.0 build commit:aabd)'}]] + self.assertTrue(self.subject.is_platform_gp6_or_up()) + + def test_is_platform_gp6_or_up_input_gpdb5(self): + + self.subject.is_platform_pg = Mock(return_value = False) + + self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 8.3.23 (Greenplum Database 5.24.0 build commit:bdca)'}]] + self.assertFalse(self.subject.is_platform_gp6_or_up()) + + def test_is_platform_gp6_or_up_input_pg(self): + + self.subject.is_platform_pg = Mock(return_value = True) + + self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 10.7'}]] + self.assertFalse(self.subject.is_platform_gp6_or_up()) + + def test_is_platform_gp6_or_up_input_gpdb7(self): + + self.subject.is_platform_pg = Mock(return_value = False) + + self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 9.4.24 (Greenplum Database 7.1.0 build commit:aabd)'}]] + self.assertTrue(self.subject.is_platform_gp6_or_up()) + if __name__ == '__main__': unittest.main() diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in index 73ff894..fca3a6b 100644 --- a/src/ports/postgres/modules/utilities/utilities.py_in +++ b/src/ports/postgres/modules/utilities/utilities.py_in @@ -1202,7 +1202,7 @@ def rename_table(schema_madlib, orig_name, new_name): format(orig_name)) return __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema, new_table_schema, new_table_name) - +# ------------------------------------------------------------------------------ def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema, new_table_schema, new_table_name): """ @@ -1276,3 +1276,8 @@ def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema, """ALTER TABLE {new_table_schema}.{interim_temp_name} RENAME to {new_table_name}""" .format(**locals())) return new_name +# ------------------------------------------------------------------------------ + +def is_platform_gp6_or_up(): + version_wrapper = __mad_version() + return not is_platform_pg() and not version_wrapper.is_gp_version_less_than('6.0') diff --git a/src/ports/postgres/modules/utilities/utilities.sql_in b/src/ports/postgres/modules/utilities/utilities.sql_in index c028661..9e5deee 100644 --- a/src/ports/postgres/modules/utilities/utilities.sql_in +++ b/src/ports/postgres/modules/utilities/utilities.sql_in @@ -541,3 +541,36 @@ BEGIN RETURN 0; END; $$ LANGUAGE plpgsql; + +-- A few of the gucs like plan_cache_mode and dev_opt_unsafe_truncate_in_subtransaction +-- are only available in either > pg 11 or > gpdb 6.5. Using this function we +-- can make sure to run the guc assertion test (assert_guc_value) on the correct +-- platform versions. +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_ver_greater_than_gp_640_or_pg_11() +RETURNS BOOLEAN AS $$ +PythonFunctionBodyOnly(utilities, utilities, is_gp_version_less_than) + from utilities.utilities import __mad_version + from utilities.utilities import is_platform_pg + from utilities.utilities import is_pg_major_version_less_than + if is_platform_pg: + is_pg_major_less_than_12 = is_pg_major_version_less_than(None, 12) + return not is_pg_major_less_than_12 + else: + is_ver_less_than_650 = __mad_version().is_gp_version_less_than('6.5.0') + return not is_ver_less_than_650 +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); + + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert_guc_value( + guc_name TEXT, + expected_guc_value TEXT) +RETURNS VOID AS $$ +import plpy +actual_guc_value = plpy.execute('show {0}'.format(guc_name))[0] +actual_guc_value = actual_guc_value[guc_name] +if actual_guc_value != expected_guc_value: + plpy.error('guc {0} assertion failed. Expected Value: {1}, ' + 'Actual Value: {2}'.format(guc_name, expected_guc_value, actual_guc_value)) +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');