This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch madlib2-master in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/madlib2-master by this push: new f91813be Build: Add support for PG15 f91813be is described below commit f91813bedb275bb042968a5e38d4293bbad6b325 Author: Orhan Kislal <okis...@apache.org> AuthorDate: Thu Jun 15 21:02:08 2023 -0400 Build: Add support for PG15 --- methods/array_ops/src/pg_gp/array_ops.c | 4 ++++ methods/sketch/src/pg_gp/sketch_support.c | 10 ++++++++-- src/madpack/madpack.py | 12 ++++++++++++ src/ports/postgres/dbconnector/UDF_impl.hpp | 4 ++++ .../modules/bayes/test/gaussian_naive_bayes.sql_in | 2 +- src/ports/postgres/modules/crf/crf.sql_in | 14 ++++++++------ .../modules/deep_learning/madlib_keras_gpu_info.py_in | 8 +++++++- src/ports/postgres/modules/graph/wcc.sql_in | 4 ++-- src/ports/postgres/modules/sample/balance_sample.py_in | 4 ++-- .../postgres/modules/utilities/encode_categorical.py_in | 2 +- 10 files changed, 49 insertions(+), 15 deletions(-) diff --git a/methods/array_ops/src/pg_gp/array_ops.c b/methods/array_ops/src/pg_gp/array_ops.c index a842a605..574a9571 100644 --- a/methods/array_ops/src/pg_gp/array_ops.c +++ b/methods/array_ops/src/pg_gp/array_ops.c @@ -8,7 +8,11 @@ #include "utils/numeric.h" #include "utils/builtins.h" #include "utils/memutils.h" +#if GP_VERSION_NUM >= 70000 || PG_VERSION_NUM >= 130000 +#include "utils/fmgrprotos.h" +#else #include "utils/int8.h" +#endif #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/typcache.h" diff --git a/methods/sketch/src/pg_gp/sketch_support.c b/methods/sketch/src/pg_gp/sketch_support.c index 1cdeb904..f8baf6a6 100644 --- a/methods/sketch/src/pg_gp/sketch_support.c +++ b/methods/sketch/src/pg_gp/sketch_support.c @@ -306,6 +306,7 @@ bytea *sketch_md5_bytea(Datum dat, Oid typOid) bool byval = get_typbyval(typOid); int len = ExtractDatumLen(dat, get_typlen(typOid), byval, -1); void *datp = DatumExtractPointer(dat, byval); + /* * it's very common to be hashing 0 for countmin sketches. Rather than * hard-code it here, we cache on first lookup. In future a bigger cache here @@ -318,9 +319,14 @@ bytea *sketch_md5_bytea(Datum dat, Oid typOid) if (byval && len == sizeof(int64) && *(int64 *)datp == 0 && zero_cached) { return md5_of_0; } - else + else{ + #if defined(GP_VERSION_NUM) || PG_VERSION_NUM < 150000 pg_md5_hash(datp, len, outbuf); - + #else + const char *errstr = NULL; + pg_md5_hash(datp, len, outbuf, &errstr); + #endif + } hex_to_bytes(outbuf, (uint8 *)VARDATA(out), MD5_HASHLEN*2); SET_VARSIZE(out, MD5_HASHLEN+VARHDRSZ); if (byval && len == sizeof(int64) && *(int64 *)datp == 0 && !zero_cached) { diff --git a/src/madpack/madpack.py b/src/madpack/madpack.py index 6524a3de..7c91911b 100755 --- a/src/madpack/madpack.py +++ b/src/madpack/madpack.py @@ -188,6 +188,18 @@ def _run_m4_and_append(schema, maddir_mod_py, module, sqlfile, '-DMODULE_NAME=' + module, '-I' + maddir_madpack, sqlfile] + if (((portid == 'postgres') & (dbver == '15'))): + m4args = ['m4', + '-P', + '-DMADLIB_SCHEMA=' + schema, + '-DPLPYTHON_LIBDIR=' + maddir_mod_py, + '-DEXT_PYTHON_LIBDIR=' + maddir_ext_py, + '-DMODULE_PATHNAME=' + maddir_lib, + '-DMADLIB_LIBRARY_PATH=' + madlib_library_path, + '-DMODULE_NAME=' + module, + '-DIS_PG_15=TRUE', + '-I' + maddir_madpack, + sqlfile] info_(this, "> ... parsing: " + " ".join(m4args), verbose) output_filehandle.flush() diff --git a/src/ports/postgres/dbconnector/UDF_impl.hpp b/src/ports/postgres/dbconnector/UDF_impl.hpp index cb377083..6674a346 100644 --- a/src/ports/postgres/dbconnector/UDF_impl.hpp +++ b/src/ports/postgres/dbconnector/UDF_impl.hpp @@ -13,6 +13,9 @@ namespace dbconnector { namespace postgres { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" + #define MADLIB_HANDLE_STANDARD_EXCEPTION(err) \ sqlerrcode = err; \ strncpy(msg, exc.what(), sizeof(msg)); @@ -215,6 +218,7 @@ UDF::call(FunctionCallInfo fcinfo) { #undef MADLIB_HANDLE_STANDARD_EXCEPTION +#pragma GCC diagnostic pop } // namespace postgres } // namespace dbconnector diff --git a/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in b/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in index 26c874f7..62dab91c 100644 --- a/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in +++ b/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in @@ -221,7 +221,7 @@ m4_include(`SQLCommon.m4') m4_changequote(`<!', `!>') m4_ifdef(<!__POSTGRESQL__!>, <!!>, <! SET optimizer = off; -!> +!>) SELECT assert(count(*) = 10, 'Gaussian Naive Bayes produces wrong classes!') FROM iris_clasif, iris_test diff --git a/src/ports/postgres/modules/crf/crf.sql_in b/src/ports/postgres/modules/crf/crf.sql_in index acb1612b..302de8a4 100644 --- a/src/ports/postgres/modules/crf/crf.sql_in +++ b/src/ports/postgres/modules/crf/crf.sql_in @@ -719,14 +719,16 @@ CREATE AGGREGATE MADLIB_SCHEMA.lincrf_lbfgs_step( INITCOND='{0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' ); -DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_union(anyarray) CASCADE; +m4_ifdef(`IS_PG_15', +`DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_union(anycompatiblearray) CASCADE;', +`DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_union(anyarray) CASCADE;') + CREATE m4_ifdef(`__POSTGRESQL__', `', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED')) AGGREGATE -MADLIB_SCHEMA.array_union(anyarray) ( - SFUNC = array_cat, - STYPE = anyarray -); - +m4_ifdef(`IS_PG_15', +`MADLIB_SCHEMA.array_union(anycompatiblearray) (SFUNC = array_cat, STYPE = anycompatiblearray);', +`MADLIB_SCHEMA.array_union(anyarray) (SFUNC = array_cat, STYPE = anyarray);' +) -- We only need to document the last one (unfortunately, in Greenplum we have to -- use function overloading instead of default arguments). CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.compute_lincrf( diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in index 5c550f65..7f1f381f 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in @@ -65,7 +65,13 @@ class GPUInfoFunctions: :return: list of gpu descriptions as returned by tensorflow """ current_working_dir = os.path.dirname(os.path.realpath(__file__)) - gpus = subprocess.check_output(["python3", "gpu_info_from_tf.py"], + + try: + subprocess.check_output(["which", "python3.9"]) + py_version = "python3.9" + except Exception: + py_version = "python3" + gpus = subprocess.check_output([py_version, "gpu_info_from_tf.py"], cwd=current_working_dir).splitlines() return gpus diff --git a/src/ports/postgres/modules/graph/wcc.sql_in b/src/ports/postgres/modules/graph/wcc.sql_in index 9d9b4802..cd299d3d 100644 --- a/src/ports/postgres/modules/graph/wcc.sql_in +++ b/src/ports/postgres/modules/graph/wcc.sql_in @@ -374,7 +374,7 @@ SELECT madlib.weakly_connected_components( 'edge', -- Edge table 'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments 'wcc_out'); -- Output table of weakly connected components -SELECT * FROM wcc_out ORDER BY component_id, id; +SELECT * FROM wcc_out ORDER BY component_id, node_id; </pre> <pre class="result"> node_id | component_id @@ -407,7 +407,7 @@ SELECT madlib.weakly_connected_components( 'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments 'wcc_out', -- Output table of weakly connected components 'user_id'); -- Grouping column name -SELECT * FROM wcc_out ORDER BY user_id, component_id, id; +SELECT * FROM wcc_out ORDER BY user_id, component_id, node_id; </pre> <pre class="result"> node_id | component_id | user_id diff --git a/src/ports/postgres/modules/sample/balance_sample.py_in b/src/ports/postgres/modules/sample/balance_sample.py_in index 0385499f..63440e9d 100644 --- a/src/ports/postgres/modules/sample/balance_sample.py_in +++ b/src/ports/postgres/modules/sample/balance_sample.py_in @@ -73,7 +73,7 @@ def _get_level_frequency_distribution(source_table, class_col, """ if grp_by_cols and grp_by_cols.lower() != 'null': is_grouping = True - grp_by_cols_comma = grp_by_cols + ', ' + grp_by_cols_comma = 'group_values, ' array_grp_by_cols_comma = "ARRAY[{0}]".format(grp_by_cols) + " AS group_values, " else: is_grouping = False @@ -96,7 +96,7 @@ def _get_level_frequency_distribution(source_table, class_col, ({class_col})::TEXT AS classes, count(*) AS class_count FROM {source_table} - GROUP BY {grp_by_cols_comma} ({class_col}) + GROUP BY {grp_by_cols_comma} classes ) q {meta_grp_by} """.format(grp_identifier="group_values" if is_grouping else "NULL", diff --git a/src/ports/postgres/modules/utilities/encode_categorical.py_in b/src/ports/postgres/modules/utilities/encode_categorical.py_in index b47f8a21..8f39f0ff 100644 --- a/src/ports/postgres/modules/utilities/encode_categorical.py_in +++ b/src/ports/postgres/modules/utilities/encode_categorical.py_in @@ -469,7 +469,7 @@ class CategoricalEncoder(object): count(*)::integer as c FROM {tbl} {filter_str} - GROUP BY {col} + GROUP BY f ) q """.format(schema_madlib=self.schema_madlib, col=col, tbl=self.source_table,