This is an automated email from the ASF dual-hosted git repository. nkak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 5a95e7c93b4effed6191f508624d4dc7333f9987 Author: Nikhil Kak <n...@pivotal.io> AuthorDate: Wed Mar 18 17:23:12 2020 -0700 Utilities: Improve rename table logic JIRA: MADLIB-1406 Consider the example "rename table foo to bar.foo2" Previously rename_table would fail if the table 'foo2' already existed on the search_path. This is because we first renamed the table foo to foo2 and then set the schema to bar. The new logic fixes this by renaming foo to a unique string, setting the schema to bar and then renaming the unique string to bar Also moved the function to utilities since the new logic required the use of unique_string. Also it seemed better suited to utilities.py instead of validate_args.py Co-authored-by: Domino Valdano <dvald...@pivotal.io> Co-authored-by: Ekta Khanna <ekha...@pivotal.io> --- src/ports/postgres/modules/graph/hits.py_in | 2 +- src/ports/postgres/modules/graph/pagerank.py_in | 2 +- src/ports/postgres/modules/graph/wcc.py_in | 2 +- src/ports/postgres/modules/linalg/svd.py_in | 2 +- .../modules/regress/clustered_variance.py_in | 2 +- .../postgres/modules/regress/robust_logistic.py_in | 2 +- .../modules/regress/robust_mlogistic.py_in | 2 +- .../postgres/modules/utilities/utilities.py_in | 114 +++++++++++++++++++++ .../postgres/modules/utilities/validate_args.py_in | 65 ------------ 9 files changed, 121 insertions(+), 72 deletions(-) diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in index f792f15..1283070 100644 --- a/src/ports/postgres/modules/graph/hits.py_in +++ b/src/ports/postgres/modules/graph/hits.py_in @@ -47,7 +47,7 @@ from utilities.utilities import is_platform_pg from utilities.validate_args import columns_exist_in_table, drop_tables from utilities.validate_args import get_cols_and_types, table_exists -from utilities.validate_args import rename_table +from utilities.utilities import rename_table def validate_hits_args(schema_madlib, vertex_table, vertex_id, edge_table, edge_params, out_table, max_iter, threshold, diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in index 41f25de..e732675 100644 --- a/src/ports/postgres/modules/graph/pagerank.py_in +++ b/src/ports/postgres/modules/graph/pagerank.py_in @@ -49,7 +49,7 @@ from utilities.utilities import py_list_to_sql_string from utilities.validate_args import columns_exist_in_table, get_cols_and_types from utilities.validate_args import table_exists -from utilities.validate_args import rename_table +from utilities.utilities import rename_table def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table, diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in index 4adc52e..28b06f5 100644 --- a/src/ports/postgres/modules/graph/wcc.py_in +++ b/src/ports/postgres/modules/graph/wcc.py_in @@ -37,7 +37,7 @@ from utilities.validate_args import columns_exist_in_table, get_expr_type from utilities.utilities import is_platform_pg from utilities.utilities import add_postfix from utilities.validate_args import table_exists -from utilities.validate_args import rename_table +from utilities.utilities import rename_table from utilities.control import MinWarning from graph_utils import validate_graph_coding, get_graph_usage from graph_utils import validate_output_and_summary_tables diff --git a/src/ports/postgres/modules/linalg/svd.py_in b/src/ports/postgres/modules/linalg/svd.py_in index 894fae2..bf0e522 100644 --- a/src/ports/postgres/modules/linalg/svd.py_in +++ b/src/ports/postgres/modules/linalg/svd.py_in @@ -11,7 +11,7 @@ from utilities.utilities import _assert from utilities.utilities import add_postfix from utilities.validate_args import columns_exist_in_table from utilities.validate_args import table_exists -from utilities.validate_args import rename_table +from utilities.utilities import rename_table from matrix_ops import get_dims from matrix_ops import validate_sparse from matrix_ops import validate_dense diff --git a/src/ports/postgres/modules/regress/clustered_variance.py_in b/src/ports/postgres/modules/regress/clustered_variance.py_in index 44b042d..8931066 100644 --- a/src/ports/postgres/modules/regress/clustered_variance.py_in +++ b/src/ports/postgres/modules/regress/clustered_variance.py_in @@ -8,7 +8,7 @@ from utilities.utilities import unique_string from utilities.utilities import _string_to_array_with_quotes from utilities.utilities import extract_keyvalue_params from utilities.validate_args import columns_exist_in_table -from utilities.validate_args import rename_table +from utilities.utilities import rename_table from utilities.validate_args import table_is_empty from utilities.validate_args import table_exists from utilities.utilities import _assert diff --git a/src/ports/postgres/modules/regress/robust_logistic.py_in b/src/ports/postgres/modules/regress/robust_logistic.py_in index 189123b..4bc72a4 100644 --- a/src/ports/postgres/modules/regress/robust_logistic.py_in +++ b/src/ports/postgres/modules/regress/robust_logistic.py_in @@ -11,7 +11,7 @@ import plpy from utilities.utilities import _assert from utilities.utilities import unique_string -from utilities.validate_args import rename_table +from utilities.utilities import rename_table # use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0 from utilities.utilities import __mad_version diff --git a/src/ports/postgres/modules/regress/robust_mlogistic.py_in b/src/ports/postgres/modules/regress/robust_mlogistic.py_in index 897b849..ffabd37 100644 --- a/src/ports/postgres/modules/regress/robust_mlogistic.py_in +++ b/src/ports/postgres/modules/regress/robust_mlogistic.py_in @@ -15,7 +15,7 @@ from utilities.utilities import _assert from utilities.utilities import add_postfix from utilities.validate_args import table_exists -from utilities.validate_args import rename_table +from utilities.utilities import rename_table from utilities.validate_args import columns_exist_in_table from regress.robust_linear import _robust_linregr_validate diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in index 210fbd4..73ff894 100644 --- a/src/ports/postgres/modules/utilities/utilities.py_in +++ b/src/ports/postgres/modules/utilities/utilities.py_in @@ -15,6 +15,7 @@ from validate_args import input_tbl_valid from validate_args import is_var_valid from validate_args import output_tbl_valid from validate_args import quote_ident +from validate_args import drop_tables import plpy @@ -1162,3 +1163,116 @@ def rotate(l, n): """ return l[-n:] + l[:-n] # ------------------------------------------------------------------------------ + +def rename_table(schema_madlib, orig_name, new_name): + """ + Renames possibly schema qualified table name to a new schema qualified name + ensuring the schema qualification are changed appropriately + + Args: + @param orig_name: string, Original name of the table + (must be schema qualified if table schema is not in search path) + @param new_name: string, New name of the table + (can be schema qualified. If it is not then the original + schema is maintained) + Returns: + String. The new table name qualified with the schema name + """ + new_names_split = new_name.split(".") + if len(new_names_split) > 2: + raise AssertionError("Invalid table name") + new_table_name = new_names_split[-1] + new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None + + orig_names_split = orig_name.split(".") + if len(orig_names_split) > 2: + raise AssertionError("Invalid table name") + + if len(orig_names_split) > 1: + orig_table_schema = orig_names_split[0] + else: + # we need to get the schema name of the original table if we are + # to change the schema of the new table. This is to ensure that we + # change the schema of the correct table in case there are multiple + # tables with the same new name. + orig_table_schema = get_first_schema(orig_name) + + if orig_table_schema is None: + raise AssertionError("Relation {0} not found during rename". + format(orig_name)) + return __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema, + new_table_schema, new_table_name) + +def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema, + new_table_schema, new_table_name): + """ + Internal private function to perform the rename operation after all the + validation checks + """ + + """ + CASE 1 + If the output table is schema is pg_temp, we cannot alter table schemas from/to + temp schemas. If it looks like a temp schema, we stay safe and just use + create/drop + Test cases + foo.bar to pg_temp.bar + foo.bar to pg_temp.bar2 + foo to pg_temp.bar + pg_temp.foo to pg_temp.bar + """ + if new_table_schema and 'pg_temp' in new_table_schema: + """ + If both new_table_schema and orig_table_schema have pg_temp in it, + just run an alter statement instead of CTAS. Without this, pca dev-check + fails on gpdb5/6 (but not on pg) + """ + if new_table_schema != orig_table_schema: + plpy.info("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}""" + .format(**locals())) + plpy.execute("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}""" + .format(**locals())) + drop_tables([orig_name]) + return new_name + else: + plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}". + format(**locals())) + return new_name + + """ + CASE 2 + Do direct rename if the new table does not have an output schema or + if the new table schema is the same as the original table schema + Test Cases + rename foo to bar + rename foo.bar to foo.bar2 + rename foo.bar to bar2 + """ + if not new_table_schema or new_table_schema == orig_table_schema: + plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}". + format(**locals())) + return orig_table_schema + "." + new_table_name + + """ + CASE 3 + output table is schema qualified + 1. rename the original table to an interim temp name + 2. set the new schema on that interim table + 3. rename interim table to the new table name + Test cases + foo.bar to foo2.bar2 + foo.bar to foo2.bar + """ + interim_temp_name = unique_string("rename_table_helper") + plpy.execute( + "ALTER TABLE {orig_name} RENAME to {interim_temp_name}".format( + **locals())) + + plpy.execute( + """ALTER TABLE {interim_temp_name} SET SCHEMA {new_table_schema}""".format( + **locals())) + + plpy.execute( + """ALTER TABLE {new_table_schema}.{interim_temp_name} RENAME to {new_table_name}""" + .format(**locals())) + return new_name diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in index ac6bd4a..a21be58 100644 --- a/src/ports/postgres/modules/utilities/validate_args.py_in +++ b/src/ports/postgres/modules/utilities/validate_args.py_in @@ -164,71 +164,6 @@ def table_exists(tbl, only_first_schema=False): # ------------------------------------------------------------------------- -def rename_table(schema_madlib, orig_name, new_name): - """ - Renames possibly schema qualified table name to a new schema qualified name - ensuring the schema qualification are changed appropriately - - Args: - @param orig_name: string, Original name of the table - (must be schema qualified if table schema is not in search path) - @param new_name: string, New name of the table - (can be schema qualified. If it is not then the original - schema is maintained) - Returns: - String. The new table name qualified with the schema name - """ - new_names_split = new_name.split(".") - if len(new_names_split) > 2: - raise AssertionError("Invalid table name") - new_table_name = new_names_split[-1] - new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None - - orig_names_split = orig_name.split(".") - if len(orig_names_split) > 2: - raise AssertionError("Invalid table name") - - if len(orig_names_split) > 1: - orig_table_schema = orig_names_split[0] - else: - # we need to get the schema name of the original table if we are - # to change the schema of the new table. This is to ensure that we - # change the schema of the correct table in case there are multiple - # tables with the same new name. - orig_table_schema = get_first_schema(orig_name) - - if orig_table_schema is None: - raise AssertionError("Relation {0} not found during rename". - format(orig_name)) - - plpy.execute("ALTER TABLE {orig_table} RENAME TO {new_table}". - format(orig_table=orig_name, new_table=new_table_name)) - - if new_table_schema: - if new_table_schema != orig_table_schema: - # set schema only if a change in schema is required - before_schema_string = "{0}.{1}".format(orig_table_schema, - new_table_name) - - if 'pg_temp' in new_table_schema: - # We cannot alter table schemas from/to temp schemas - # If it looks like a temp schema, we stay safe and just use - # create/drop - plpy.execute("""CREATE TABLE {new_name} AS - SELECT * FROM {new_table_name}""". - format(**locals())) - drop_tables([new_table_name]) - else: - plpy.execute("""ALTER TABLE {new_table} - SET SCHEMA {schema_name}""". - format(new_table=before_schema_string, - schema_name=new_table_schema)) - return new_name - else: - return orig_table_schema + "." + new_table_name -# ------------------------------------------------------------------------- - - def get_first_schema(table_name): """ Return first schema name from search path that contains given table.