Github user njayaram2 commented on a diff in the pull request: https://github.com/apache/madlib/pull/291#discussion_r203890181 --- Diff: src/ports/postgres/modules/utilities/transform_vec_cols.py_in --- @@ -0,0 +1,492 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import plpy +from control import MinWarning +from internal.db_utils import is_col_1d_array +from internal.db_utils import quote_literal +from utilities import _assert +from utilities import add_postfix +from utilities import ANY_ARRAY +from utilities import is_valid_psql_type +from utilities import py_list_to_sql_string +from utilities import split_quoted_delimited_str +from validate_args import is_var_valid +from validate_args import get_cols +from validate_args import get_expr_type +from validate_args import input_tbl_valid +from validate_args import output_tbl_valid +from validate_args import table_exists + +class vec_cols_helper: + def __init__(self): + self.all_cols = None + + def get_cols_as_list(self, cols_to_process, source_table=None, exclude_cols=None): + """ + Get a list of columns based on the value of cols_to_process + Args: + @param cols_to_process: str, Either a * or a comma-separated list of col names + @param source_table: str, optional. Source table name + @param exclude_cols: str, optional. Comma-separated list of the col(s) to exclude + from the source table, only used if cols_to_process is * + Returns: + A list of column names (or an empty list) + """ + # If cols_to_process is empty/None, return empty list + if not cols_to_process: + return [] + if cols_to_process.strip() != "*": + # If cols_to_process is a comma separated list of names, return list + # of column names in cols_to_process. + return [col for col in split_quoted_delimited_str(cols_to_process) + if col not in split_quoted_delimited_str(exclude_cols)] + if source_table: + if not self.all_cols: + self.all_cols = get_cols(source_table) + return [col for col in self.all_cols + if col not in split_quoted_delimited_str(exclude_cols)] + return [] + +class vec2cols: + def __init__(self): + self.get_cols_helper = vec_cols_helper() + self.module_name = self.__class__.__name__ + + def validate_args(self, source_table, output_table, vector_col, feature_names, + cols_to_output): + """ + Validate args for vec2cols + """ + input_tbl_valid(source_table, self.module_name) + output_tbl_valid(output_table, self.module_name) + # cols_to_validate = self.get_cols_helper.get_cols_as_list(cols_to_output) + [vector_col] --- End diff -- Guess we can remove this commented line.
---