Github user njayaram2 commented on a diff in the pull request: https://github.com/apache/incubator-madlib/pull/148#discussion_r126478640 --- Diff: src/ports/postgres/modules/graph/graph_utils.py_in --- @@ -27,104 +27,103 @@ @namespace graph """ -import plpy -from utilities.control import MinWarning from utilities.utilities import _assert -from utilities.utilities import extract_keyvalue_params -from utilities.utilities import unique_string from utilities.validate_args import get_cols from utilities.validate_args import unquote_ident from utilities.validate_args import table_exists from utilities.validate_args import columns_exist_in_table from utilities.validate_args import table_is_empty + def _check_groups(tbl1, tbl2, grp_list): + """ + Helper function for joining tables with groups. + Args: + @param tbl1 Name of the first table + @param tbl2 Name of the second table + @param grp_list The list of grouping columns + """ - """ - Helper function for joining tables with groups. - Args: - @param tbl1 Name of the first table - @param tbl2 Name of the second table - @param grp_list The list of grouping columns - """ + return ' AND '.join([" {tbl1}.{i} = {tbl2}.{i} ".format(**locals()) + for i in grp_list]) - return ' AND '.join([" {tbl1}.{i} = {tbl2}.{i} ".format(**locals()) - for i in grp_list]) def _grp_from_table(tbl, grp_list): + """ + Helper function for selecting grouping columns of a table + Args: + @param tbl Name of the table + @param grp_list The list of grouping columns + """ + return ' , '.join([" {tbl}.{i} ".format(**locals()) + for i in grp_list]) - """ - Helper function for selecting grouping columns of a table - Args: - @param tbl Name of the table - @param grp_list The list of grouping columns - """ - return ' , '.join([" {tbl}.{i} ".format(**locals()) - for i in grp_list]) def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params, - out_table, func_name, **kwargs): - """ - Validates graph tables (vertex and edge) as well as the output table. - """ - _assert(out_table and out_table.strip().lower() not in ('null', ''), - "Graph {func_name}: Invalid output table name!".format(**locals())) - _assert(not table_exists(out_table), - "Graph {func_name}: Output table already exists!".format(**locals())) - - _assert(vertex_table and vertex_table.strip().lower() not in ('null', ''), - "Graph {func_name}: Invalid vertex table name!".format(**locals())) - _assert(table_exists(vertex_table), - "Graph {func_name}: Vertex table ({vertex_table}) is missing!".format( - **locals())) - _assert(not table_is_empty(vertex_table), - "Graph {func_name}: Vertex table ({vertex_table}) is empty!".format( - **locals())) - - _assert(edge_table and edge_table.strip().lower() not in ('null', ''), - "Graph {func_name}: Invalid edge table name!".format(**locals())) - _assert(table_exists(edge_table), - "Graph {func_name}: Edge table ({edge_table}) is missing!".format( - **locals())) - _assert(not table_is_empty(edge_table), - "Graph {func_name}: Edge table ({edge_table}) is empty!".format( - **locals())) - - existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table)) - _assert(vertex_id in existing_cols, - """Graph {func_name}: The vertex column {vertex_id} is not present in vertex table ({vertex_table}) """. - format(**locals())) - _assert(columns_exist_in_table(edge_table, edge_params.values()), - """Graph {func_name}: Not all columns from {cols} are present in edge table ({edge_table})""". - format(cols=edge_params.values(), **locals())) - - return None + out_table, func_name, **kwargs): + """ + Validates graph tables (vertex and edge) as well as the output table. + """ + _assert(out_table and out_table.strip().lower() not in ('null', ''), + "Graph {func_name}: Invalid output table name!".format(**locals())) + _assert(not table_exists(out_table), + "Graph {func_name}: Output table already exists!".format(**locals())) + + _assert(vertex_table and vertex_table.strip().lower() not in ('null', ''), + "Graph {func_name}: Invalid vertex table name!".format(**locals())) + _assert(table_exists(vertex_table), + "Graph {func_name}: Vertex table ({vertex_table}) is missing!".format( + **locals())) + _assert(not table_is_empty(vertex_table), + "Graph {func_name}: Vertex table ({vertex_table}) is empty!".format( + **locals())) + + _assert(edge_table and edge_table.strip().lower() not in ('null', ''), + "Graph {func_name}: Invalid edge table name!".format(**locals())) + _assert(table_exists(edge_table), + "Graph {func_name}: Edge table ({edge_table}) is missing!".format( + **locals())) + _assert(not table_is_empty(edge_table), + "Graph {func_name}: Edge table ({edge_table}) is empty!".format( + **locals())) + + existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table)) + _assert(vertex_id in existing_cols, + """Graph {func_name}: The vertex column {vertex_id} is not present in vertex table ({vertex_table}) """. + format(**locals())) + _assert(columns_exist_in_table(edge_table, edge_params.values()), + """Graph {func_name}: Not all columns from {cols} are present in edge table ({edge_table})""". + format(cols=edge_params.values(), **locals())) + + return None + def get_graph_usage(schema_madlib, func_name, other_text): - usage = """ ----------------------------------------------------------------------------- - USAGE ----------------------------------------------------------------------------- - SELECT {schema_madlib}.{func_name}( - vertex_table TEXT, -- Name of the table that contains the vertex data. - vertex_id TEXT, -- Name of the column containing the vertex ids. - edge_table TEXT, -- Name of the table that contains the edge data. - edge_args TEXT{comma} -- A comma-delimited string containing multiple - -- named arguments of the form "name=value". - {other_text} -); - -The following parameters are supported for edge table arguments ('edge_args' - above): - -src (default = 'src') : Name of the column containing the source - vertex ids in the edge table. -dest (default = 'dest') : Name of the column containing the destination - vertex ids in the edge table. -weight (default = 'weight') : Name of the column containing the weight of - edges in the edge table. -""".format(schema_madlib=schema_madlib, func_name=func_name, - other_text=other_text, comma = ',' if other_text is not None else ' ') - - return usage + usage = """ + ---------------------------------------------------------------------------- + USAGE + ---------------------------------------------------------------------------- + SELECT {schema_madlib}.{func_name}( + vertex_table TEXT, -- Name of the table that contains the vertex data. + vertex_id TEXT, -- Name of the column containing the vertex ids. + edge_table TEXT, -- Name of the table that contains the edge data. + edge_args TEXT{comma} -- A comma-delimited string containing multiple + -- named arguments of the form "name=value". + {other_text} --- End diff -- `other_text` is also a string that is passed to this function. Shouldn't that string also be indented with a tab's space? For instance, this is how help on `sssp` looks like now: ``` graph_sssp ---------------------------------------------------------------------------------- Given a graph and a source vertex, single source shortest path (SSSP) algorithm finds a path for every vertex such that the sum of the weights of its constituent edges is minimized. ---------------------------------------------------------------------------- USAGE ---------------------------------------------------------------------------- SELECT madlib.graph_sssp( vertex_table TEXT, -- Name of the table that contains the vertex data. vertex_id TEXT, -- Name of the column containing the vertex ids. edge_table TEXT, -- Name of the table that contains the edge data. edge_args TEXT, -- A comma-delimited string containing multiple -- named arguments of the form "name=value". source_vertex INT, -- The source vertex id for the algorithm to start. out_table TEXT, -- Name of the table to store the result of SSSP. grouping_cols TEXT -- The list of grouping columns. ); ``` the description of `out_table` and `grouping_cols` comes from the `other_text` param.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---