reductionista commented on a change in pull request #573: URL: https://github.com/apache/madlib/pull/573#discussion_r744040215
########## File path: src/ports/postgres/modules/graph/wcc.py_in ########## @@ -120,6 +121,7 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, new_to_update_where_condition = '' edge_to_update_where_condition = '' edge_inverse_to_update_where_condition = '' + grouping_cols_select = '' Review comment: I think you can remove this line--looks like it is not used anywhere ########## File path: src/ports/postgres/modules/graph/wcc.py_in ########## @@ -162,52 +160,128 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, edge_inverse_to_update_where_condition = ' AND ' + \ _check_groups(edge_inverse, toupdate, grouping_cols_list) join_grouping_cols = _check_groups(subq, distinct_grp_table, grouping_cols_list) - group_by_clause_newupdate = ('' if not grouping_cols else - '{0}, {1}.{2}'.format(subq_prefixed_grouping_cols, + group_by_clause_newupdate = ('{0}, {1}.{2}'.format(subq_prefixed_grouping_cols, subq, vertex_id)) - plpy.execute(""" - CREATE TABLE {newupdate} AS - SELECT {subq}.{vertex_id}, - CAST({BIGINT_MAX} AS BIGINT) AS {component_id} - {select_grouping_cols} - FROM {distinct_grp_table} INNER JOIN ( - SELECT {select_grouping_cols_clause} {src} AS {vertex_id} - FROM {edge_table} - UNION - SELECT {select_grouping_cols_clause} {dest} AS {vertex_id} - FROM {edge_inverse} - ) {subq} - ON {join_grouping_cols} - GROUP BY {group_by_clause_newupdate} - {distribution} - """.format(select_grouping_cols=',' + subq_prefixed_grouping_cols, - select_grouping_cols_clause=grouping_cols_comma, - **locals())) - # drop intermediate table - plpy.execute("DROP TABLE IF EXISTS {0}".format(distinct_grp_table)) - plpy.execute(""" - CREATE TEMP TABLE {message} AS - SELECT {vertex_id}, - CAST({vertex_id} AS BIGINT) AS {component_id} - {select_grouping_cols_clause} - FROM {newupdate} - {distribution} - """.format(select_grouping_cols_clause=comma_grouping_cols, - **locals())) + Review comment: Can we move this definition of `group_by_clause_newupdate` into the grouping_cols specific conditional block below? ########## File path: src/ports/postgres/modules/graph/wcc.py_in ########## @@ -299,24 +315,27 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, rename_table(schema_madlib, newupdate, out_table) # Create summary table. We only need the vertex_id and grouping columns # in it. - plpy.execute(""" - CREATE TABLE {out_table_summary} ( - {grouping_cols_summary} - vertex_table TEXT, - vertex_id TEXT, - vertex_id_type TEXT - ) - """.format(grouping_cols_summary='' if not grouping_cols else - 'grouping_cols TEXT, ', **locals())) vertex_id_type = get_expr_type(vertex_id, vertex_table) plpy.execute(""" - INSERT INTO {out_table_summary} VALUES - ({grouping_cols_summary} '{vertex_table}', '{vertex_id}', - '{vertex_id_type}') - """.format(grouping_cols_summary='' if not grouping_cols else - "'{0}', ".format(grouping_cols), **locals())) - plpy.execute("DROP TABLE IF EXISTS {0},{1},{2},{3} ". - format(message, oldupdate, newupdate, toupdate)) + CREATE TABLE {out_table_summary} ( + {grouping_cols_summary} + vertex_table TEXT, + vertex_id TEXT, + vertex_id_type TEXT + ); + """.format(grouping_cols_summary='' if not grouping_cols else + 'grouping_cols TEXT, ', + **locals())) + + plpy.execute(""" + INSERT INTO {out_table_summary} VALUES + ({grouping_cols_txt} '{vertex_table}', '{vertex_id}', + '{vertex_id_type}'); + + DROP TABLE IF EXISTS {message},{oldupdate},{newupdate},{toupdate}; + """.format(grouping_cols_txt='' if not grouping_cols else + "'{0}', ".format(grouping_cols), + **locals())) Review comment: Since there's nothing that needs to happen in between, why not combine these two into a single `plpy.execute()` call? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org