[I] Apache AGE takes a long time to create graph node [age]

via GitHub Tue, 24 Oct 2023 06:13:17 -0700


I33Buckler opened a new issue, #1287:
URL: https://github.com/apache/age/issues/1287


   I am in the process of creating a graph node from a database table that has 
7.6 million lines. 
   The process runs a function to process the table into the graph with loops. 
   Initial run of the process it was still running after nearly 24 hours.  
   To test the loop I ran it without loading into graph. The result is nearly 5 
minutes of execution. 
   The code for creating the graph node below.
   Removing the comments from from the section with the PERFORM statement 
results in the long process. 
   Wondering if there is something missing from the process.
   
   ```SQL
   --create graph 
   --select * from drop_graph('transport_network',true);
   --SELECT create_graph('transport_network');
   
   -- function to test for existence of graph node 
   drop function if exists node_check;
   create or replace function node_check (graph_name text,node_name text)
   returns text
   LANGUAGE plpgsql
   as $node_check$
   DECLARE 
        table_exists boolean;
        node_status text;
        test text;
   begin
   drop table if exists table_test;
   -- Check for existence of  node 
   execute format('create temp table table_test as SELECT COUNT(*) as existence 
FROM information_schema.tables WHERE lower(table_schema) = lower(%1$s) 
                                                                        AND 
lower(table_type) = ''base table'' and lower(table_name) = lower(%2$s);',
                                
format('''%s''',graph_name),format('''%s''',node_name));
   -- create or empty node
   select existence != 0 from table_test into table_exists;  
   if table_exists then 
   node_status = 'Node exists';
   execute format('select * from cypher(''%1s'',$$match (v:%2$s) detach delete 
v$$) as (v agtype);',graph_name,node_name)
                                        ;
   else 
   node_status = 'Node created';
   execute format('select create_vlabel(%1$s,%2$s);',
                                
format('''%s''',graph_name),format('''%s''',node_name));
   end if;
   drop table if exists table_test;
   return node_status;
   END;
   $node_check$;
   
   -- Function for creating graph node from existing table 
   drop function if exists create_stop_times;
   CREATE OR REPLACE FUNCTION create_stop_times(graph_name text, tc_date date, 
trip_id text, stop_id text, 
                                                arrival_time text, 
arrival_time_sec bigint, departure_time text, departure_time_sec bigint,
                                                stop_sequence bigint, 
pickup_type int, drop_off_type int)
   returns text 
   LANGUAGE plpgsql
   VOLATILE
   as $stop_times$
   declare 
        nodename text := graph_name || '.' || 'stop_times';
   BEGIN
   execute 
   format ('select * from 
                        cypher(''%1$s'', 
                                $$match (t:trips),(s:stops)
                                                 where t.id=%2$s and s.id=%3$s 
and t.tc_date=%11$s and t.tc_date=s.tc_date
                                        create(t)<-
                                        [:PART_OF_TRIP]-
                                        (st:stop_times {tc_date: %11$s, 
arrival_time: %4$s, arrival_time_seconds: %5$s, departure_time: %6$s, 
departure_time_seconds: %7$s, 
                                                        stop_sequence: %8$s, 
pickup_type: %9$s, drop_off_type: %10$s})-
                                        [:LOCATED_AT]->(s)$$) as (st agtype);',
                        
quote_ident(graph_name),quote_ident(trip_id),quote_ident(stop_id),
                                        
quote_ident(arrival_time),to_char(arrival_time_sec,'999999'),
                                        
quote_ident(departure_time),to_char(departure_time_sec,'999999'),
                                        
to_char(row_num,'9999'),to_char(pickup_type,'9'),to_char(drop_off_type,'9'),
                                        
format('"%s"',to_char(tc_date,'yyyy-mm-dd')));
   return nodename;
   END
   $stop_times$
   ;
   
   -- Check for node. Reset if exists 
   select node_check('transport_network','stop_times');
   
   -- Initialise tables 
   drop table if exists stop_times, trip_ranks;
   
   -- Stop times for processing by trip ;
   create temp table stop_times as 
   select 
   dense_rank() over (order by tc_date,trip_id) as trip_rank,
   row_number() over (partition by tc_date,trip_id order by stop_sequence) as 
row_num,* from allservices.stop_times ;
   create index on stop_times (trip_rank);
   create index on stop_times (row_num);
   create unique index on stop_times (trip_rank,row_num);
   create unique index on stop_times (trip_rank,stop_sequence);
   
   -- individual trips for processing stop times
   create temp table trip_ranks as select distinct trip_rank from stop_times;
   create unique index on trip_ranks (trip_rank);
   
   -- define table for processing in loop 
   drop table if exists stop_times_loop;
   create temp table stop_times_loop (like stop_times);
   
   
   do $$
   declare temprow record;
                triprank record;
                graph_name text:='transport_network';
                row_counter integer := 0 ;
                rank_counter integer := 0;
   begin
   -- Loop through trips ranked by TC_Date
   for triprank in select trip_rank from trip_ranks order by trip_rank
   loop 
   rank_counter := rank_counter+1;
   row_counter := 0; -- reset row counter for looping through rows in trip by 
date
   for temprow in select row_num from stop_times where trip_rank=rank_counter 
order by row_num
   loop
   row_counter := row_counter+1; 
   -- Insert trip by date and associated rows for stop sequence on trip
   truncate table stop_times_loop;
   insert into stop_times_loop select * from stop_times where 
trip_rank=rank_counter and row_num=row_counter;
   --perform create_stop_times(graph_name,
   --                                                                   
a.tc_date,a.trip_id,a.stop_id,
   --                                                                   
a.arrival_time,a.arrival_time_sec,
   --                                                                   
a.departure_time,a.departure_time_sec,
   --                                                                   
a.stop_sequence,a.pickup_type,a.drop_off_type)
   --           from stop_times_loop a
   --;
   end loop;
   end loop;
   end; $$
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[I] Apache AGE takes a long time to create graph node [age]

Reply via email to