I33Buckler opened a new issue, #1287:
URL: https://github.com/apache/age/issues/1287
I am in the process of creating a graph node from a database table that has
7.6 million lines.
The process runs a function to process the table into the graph with loops.
Initial run of the process it was still running after nearly 24 hours.
To test the loop I ran it without loading into graph. The result is nearly 5
minutes of execution.
The code for creating the graph node below.
Removing the comments from from the section with the PERFORM statement
results in the long process.
Wondering if there is something missing from the process.
```SQL
--create graph
--select * from drop_graph('transport_network',true);
--SELECT create_graph('transport_network');
-- function to test for existence of graph node
drop function if exists node_check;
create or replace function node_check (graph_name text,node_name text)
returns text
LANGUAGE plpgsql
as $node_check$
DECLARE
table_exists boolean;
node_status text;
test text;
begin
drop table if exists table_test;
-- Check for existence of node
execute format('create temp table table_test as SELECT COUNT(*) as existence
FROM information_schema.tables WHERE lower(table_schema) = lower(%1$s)
AND
lower(table_type) = ''base table'' and lower(table_name) = lower(%2$s);',
format('''%s''',graph_name),format('''%s''',node_name));
-- create or empty node
select existence != 0 from table_test into table_exists;
if table_exists then
node_status = 'Node exists';
execute format('select * from cypher(''%1s'',$$match (v:%2$s) detach delete
v$$) as (v agtype);',graph_name,node_name)
;
else
node_status = 'Node created';
execute format('select create_vlabel(%1$s,%2$s);',
format('''%s''',graph_name),format('''%s''',node_name));
end if;
drop table if exists table_test;
return node_status;
END;
$node_check$;
-- Function for creating graph node from existing table
drop function if exists create_stop_times;
CREATE OR REPLACE FUNCTION create_stop_times(graph_name text, tc_date date,
trip_id text, stop_id text,
arrival_time text,
arrival_time_sec bigint, departure_time text, departure_time_sec bigint,
stop_sequence bigint,
pickup_type int, drop_off_type int)
returns text
LANGUAGE plpgsql
VOLATILE
as $stop_times$
declare
nodename text := graph_name || '.' || 'stop_times';
BEGIN
execute
format ('select * from
cypher(''%1$s'',
$$match (t:trips),(s:stops)
where t.id=%2$s and s.id=%3$s
and t.tc_date=%11$s and t.tc_date=s.tc_date
create(t)<-
[:PART_OF_TRIP]-
(st:stop_times {tc_date: %11$s,
arrival_time: %4$s, arrival_time_seconds: %5$s, departure_time: %6$s,
departure_time_seconds: %7$s,
stop_sequence: %8$s,
pickup_type: %9$s, drop_off_type: %10$s})-
[:LOCATED_AT]->(s)$$) as (st agtype);',
quote_ident(graph_name),quote_ident(trip_id),quote_ident(stop_id),
quote_ident(arrival_time),to_char(arrival_time_sec,'999999'),
quote_ident(departure_time),to_char(departure_time_sec,'999999'),
to_char(row_num,'9999'),to_char(pickup_type,'9'),to_char(drop_off_type,'9'),
format('"%s"',to_char(tc_date,'yyyy-mm-dd')));
return nodename;
END
$stop_times$
;
-- Check for node. Reset if exists
select node_check('transport_network','stop_times');
-- Initialise tables
drop table if exists stop_times, trip_ranks;
-- Stop times for processing by trip ;
create temp table stop_times as
select
dense_rank() over (order by tc_date,trip_id) as trip_rank,
row_number() over (partition by tc_date,trip_id order by stop_sequence) as
row_num,* from allservices.stop_times ;
create index on stop_times (trip_rank);
create index on stop_times (row_num);
create unique index on stop_times (trip_rank,row_num);
create unique index on stop_times (trip_rank,stop_sequence);
-- individual trips for processing stop times
create temp table trip_ranks as select distinct trip_rank from stop_times;
create unique index on trip_ranks (trip_rank);
-- define table for processing in loop
drop table if exists stop_times_loop;
create temp table stop_times_loop (like stop_times);
do $$
declare temprow record;
triprank record;
graph_name text:='transport_network';
row_counter integer := 0 ;
rank_counter integer := 0;
begin
-- Loop through trips ranked by TC_Date
for triprank in select trip_rank from trip_ranks order by trip_rank
loop
rank_counter := rank_counter+1;
row_counter := 0; -- reset row counter for looping through rows in trip by
date
for temprow in select row_num from stop_times where trip_rank=rank_counter
order by row_num
loop
row_counter := row_counter+1;
-- Insert trip by date and associated rows for stop sequence on trip
truncate table stop_times_loop;
insert into stop_times_loop select * from stop_times where
trip_rank=rank_counter and row_num=row_counter;
--perform create_stop_times(graph_name,
--
a.tc_date,a.trip_id,a.stop_id,
--
a.arrival_time,a.arrival_time_sec,
--
a.departure_time,a.departure_time_sec,
--
a.stop_sequence,a.pickup_type,a.drop_off_type)
-- from stop_times_loop a
--;
end loop;
end loop;
end; $$
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]