arca1n opened a new issue, #1010:
URL: https://github.com/apache/age/issues/1010

   **Describe the bug**
   The GIN index needs to be crated before any data is added to the vertex. If 
the gin index on `properties` is created after data has been added to the 
graph, the GIN indexes are ignored while querying. See examples below.
   
   **How are you accessing AGE (Command line, driver, etc.)?**
   - PSQL
   
   **What data setup do we need to do?**
   ### Scenario 1 The GIN index is created before adding graph data
   ```pgsql
   ...
   -- Boilerplate
   LOAD 'age';
   SET search_path TO ag_catalog;
   -- Setup
   SELECT drop_graph('cypher_index', true);
   SELECT create_graph('cypher_index');
   SELECT create_vlabel('cypher_index', 'Movie');
   -- Create index before adding data
   CREATE UNIQUE INDEX imbd_unqiue_movies ON 
cypher_index."Movie"((properties->'id'));
   CREATE INDEX imdb_everything ON cypher_index."Movie" USING gin (properties);
   -- Add data
   SELECT * from cypher('cypher_index', $$
   CREATE
   (:Movie {id: 'movie1', name: 'The Shawshank Redemption', imdbRank : 25}),
   (:Movie {id: 'movie2', name: 'The Godfather', imdbRank : 60}),
   (:Movie {id: 'movie3', name: 'The Dark Knight', imdbRank : 100})
   $$) as (V agtype);
   --  Query shows that it uses index
   SELECT * FROM cypher('cypher_index', $$ EXPLAIN ANALYZE MATCH(n:Movie {id: 
'movie1'}) return n  $$) AS (a agtype);
   ...
   ```
   ### Result Scenario 1 shows GIN index on property is being used
   ```pgsql
   ....
   
   cypher_tests=# -- Query shows that it uses index
   cypher_tests=# SELECT * FROM cypher('cypher_index', $$ EXPLAIN ANALYZE 
MATCH(n:Movie {id: 'movie1'}) return n  $$) AS (a agtype);
                                                          QUERY PLAN
   
-------------------------------------------------------------------------------------------------------------------------
    Bitmap Heap Scan on "Movie" n  (cost=16.01..20.03 rows=1 width=32) (actual 
time=0.039..0.040 rows=1 loops=1)
      Recheck Cond: (properties @> agtype_build_map('id'::text, 
'"movie1"'::agtype))
      Heap Blocks: exact=1
      ->  Bitmap Index Scan on imdb_everything  (cost=0.00..16.01 rows=1 
width=0) (actual time=0.013..0.013 rows=1 loops=1)
            Index Cond: (properties @> agtype_build_map('id'::text, 
'"movie1"'::agtype))
    Planning Time: 0.093 ms
    Execution Time: 0.069 ms
   (7 rows)
   ```
   ### Scenario 2 GIN index on property is created after data has been added
   ```pgsql
   -- Boilerplate
   LOAD 'age';
   SET search_path TO ag_catalog;
   -- Setup
   SELECT drop_graph('cypher_index', true);
   SELECT create_graph('cypher_index');
   SELECT create_vlabel('cypher_index', 'Movie');
   -- Add data
   SELECT * from cypher('cypher_index', $$
   CREATE
   (:Movie {id: 'movie1', name: 'The Shawshank Redemption', imdbRank : 25}),
   (:Movie {id: 'movie2', name: 'The Godfather', imdbRank : 60}),
   (:Movie {id: 'movie3', name: 'The Dark Knight', imdbRank : 100})
   $$) as (V agtype);
   -- Query Shows Seq Scan
   SELECT * FROM cypher('cypher_index', $$ EXPLAIN ANALYZE MATCH(n:Movie {id: 
'movie1'}) return n  $$) AS (a agtype);
   -- Create index
   CREATE UNIQUE INDEX imbd_unqiue_movies ON 
cypher_index."Movie"((properties->'id'));
   CREATE INDEX imdb_everything ON cypher_index."Movie" USING gin (properties);
   -- Query Shows Seq Scan
   SELECT * FROM cypher('cypher_index', $$ EXPLAIN ANALYZE MATCH(n:Movie {id: 
'movie1'}) return n  $$) AS (a agtype);
   ```
   ### Result Scenario 2 shows that the GIN indexes are ignored on the 
properties when querying
   ```pgsql
   ...
   cypher_tests=# -- Add data
   cypher_tests=# SELECT * from cypher('cypher_index', $$
   cypher_tests$# CREATE
   cypher_tests$# (:Movie {id: 'movie1', name: 'The Shawshank Redemption', 
imdbRank : 25}),
   cypher_tests$# (:Movie {id: 'movie2', name: 'The Godfather', imdbRank : 60}),
   cypher_tests$# (:Movie {id: 'movie3', name: 'The Dark Knight', imdbRank : 
100})
   cypher_tests$# $$) as (V agtype);
    v
   ---
   (0 rows)
   
   cypher_tests=# -- Query Shows Seq Scan
   cypher_tests=# SELECT * FROM cypher('cypher_index', $$ EXPLAIN ANALYZE 
MATCH(n:Movie {id: 'movie1'}) return n  $$) AS (a agtype);
                                                QUERY PLAN
   
-----------------------------------------------------------------------------------------------------
    Seq Scan on "Movie" n  (cost=0.00..28.00 rows=1 width=32) (actual 
time=0.026..0.029 rows=1 loops=1)
      Filter: (properties @> agtype_build_map('id'::text, '"movie1"'::agtype))
      Rows Removed by Filter: 2
    Planning Time: 0.063 ms
    Execution Time: 0.038 ms
   (5 rows)
   
   cypher_tests=# -- Create index
   cypher_tests=# CREATE UNIQUE INDEX imbd_unqiue_movies ON 
cypher_index."Movie"((properties->'id'));
   CREATE INDEX
   cypher_tests=# CREATE INDEX imdb_everything ON cypher_index."Movie" USING 
gin (properties);
   CREATE INDEX
   cypher_tests=# -- Query Shows Seq Scan
   cypher_tests=# SELECT * FROM cypher('cypher_index', $$ EXPLAIN ANALYZE 
MATCH(n:Movie {id: 'movie1'}) return n  $$) AS (a agtype);
                                                QUERY PLAN
   
----------------------------------------------------------------------------------------------------
    Seq Scan on "Movie" n  (cost=0.00..1.05 rows=1 width=32) (actual 
time=0.016..0.018 rows=1 loops=1)
      Filter: (properties @> agtype_build_map('id'::text, '"movie1"'::agtype))
      Rows Removed by Filter: 2
    Planning Time: 0.138 ms
    Execution Time: 0.026 ms
   (5 rows)
   ...
   ```
   
   **What is the necessary configuration info needed?**
   - Just regular apache age PG13 docker image
   
   **What is the command that caused the error?**
    Details described above. The the GIN index needs to be added on the 
property before any data is added to the table
   
   
   **Expected behavior**
   No matter when the data gets added to the table, creating an index on the 
table should enable indexes on the query
   
   **Environment (please complete the following information):**
   - apache/age:PG13_latest docker image
   
   **Additional context**
   GIN index to enable faster querying on properties needs to be created before 
any data has been added to the graph.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to