This is an automated email from the ASF dual-hosted git repository.

mtaha pushed a commit to branch PG17
in repository https://gitbox.apache.org/repos/asf/age.git

commit 6b0305fafccfba1db3f940a237e78c90a9a86fec
Author: John Gemignani <[email protected]>
AuthorDate: Tue Dec 16 08:33:28 2025 -0800

    Restrict age_load commands (#2274)
    
    This PR applies restrictions to the following age_load commands -
    
        load_labels_from_file()
        load_edges_from_file()
    
    They are now tied to a specific root directory and are required to have a
    specific file extension to eliminate any attempts to force them to access
    any other files.
    
    Nothing else has changed with the actual command formats or parameters,
    only that they work out of the /tmp/age directory and only access files
    with an extension of .csv.
    
    Added regression tests and updated the location of the csv files for
    those regression tests.
    
    modified:   regress/expected/age_load.out
    modified:   regress/sql/age_load.sql
    modified:   src/backend/utils/load/age_load.c
---
 regress/expected/age_load.out     | 44 ++++++++++++++++++++++-
 regress/sql/age_load.sql          | 38 +++++++++++++++++++-
 src/backend/utils/load/age_load.c | 76 +++++++++++++++++++++++++++++++++++----
 3 files changed, 149 insertions(+), 9 deletions(-)

diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out
index 5f2bdab7..55d1ff1d 100644
--- a/regress/expected/age_load.out
+++ b/regress/expected/age_load.out
@@ -16,7 +16,9 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-\! cp -r regress/age_load/data regress/instance/data/age_load
+\! rm -rf /tmp/age/age_load
+\! mkdir -p /tmp/age
+\! cp -r regress/age_load/data /tmp/age/age_load
 LOAD 'age';
 SET search_path TO ag_catalog;
 -- Create a country using CREATE clause
@@ -401,6 +403,43 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH 
()-[e:Edges2]->() RETURN prop
  {"bool": "false", "string": "nUll", "numeric": "3.14"}
 (6 rows)
 
+--
+-- Check sandbox
+--
+-- check null file name
+SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true);
+ERROR:  file path must not be NULL
+SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true);
+ERROR:  file path must not be NULL
+-- check no file name
+SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true);
+ERROR:  file name cannot be zero length
+SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true);
+ERROR:  file name cannot be zero length
+-- check for file/path does not exist
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load_xxx/conversion_vertices.csv', true, true);
+ERROR:  File or path does not exist 
[/tmp/age/age_load_xxx/conversion_vertices.csv]
+SELECT load_edges_from_file('agload_conversion', 'Edges1', 
'age_load_xxx/conversion_edges.csv', true);
+ERROR:  File or path does not exist 
[/tmp/age/age_load_xxx/conversion_edges.csv]
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load/conversion_vertices.txt', true, true);
+ERROR:  File or path does not exist [/tmp/age/age_load/conversion_vertices.txt]
+SELECT load_edges_from_file('agload_conversion', 'Edges1', 
'age_load/conversion_edges.txt', true);
+ERROR:  File or path does not exist [/tmp/age/age_load/conversion_edges.txt]
+-- check wrong extension
+\! touch /tmp/age/age_load/conversion_vertices.txt
+\! touch /tmp/age/age_load/conversion_edges.txt
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load/conversion_vertices.txt', true, true);
+ERROR:  You can only load files with extension [.csv].
+SELECT load_edges_from_file('agload_conversion', 'Edges1', 
'age_load/conversion_edges.txt', true);
+ERROR:  You can only load files with extension [.csv].
+-- check outside sandbox directory
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'../../etc/passwd', true, true);
+ERROR:  You can only load files located in [/tmp/age/].
+SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', 
true);
+ERROR:  You can only load files located in [/tmp/age/].
+--
+-- Cleanup
+--
 SELECT drop_graph('agload_conversion', true);
 NOTICE:  drop cascades to 6 other objects
 DETAIL:  drop cascades to table agload_conversion._ag_label_vertex
@@ -415,3 +454,6 @@ NOTICE:  graph "agload_conversion" has been dropped
  
 (1 row)
 
+--
+-- End
+--
diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql
index 180248bf..cefcfb4c 100644
--- a/regress/sql/age_load.sql
+++ b/regress/sql/age_load.sql
@@ -17,7 +17,9 @@
  * under the License.
  */
 
-\! cp -r regress/age_load/data regress/instance/data/age_load
+\! rm -rf /tmp/age/age_load
+\! mkdir -p /tmp/age
+\! cp -r regress/age_load/data /tmp/age/age_load
 
 LOAD 'age';
 
@@ -160,4 +162,38 @@ SELECT create_elabel('agload_conversion','Edges2');
 SELECT load_edges_from_file('agload_conversion', 'Edges2', 
'age_load/conversion_edges.csv', false);
 SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN 
properties(e) $$) as (a agtype);
 
+--
+-- Check sandbox
+--
+-- check null file name
+SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true);
+SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true);
+
+-- check no file name
+SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true);
+SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true);
+
+-- check for file/path does not exist
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load_xxx/conversion_vertices.csv', true, true);
+SELECT load_edges_from_file('agload_conversion', 'Edges1', 
'age_load_xxx/conversion_edges.csv', true);
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load/conversion_vertices.txt', true, true);
+SELECT load_edges_from_file('agload_conversion', 'Edges1', 
'age_load/conversion_edges.txt', true);
+
+-- check wrong extension
+\! touch /tmp/age/age_load/conversion_vertices.txt
+\! touch /tmp/age/age_load/conversion_edges.txt
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load/conversion_vertices.txt', true, true);
+SELECT load_edges_from_file('agload_conversion', 'Edges1', 
'age_load/conversion_edges.txt', true);
+
+-- check outside sandbox directory
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'../../etc/passwd', true, true);
+SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', 
true);
+
+--
+-- Cleanup
+--
 SELECT drop_graph('agload_conversion', true);
+
+--
+-- End
+--
diff --git a/src/backend/utils/load/age_load.c 
b/src/backend/utils/load/age_load.c
index 307ec335..c7cf0677 100644
--- a/src/backend/utils/load/age_load.c
+++ b/src/backend/utils/load/age_load.c
@@ -31,6 +31,62 @@ static agtype_value *csv_value_to_agtype_value(char 
*csv_val);
 static Oid get_or_create_graph(const Name graph_name);
 static int32 get_or_create_label(Oid graph_oid, char *graph_name,
                                  char *label_name, char label_kind);
+static char *build_safe_filename(char *name);
+
+#define AGE_BASE_CSV_DIRECTORY "/tmp/age/"
+#define AGE_CSV_FILE_EXTENSION ".csv"
+
+static char *build_safe_filename(char *name)
+{
+    int length;
+    char path[PATH_MAX];
+    char *resolved;
+
+    if (name == NULL)
+    {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("file name cannot be NULL")));
+
+    }
+
+    length = strlen(name);
+
+    if (length == 0)
+    {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("file name cannot be zero length")));
+
+    }
+
+    snprintf(path, sizeof(path), "%s%s", AGE_BASE_CSV_DIRECTORY, name);
+
+    resolved = realpath(path, NULL);
+
+    if (resolved == NULL)
+    {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("File or path does not exist [%s]", path)));
+    }
+
+    if (strncmp(resolved, AGE_BASE_CSV_DIRECTORY,
+                strlen(AGE_BASE_CSV_DIRECTORY)) != 0)
+    {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("You can only load files located in [%s].",
+                               AGE_BASE_CSV_DIRECTORY)));
+    }
+
+    length = strlen(resolved) - 4;
+    if (strncmp(resolved+length, AGE_CSV_FILE_EXTENSION,
+                strlen(AGE_CSV_FILE_EXTENSION)) != 0)
+    {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("You can only load files with extension [%s].",
+                               AGE_CSV_FILE_EXTENSION)));
+    }
+
+    return resolved;
+}
 
 agtype *create_empty_agtype(void)
 {
@@ -344,7 +400,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
 {
     Name graph_name;
     Name label_name;
-    text* file_path;
+    text* file_name;
     char* graph_name_str;
     char* label_name_str;
     char* file_path_str;
@@ -373,7 +429,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
 
     graph_name = PG_GETARG_NAME(0);
     label_name = PG_GETARG_NAME(1);
-    file_path = PG_GETARG_TEXT_P(2);
+    file_name = PG_GETARG_TEXT_P(2);
     id_field_exists = PG_GETARG_BOOL(3);
     load_as_agtype = PG_GETARG_BOOL(4);
 
@@ -385,7 +441,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
         label_name_str = AG_DEFAULT_LABEL_VERTEX;
     }
 
-    file_path_str = text_to_cstring(file_path);
+    file_path_str = build_safe_filename(text_to_cstring(file_name));
 
     graph_oid = get_or_create_graph(graph_name);
     label_id = get_or_create_label(graph_oid, graph_name_str,
@@ -394,6 +450,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS)
     create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid,
                                 label_name_str, label_id, id_field_exists,
                                 load_as_agtype);
+
+    free(file_path_str);
+
     PG_RETURN_VOID();
 }
 
@@ -403,7 +462,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
 
     Name graph_name;
     Name label_name;
-    text* file_path;
+    text* file_name;
     char* graph_name_str;
     char* label_name_str;
     char* file_path_str;
@@ -431,7 +490,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
 
     graph_name = PG_GETARG_NAME(0);
     label_name = PG_GETARG_NAME(1);
-    file_path = PG_GETARG_TEXT_P(2);
+    file_name = PG_GETARG_TEXT_P(2);
     load_as_agtype = PG_GETARG_BOOL(3);
 
     graph_name_str = NameStr(*graph_name);
@@ -442,7 +501,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
         label_name_str = AG_DEFAULT_LABEL_EDGE;
     }
 
-    file_path_str = text_to_cstring(file_path);
+    file_path_str = build_safe_filename(text_to_cstring(file_name));
 
     graph_oid = get_or_create_graph(graph_name);
     label_id = get_or_create_label(graph_oid, graph_name_str,
@@ -450,6 +509,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS)
 
     create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid,
                                label_name_str, label_id, load_as_agtype);
+
+    free(file_path_str);
+
     PG_RETURN_VOID();
 }
 
@@ -599,4 +661,4 @@ void finish_batch_insert(batch_insert_state **batch_state)
     pfree((*batch_state)->slots);
     pfree(*batch_state);
     *batch_state = NULL;
-}
\ No newline at end of file
+}

Reply via email to