Re: [I] Streaming LIST data over ADBC [arrow-adbc]

via GitHub Thu, 08 Aug 2024 08:20:04 -0700


skarakuzu commented on issue #2066:
URL: https://github.com/apache/arrow-adbc/issues/2066#issuecomment-2276090594


   Hello @paleolimbot,
   
   Thank you very much for your response. In addition to python, we created 
also a c++  example by following the code and the tests in the c++ repo. We 
were able to create a table and read from it when the table does not contain 
lists. However, the code crashes with segmentation fault if there is any list 
ingestion. All the queries run without throwing any error but no table is 
created in the postgresql database. The code crashes at the 
`stream.get_schema(&stream, &schema_rep);` line . I am a bit new to arrow-adbc 
and nano-arrow so I wanted to ask if there is any step I am missing in the 
process.
   
   I also tried to comment the line you pointed and build the code but it did 
not fix the problem. 
   We appreciate any help and suggestions. Thanks in advance! 
   
   <details>
   
   ```
   
   #include <adbc.h>
   #include <nanoarrow.h>
   #include <cstdlib>
   #include <cstring>
   #include <fstream>
   #include <iostream>
   #include <string>
   #include <vector>
   
   int main() {
     AdbcError error{};
   
     AdbcDatabase database = {};
     AdbcDatabaseNew(&database, &error);
     AdbcDatabaseSetOption(&database, "driver", "adbc_driver_postgresql", 
&error);
     AdbcDatabaseSetOption(&database, "uri", 
"postgresql://localhost:5432/postgres", &error);
     AdbcDatabaseInit(&database, &error);
   
     /// Creating a Connection
     AdbcConnection connection = {};
     AdbcConnectionNew(&connection, &error);
     AdbcConnectionInit(&connection, &database, &error);
   
     struct ArrowSchema schema;
     struct ArrowArray batch;
     static struct ArrowError global_error;
   
     ArrowSchemaInit(&schema);
     ArrowSchemaSetTypeStruct(&schema, 2);
     ArrowSchemaInit(schema.children[0]);
     ArrowSchemaSetTypeFixedSize(schema.children[0], 
NANOARROW_TYPE_FIXED_SIZE_LIST, 2);
     ArrowSchemaInit(schema.children[1]);
     ArrowSchemaSetTypeFixedSize(schema.children[1], 
NANOARROW_TYPE_FIXED_SIZE_LIST, 2);
   
     ArrowSchemaSetName(schema.children[0], "index");
     ArrowSchemaSetName(schema.children[1], "create");
   
     ArrowSchemaSetType(schema.children[0]->children[0], NANOARROW_TYPE_INT64);
     ArrowSchemaSetType(schema.children[1]->children[0], NANOARROW_TYPE_STRING);
   
     std::vector<std::vector<int>> v1{{42, 43}, {-42, -43}};
     std::vector<std::vector<std::string>> v2{{"foo", "foo1"}, {"bar", "bar1"}};
   
     ArrowArrayInitFromSchema(&batch, &schema, &global_error);
   
     ArrowArrayStartAppending(&batch);
   
     for (size_t i = 0; i < v1.size(); i++) {
     for (size_t j = 0; j < v1[i].size(); j++) {
       ArrowArrayAppendInt(batch.children[0]->children[0], v1[i][j]);
     }
     ArrowArrayFinishElement(batch.children[0]);
     ArrowArrayFinishElement(&batch);
     }
     for (size_t i = 0; i < v2.size(); i++) {
     for (size_t j = 0; j < v2[i].size(); j++) {
       ArrowArrayAppendString(
           batch.children[1]->children[0],
           ArrowStringView{v2[i][j].c_str(), 
(int64_t)strlen(v2[i][j].c_str())});
     }
     ArrowArrayFinishElement(batch.children[1]);
     ArrowArrayFinishElement(&batch);
     }
   
     batch.children[0]->length = batch.children[0]->children[0]->length;
     batch.length = batch.children[0]->length;
     ArrowArrayFinishBuildingDefault(&batch, &global_error);
   
     // Create Stream
     struct ArrowArrayStream stream {};
   
     // Creating a Statement
     struct AdbcStatement statement;
     int64_t rows_affected = -1;
   
     // Drop table if exists
     AdbcStatementNew(&connection, &statement, &error);
     std::string query = "DROP TABLE IF EXISTS \"bulk_ingest\"";
     AdbcStatementSetSqlQuery(&statement, query.c_str(), &error);
     AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error);
     // End of dropping the table if exists
   
     // Start the table
     AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, 
"bulk_ingest",
                            &error);
     AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_MODE,
                            ADBC_INGEST_OPTION_MODE_CREATE_APPEND, &error);
   
     AdbcStatementBind(&statement, &batch, &schema, &error);
     AdbcStatementExecuteQuery(&statement, nullptr, &rows_affected, &error);
   
     std::cout << "0 . rows affected: " << rows_affected << std::endl;
     AdbcStatementSetSqlQuery(&statement, "SELECT * FROM \"bulk_ingest\"", 
&error);
     AdbcStatementExecuteQuery(&statement, &stream, &rows_affected, &error);
   
     std::cout << "1 . rows affected: " << rows_affected << std::endl;
   
     struct ArrowSchema schema_rep = {};
     stream.get_schema(&stream, &schema_rep);
   
     // ... Some other code for post processing
   
     AdbcConnectionRelease(&connection, &error);
     AdbcDatabaseRelease(&database, &error);
   
     return 0;
   }
   
   ```
   <\details>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [I] Streaming LIST data over ADBC [arrow-adbc]

Reply via email to