[I] CreateMultipartUpload failing to upload a file on aws s3 [arrow]

via GitHub Mon, 10 Feb 2025 22:44:56 -0800


kishorik-2097 opened a new issue, #45496:
URL: https://github.com/apache/arrow/issues/45496


   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   Attaching my POC code which will use Arrow Version 17 API to make connection 
with aws s3 but getting Runtime error for -
   IOError: When initiating multiple part upload for key 'data1.parquet' in 
bucket 'parquetpoc01': AWS Error NETWORK_CONNECTION during 
CreateMultipartUpload operation: curlCode: 28, Timeout was reached
   
   we have verified access to aws s3 by aws CLI command line we are able to 
perform operation on same bucket.
   
   
   #include <arrow/api.h>
   #include <arrow/compute/cast.h>
   #include <arrow/dataset/dataset.h>
   #include <arrow/dataset/discovery.h>
   #include <arrow/dataset/file_base.h>
   #include <arrow/dataset/file_ipc.h>
   #include <arrow/dataset/file_parquet.h>
   #include <arrow/dataset/scanner.h>
   #include <arrow/filesystem/filesystem.h>
   #include <arrow/ipc/writer.h>
   #include <arrow/util/iterator.h>
   #include <parquet/arrow/reader.h>
   #include <parquet/arrow/writer.h>
   #include "arrow/compute/expression.h"
   #include  <arrow/filesystem/s3fs.h>
   #include <arrow/filesystem/azurefs.h>
   #include <arrow/status.h>
   #include <iostream>
   #include <string.h>
   #include <vector>
   #include <parquet/arrow/writer.h>
   #include <arrow/util/type_fwd.h>
   #include<curl/curl.h>
   #include <arrow/filesystem/azurefs.h>
   
   using parquet::ArrowWriterProperties;
   using parquet::WriterProperties;
   namespace ds = arrow::dataset;
   namespace fs = arrow::fs;
   namespace cp = arrow::compute;
   using parquet::ParquetDataPageVersion;
   using arrow::Compression;
   
   //namespace s3fs = arrow::fs::S3FileSystem;
   
   /**
    * \brief Run Example
    *
    * ./debug/dataset-documentation-example 
file:///<some_path>/<some_directory> parquet
    */
   
   // (Doc section: Reading Datasets)
   // Generate some data for the rest of this example.
   arrow::Result<std::shared_ptr<arrow::Table>> CreateTable() {
     auto schema =
         arrow::schema({arrow::field("a", arrow::int64()), arrow::field("b", 
arrow::int64()),
                        arrow::field("c", arrow::int64())});
     std::shared_ptr<arrow::Array> array_a;
     std::shared_ptr<arrow::Array> array_b;
     std::shared_ptr<arrow::Array> array_c;
     arrow::NumericBuilder<arrow::Int64Type> builder;
     ARROW_RETURN_NOT_OK(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
     ARROW_RETURN_NOT_OK(builder.Finish(&array_a));
     builder.Reset();
     ARROW_RETURN_NOT_OK(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
     ARROW_RETURN_NOT_OK(builder.Finish(&array_b));
     builder.Reset();
     ARROW_RETURN_NOT_OK(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
     ARROW_RETURN_NOT_OK(builder.Finish(&array_c));
     return arrow::Table::Make(schema, {array_a, array_b, array_c});
   }
   
   // Set up a dataset by writing two Parquet files.
   arrow::Result<std::string> CreateExampleParquetDataset(
       const std::shared_ptr<fs::FileSystem>& filesystem, const std::string& 
root_path) {
      // auto base_path = root_path + "/parquet_dataset";
     
      auto base_path = root_path;
    std::cout<<"base_path:"<<base_path<<std::endl;
    // ARROW_RETURN_NOT_OK(filesystem->CreateDir("parquet_dataset"));
     // Create an Arrow Table
     std::shared_ptr<arrow::Table> table;
     ARROW_ASSIGN_OR_RAISE(table, CreateTable());
   
   
     // Choose compression
        std::shared_ptr<WriterProperties> props =
          
WriterProperties::Builder().compression(arrow::Compression::ZSTD)->build();
     
   ARROW_ASSIGN_OR_RAISE(auto output,
                          filesystem->OpenOutputStream(base_path + 
"/data1.parquet"));
     return base_path;
   }
   
   // (Doc section: Reading and writing partitioned data #3)
   
   arrow::Status RunDatasetDocumentation(const std::string& format_name,
                                         const std::string& uri) {
     
     std::string base_path;
     std::shared_ptr<ds::FileFormat> format;
     std::string root_path;
   
     ARROW_ASSIGN_OR_RAISE(auto fs, fs::FileSystemFromUri(uri, &root_path));
   
     if (format_name == "parquet")
     {
       format = std::make_shared<ds::ParquetFileFormat>();
       ARROW_ASSIGN_OR_RAISE(base_path, CreateExampleParquetDataset(fs, 
root_path));
     } 
     else 
     {
       std::cerr << "Unknown format: " << format_name << std::endl;
       std::cerr << "Supported formats: feather, parquet, parquet_hive" << 
std::endl;
       return arrow::Status::ExecutionError("Dataset creating failed.");
     }
   
     std::shared_ptr<arrow::Table> table;
   
     ARROW_RETURN_NOT_OK(arrow::fs::FinalizeS3());
     return arrow::Status::OK();
   
   }
   
   int main() {
   
     
     arrow::fs::S3GlobalOptions options;
     options.log_level = arrow::fs::S3LogLevel::Fatal;  // Replace with your 
region
   
     // Initialize Arrow with S3 options
     arrow::Status status = fs::InitializeS3(options);
     if (!status.ok()) {
       // Handle error
       return 1;
     }
     arrow::fs::S3Options opt;
     opt.region="us-east-1";
     arrow::Result<std::shared_ptr<arrow::fs::S3FileSystem>> s3file;
     //s3file = arrow::fs::S3FileSystem::Make(opt);
   
     
     std::string uri ="s3://AccessKey:secretkey@parquetpoc01?region=us-east-1";
    
     std::string format_name ="parquet";
   
    status = RunDatasetDocumentation(format_name, uri);
     
     if (!status.ok()) {
       std::cerr << status.ToString() << std::endl;
       return EXIT_FAILURE;
     }
    // status = arrow::fs::FinalizeS3();
    // if (!status.ok()) {
    //   std::cerr << status.ToString() << std::endl;
    //   return EXIT_FAILURE;
     //}
     //ARROW_RETURN_NOT_OK(arrow::fs::FinalizeS3());
     return EXIT_SUCCESS;
   }
   
   
   ### Component(s)
   
   C++


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[I] CreateMultipartUpload failing to upload a file on aws s3 [arrow]

Reply via email to