r4ntix commented on code in PR #5732:
URL: https://github.com/apache/arrow-datafusion/pull/5732#discussion_r1151447436


##########
datafusion/core/src/datasource/listing_table_factory.rs:
##########
@@ -138,6 +142,73 @@ impl TableProviderFactory for ListingTableFactory {
             .with_file_sort_order(file_sort_order);
 
         let table_path = ListingTableUrl::parse(&cmd.location)?;
+
+        // try obtaining all relevant information of object store from 
cmd.options
+        match table_path.scheme() {
+            "s3" => {
+                let url: &Url = table_path.as_ref();
+                let bucket_name = url
+                    .host_str()
+                    .ok_or(DataFusionError::External("invaild bucket 
name".into()))?;
+                let mut builder =
+                    AmazonS3Builder::from_env().with_bucket_name(bucket_name);
+
+                if let (Some(access_key_id), Some(secret_access_key)) = (
+                    cmd.options.get("access_key_id"),
+                    cmd.options.get("secret_access_key"),
+                ) {
+                    builder = builder
+                        .with_access_key_id(access_key_id)
+                        .with_secret_access_key(secret_access_key);
+                }
+
+                if let Some(session_token) = cmd.options.get("session_token") {
+                    builder = builder.with_token(session_token);
+                }
+
+                if let Some(region) = cmd.options.get("region") {
+                    builder = builder.with_region(region);
+                }
+
+                let store = Arc::new(builder.build()?);
+
+                state
+                    .runtime_env()
+                    .register_object_store(table_path.as_ref(), store);

Review Comment:
   @alamb @yjshen  I created a new branch and moved the code from datafusion 
core to datafusion-cli: 
https://github.com/r4ntix/arrow-datafusion/commit/f9880465b3f2c00164422177d78622fcf7d334d0
   
   Because the necessary info of object store is dynamically fetched from the 
SQL(cmd.options), there is an additional sql parsing overhead here:
   ```rust
   async fn exec_and_print(
       ctx: &mut SessionContext,
       print_options: &PrintOptions,
       sql: String,
   ) -> Result<()> {
       let now = Instant::now();
   
       // parsing sql to get external table information
       let plan = ctx.state().create_logical_plan(&sql).await?;
       let df = match plan {
           LogicalPlan::CreateExternalTable(cmd) => {
               create_external_table(&ctx, &cmd)?;
               ctx.sql(&sql).await?
           }
           _ => ctx.sql(&sql).await?,
       };
   
       let results = df.collect().await?;
       print_options.print_batches(&results, now)?;
   
       Ok(())
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to