This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new b1d134e9ff Add HTTP object store example (#7602)
b1d134e9ff is described below

commit b1d134e9ff37724459cb5090a6346a85152a1db7
Author: Pirmin Kalberer <[email protected]>
AuthorDate: Mon Sep 25 22:43:45 2023 +0200

    Add HTTP object store example (#7602)
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion-examples/Cargo.toml                 |  2 +-
 datafusion-examples/README.md                  |  1 +
 datafusion-examples/examples/query-http-csv.rs | 57 ++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index d928f0177d..e5146c7fd9 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -46,7 +46,7 @@ futures = "0.3"
 log = "0.4"
 mimalloc = { version = "0.1", default-features = false }
 num_cpus = "1.13.0"
-object_store = { version = "0.7.0", features = ["aws"] }
+object_store = { version = "0.7.0", features = ["aws", "http"] }
 prost = { version = "0.12", default-features = false }
 prost-derive = { version = "0.11", default-features = false }
 serde = { version = "1.0.136", features = ["derive"] }
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index 02dd9c4173..bfed3976c9 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -54,6 +54,7 @@ cargo run --example csv_sql
 - [`parquet_sql.rs`](examples/parquet_sql.rs): Build and run a query plan from 
a SQL statement against a local Parquet file
 - [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs): 
Build and run a query plan from a SQL statement against multiple local Parquet 
files
 - [`query-aws-s3.rs`](examples/query-aws-s3.rs): Configure `object_store` and 
run a query against files stored in AWS S3
+- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` 
and run a query against files vi HTTP
 - [`rewrite_expr.rs`](examples/rewrite_expr.rs): Define and invoke a custom 
Query Optimizer pass
 - [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User 
Defined Aggregate Function (UDAF)
 - [`simple_udf.rs`](examples/simple_udf.rs): Define and invoke a User Defined 
(scalar) Function (UDF)
diff --git a/datafusion-examples/examples/query-http-csv.rs 
b/datafusion-examples/examples/query-http-csv.rs
new file mode 100644
index 0000000000..928d702711
--- /dev/null
+++ b/datafusion-examples/examples/query-http-csv.rs
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::error::Result;
+use datafusion::prelude::*;
+use object_store::http::HttpBuilder;
+use std::sync::Arc;
+use url::Url;
+
+/// This example demonstrates executing a simple query against an Arrow data 
source (CSV) and
+/// fetching results
+#[tokio::main]
+async fn main() -> Result<()> {
+    // create local execution context
+    let ctx = SessionContext::new();
+
+    // setup http object store
+    let base_url = Url::parse("https://github.com";).unwrap();
+    let http_store = HttpBuilder::new()
+        .with_url(base_url.clone())
+        .build()
+        .unwrap();
+    ctx.runtime_env()
+        .register_object_store(&base_url, Arc::new(http_store));
+
+    // register csv file with the execution context
+    ctx.register_csv(
+        "aggregate_test_100",
+        
"https://github.com/apache/arrow-testing/raw/master/data/csv/aggregate_test_100.csv";,
+        CsvReadOptions::new(),
+    )
+    .await?;
+
+    // execute the query
+    let df = ctx
+        .sql("SELECT c1,c2,c3 FROM aggregate_test_100 LIMIT 5")
+        .await?;
+
+    // print the results
+    df.show().await?;
+
+    Ok(())
+}

Reply via email to