This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a7ff7a5e97 Consolidate Example: dataframe_output.rs into dataframe.rs 
(#13877)
a7ff7a5e97 is described below

commit a7ff7a5e97dbaf483ccde9408a53cfdf13120243
Author: Qi Zhu <[email protected]>
AuthorDate: Mon Dec 23 00:57:40 2024 +0800

    Consolidate Example: dataframe_output.rs into dataframe.rs (#13877)
---
 datafusion-examples/README.md                    |  3 +-
 datafusion-examples/examples/dataframe.rs        | 67 ++++++++++++++++++++
 datafusion-examples/examples/dataframe_output.rs | 78 ------------------------
 3 files changed, 68 insertions(+), 80 deletions(-)

diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index aca600e50e..a155920ead 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -57,8 +57,7 @@ cargo run --example dataframe
 - [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against 
a custom datasource (TableProvider)
 - [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a 
custom file format
 - [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run 
a query using a DataFrame against a parquet file from s3 and writing back to s3
-- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API 
against parquet files, csv files, and in-memory data
-- [`dataframe_output.rs`](examples/dataframe_output.rs): Examples of methods 
which write data out from a DataFrame
+- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API 
against parquet files, csv files, and in-memory data. Also demonstrates the 
various methods to write out a DataFrame to a table, parquet file, csv file, 
and json file.
 - [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert 
query results into rust structs using serde
 - [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze 
and coerce `Expr`s
 - [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query 
on `FileStreamProvider` which implements `StreamProvider` for reading and 
writing to arbitrary stream sources / sinks.
diff --git a/datafusion-examples/examples/dataframe.rs 
b/datafusion-examples/examples/dataframe.rs
index 59766e881e..5d5414e3d8 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -17,8 +17,12 @@
 
 use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::error::Result;
 use datafusion::prelude::*;
+use datafusion_common::config::CsvOptions;
+use datafusion_common::parsers::CompressionTypeVariant;
+use datafusion_common::DataFusionError;
 use std::fs::File;
 use std::io::Write;
 use std::sync::Arc;
@@ -29,6 +33,11 @@ use tempfile::tempdir;
 /// * [read_parquet]: execute queries against parquet files
 /// * [read_csv]: execute queries against csv files
 /// * [read_memory]: execute queries against in-memory arrow data
+///
+/// This example demonstrates the various methods to write out a DataFrame to 
local storage.
+/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs 
for an example
+/// using a remote object store.
+/// * [write_out]: write out a DataFrame to a table, parquet file, csv file, 
or json file
 #[tokio::main]
 async fn main() -> Result<()> {
     // The SessionContext is the main high level API for interacting with 
DataFusion
@@ -36,6 +45,7 @@ async fn main() -> Result<()> {
     read_parquet(&ctx).await?;
     read_csv(&ctx).await?;
     read_memory(&ctx).await?;
+    write_out(&ctx).await?;
     Ok(())
 }
 
@@ -139,3 +149,60 @@ async fn read_memory(ctx: &SessionContext) -> Result<()> {
 
     Ok(())
 }
+
+/// Use the DataFrame API to:
+/// 1. Write out a DataFrame to a table
+/// 2. Write out a DataFrame to a parquet file
+/// 3. Write out a DataFrame to a csv file
+/// 4. Write out a DataFrame to a json file
+async fn write_out(ctx: &SessionContext) -> std::result::Result<(), 
DataFusionError> {
+    let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();
+
+    // Ensure the column names and types match the target table
+    df = df.with_column_renamed("column1", "tablecol1").unwrap();
+
+    ctx.sql(
+        "create external table
+    test(tablecol1 varchar)
+    stored as parquet
+    location './datafusion-examples/test_table/'",
+    )
+    .await?
+    .collect()
+    .await?;
+
+    // This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
+    // The behavior of write_table depends on the TableProvider's 
implementation
+    // of the insert_into method.
+    df.clone()
+        .write_table("test", DataFrameWriteOptions::new())
+        .await?;
+
+    df.clone()
+        .write_parquet(
+            "./datafusion-examples/test_parquet/",
+            DataFrameWriteOptions::new(),
+            None,
+        )
+        .await?;
+
+    df.clone()
+        .write_csv(
+            "./datafusion-examples/test_csv/",
+            // DataFrameWriteOptions contains options which control how data 
is written
+            // such as compression codec
+            DataFrameWriteOptions::new(),
+            
Some(CsvOptions::default().with_compression(CompressionTypeVariant::GZIP)),
+        )
+        .await?;
+
+    df.clone()
+        .write_json(
+            "./datafusion-examples/test_json/",
+            DataFrameWriteOptions::new(),
+            None,
+        )
+        .await?;
+
+    Ok(())
+}
diff --git a/datafusion-examples/examples/dataframe_output.rs 
b/datafusion-examples/examples/dataframe_output.rs
deleted file mode 100644
index 60ca090d72..0000000000
--- a/datafusion-examples/examples/dataframe_output.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::{dataframe::DataFrameWriteOptions, prelude::*};
-use datafusion_common::config::CsvOptions;
-use datafusion_common::{parsers::CompressionTypeVariant, DataFusionError};
-
-/// This example demonstrates the various methods to write out a DataFrame to 
local storage.
-/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs 
for an example
-/// using a remote object store.
-#[tokio::main]
-async fn main() -> Result<(), DataFusionError> {
-    let ctx = SessionContext::new();
-
-    let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();
-
-    // Ensure the column names and types match the target table
-    df = df.with_column_renamed("column1", "tablecol1").unwrap();
-
-    ctx.sql(
-        "create external table 
-    test(tablecol1 varchar)
-    stored as parquet 
-    location './datafusion-examples/test_table/'",
-    )
-    .await?
-    .collect()
-    .await?;
-
-    // This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
-    // The behavior of write_table depends on the TableProvider's 
implementation
-    // of the insert_into method.
-    df.clone()
-        .write_table("test", DataFrameWriteOptions::new())
-        .await?;
-
-    df.clone()
-        .write_parquet(
-            "./datafusion-examples/test_parquet/",
-            DataFrameWriteOptions::new(),
-            None,
-        )
-        .await?;
-
-    df.clone()
-        .write_csv(
-            "./datafusion-examples/test_csv/",
-            // DataFrameWriteOptions contains options which control how data 
is written
-            // such as compression codec
-            DataFrameWriteOptions::new(),
-            
Some(CsvOptions::default().with_compression(CompressionTypeVariant::GZIP)),
-        )
-        .await?;
-
-    df.clone()
-        .write_json(
-            "./datafusion-examples/test_json/",
-            DataFrameWriteOptions::new(),
-            None,
-        )
-        .await?;
-
-    Ok(())
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to