alamb commented on code in PR #20394: URL: https://github.com/apache/datafusion/pull/20394#discussion_r2869094238
########## datafusion-examples/examples/data_io/in_memory_object_store.rs: ########## @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! See `main.rs` for how to run it. +//! +//! This follows the recommended approach: implement the `ObjectStore` trait +//! (or use an existing implementation), register it with DataFusion, and then +//! read a URL "path" from that store. +//! See the in-memory reference implementation: +//! https://docs.rs/object_store/latest/object_store/memory/struct.InMemory.html + +use std::sync::Arc; + +use arrow::array::{Int64Array, RecordBatch, StringArray}; +use arrow::datatypes::{DataType, Field, Schema}; +use datafusion::assert_batches_eq; +use datafusion::common::Result; +use datafusion::parquet::arrow::ArrowWriter; +use datafusion::prelude::{ + CsvReadOptions, JsonReadOptions, ParquetReadOptions, SessionContext, +}; +use object_store::memory::InMemory; +use object_store::path::Path; +use object_store::{ObjectStore, PutPayload}; +use url::Url; + +/// Demonstrates reading CSV/JSON/Parquet data from an in-memory object store. +pub async fn in_memory_object_store() -> Result<()> { + let store: Arc<dyn ObjectStore> = Arc::new(InMemory::new()); + let ctx = SessionContext::new(); + let object_store_url = Url::parse("mem://").unwrap(); + ctx.register_object_store(&object_store_url, Arc::clone(&store)); + + let schema = Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + ]); + + println!("=== CSV from memory ==="); Review Comment: I wonder how much value using multiple formats gives? Maybe we could just show one format and add comments explaining the same thing applies to other formats? I think it would also help if we added a few comments in the code to highlight the major steps: 1. Register a URL prefix 2. Write into memory 3. Read using the URL What do you think? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
