This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e68852c8a Port `object_store` integration tests, use github actions 
(#2148)
e68852c8a is described below

commit e68852c8a9c08da2d1e4857c0e811bd0f687f7c5
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Jul 25 09:24:01 2022 -0400

    Port `object_store` integration tests, use github actions (#2148)
    
    * Add github test skeleton
    
    * Cleanups and fmt
    
    * Run on changes to object_store
    
    * Update name
    
    * Broken yaml?
    
    * Remove uneeded lint job
    
    * Run only object store tests
    
    * Add local gcp test instructions
    
    * Allow custom http client for gcs
    
    * remove unused error
    
    * Also run clippy
    
    * Update object_store/src/gcp.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * rename more
    
    * Fixup test
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 .github/workflows/object_store.yml | 114 +++++++++++++++++++++++++++++++++++++
 object_store/CONTRIBUTING.md       |  23 +++++++-
 object_store/src/azure.rs          |  50 +++++++++++++++-
 object_store/src/gcp.rs            |  34 ++++++++---
 object_store/src/lib.rs            |   4 +-
 5 files changed, 213 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/object_store.yml 
b/.github/workflows/object_store.yml
new file mode 100644
index 000000000..1b84f3ef0
--- /dev/null
+++ b/.github/workflows/object_store.yml
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+---
+name: "Object Store"
+
+on:
+  pull_request:
+    paths:
+      # Only run when object store files or github workflows change
+      - object_store/**
+      - .github/**
+
+jobs:
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Rust toolchain with clippy
+        run: |
+          rustup toolchain install stable
+          rustup default stable
+          rustup component add clippy
+      - name: Run clippy
+        run: |
+          cargo clippy -p object_store --all-features
+
+  # test the crate
+  linux-test:
+    name: Emulator Tests
+    runs-on: ubuntu-latest
+    services:
+      fake-gcs:
+        image: fsouza/fake-gcs-server
+        ports:
+          - 4443:4443
+      localstack:
+        image: localstack/localstack:0.14.4
+        ports:
+          - 4566:4566
+      azurite:
+        image: mcr.microsoft.com/azure-storage/azurite
+        ports:
+          - 10000:10002
+    container:
+      image: amd64/rust
+      env:
+        # Disable full debug symbol generation to speed up CI build and keep 
memory down
+        # "1" means line tables only, which is useful for panic tracebacks.
+        RUSTFLAGS: "-C debuginfo=1"
+        # https://github.com/rust-lang/cargo/issues/10280
+        CARGO_NET_GIT_FETCH_WITH_CLI: "true"
+        RUST_BACKTRACE: "1"
+        # Run integration tests
+        TEST_INTEGRATION: 1
+        AWS_DEFAULT_REGION: "us-east-1"
+        AWS_ACCESS_KEY_ID: test
+        AWS_SECRET_ACCESS_KEY: test
+        AWS_ENDPOINT: http://localstack:4566
+        AZURE_USE_EMULATOR: "1"
+        AZURITE_BLOB_STORAGE_URL: "http://azurite:10000";
+        AZURITE_QUEUE_STORAGE_URL: "http://azurite:10001";
+        GOOGLE_SERVICE_ACCOUNT: "/tmp/gcs.json"
+        OBJECT_STORE_BUCKET: test-bucket
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Configure Fake GCS Server (GCP emulation)
+        run: |
+          curl --insecure -v -X POST --data-binary '{"name":"test-bucket"}' -H 
"Content-Type: application/json" "https://fake-gcs:4443/storage/v1/b";
+          echo '{"gcs_base_url": "https://fake-gcs:4443";, "disable_oauth": 
true, "client_email": "", "private_key": ""}' > "$GOOGLE_SERVICE_ACCOUNT"
+
+      - name: Setup LocalStack (AWS emulation)
+        run: |
+          cd /tmp
+          curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip"; -o 
"awscliv2.zip"
+          unzip awscliv2.zip
+          ./aws/install
+          aws --endpoint-url=http://localstack:4566 s3 mb s3://test-bucket
+
+      - name: Configure Azurite (Azure emulation)
+          # the magical connection string is from
+          # 
https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio#http-connection-strings
+        run: |
+          curl -sL https://aka.ms/InstallAzureCLIDeb | bash
+          az storage container create -n test-bucket --connection-string 
'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite:10000/devstoreaccount1;QueueEndpoint=http://azurite:10001/devstoreaccount1;'
+
+      - name: Setup Rust toolchain
+        run: |
+          rustup toolchain install stable
+          rustup default stable
+
+      - name: Run object_store tests
+        run: |
+          # run tests
+          cargo test -p object_store --features=aws,azure,azure_test,gcp
diff --git a/object_store/CONTRIBUTING.md b/object_store/CONTRIBUTING.md
index 2e216dd48..7c2832cf7 100644
--- a/object_store/CONTRIBUTING.md
+++ b/object_store/CONTRIBUTING.md
@@ -90,5 +90,24 @@ $ cargo test --features azure
 
 ### GCP
 
-We don't have a good story yet for testing the GCP integration locally. You 
will need to create a GCS bucket, a
-service account that has access to it, and use this to run the tests.
+To test the GCS integration, we use [Fake GCS 
Server](https://github.com/fsouza/fake-gcs-server)
+
+Startup the fake server:
+
+```shell
+docker run -p 4443:4443 fsouza/fake-gcs-server
+```
+
+Configure the account:
+```shell
+curl --insecure -v -X POST --data-binary '{"name":"test-bucket"}' -H 
"Content-Type: application/json" "https://localhost:4443/storage/v1/b";
+echo '{"gcs_base_url": "https://localhost:4443";, "disable_oauth": true, 
"client_email": "", "private_key": ""}' > /tmp/gcs.json
+```
+
+Now run the tests:
+```shell
+TEST_INTEGRATION=1 \
+OBJECT_STORE_BUCKET=test-bucket \
+GOOGLE_SERVICE_ACCOUNT=/tmp/gcs.json \
+cargo test -p object_store --features=gcp
+```
diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs
index 5f4327982..75dafef86 100644
--- a/object_store/src/azure.rs
+++ b/object_store/src/azure.rs
@@ -38,6 +38,7 @@ use futures::{
 use snafu::{ResultExt, Snafu};
 use std::collections::BTreeSet;
 use std::{convert::TryInto, sync::Arc};
+use url::Url;
 
 /// A specialized `Error` for Azure object store-related errors
 #[derive(Debug, Snafu)]
@@ -158,6 +159,18 @@ enum Error {
         "Azurite (azure emulator) support not compiled in, please add 
`azure_test` feature"
     ))]
     NoEmulatorFeature,
+
+    #[snafu(display(
+        "Unable parse emulator url {}={}, Error: {}",
+        env_name,
+        env_value,
+        source
+    ))]
+    UnableToParseEmulatorUrl {
+        env_name: String,
+        env_value: String,
+        source: url::ParseError,
+    },
 }
 
 impl From<Error> for super::Error {
@@ -507,6 +520,21 @@ fn check_if_emulator_works() -> Result<()> {
     Err(Error::NoEmulatorFeature.into())
 }
 
+/// Parses the contents of the environment variable `env_name` as a URL
+/// if present, otherwise falls back to default_url
+fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
+    let url = match std::env::var(env_name) {
+        Ok(env_value) => {
+            Url::parse(&env_value).context(UnableToParseEmulatorUrlSnafu {
+                env_name,
+                env_value,
+            })?
+        }
+        Err(_) => Url::parse(default_url).expect("Failed to parse default 
URL"),
+    };
+    Ok(url)
+}
+
 /// Configure a connection to container with given name on Microsoft Azure
 /// Blob store.
 ///
@@ -524,7 +552,27 @@ pub fn new_azure(
 
     let (is_emulator, storage_account_client) = if use_emulator {
         check_if_emulator_works()?;
-        (true, StorageAccountClient::new_emulator_default())
+        // Allow overriding defaults. Values taken from
+        // from 
https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141
+        let http_client = azure_core::new_http_client();
+        let blob_storage_url =
+            url_from_env("AZURITE_BLOB_STORAGE_URL", 
"http://127.0.0.1:10000";)?;
+        let queue_storage_url =
+            url_from_env("AZURITE_QUEUE_STORAGE_URL", 
"http://127.0.0.1:10001";)?;
+        let table_storage_url =
+            url_from_env("AZURITE_TABLE_STORAGE_URL", 
"http://127.0.0.1:10002";)?;
+        let filesystem_url =
+            url_from_env("AZURITE_TABLE_STORAGE_URL", 
"http://127.0.0.1:10004";)?;
+
+        let storage_client = StorageAccountClient::new_emulator(
+            http_client,
+            &blob_storage_url,
+            &table_storage_url,
+            &queue_storage_url,
+            &filesystem_url,
+        );
+
+        (true, storage_client)
     } else {
         (
             false,
diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs
index 84fb572bd..e836caba7 100644
--- a/object_store/src/gcp.rs
+++ b/object_store/src/gcp.rs
@@ -502,9 +502,17 @@ fn reader_credentials_file(
 pub fn new_gcs(
     service_account_path: impl AsRef<std::path::Path>,
     bucket_name: impl Into<String>,
+) -> Result<GoogleCloudStorage> {
+    new_gcs_with_client(service_account_path, bucket_name, Client::new())
+}
+
+/// Configure a connection to Google Cloud Storage with the specified HTTP 
client.
+pub fn new_gcs_with_client(
+    service_account_path: impl AsRef<std::path::Path>,
+    bucket_name: impl Into<String>,
+    client: Client,
 ) -> Result<GoogleCloudStorage> {
     let credentials = reader_credentials_file(service_account_path)?;
-    let client = Client::new();
 
     // TODO: https://cloud.google.com/storage/docs/authentication#oauth-scopes
     let scope = "https://www.googleapis.com/auth/devstorage.full_control";;
@@ -575,6 +583,18 @@ mod test {
         service_account: String,
     }
 
+    impl GoogleCloudConfig {
+        fn build_test(self) -> Result<GoogleCloudStorage> {
+            // ignore HTTPS errors in tests so we can use fake-gcs server
+            let client = Client::builder()
+                .danger_accept_invalid_certs(true)
+                .build()
+                .expect("Error creating http client for testing");
+
+            new_gcs_with_client(self.service_account, self.bucket, client)
+        }
+    }
+
     // Helper macro to skip tests if TEST_INTEGRATION and the GCP environment 
variables are not set.
     macro_rules! maybe_skip_integration {
         () => {{
@@ -622,7 +642,7 @@ mod test {
     #[tokio::test]
     async fn gcs_test() {
         let config = maybe_skip_integration!();
-        let integration = new_gcs(config.service_account, 
config.bucket).unwrap();
+        let integration = config.build_test().unwrap();
 
         put_get_delete_list(&integration).await.unwrap();
         list_uses_directories_correctly(&integration).await.unwrap();
@@ -633,7 +653,7 @@ mod test {
     #[tokio::test]
     async fn gcs_test_get_nonexistent_location() {
         let config = maybe_skip_integration!();
-        let integration = new_gcs(config.service_account, 
&config.bucket).unwrap();
+        let integration = config.build_test().unwrap();
 
         let location = Path::from_iter([NON_EXISTENT_NAME]);
 
@@ -650,7 +670,7 @@ mod test {
     async fn gcs_test_get_nonexistent_bucket() {
         let mut config = maybe_skip_integration!();
         config.bucket = NON_EXISTENT_NAME.into();
-        let integration = new_gcs(config.service_account, 
&config.bucket).unwrap();
+        let integration = config.build_test().unwrap();
 
         let location = Path::from_iter([NON_EXISTENT_NAME]);
 
@@ -668,7 +688,7 @@ mod test {
     #[tokio::test]
     async fn gcs_test_delete_nonexistent_location() {
         let config = maybe_skip_integration!();
-        let integration = new_gcs(config.service_account, 
&config.bucket).unwrap();
+        let integration = config.build_test().unwrap();
 
         let location = Path::from_iter([NON_EXISTENT_NAME]);
 
@@ -684,7 +704,7 @@ mod test {
     async fn gcs_test_delete_nonexistent_bucket() {
         let mut config = maybe_skip_integration!();
         config.bucket = NON_EXISTENT_NAME.into();
-        let integration = new_gcs(config.service_account, 
&config.bucket).unwrap();
+        let integration = config.build_test().unwrap();
 
         let location = Path::from_iter([NON_EXISTENT_NAME]);
 
@@ -700,7 +720,7 @@ mod test {
     async fn gcs_test_put_nonexistent_bucket() {
         let mut config = maybe_skip_integration!();
         config.bucket = NON_EXISTENT_NAME.into();
-        let integration = new_gcs(config.service_account, 
&config.bucket).unwrap();
+        let integration = config.build_test().unwrap();
 
         let location = Path::from_iter([NON_EXISTENT_NAME]);
         let data = Bytes::from("arbitrary data");
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 4a56b03bf..2dc65069a 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -695,8 +695,8 @@ mod tests {
     #[tokio::test]
     async fn test_list_lifetimes() {
         let store = memory::InMemory::new();
-        let stream = list_store(&store, "path").await.unwrap();
-        assert_eq!(stream.count().await, 0);
+        let mut stream = list_store(&store, "path").await.unwrap();
+        assert!(stream.next().await.is_none());
     }
 
     // Tests TODO:

Reply via email to