Repository: zeppelin Updated Branches: refs/heads/master d4783040c -> 91b5d69be
[ZEPPELIN-3182] Support saving notebooks to Google Cloud Storage ### What is this PR for? Support saving notebooks to Google Cloud Storage, similar to implementations for S3 and Azure. It uses the same authentication mechanisms as the BigQuery interpreter. I am new to Maven, so please check my work on the pom.xml files. In particular, I upgraded Guava to 23.0, which was required for `google-cloud-java`. Going through hello-world with my changes seems to work. Also, I modified the BigQuery interpreter docs to point to the **latest** GCS storage docs. Is it more appropriate to pin to the version you are viewing? How can I do that? ### What type of PR is it? Improvement ### Todos * [Low priority] Support encryption keys I don't this is particularly important, at least for v1. ### How should this be tested? * I added unit tests for the core functionality * I manually tested the authentication instructions (but that could use a second pair of eyes) ### Questions: * Does the licenses files need update? * No idea. `google-cloud-java` is Apache 2: https://github.com/GoogleCloudPlatform/google-cloud-java/blob/master/LICENSE * Is there breaking changes for older versions? * Nope. * Does this needs documentation? * Yes, and I tried to update the docs (but there are likely other things that need to be updated) Author: Karthik Palaniappan <karthik...@google.com> Closes #2738 from karth295/master and squashes the following commits: c4a45b7 [Karthik Palaniappan] [ZEPPELIN-3182] Support saving notebooks to Google Cloud Storage 8dc819e [Karthik Palaniappan] Unify logic to clear notebook runtime state on load from storage Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/91b5d69b Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/91b5d69b Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/91b5d69b Branch: refs/heads/master Commit: 91b5d69be2aa8f72dc49d27800a90f8bed9781cc Parents: d478304 Author: Karthik Palaniappan <karthik...@google.com> Authored: Sun Jan 28 19:21:34 2018 -0800 Committer: Jeff Zhang <zjf...@apache.org> Committed: Wed Feb 14 12:39:03 2018 +0800 ---------------------------------------------------------------------- LICENSE | 5 +- conf/zeppelin-env.sh.template | 6 + conf/zeppelin-site.xml.template | 17 ++ docs/index.md | 1 + docs/interpreter/bigquery.md | 20 +- docs/setup/storage/storage.md | 92 ++++++++ .../zeppelin/conf/ZeppelinConfiguration.java | 11 +- zeppelin-zengine/pom.xml | 103 +++++++- .../java/org/apache/zeppelin/notebook/Note.java | 13 + .../notebook/repo/AzureNotebookRepo.java | 28 +-- .../zeppelin/notebook/repo/GCSNotebookRepo.java | 234 ++++++++++++++++++ .../notebook/repo/MongoNotebookRepo.java | 42 +--- .../zeppelin/notebook/repo/S3NotebookRepo.java | 17 +- .../zeppelin/notebook/repo/VFSNotebookRepo.java | 27 +-- .../notebook/repo/GCSNotebookRepoTest.java | 235 +++++++++++++++++++ 15 files changed, 744 insertions(+), 107 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/LICENSE ---------------------------------------------------------------------- diff --git a/LICENSE b/LICENSE index 2252d65..3b34053 100644 --- a/LICENSE +++ b/LICENSE @@ -259,6 +259,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (Apache 2.0) Gson extra (https://github.com/DanySK/gson-extras) (Apache 2.0) Nimbus JOSE+JWT (https://bitbucket.org/connect2id/nimbus-jose-jwt/wiki/Home) (Apache 2.0) jarchivelib (https://github.com/thrau/jarchivelib) + (Apache 2.0) Google Cloud Client Library for Java (https://github.com/GoogleCloudPlatform/google-cloud-java) ======================================================================== BSD 3-Clause licenses @@ -274,6 +275,8 @@ The following components are provided under the BSD 3-Clause license. See file (BSD 3 Clause) diff.js (https://github.com/kpdecker/jsdiff) + (BSD 3-Clause) Google Auth Library for Java (https://github.com/google/google-auth-library-java) + ======================================================================== BSD 2-Clause licenses ======================================================================== @@ -287,4 +290,4 @@ Jython Software License ======================================================================== The following components are provided under the Jython Software License. See file headers and project links for details. - (Jython Software License) jython-standalone - http://www.jython.org/ \ No newline at end of file + (Jython Software License) jython-standalone - http://www.jython.org/ http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/conf/zeppelin-env.sh.template ---------------------------------------------------------------------- diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template index 7bc38d6..c7204bd 100644 --- a/conf/zeppelin-env.sh.template +++ b/conf/zeppelin-env.sh.template @@ -30,16 +30,22 @@ # export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved # export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false" + # export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket # export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region # export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks + +# export ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR # GCS "directory" (prefix) under which notebooks are saved. E.g. gs://example-bucket/path/to/dir +# export GOOGLE_APPLICATION_CREDENTIALS # Provide a service account key file for GCS and BigQuery API calls (overrides application default credentials) + # export ZEPPELIN_NOTEBOOK_MONGO_URI # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost" # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE # Database name to store notebook. Default "zeppelin" # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes" # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false" + # export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. # export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. # export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/conf/zeppelin-site.xml.template ---------------------------------------------------------------------- diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index 9e9898b..9774f0d 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -67,6 +67,23 @@ <description>hide homescreen notebook from list when this value set to true</description> </property> +<!-- Google Cloud Storage notebook storage --> +<!-- +<property> + <name>zeppelin.notebook.gcs.dir</name> + <value></value> + <description> + A GCS path in the form gs://bucketname/path/to/dir. + Notes are stored at {zeppelin.notebook.gcs.dir}/{notebook-id}/note.json + </description> +</property> + +<property> + <name>zeppelin.notebook.storage</name> + <value>org.apache.zeppelin.notebook.repo.GCSNotebookRepo</value> + <description>notebook persistence layer implementation</description> +</property> +--> <!-- Amazon S3 notebook storage --> <!-- Creates the following directory structure: s3://{bucket}/{username}/{notebook-id}/note.json --> http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/docs/index.md ---------------------------------------------------------------------- diff --git a/docs/index.md b/docs/index.md index 587ae93..3d42735 100644 --- a/docs/index.md +++ b/docs/index.md @@ -104,6 +104,7 @@ limitations under the License. * [Git Storage](./setup/storage/storage.html#notebook-storage-in-local-git-repository) * [S3 Storage](./setup/storage/storage.html#notebook-storage-in-s3) * [Azure Storage](./setup/storage/storage.html#notebook-storage-in-azure) + * [Google Cloud Storage](./setup/storage/storage.html#notebook-storage-in-gcs) * [ZeppelinHub Storage](./setup/storage/storage.html#notebook-storage-in-zeppelinhub) * [MongoDB Storage](./setup/storage/storage.html#notebook-storage-in-mongodb) * Operation http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/docs/interpreter/bigquery.md ---------------------------------------------------------------------- diff --git a/docs/interpreter/bigquery.md b/docs/interpreter/bigquery.md index 7ebe2e2..1b90f99 100644 --- a/docs/interpreter/bigquery.md +++ b/docs/interpreter/bigquery.md @@ -58,20 +58,12 @@ Zeppelin is built against BigQuery API version v2-rev265-1.21.0 - [API Javadocs] In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**. -### Setup service account credentials - -In order to run BigQuery interpreter outside of Google Cloud Engine you need to provide authentication credentials, -by [following this instructions](https://developers.google.com/identity/protocols/application-default-credentials): - - - Go to the [API Console Credentials page](https://console.developers.google.com/project/_/apis/credentials) - - From the project drop-down, select your project. - - On the `Credentials` page, select the `Create credentials` drop-down, then select `Service account key`. - - From the Service account drop-down, select an existing service account or create a new one. - - For `Key type`, select the `JSON` key option, then select `Create`. The file automatically downloads to your computer. - - Put the `*.json` file you just downloaded in a directory of your choosing. This directory must be private (you can't let anyone get access to this), but accessible to your Zeppelin instance. - - Set the environment variable `GOOGLE_APPLICATION_CREDENTIALS` to the path of the JSON file downloaded. - * either though GUI: in interpreter configuration page property names in CAPITAL_CASE set up env vars - * or though `zeppelin-env.sh`: just add it to the end of the file. +### Provide Application Default Credentials + +Within Google Cloud Platform (e.g. Google App Engine, Google Compute Engine), +built-in credentials are used by default. + +Outside of GCP, follow the Google API authentication instructions for [Zeppelin Google Cloud Storage](https://zeppelin.apache.org/docs/latest/storage/storage.html#notebook-storage-in-gcs) ## Using the BigQuery Interpreter http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/docs/setup/storage/storage.md ---------------------------------------------------------------------- diff --git a/docs/setup/storage/storage.md b/docs/setup/storage/storage.md index f34fc2c..6f2ace4 100644 --- a/docs/setup/storage/storage.md +++ b/docs/setup/storage/storage.md @@ -33,6 +33,7 @@ There are few notebook storage systems available for a use out of the box: * all notes are saved in the notebook folder in hadoop compatible file system - `FileSystemNotebookRepo` * storage using Amazon S3 service - `S3NotebookRepo` * storage using Azure service - `AzureNotebookRepo` + * storage using Google Cloud Storage - `GCSNotebookRepo` * storage using MongoDB - `MongoNotebookRepo` * storage using GitHub - `GitHubNotebookRepo` @@ -264,6 +265,97 @@ Optionally, you can specify Azure folder structure name in the file **zeppelin-s ``` </br> +## Notebook Storage in Google Cloud Storage<a name="GCS"></a> + +Using `GCSNotebookRepo` you can connect Zeppelin with Google Cloud Storage using [Application Default Credentials](https://cloud.google.com/docs/authentication/production). + +First, choose a GCS path under which to store notebooks. + +``` +<property> + <name>zeppelin.notebook.gcs.dir</name> + <value></value> + <description> + A GCS path in the form gs://bucketname/path/to/dir. + Notes are stored at {zeppelin.notebook.gcs.dir}/{notebook-id}/note.json + </description> +</property> +``` + +Then, initialize the `GCSNotebookRepo` class in the file **zeppelin-site.xml** by commenting the next property: + +``` +<property> + <name>zeppelin.notebook.storage</name> + <value>org.apache.zeppelin.notebook.repo.GitNotebookRepo</value> + <description>versioned notebook persistence layer implementation</description> +</property> +``` + +and commenting out: + +``` +<property> + <name>zeppelin.notebook.storage</name> + <value>org.apache.zeppelin.notebook.repo.GCSNotebookRepo</value> + <description>notebook persistence layer implementation</description> +</property> +``` + +Or, if you want to simultaneously use your local git storage with GCS, use the following property instead: + + ``` +<property> + <name>zeppelin.notebook.storage</name> + <value>org.apache.zeppelin.notebook.repo.GitNotebookRepo,org.apache.zeppelin.notebook.repo.GCSNotebookRepo</value> + <description>notebook persistence layer implementation</description> +</property> +``` + +### Google Cloud API Authentication + +Note: On Google App Engine, Google Cloud Shell, and Google Compute Engine, these +steps are not necessary, as build-in credentials are used by default. + +For more information, see [Application Default Credentials](https://cloud.google.com/docs/authentication/production) + +#### Using gcloud auth application-default login + +See the [gcloud docs](https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login) + +As the user running the zeppelin daemon, run: + +```bash +gcloud auth application-default login +``` + +You can also use `--scopes` to restrict access to specific Google APIs, such as +Cloud Storage and BigQuery. + +#### Using service account key files + +Alternatively, to use a [service account](https://cloud.google.com/compute/docs/access/service-accounts) +for authentication with GCS, you will need a JSON service account key file. + +1. Navigate to the [service accounts page](https://console.cloud.google.com/iam-admin/serviceaccounts/project) +2. Click `CREATE SERVICE ACCOUNT` +3. Select at least `Storage -> Storage Object Admin`. Note that this is + **different** than `Storage Admin`. +4. If you are also using the BigQuery Interpreter, add the appropriate + permissions (e.g. `Bigquery -> Bigquery Data Viewer and BigQuery User`) +5. Name your service account, and select "Furnish a new private key" to download + a `.json` file. Click "Create". +6. Move the downloaded file to a location of your choice (e.g. + `/path/to/my/key.json`), and give it appropriate permissions. Ensure at + least the user running the zeppelin daemon can read it. + +Then, point `GOOGLE_APPLICATION_CREDENTIALS` at your new key file in **zeppelin-env.sh**. For example: + +```bash +export GOOGLE_APPLICATION_CREDENTIALS=/path/to/my/key.json +``` + +</br> ## Notebook Storage in ZeppelinHub <a name="ZeppelinHub"></a> ZeppelinHub storage layer allows out of the box connection of Zeppelin instance with your ZeppelinHub account. First of all, you need to either comment out the following property in **zeppelin-site.xml**: http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java ---------------------------------------------------------------------- diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java index f7b3d7b..5beb2c7 100644 --- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java +++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java @@ -360,15 +360,19 @@ public class ZeppelinConfiguration extends XMLConfiguration { "org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage"); } - public String getUser() { + public String getGCSStorageDir() { + return getString(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR); + } + + public String getS3User() { return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_USER); } - public String getBucketName() { + public String getS3BucketName() { return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_BUCKET); } - public String getEndpoint() { + public String getS3Endpoint() { return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_ENDPOINT); } @@ -697,6 +701,7 @@ public class ZeppelinConfiguration extends XMLConfiguration { ZEPPELIN_NOTEBOOK_HOMESCREEN("zeppelin.notebook.homescreen", null), // whether homescreen notebook will be hidden from notebook list or not ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE("zeppelin.notebook.homescreen.hide", false), + ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR("zeppelin.notebook.gcs.dir", ""), ZEPPELIN_NOTEBOOK_S3_BUCKET("zeppelin.notebook.s3.bucket", "zeppelin"), ZEPPELIN_NOTEBOOK_S3_ENDPOINT("zeppelin.notebook.s3.endpoint", "s3.amazonaws.com"), ZEPPELIN_NOTEBOOK_S3_USER("zeppelin.notebook.s3.user", "user"), http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/pom.xml ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml index a864fdf..81ce716 100644 --- a/zeppelin-zengine/pom.xml +++ b/zeppelin-zengine/pom.xml @@ -39,6 +39,7 @@ <hadoop.version>2.7.3</hadoop.version> <commons.lang3.version>3.4</commons.lang3.version> <commons.vfs2.version>2.0</commons.vfs2.version> + <gcs.storage.version>1.14.0</gcs.storage.version> <aws.sdk.s3.version>1.10.62</aws.sdk.s3.version> <adl.sdk.version>2.1.4</adl.sdk.version> <jackrabbit.webdav.version>1.5.2</jackrabbit.webdav.version> @@ -51,6 +52,7 @@ <!--test library versions--> <google.truth.version>0.27</google.truth.version> + <google.testing.nio.version>0.32.0-alpha</google.testing.nio.version> </properties> <dependencies> @@ -115,9 +117,83 @@ </dependency> <dependency> + <groupId>com.google.cloud</groupId> + <artifactId>google-cloud-storage</artifactId> + <version>${gcs.storage.version}</version> + <exclusions> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.api</groupId> + <artifactId>api-common</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.http-client</groupId> + <artifactId>google-http-client-jackson2</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.http-client</groupId> + <artifactId>google-http-client</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>com.google.api</groupId> + <artifactId>api-common</artifactId> + <version>1.2.0</version> + <exclusions> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>com.google.http-client</groupId> + <artifactId>google-http-client-jackson2</artifactId> + <version>1.23.0</version> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> <groupId>com.amazonaws</groupId> <artifactId>aws-java-sdk-s3</artifactId> <version>${aws.sdk.s3.version}</version> + <exclusions> + <exclusion> + <groupId>joda-time</groupId> + <artifactId>joda-time</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> @@ -146,7 +222,7 @@ <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> - <version>15.0</version> + <version>20.0</version> </dependency> <dependency> @@ -228,6 +304,23 @@ </dependency> <dependency> + <groupId>com.google.cloud</groupId> + <artifactId>google-cloud-nio</artifactId> + <version>${google.testing.nio.version}</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> <groupId>com.google.truth</groupId> <artifactId>truth</artifactId> <version>${google.truth.version}</version> @@ -612,6 +705,10 @@ <artifactId>protobuf-java</artifactId> </exclusion> <exclusion> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java-util</artifactId> + </exclusion> + <exclusion> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> </exclusion> @@ -623,6 +720,10 @@ <groupId>io.grpc</groupId> <artifactId>grpc-context</artifactId> </exclusion> + <exclusion> + <groupId>com.google.api.grpc</groupId> + <artifactId>proto-google-common-protos</artifactId> + </exclusion> </exclusions> </dependency> http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java index 281c4de..0a8fb12 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java @@ -963,6 +963,19 @@ public class Note implements ParagraphJobListener, JsonSerializable { for (Paragraph p : paragraphs) { p.clearRuntimeInfos(); p.parseText(); + + if (p.getStatus() == Status.PENDING || p.getStatus() == Status.RUNNING) { + p.setStatus(Status.ABORT); + } + + List<ApplicationState> appStates = p.getAllApplicationStates(); + if (appStates != null) { + for (ApplicationState app : appStates) { + if (app.getStatus() != ApplicationState.Status.ERROR) { + app.setStatus(ApplicationState.Status.UNLOADED); + } + } + } } } http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java index de337fa..731a3e8 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java @@ -17,6 +17,13 @@ package org.apache.zeppelin.notebook.repo; +import com.microsoft.azure.storage.CloudStorageAccount; +import com.microsoft.azure.storage.StorageException; +import com.microsoft.azure.storage.file.CloudFile; +import com.microsoft.azure.storage.file.CloudFileClient; +import com.microsoft.azure.storage.file.CloudFileDirectory; +import com.microsoft.azure.storage.file.CloudFileShare; +import com.microsoft.azure.storage.file.ListFileItem; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; @@ -28,26 +35,15 @@ import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Map; - import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.zeppelin.conf.ZeppelinConfiguration; import org.apache.zeppelin.notebook.Note; import org.apache.zeppelin.notebook.NoteInfo; -import org.apache.zeppelin.notebook.Paragraph; -import org.apache.zeppelin.scheduler.Job; import org.apache.zeppelin.user.AuthenticationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.file.CloudFile; -import com.microsoft.azure.storage.file.CloudFileClient; -import com.microsoft.azure.storage.file.CloudFileDirectory; -import com.microsoft.azure.storage.file.CloudFileShare; -import com.microsoft.azure.storage.file.ListFileItem; - /** * Azure storage backend for notebooks */ @@ -128,15 +124,7 @@ public class AzureNotebookRepo implements NotebookRepo { String json = IOUtils.toString(ins, conf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_ENCODING)); ins.close(); - Note note = Note.fromJson(json); - - for (Paragraph p : note.getParagraphs()) { - if (p.getStatus() == Job.Status.PENDING || p.getStatus() == Job.Status.RUNNING) { - p.setStatus(Job.Status.ABORT); - } - } - - return note; + return Note.fromJson(json); } @Override http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java new file mode 100644 index 0000000..591c532 --- /dev/null +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.notebook.repo; + +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.BlobInfo; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.gson.JsonParseException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.lang.StringUtils; +import org.apache.zeppelin.conf.ZeppelinConfiguration; +import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars; +import org.apache.zeppelin.notebook.Note; +import org.apache.zeppelin.notebook.NoteInfo; +import org.apache.zeppelin.user.AuthenticationInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A NotebookRepo implementation for storing notebooks in Google Cloud Storage. + * + * Notes are stored in the GCS "directory" specified by zeppelin.notebook.gcs.dir. This path + * must be in the form gs://bucketName/path/to/Dir. The bucket must already exist. N.B: GCS is an + * object store, so this "directory" should not itself be an object. Instead, it represents the base + * path for the note.json files. + * + * Authentication is provided by google-auth-library-java. + * @see <a href="https://github.com/google/google-auth-library-java"> + * google-auth-library-java</a>. + */ +public class GCSNotebookRepo implements NotebookRepo { + + private static final Logger LOG = LoggerFactory.getLogger(GCSNotebookRepo.class); + private String encoding; + private String bucketName; + private Optional<String> basePath; + private Pattern noteNamePattern; + private Storage storage; + + public GCSNotebookRepo(ZeppelinConfiguration conf) throws IOException { + this(conf, StorageOptions.getDefaultInstance().getService()); + } + + // For tests to use an in-memory storage implementation + GCSNotebookRepo(ZeppelinConfiguration conf, Storage storage) throws IOException { + this.encoding = conf.getString(ConfVars.ZEPPELIN_ENCODING); + + String gcsStorageDir = conf.getGCSStorageDir(); + if (gcsStorageDir.isEmpty()) { + throw new IOException("GCS storage directory must be set using 'zeppelin.notebook.gcs.dir'"); + } + if (!gcsStorageDir.startsWith("gs://")) { + throw new IOException(String.format( + "GCS storage directory '%s' must start with 'gs://'.", gcsStorageDir)); + } + String storageDirWithoutScheme = gcsStorageDir.substring("gs://".length()); + + // pathComponents excludes empty string if trailing slash is present + List<String> pathComponents = Arrays.asList(storageDirWithoutScheme.split("/")); + if (pathComponents.size() < 1) { + throw new IOException(String.format( + "GCS storage directory '%s' must be in the form gs://bucketname/path/to/dir", + gcsStorageDir)); + } + this.bucketName = pathComponents.get(0); + if (pathComponents.size() > 1) { + this.basePath = Optional.of(StringUtils.join( + pathComponents.subList(1, pathComponents.size()), "/")); + } else { + this.basePath = Optional.absent(); + } + + // Notes are stored at gs://bucketName/basePath/<note-id>/note.json + if (basePath.isPresent()) { + this.noteNamePattern = Pattern.compile( + "^" + Pattern.quote(basePath.get() + "/") + "([^/]+)/note\\.json$"); + } else { + this.noteNamePattern = Pattern.compile("^([^/]+)/note\\.json$"); + } + + this.storage = storage; + } + + private BlobId makeBlobId(String noteId) { + if (basePath.isPresent()) { + return BlobId.of(bucketName, basePath.get() + "/" + noteId + "/note.json"); + } else { + return BlobId.of(bucketName, noteId + "/note.json"); + } + } + + @Override + public List<NoteInfo> list(AuthenticationInfo subject) throws IOException { + try { + List<NoteInfo> infos = new ArrayList<>(); + Iterable<Blob> blobsUnderDir; + if (basePath.isPresent()) { + blobsUnderDir = storage + .list(bucketName, BlobListOption.prefix(this.basePath.get() + "/")) + .iterateAll(); + } else { + blobsUnderDir = storage + .list(bucketName) + .iterateAll(); + } + for (Blob b : blobsUnderDir) { + Matcher matcher = noteNamePattern.matcher(b.getName()); + if (matcher.matches()) { + // Callers only use the id field, so do not fetch each note + // This matches the implementation in FileSystemNoteRepo#list + infos.add(new NoteInfo(matcher.group(1), "", null)); + } + } + return infos; + } catch (StorageException se) { + throw new IOException("Could not list GCS directory: " + se.getMessage(), se); + } + } + + @Override + public Note get(String noteId, AuthenticationInfo subject) throws IOException { + BlobId blobId = makeBlobId(noteId); + byte[] contents; + try { + contents = storage.readAllBytes(blobId); + } catch (StorageException se) { + throw new IOException("Could not read " + blobId.toString() + ": " + se.getMessage(), se); + } + + try { + return Note.fromJson(new String(contents, encoding)); + } catch (JsonParseException jpe) { + throw new IOException( + "Could note parse as json " + blobId.toString() + jpe.getMessage(), jpe); + } + } + + @Override + public void save(Note note, AuthenticationInfo subject) throws IOException { + BlobInfo info = BlobInfo.newBuilder(makeBlobId(note.getId())) + .setContentType("application/json") + .build(); + try { + storage.create(info, note.toJson().getBytes("UTF-8")); + } catch (StorageException se) { + throw new IOException("Could not write " + info.toString() + ": " + se.getMessage(), se); + } + } + + @Override + public void remove(String noteId, AuthenticationInfo subject) throws IOException { + Preconditions.checkArgument(!Strings.isNullOrEmpty(noteId)); + BlobId blobId = makeBlobId(noteId); + try { + boolean deleted = storage.delete(blobId); + if (!deleted) { + throw new IOException("Tried to remove nonexistent blob " + blobId.toString()); + } + } catch (StorageException se) { + throw new IOException("Could not remove " + blobId.toString() + ": " + se.getMessage(), se); + } + } + + @Override + public void close() { + //no-op + } + + @Override + public Revision checkpoint(String noteId, String checkpointMsg, AuthenticationInfo subject) + throws IOException { + LOG.warn("checkpoint is not implemented for GCSNotebookRepo"); + return null; + } + + @Override + public Note get(String noteId, String revId, AuthenticationInfo subject) throws IOException { + LOG.warn("get revId is not implemented for GCSNotebookRepo"); + return null; + } + + @Override + public List<Revision> revisionHistory(String noteId, AuthenticationInfo subject) { + LOG.warn("revisionHistory is not implemented for GCSNotebookRepo"); + return Collections.emptyList(); + } + + @Override + public Note setNoteRevision(String noteId, String revId, AuthenticationInfo subject) + throws IOException { + LOG.warn("setNoteRevision is not implemented for GCSNotebookRepo"); + return null; + } + + @Override + public List<NotebookRepoSettingsInfo> getSettings(AuthenticationInfo subject) { + LOG.warn("getSettings is not implemented for GCSNotebookRepo"); + return Collections.emptyList(); + } + + @Override + public void updateSettings(Map<String, String> settings, AuthenticationInfo subject) { + LOG.warn("updateSettings is not implemented for GCSNotebookRepo"); + } +} http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java index 273d75d..376a986 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java @@ -1,5 +1,9 @@ package org.apache.zeppelin.notebook.repo; +import static com.mongodb.client.model.Filters.eq; +import static com.mongodb.client.model.Filters.in; +import static com.mongodb.client.model.Filters.type; + import com.mongodb.MongoBulkWriteException; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; @@ -7,18 +11,18 @@ import com.mongodb.bulk.BulkWriteError; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; -import static com.mongodb.client.model.Filters.eq; -import static com.mongodb.client.model.Filters.type; -import static com.mongodb.client.model.Filters.in; - import com.mongodb.client.model.InsertManyOptions; import com.mongodb.client.model.UpdateOptions; +import java.io.IOException; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.zeppelin.conf.ZeppelinConfiguration; import org.apache.zeppelin.notebook.Note; import org.apache.zeppelin.notebook.NoteInfo; -import org.apache.zeppelin.notebook.Paragraph; -import org.apache.zeppelin.notebook.ApplicationState; -import org.apache.zeppelin.scheduler.Job; import org.apache.zeppelin.user.AuthenticationInfo; import org.bson.BsonType; import org.bson.Document; @@ -26,11 +30,6 @@ import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - /** * Backend for storing Notebook on MongoDB */ @@ -161,24 +160,7 @@ public class MongoNotebookRepo implements NotebookRepo { // document to JSON String json = doc.toJson(); // JSON to note - Note note = Note.fromJson(json); - - for (Paragraph p : note.getParagraphs()) { - if (p.getStatus() == Job.Status.PENDING || p.getStatus() == Job.Status.RUNNING) { - p.setStatus(Job.Status.ABORT); - } - - List<ApplicationState> appStates = p.getAllApplicationStates(); - if (appStates != null) { - for (ApplicationState app : appStates) { - if (app.getStatus() != ApplicationState.Status.ERROR) { - app.setStatus(ApplicationState.Status.UNLOADED); - } - } - } - } - - return note; + return Note.fromJson(json); } /** http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java index 8828985..7d64702 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java @@ -90,8 +90,8 @@ public class S3NotebookRepo implements NotebookRepo { public S3NotebookRepo(ZeppelinConfiguration conf) throws IOException { this.conf = conf; - bucketName = conf.getBucketName(); - user = conf.getUser(); + bucketName = conf.getS3BucketName(); + user = conf.getS3User(); useServerSideEncryption = conf.isS3ServerSideEncryption(); // always use the default provider chain @@ -123,7 +123,7 @@ public class S3NotebookRepo implements NotebookRepo { } // set S3 endpoint to use - s3client.setEndpoint(conf.getEndpoint()); + s3client.setEndpoint(conf.getS3Endpoint()); } /** @@ -205,19 +205,10 @@ public class S3NotebookRepo implements NotebookRepo { throw new IOException("Unable to retrieve object from S3: " + ace, ace); } - Note note; try (InputStream ins = s3object.getObjectContent()) { String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING)); - note = Note.fromJson(json); + return Note.fromJson(json); } - - for (Paragraph p : note.getParagraphs()) { - if (p.getStatus() == Status.PENDING || p.getStatus() == Status.RUNNING) { - p.setStatus(Status.ABORT); - } - } - - return note; } private NoteInfo getNoteInfo(String key) throws IOException { http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java index 63395f9..481ea3d 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java @@ -17,6 +17,7 @@ package org.apache.zeppelin.notebook.repo; +import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -24,11 +25,9 @@ import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.Collections; -import java.util.Date; import java.util.LinkedList; import java.util.List; import java.util.Map; - import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.vfs2.FileContent; @@ -40,17 +39,12 @@ import org.apache.commons.vfs2.Selectors; import org.apache.commons.vfs2.VFS; import org.apache.zeppelin.conf.ZeppelinConfiguration; import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars; -import org.apache.zeppelin.notebook.ApplicationState; import org.apache.zeppelin.notebook.Note; import org.apache.zeppelin.notebook.NoteInfo; -import org.apache.zeppelin.notebook.Paragraph; -import org.apache.zeppelin.scheduler.Job.Status; import org.apache.zeppelin.user.AuthenticationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; - /** * */ @@ -167,24 +161,7 @@ public class VFSNotebookRepo implements NotebookRepo { String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING)); ins.close(); - Note note = Note.fromJson(json); - - for (Paragraph p : note.getParagraphs()) { - if (p.getStatus() == Status.PENDING || p.getStatus() == Status.RUNNING) { - p.setStatus(Status.ABORT); - } - - List<ApplicationState> appStates = p.getAllApplicationStates(); - if (appStates != null) { - for (ApplicationState app : appStates) { - if (app.getStatus() != ApplicationState.Status.ERROR) { - app.setStatus(ApplicationState.Status.UNLOADED); - } - } - } - } - - return note; + return Note.fromJson(json); } private NoteInfo getNoteInfo(FileObject noteDir) throws IOException { http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java new file mode 100644 index 0000000..c1fae67 --- /dev/null +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.notebook.repo; + +import static com.google.common.truth.Truth.assertThat; +import static junit.framework.TestCase.fail; + +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.BlobInfo; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.contrib.nio.testing.LocalStorageHelper; +import com.google.common.base.Optional; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import org.apache.zeppelin.conf.ZeppelinConfiguration; +import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars; +import org.apache.zeppelin.notebook.Note; +import org.apache.zeppelin.notebook.NoteInfo; +import org.apache.zeppelin.notebook.Paragraph; +import org.apache.zeppelin.scheduler.Job.Status; +import org.apache.zeppelin.user.AuthenticationInfo; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class GCSNotebookRepoTest { + private static final AuthenticationInfo AUTH_INFO = AuthenticationInfo.ANONYMOUS; + + private GCSNotebookRepo notebookRepo; + private Storage storage; + + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { + { "bucketname", Optional.absent(), "gs://bucketname" }, + { "bucketname-with-slash", Optional.absent(), "gs://bucketname-with-slash/" }, + { "bucketname", Optional.of("path/to/dir"), "gs://bucketname/path/to/dir" }, + { "bucketname", Optional.of("trailing/slash"), "gs://bucketname/trailing/slash/" } + }); + } + + @Parameter(0) + public String bucketName; + + @Parameter(1) + public Optional<String> basePath; + + @Parameter(2) + public String uriPath; + + private Note runningNote; + + @Before + public void setUp() throws Exception { + this.runningNote = makeRunningNote(); + + this.storage = LocalStorageHelper.getOptions().getService(); + + System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR.getVarName(), uriPath); + this.notebookRepo = new GCSNotebookRepo(new ZeppelinConfiguration(), storage); + } + + private static Note makeRunningNote() { + Note note = new Note(); + note.setConfig(ImmutableMap.<String, Object>of("key", "value")); + + Paragraph p = new Paragraph(note, null, null); + p.setText("text"); + p.setStatus(Status.RUNNING); + note.addParagraph(p); + return note; + } + + @Test + public void testList_nonexistent() throws Exception { + assertThat(notebookRepo.list(AUTH_INFO)).isEmpty(); + } + + @Test + public void testList() throws Exception { + createAt(runningNote, "note.json"); + createAt(runningNote, "/note.json"); + createAt(runningNote, "validid/note.json"); + createAt(runningNote, "validid-2/note.json"); + createAt(runningNote, "cannot-be-dir/note.json/foo"); + createAt(runningNote, "cannot/be/nested/note.json"); + + List<NoteInfo> infos = notebookRepo.list(AUTH_INFO); + List<String> noteIds = new ArrayList<>(); + for (NoteInfo info : infos) { + noteIds.add(info.getId()); + } + // Only valid paths are gs://bucketname/path/<noteid>/note.json + assertThat(noteIds).containsExactlyElementsIn(ImmutableList.of("validid", "validid-2")); + } + + @Test + public void testGet_nonexistent() throws Exception { + try { + notebookRepo.get("id", AUTH_INFO); + fail(); + } catch (IOException e) {} + } + + @Test + public void testGet() throws Exception { + create(runningNote); + + // Status of saved running note is removed in get() + Note got = notebookRepo.get(runningNote.getId(), AUTH_INFO); + assertThat(got.getLastParagraph().getStatus()).isEqualTo(Status.ABORT); + + // But otherwise equal + got.getLastParagraph().setStatus(Status.RUNNING); + assertThat(got).isEqualTo(runningNote); + } + + @Test + public void testGet_malformed() throws Exception { + createMalformed("id"); + try { + notebookRepo.get("id", AUTH_INFO); + fail(); + } catch (IOException e) {} + } + + @Test + public void testSave_create() throws Exception { + notebookRepo.save(runningNote, AUTH_INFO); + // Output is saved + assertThat(storage.readAllBytes(makeBlobId(runningNote.getId()))) + .isEqualTo(runningNote.toJson().getBytes("UTF-8")); + } + + @Test + public void testSave_update() throws Exception { + notebookRepo.save(runningNote, AUTH_INFO); + // Change name of runningNote + runningNote.setName("new-name"); + notebookRepo.save(runningNote, AUTH_INFO); + assertThat(storage.readAllBytes(makeBlobId(runningNote.getId()))) + .isEqualTo(runningNote.toJson().getBytes("UTF-8")); + } + + @Test + public void testRemove_nonexistent() throws Exception { + try { + notebookRepo.remove("id", AUTH_INFO); + fail(); + } catch (IOException e) {} + } + + @Test + public void testRemove() throws Exception { + create(runningNote); + notebookRepo.remove(runningNote.getId(), AUTH_INFO); + assertThat(storage.get(makeBlobId(runningNote.getId()))).isNull(); + } + + private String makeName(String relativePath) { + if (basePath.isPresent()) { + return basePath.get() + "/" + relativePath; + } else { + return relativePath; + } + } + + private BlobId makeBlobId(String noteId) { + return BlobId.of(bucketName, makeName(noteId + "/note.json")); + } + + private void createAt(Note note, String relativePath) throws IOException { + BlobId id = BlobId.of(bucketName, makeName(relativePath)); + BlobInfo info = BlobInfo.newBuilder(id).setContentType("application/json").build(); + storage.create(info, note.toJson().getBytes("UTF-8")); + } + + private void create(Note note) throws IOException { + BlobInfo info = BlobInfo.newBuilder(makeBlobId(note.getId())) + .setContentType("application/json") + .build(); + storage.create(info, note.toJson().getBytes("UTF-8")); + } + + private void createMalformed(String noteId) throws IOException { + BlobInfo info = BlobInfo.newBuilder(makeBlobId(noteId)) + .setContentType("application/json") + .build(); + storage.create(info, "{ invalid-json }".getBytes("UTF-8")); + } + + /* These tests test path parsing for illegal paths, and do not use the parameterized vars */ + + @Test + public void testInitialization_pathNotSet() throws Exception { + try { + System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR.getVarName(), ""); + new GCSNotebookRepo(new ZeppelinConfiguration(), storage); + fail(); + } catch (IOException e) {} + } + + @Test + public void testInitialization_malformedPath() throws Exception { + try { + System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR.getVarName(), "foo"); + new GCSNotebookRepo(new ZeppelinConfiguration(), storage); + fail(); + } catch (IOException e) {} + } +}