This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new eb416e7 [ZEPPELIN-4659]. S3NotebookRepo doesn't work for new note file structure eb416e7 is described below commit eb416e7e4026c03ed430261a7e5e51cf2f7b0938 Author: Jeff Zhang <zjf...@apache.org> AuthorDate: Wed Mar 4 14:45:18 2020 +0800 [ZEPPELIN-4659]. S3NotebookRepo doesn't work for new note file structure ### What is this PR for? This PR improve the S3NotebookRepo to adapt the new note file structure. Besides that I use s3proxy to add unit test for S3NotebookRepo. Unfortunately I don't have aws account, so I didn't test it against the real s3. I would be very appreciated if someone can help test it against rest s3. ### What type of PR is it? [Bug Fix] ### Todos * [ ] - Task ### What is the Jira issue? * https://issues.apache.org/jira/browse/ZEPPELIN-4659 ### How should this be tested? CI pass ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? no * Is there breaking changes for older versions? no * Does this needs documentation? no Author: Jeff Zhang <zjf...@apache.org> Closes #3675 from zjffdu/ZEPPELIN-4659 and squashes the following commits: c2b28d14e [Jeff Zhang] [ZEPPELIN-4659]. S3NotebookRepo doesn't work for new note file structure --- LICENSE | 1 + zeppelin-plugins/notebookrepo/s3/pom.xml | 42 ++++++- .../zeppelin/notebook/repo/S3NotebookRepo.java | 55 ++++++--- .../zeppelin/notebook/repo/S3NotebookRepoTest.java | 135 +++++++++++++++++++++ zeppelin-server/pom.xml | 4 + zeppelin-zengine/pom.xml | 2 +- 6 files changed, 221 insertions(+), 18 deletions(-) diff --git a/LICENSE b/LICENSE index 2edb8a8..d6460e9 100644 --- a/LICENSE +++ b/LICENSE @@ -266,6 +266,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (Apache 2.0) concurrentunit (https://github.com/jhalterman/concurrentunit) (Apache 2.0) Embedded MongoDB (https://github.com/flapdoodle-oss/de.flapdoodle.embed.mongo) (Apache 2.0) Kotlin (https://github.com/JetBrains/kotlin) + (Apache 2.0) s3proxy (https://github.com/gaul/s3proxy) ======================================================================== BSD 3-Clause licenses diff --git a/zeppelin-plugins/notebookrepo/s3/pom.xml b/zeppelin-plugins/notebookrepo/s3/pom.xml index 1385907..744840e 100644 --- a/zeppelin-plugins/notebookrepo/s3/pom.xml +++ b/zeppelin-plugins/notebookrepo/s3/pom.xml @@ -36,7 +36,7 @@ <description>NotebookRepo implementation based on S3</description> <properties> - <aws.sdk.s3.version>1.10.62</aws.sdk.s3.version> + <aws.sdk.s3.version>1.11.736</aws.sdk.s3.version> <plugin.name>NotebookRepo/S3NotebookRepo</plugin.name> </properties> @@ -46,6 +46,46 @@ <artifactId>aws-java-sdk-s3</artifactId> <version>${aws.sdk.s3.version}</version> </dependency> + + <dependency> + <groupId>org.gaul</groupId> + <artifactId>s3proxy</artifactId> + <version>1.6.1</version> + <exclusions> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>javax.xml.bind</groupId> + <artifactId>jaxb-api</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-util</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.errorprone</groupId> + <artifactId>error_prone_annotations</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + </exclusions> + </dependency> </dependencies> <build> diff --git a/zeppelin-plugins/notebookrepo/s3/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java b/zeppelin-plugins/notebookrepo/s3/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java index c86c627..8df3149 100644 --- a/zeppelin-plugins/notebookrepo/s3/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java +++ b/zeppelin-plugins/notebookrepo/s3/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java @@ -196,7 +196,7 @@ public class S3NotebookRepo implements NotebookRepo { listObjectsRequest.setMarker(objectListing.getNextMarker()); } while (objectListing.isTruncated()); } catch (AmazonClientException ace) { - throw new IOException("Unable to list objects in S3: " + ace, ace); + throw new IOException("Fail to list objects in S3", ace); } return notesInfo; } @@ -213,7 +213,7 @@ public class S3NotebookRepo implements NotebookRepo { rootFolder + "/" + buildNoteFileName(noteId, notePath))); } catch (AmazonClientException ace) { - throw new IOException("Unable to retrieve object from S3: " + ace, ace); + throw new IOException("Fail to get note: " + notePath + " from S3", ace); } try (InputStream ins = s3object.getObjectContent()) { String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING)); @@ -240,7 +240,7 @@ public class S3NotebookRepo implements NotebookRepo { s3client.putObject(putRequest); } catch (AmazonClientException ace) { - throw new IOException("Unable to store note in S3: " + ace, ace); + throw new IOException("Fail to store note: " + note.getPath() + " in S3", ace); } finally { FileUtils.deleteQuietly(file); @@ -257,16 +257,43 @@ public class S3NotebookRepo implements NotebookRepo { } @Override - public void move(String folderPath, String newFolderPath, AuthenticationInfo subject) { - + public void move(String folderPath, String newFolderPath, AuthenticationInfo subject) throws IOException { + try { + ListObjectsRequest listObjectsRequest = new ListObjectsRequest() + .withBucketName(bucketName) + .withPrefix(rootFolder + folderPath + "/"); + ObjectListing objectListing; + do { + objectListing = s3client.listObjects(listObjectsRequest); + for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { + if (objectSummary.getKey().endsWith(".zpln")) { + String noteId = getNoteId(objectSummary.getKey()); + String notePath = getNotePath(rootFolder, objectSummary.getKey()); + String newNotePath = newFolderPath + notePath.substring(folderPath.length()); + move(noteId, notePath, newNotePath, subject); + } + } + listObjectsRequest.setMarker(objectListing.getNextMarker()); + } while (objectListing.isTruncated()); + } catch (AmazonClientException ace) { + throw new IOException("Fail to move folder: " + folderPath + " to " + newFolderPath + " in S3" , ace); + } } @Override public void remove(String noteId, String notePath, AuthenticationInfo subject) throws IOException { - String key = rootFolder + "/" + buildNoteFileName(noteId, notePath); + try { + s3client.deleteObject(bucketName, rootFolder + "/" + buildNoteFileName(noteId, notePath)); + } catch (AmazonClientException ace) { + throw new IOException("Fail to remove note: " + notePath + " from S3", ace); + } + } + + @Override + public void remove(String folderPath, AuthenticationInfo subject) throws IOException { final ListObjectsRequest listObjectsRequest = new ListObjectsRequest() - .withBucketName(bucketName).withPrefix(key); + .withBucketName(bucketName).withPrefix(rootFolder + folderPath + "/"); try { ObjectListing objects = s3client.listObjects(listObjectsRequest); do { @@ -275,20 +302,16 @@ public class S3NotebookRepo implements NotebookRepo { } objects = s3client.listNextBatchOfObjects(objects); } while (objects.isTruncated()); - } - catch (AmazonClientException ace) { - throw new IOException("Unable to remove note in S3: " + ace, ace); + } catch (AmazonClientException ace) { + throw new IOException("Unable to remove folder " + folderPath + " in S3", ace); } } @Override - public void remove(String folderPath, AuthenticationInfo subject) { - - } - - @Override public void close() { - //no-op + if (s3client != null) { + s3client.shutdown(); + } } @Override diff --git a/zeppelin-plugins/notebookrepo/s3/src/test/java/org/apache/zeppelin/notebook/repo/S3NotebookRepoTest.java b/zeppelin-plugins/notebookrepo/s3/src/test/java/org/apache/zeppelin/notebook/repo/S3NotebookRepoTest.java new file mode 100644 index 0000000..2daff99 --- /dev/null +++ b/zeppelin-plugins/notebookrepo/s3/src/test/java/org/apache/zeppelin/notebook/repo/S3NotebookRepoTest.java @@ -0,0 +1,135 @@ +package org.apache.zeppelin.notebook.repo; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import org.apache.zeppelin.conf.ZeppelinConfiguration; +import org.apache.zeppelin.notebook.Note; +import org.apache.zeppelin.notebook.NoteInfo; +import org.apache.zeppelin.user.AuthenticationInfo; +import org.gaul.s3proxy.junit.S3ProxyRule; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import java.io.IOException; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class S3NotebookRepoTest { + + private AuthenticationInfo anonymous = AuthenticationInfo.ANONYMOUS; + private S3NotebookRepo notebookRepo; + + @Rule + public S3ProxyRule s3Proxy = S3ProxyRule.builder() + .withCredentials("access", "secret") + .build(); + + + @Before + public void setUp() throws IOException { + String bucket = "test-bucket"; + notebookRepo = new S3NotebookRepo(); + ZeppelinConfiguration conf = ZeppelinConfiguration.create(); + System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_NOTEBOOK_S3_ENDPOINT.getVarName(), + s3Proxy.getUri().toString()); + System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_NOTEBOOK_S3_BUCKET.getVarName(), + bucket); + System.setProperty("aws.accessKeyId", s3Proxy.getAccessKey()); + System.setProperty("aws.secretKey", s3Proxy.getSecretKey()); + + notebookRepo.init(conf); + + // create bucket for notebook + AmazonS3 s3Client = AmazonS3ClientBuilder + .standard() + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(s3Proxy.getAccessKey(), + s3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(s3Proxy.getUri().toString(), + Regions.US_EAST_1.getName())) + .build(); + s3Client.createBucket(bucket); + } + + @After + public void tearDown() { + if (notebookRepo != null) { + notebookRepo.close(); + } + } + + @Test + public void testNotebookRepo() throws IOException { + Map<String, NoteInfo> notesInfo = notebookRepo.list(anonymous); + assertEquals(0, notesInfo.size()); + + // create Note note1 + Note note1 = new Note(); + note1.setPath("/spark/note_1"); + notebookRepo.save(note1, anonymous); + + notesInfo = notebookRepo.list(anonymous); + assertEquals(1, notesInfo.size()); + assertEquals("/spark/note_1", notesInfo.get(note1.getId()).getPath()); + + // Get note1 + Note noteFromRepo = notebookRepo.get(note1.getId(), note1.getPath(), anonymous); + assertEquals(note1.getName(), noteFromRepo.getName()); + + // Get non-existed note + try { + notebookRepo.get("invalid_id", "/invalid_path", anonymous); + fail("Should fail to get non-existed note1"); + } catch (IOException e) { + assertEquals("Fail to get note: /invalid_path from S3", e.getMessage()); + } + + // create another Note note2 + Note note2 = new Note(); + note2.setPath("/spark/note_2"); + notebookRepo.save(note2, anonymous); + + notesInfo = notebookRepo.list(anonymous); + assertEquals(2, notesInfo.size()); + assertEquals("/spark/note_1", notesInfo.get(note1.getId()).getPath()); + assertEquals("/spark/note_2", notesInfo.get(note2.getId()).getPath()); + + // move note1 + notebookRepo.move(note1.getId(), note1.getPath(), "/spark2/note_1", anonymous); + + notesInfo = notebookRepo.list(anonymous); + assertEquals(2, notesInfo.size()); + assertEquals("/spark2/note_1", notesInfo.get(note1.getId()).getPath()); + assertEquals("/spark/note_2", notesInfo.get(note2.getId()).getPath()); + + // move folder + notebookRepo.move("/spark2", "/spark3", anonymous); + + notesInfo = notebookRepo.list(anonymous); + assertEquals(2, notesInfo.size()); + assertEquals("/spark3/note_1", notesInfo.get(note1.getId()).getPath()); + assertEquals("/spark/note_2", notesInfo.get(note2.getId()).getPath()); + + // delete note + notebookRepo.remove(note1.getId(), notesInfo.get(note1.getId()).getPath(), anonymous); + notesInfo = notebookRepo.list(anonymous); + assertEquals(1, notesInfo.size()); + assertEquals("/spark/note_2", notesInfo.get(note2.getId()).getPath()); + + // delete folder + notebookRepo.remove("/spark", anonymous); + notesInfo = notebookRepo.list(anonymous); + assertEquals(0, notesInfo.size()); + } +} diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml index 783ca17..2371f2a 100644 --- a/zeppelin-server/pom.xml +++ b/zeppelin-server/pom.xml @@ -76,6 +76,10 @@ <groupId>com.sun.jersey</groupId> <artifactId>jersey-server</artifactId> </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + </exclusion> </exclusions> </dependency> diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml index 08c29f4..da320a8 100644 --- a/zeppelin-zengine/pom.xml +++ b/zeppelin-zengine/pom.xml @@ -45,7 +45,7 @@ <org.reflections.version>0.9.8</org.reflections.version> <xml.apis.version>1.4.01</xml.apis.version> <frontend.maven.plugin.version>1.3</frontend.maven.plugin.version> - <aws.sdk.s3.version>1.10.62</aws.sdk.s3.version> + <aws.sdk.s3.version>1.11.736</aws.sdk.s3.version> <commons.vfs2.version>2.2</commons.vfs2.version> <eclipse.jgit.version>4.5.4.201711221230-r</eclipse.jgit.version> <jettison.version>1.4.0</jettison.version>