This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new 0c491e4 ZEPPELIN-4272: Zeppelin fails to compile when hadoop3 is enabled 0c491e4 is described below commit 0c491e466525e547b1b8cd2bfb49d26cd9d264b3 Author: Bhavik Patel <bhavikpatel...@gmail.com> AuthorDate: Mon Jul 22 15:06:14 2019 +0530 ZEPPELIN-4272: Zeppelin fails to compile when hadoop3 is enabled ### What is this PR for? To compile and build Zeppelin with hadoop3 added couple of missing dependencies ### What type of PR is it? [Bug Fix] ### What is the Jira issue? * [ZEPPELIN-4272](https://issues.apache.org/jira/browse/ZEPPELIN-4272) ### How should this be tested? * CI green ### Screenshots (if appropriate) ![apache_zepp](https://user-images.githubusercontent.com/9100442/61625654-dc261300-ac98-11e9-88c3-a85668e050e3.png) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? Yes, added 'kerberos-client' dependency for hadoop3 * Does this needs documentation? No Author: Bhavik Patel <bhavikpatel...@gmail.com> Closes #3411 from bhavikpatel9977/ZEPPELIN-4272 and squashes the following commits: 40f65d605 [Bhavik Patel] ZEPPELIN-4272: Zeppelin fails to use s3a configured for zeppelin.notebook.dir --- zeppelin-zengine/pom.xml | 29 +++++++++++++---- .../zeppelin/notebook/FileSystemStorage.java | 36 +++++++++++++++++++--- .../zeppelinhub/websocket/ZeppelinhubClient.java | 6 ++-- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml index 3fd44c7..715f6f9 100644 --- a/zeppelin-zengine/pom.xml +++ b/zeppelin-zengine/pom.xml @@ -48,6 +48,8 @@ <aws.sdk.s3.version>1.10.62</aws.sdk.s3.version> <commons.vfs2.version>2.2</commons.vfs2.version> <eclipse.jgit.version>4.5.4.201711221230-r</eclipse.jgit.version> + <jettison.version>1.4.0</jettison.version> + <kerberos-client.version>2.0.0-M15</kerberos-client.version> <!--test library versions--> <google.truth.version>0.27</google.truth.version> <google.testing.nio.version>0.32.0-alpha</google.testing.nio.version> @@ -160,12 +162,6 @@ </dependency> <dependency> - <groupId>com.amazonaws</groupId> - <artifactId>aws-java-sdk-s3</artifactId> - <version>${aws.sdk.s3.version}</version> - </dependency> - - <dependency> <groupId>com.github.eirslett</groupId> <artifactId>frontend-maven-plugin</artifactId> <version>${frontend.maven.plugin.version}</version> @@ -313,6 +309,12 @@ <dependencies> <dependency> + <groupId>com.amazonaws</groupId> + <artifactId>aws-java-sdk-s3</artifactId> + <version>${aws.sdk.s3.version}</version> + </dependency> + + <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> @@ -650,6 +652,21 @@ <version>${hadoop.version}</version> <scope>test</scope> </dependency> + <dependency> + <groupId>org.codehaus.jettison</groupId> + <artifactId>jettison</artifactId> + <version>${jettison.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-aws</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.directory.server</groupId> + <artifactId>kerberos-client</artifactId> + <version>${kerberos-client.version}</version> + </dependency> </dependencies> </profile> </profiles> diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/FileSystemStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/FileSystemStorage.java index 122848e..f47cf65 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/FileSystemStorage.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/FileSystemStorage.java @@ -29,6 +29,8 @@ import java.util.List; public class FileSystemStorage { private static Logger LOGGER = LoggerFactory.getLogger(FileSystemStorage.class); + private static final String S3A = "s3a"; + private static final String FS_DEFAULTFS = "fs.defaultFS"; // only do UserGroupInformation.loginUserFromKeytab one time, otherwise you will still get // your ticket expired. @@ -60,18 +62,42 @@ public class FileSystemStorage { public FileSystemStorage(ZeppelinConfiguration zConf, String path) throws IOException { this.zConf = zConf; this.hadoopConf = new Configuration(); + URI zepConfigURI; + URI defaultFSURI; + + try { + zepConfigURI = new URI(path); + } catch (URISyntaxException e) { + LOGGER.error("Failed to get Zeppelin config URI"); + throw new IOException(e); + } // disable checksum for local file system. because interpreter.json may be updated by // non-hadoop filesystem api - // disable caching for file:// scheme to avoid getting LocalFS which does CRC checks - this.hadoopConf.setBoolean("fs.file.impl.disable.cache", true); - this.hadoopConf.set("fs.file.impl", RawLocalFileSystem.class.getName()); - this.isSecurityEnabled = UserGroupInformation.isSecurityEnabled(); + // disable caching for file:// scheme to avoid getting LocalFS which does CRC checks. + this.hadoopConf.setBoolean("fs.file.impl.disable.cache", true); + String defaultFS = this.hadoopConf.get(FS_DEFAULTFS); try { - this.fs = FileSystem.get(new URI(path), this.hadoopConf); + defaultFSURI = new URI(defaultFS); } catch (URISyntaxException e) { + LOGGER.error("Failed to get defaultFS URI"); throw new IOException(e); } + + // to check whether underlying fileSystemStorage is S3A or not + if (!isS3AFileSystem(defaultFSURI, zepConfigURI)) { + this.hadoopConf.set("fs.file.impl", RawLocalFileSystem.class.getName()); + } + + this.isSecurityEnabled = UserGroupInformation.isSecurityEnabled(); + + this.fs = FileSystem.get(zepConfigURI, this.hadoopConf); + } + + public boolean isS3AFileSystem(URI defaultFSURI, URI zepConfigURI) { + return defaultFSURI.getScheme().equals(S3A) + || (StringUtils.isNotEmpty(zepConfigURI.getScheme()) + && zepConfigURI.getScheme().equals(S3A)); } public FileSystem getFs() { diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/zeppelinhub/websocket/ZeppelinhubClient.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/zeppelinhub/websocket/ZeppelinhubClient.java index 4c03a66..0a14393 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/zeppelinhub/websocket/ZeppelinhubClient.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/zeppelinhub/websocket/ZeppelinhubClient.java @@ -37,6 +37,9 @@ import org.apache.zeppelin.notebook.repo.zeppelinhub.websocket.utils.Zeppelinhub import org.apache.zeppelin.notebook.socket.Message; import org.apache.zeppelin.notebook.socket.Message.OP; import org.apache.zeppelin.ticket.TicketContainer; +import org.codehaus.jettison.json.JSONArray; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.websocket.api.Session; import org.eclipse.jetty.websocket.client.ClientUpgradeRequest; @@ -44,9 +47,6 @@ import org.eclipse.jetty.websocket.client.WebSocketClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.amazonaws.util.json.JSONArray; -import com.amazonaws.util.json.JSONException; -import com.amazonaws.util.json.JSONObject; import com.google.common.collect.Lists; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken;