[ 
https://issues.apache.org/jira/browse/HUDI-2083?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17628459#comment-17628459
 ] 

Benoit COLAS commented on HUDI-2083:
------------------------------------

Not working
 # Install Java 
 # Reference JAVA_HOME
 # Install Maven
 # Install Spark
 # Reference SPARK_HOME
 # Copy Hudi Sources
 # Build Hudi
 # Download Client jars for S3 and REFERENCE CLIENT_JAR
 # Launch Hudi CLI

{code:java}
hudi->stacktrace
org.apache.hudi.exception.HoodieIOException: Failed to get instance of 
org.apache.hadoop.fs.FileSystem
        at org.apache.hudi.common.fs.FSUtils.getFs(FSUtils.java:112)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.getFs(HoodieTableMetaClient.java:295)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:127)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.newMetaClient(HoodieTableMetaClient.java:642)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.access$000(HoodieTableMetaClient.java:80)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient$Builder.build(HoodieTableMetaClient.java:711)
        at org.apache.hudi.cli.HoodieCLI.refreshTableMetadata(HoodieCLI.java:89)
        at org.apache.hudi.cli.HoodieCLI.connectTo(HoodieCLI.java:95)
        at 
org.apache.hudi.cli.commands.TableCommand.connect(TableCommand.java:86)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.springframework.shell.command.invocation.InvocableShellMethod.doInvoke(InvocableShellMethod.java:306)
        at 
org.springframework.shell.command.invocation.InvocableShellMethod.invoke(InvocableShellMethod.java:232)
        at 
org.springframework.shell.command.CommandExecution$DefaultCommandExecution.evaluate(CommandExecution.java:158)
        at org.springframework.shell.Shell.evaluate(Shell.java:208)
        at org.springframework.shell.Shell.run(Shell.java:140)
        at 
org.springframework.shell.jline.InteractiveShellRunner.run(InteractiveShellRunner.java:73)
        at 
org.springframework.shell.DefaultShellApplicationRunner.run(DefaultShellApplicationRunner.java:65)
        at 
org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:762)
        at 
org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:752)
        at 
org.springframework.boot.SpringApplication.run(SpringApplication.java:315)
        at 
org.springframework.boot.SpringApplication.run(SpringApplication.java:1306)
        at 
org.springframework.boot.SpringApplication.run(SpringApplication.java:1295)
        at org.apache.hudi.cli.Main.main(Main.java:34)
Caused by: org.apache.hadoop.fs.UnsupportedFileSystemException: No FileSystem 
for scheme "s3"
        at 
org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3225)
        at 
org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3245)
        at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:121)
        at 
org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3296)
        at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3264)
        at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:475)
        at org.apache.hadoop.fs.Path.getFileSystem(Path.java:356)
        at org.apache.hudi.common.fs.FSUtils.getFs(FSUtils.java:110)
{code}

> Hudi CLI does not work with S3
> ------------------------------
>
>                 Key: HUDI-2083
>                 URL: https://issues.apache.org/jira/browse/HUDI-2083
>             Project: Apache Hudi
>          Issue Type: Task
>          Components: cli
>            Reporter: Vinay
>            Assignee: Vinay
>            Priority: Major
>              Labels: pull-request-available, query-eng, sev:high
>             Fix For: 0.11.1
>
>
> Hudi CLI gives exception when trying to connect to s3 path
> {code:java}
> create --path s3://some-bucket/tmp/hudi/test_mor --tableName test_mor_s3 
> --tableType MERGE_ON_READ
> Failed to get instance of org.apache.hadoop.fs.FileSystem
> org.apache.hudi.exception.HoodieIOException: Failed to get instance of 
> org.apache.hadoop.fs.FileSystem
>     at org.apache.hudi.common.fs.FSUtils.getFs(FSUtils.java:98)
> =========
> create --path s3a://some-bucket/tmp/hudi/test_mor --tableName test_mor_s3 
> --tableType MERGE_ON_READ
> Command failed java.lang.RuntimeException: java.lang.ClassNotFoundException: 
> Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
> java.lang.ClassNotFoundException: Class 
> org.apache.hadoop.fs.s3a.S3AFileSystem not found
> java.lang.RuntimeException: java.lang.ClassNotFoundException: Class 
> org.apache.hadoop.fs.s3a.S3AFileSystem not found
>     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)
>     at 
> org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2654)
> {code}
> This could be because target/lib folder does not contain hadoop-aws or aws-s3 
> dependency.
>  
> Update from Sivabalan:
> Something that works for me even w/o the patch linked. If you wish to use 
> latest master Hudi-cli with S3 dataset. Just incase someone wants to try it 
> out.
> 1. replace local hudi-cli.sh contents with 
> [this|https://gist.github.com/nsivabalan/a31d56891353fe84413951972484f21f].
> 2. do mvn package.
> 3. tar entire hudi-cli directory.
> 4. copy to emr master.
> 5. untar hudi-cli.tar
> 6. Ensure to set SPARK_HOME to /usr/lib/spark
> 7. download aws jars and copy to some directory. 
> mkdir client_jars && cd client_jars
> export HADOOP_VERSION=3.2.0
> wget 
> https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar
>  -O hadoop-aws.jar
> export AWS_SDK_VERSION=1.11.375
> wget 
> https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_VERSION}/aws-java-sdk-bundle-${AWS_SDK_VERSION}.jar
>  -O aws-java-sdk.jar 
> set 
> CLIENT_JARS=/home/hadoop/client_jars/aws-java-sdk.jar:/home/hadoop/client_jars/hadoop-aws.jar
> 8. and then launch hudi-cli.sh 
> I verified that cli ommands that launches spark succeeds w/ this for a S3 
> dataset.
> With the patch from vinay, I am running into EMR FS issues.
>  
> Ethan: locally running hudi-cli with S3 Hudi table:
> {code:java}
> Build Hudi with corresponding Spark version
> export AWS_REGION=us-east-2
> export AWS_ACCESS_KEY_ID=<key_id>
> export AWS_SECRET_ACCESS_KEY=<secret_key>
> export SPARK_HOME=<spark_home>
> # Note: AWS jar versions below are specific to Spark 3.2.0
> export 
> CLIENT_JAR=/lib/spark-3.2.0-bin-hadoop3.2/jars/aws-java-sdk-bundle-1.12.48.jar:/lib/spark-3.2.0-bin-hadoop3.2/jars/hadoop-aws-3.3.1.jar
> ./hudi-cli/hudi-cli.sh{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to