Jose Luis Pedrosa created HADOOP-16417:
------------------------------------------

             Summary: abfs can't access storage account without password
                 Key: HADOOP-16417
                 URL: https://issues.apache.org/jira/browse/HADOOP-16417
             Project: Hadoop Common
          Issue Type: Bug
          Components: fs/azure
    Affects Versions: 3.2.0
            Reporter: Jose Luis Pedrosa


It does not seem possible to access storage accounts without passwords using 
abfs, but it is possible using wasb.

 

This sample code (Spark based) to illustrate, the following code using 
abfs_path with throw an exception
{noformat}
Exception in thread "main" java.lang.IllegalArgumentException: Invalid account 
key.
        at 
org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials.<init>(SharedKeyCredentials.java:70)
        at 
org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.initializeClient(AzureBlobFileSystemStore.java:812)
        at 
org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.<init>(AzureBlobFileSystemStore.java:149)
        at 
org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:108)
        at 
org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3303)
        at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
        at 
org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
        at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3320)
        at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
        at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361)
{noformat}
  While using the wasb_path will work normally,
{code:java}
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.RuntimeConfig;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

public class SimpleApp {

    static String blob_account_name = "azureopendatastorage";
    static String blob_container_name = "gfsweatherdatacontainer";
    static String blob_relative_path = "GFSWeather/GFSProcessed";
    static String blob_sas_token = "";
    static String abfs_path = 
"abfs://"+blob_container_name+"@"+blob_account_name+".dfs.core.windows.net/"+blob_relative_path;
    static String wasbs_path = "wasbs://"+blob_container_name + 
"@"+blob_account_name+".blob.core.windows.net/" + blob_relative_path;


    public static void main(String[] args) {
       
        SparkSession spark = SparkSession.builder().appName("NOAAGFS 
Run").getOrCreate();
        configureAzureHadoopConnetor(spark);
        RuntimeConfig conf = spark.conf();

        
conf.set("fs.azure.account.key."+blob_account_name+".dfs.core.windows.net", 
blob_sas_token);
        
conf.set("fs.azure.account.key."+blob_account_name+".blob.core.windows.net", 
blob_sas_token);

        System.out.println("Creating parquet dataset");
        Dataset<Row> logData = spark.read().parquet(abfs_path);

        System.out.println("Creating temp view");
        logData.createOrReplaceTempView("source");

        System.out.println("SQL");
        spark.sql("SELECT * FROM source LIMIT 10").show();
        spark.stop();
    }

    public static void configureAzureHadoopConnetor(SparkSession session) {
        RuntimeConfig conf = session.conf();

        
conf.set("fs.AbstractFileSystem.wasb.impl","org.apache.hadoop.fs.azure.Wasb");
        
conf.set("fs.AbstractFileSystem.wasbs.impl","org.apache.hadoop.fs.azure.Wasbs");
        
conf.set("fs.wasb.impl","org.apache.hadoop.fs.azure.NativeAzureFileSystem");
        
conf.set("fs.wasbs.impl","org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure");

        conf.set("fs.azure.secure.mode", false);

        conf.set("fs.abfs.impl",  
"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem");
        conf.set("fs.abfss.impl", 
"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem");

        
conf.set("fs.AbstractFileSystem.abfs.impl","org.apache.hadoop.fs.azurebfs.Abfs");
        
conf.set("fs.AbstractFileSystem.abfss.impl","org.apache.hadoop.fs.azurebfs.Abfss");

        // Works in conjuction with fs.azure.secure.mode. Setting this config 
to true
        //    results in fs.azure.NativeAzureFileSystem using the local SAS key 
generation
        //    where the SAS keys are generating in the same process as 
fs.azure.NativeAzureFileSystem.
        //    If fs.azure.secure.mode flag is set to false, this flag has no 
effect.
        conf.set("fs.azure.local.sas.key.mode", false);
    }
}
{code}
Sample build.gradle
{noformat}
plugins {
    id 'java'
}

group 'org.samples'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.8

repositories {
    mavenCentral()
}

dependencies {
    compile  'org.apache.spark:spark-sql_2.12:2.4.3'
}
{noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-dev-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-dev-h...@hadoop.apache.org

Reply via email to