[ https://issues.apache.org/jira/browse/HADOOP-14700?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Cheng Lian updated HADOOP-14700: -------------------------------- Description: {{NativeAzureFileSystem}} instances are associated with the blob container used to initialize the file system. Assuming that a file system instance {{fs}} is associated with a container {{A}}, when trying to access a blob inside another container {{B}}, {{fs}} still tries to find the blob inside container {{A}}. If there happens to be two blobs with the same name inside both containers, the user may get a wrong result because {{fs}} reads the contents from the blob inside container {{A}} instead of container {{B}}. The following self-contained Scala code snippet illustrates this issue. You may reproduce it by running the script inside the [Ammonite REPL|http://ammonite.io/]. {code} import $ivy.`com.jsuereth::scala-arm:2.0` import $ivy.`com.microsoft.azure:azure-storage:5.2.0` import $ivy.`org.apache.hadoop:hadoop-azure:3.0.0-alpha4` import $ivy.`org.apache.hadoop:hadoop-common:3.0.0-alpha4` import $ivy.`org.scalatest::scalatest:3.0.3` import java.io.{BufferedReader, InputStreamReader} import java.net.URI import java.time.{Duration, Instant} import java.util.{Date, EnumSet} import com.microsoft.azure.storage.{CloudStorageAccount, StorageCredentialsAccountAndKey} import com.microsoft.azure.storage.blob.{SharedAccessBlobPermissions, SharedAccessBlobPolicy} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.azure.{AzureException, NativeAzureFileSystem} import org.scalatest.Assertions._ import resource._ // Utility implicit conversion for auto resource management. implicit def `Closable->Resource`[T <: { def close() }]: Resource[T] = new Resource[T] { override def close(closable: T): Unit = closable.close() } // Credentials information val ACCOUNT = "** REDACTED **" val ACCESS_KEY = "** REDACTED **" // We'll create two different containers, both contain a blob named "test-blob" but with different // contents. val CONTAINER_A = "container-a" val CONTAINER_B = "container-b" val TEST_BLOB = "test-blob" val blobClient = { val credentials = new StorageCredentialsAccountAndKey(ACCOUNT, ACCESS_KEY) val account = new CloudStorageAccount(credentials, /* useHttps */ true) account.createCloudBlobClient() } // Generates a read-only SAS key restricted within "container-a". val sasKeyForContainerA = { val since = Instant.now() minus Duration.ofMinutes(10) val duration = Duration.ofHours(1) val policy = new SharedAccessBlobPolicy() policy.setSharedAccessStartTime(Date.from(since)) policy.setSharedAccessExpiryTime(Date.from(since plus duration)) policy.setPermissions(EnumSet.of( SharedAccessBlobPermissions.READ, SharedAccessBlobPermissions.LIST )) blobClient .getContainerReference(CONTAINER_A) .generateSharedAccessSignature(policy, null) } // Sets up testing containers and blobs using the Azure storage SDK: // // container-a/test-blob => "foo" // container-b/test-blob => "bar" { val containerARef = blobClient.getContainerReference(CONTAINER_A) val containerBRef = blobClient.getContainerReference(CONTAINER_B) containerARef.createIfNotExists() containerARef.getBlockBlobReference(TEST_BLOB).uploadText("foo") containerBRef.createIfNotExists() containerBRef.getBlockBlobReference(TEST_BLOB).uploadText("bar") } val pathA = new Path(s"wasbs://$CONTAINER_A@$ACCOUNT.blob.core.windows.net/$TEST_BLOB") val pathB = new Path(s"wasbs://$CONTAINER_A@$ACCOUNT.blob.core.windows.net/$TEST_BLOB") for { // Creates a file system associated with "container-a". fs <- managed { val conf = new Configuration conf.set("fs.wasbs.impl", classOf[NativeAzureFileSystem].getName) conf.set(s"fs.azure.sas.$CONTAINER_A.$ACCOUNT.blob.core.windows.net", sasKeyForContainerA) pathA.getFileSystem(conf) } // Opens a reader pointing to "container-a/test-blob". We expect to get the string "foo" written // to this blob previously. readerA <- managed(new BufferedReader(new InputStreamReader(fs open pathA))) // Opens a reader pointing to "container-b/test-blob". We expect to get an exception since the SAS // key used to create the `FileSystem` instance is restricted to "container-a". readerB <- managed(new BufferedReader(new InputStreamReader(fs open pathB))) } { // Should get "foo" assert(readerA.readLine() == "foo") // Should catch an exception ... intercept[AzureException] { // ... but instead, we get string "foo" here, which indicates that the readerB was reading from // "container-a" instead of "container-b". val contents = readerB.readLine() println(s"Should not reach here but we got $contents") } } {code} was: {{NativeAzureFileSystem}} instances are associated with the blob container used to initialize the file system. Assuming that a file system instance {{fs}} is associated with a container {{A}}, when trying to access a blob inside another container {{B}}, {{fs}} still tries to find the blob inside container {{A}}. If there happens to be two blobs with the same name inside both containers, the user may get a wrong result because {{fs}} reads the contents from the blob inside container {{A}} instead of container {{B}}. The following self-contained Scala code snippet illustrates this issue. You may reproduce it by running the script inside the [Ammonite REPL|http://ammonite.io/]. {code} import $ivy.`com.jsuereth::scala-arm:2.0` import $ivy.`com.microsoft.azure:azure-storage:5.2.0` import $ivy.`org.apache.hadoop:hadoop-azure:3.0.0-alpha4` import $ivy.`org.apache.hadoop:hadoop-common:3.0.0-alpha4` import $ivy.`org.scalatest::scalatest:3.0.3` import java.io.{BufferedReader, InputStreamReader} import java.net.URI import java.time.{Duration, Instant} import java.util.{Date, EnumSet} import com.microsoft.azure.storage.{CloudStorageAccount, StorageCredentialsAccountAndKey} import com.microsoft.azure.storage.blob.{SharedAccessBlobPermissions, SharedAccessBlobPolicy} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.azure.{AzureException, NativeAzureFileSystem} import org.scalatest.Assertions._ import resource._ // Utility implicit conversion for auto resource management. implicit def `Closable->Resource`[T <: { def close() }]: Resource[T] = new Resource[T] { override def close(closable: T): Unit = closable.close() } // Credentials information val ACCOUNT = "** REDACTED **" val ACCESS_KEY = "** REDACTED **" // We'll create two different containers, both contain a blob named "test-blob" but with different // contents. val CONTAINER_A = "container-a" val CONTAINER_B = "container-b" val TEST_BLOB = "test-blob" val blobClient = { val credentials = new StorageCredentialsAccountAndKey(ACCOUNT, ACCESS_KEY) val account = new CloudStorageAccount(credentials, /* useHttps */ true) account.createCloudBlobClient() } // Generates a read-only SAS key restricted within "container-a". val sasKeyForContainerA = { val since = Instant.now() minus Duration.ofMinutes(10) val duration = Duration.ofHours(1) val policy = new SharedAccessBlobPolicy() policy.setSharedAccessStartTime(Date.from(since)) policy.setSharedAccessExpiryTime(Date.from(since plus duration)) policy.setPermissions(EnumSet.of( SharedAccessBlobPermissions.READ, SharedAccessBlobPermissions.LIST )) blobClient .getContainerReference(CONTAINER_A) .generateSharedAccessSignature(policy, null) } // Sets up testing containers and blobs using the Azure storage SDK: // // container-a/test-blob => "foo" // container-b/test-blob => "bar" { val containerARef = blobClient.getContainerReference(CONTAINER_A) val containerBRef = blobClient.getContainerReference(CONTAINER_B) containerARef.createIfNotExists() containerARef.getBlockBlobReference(TEST_BLOB).uploadText("foo") containerBRef.createIfNotExists() containerBRef.getBlockBlobReference(TEST_BLOB).uploadText("bar") } val pathA = new Path(s"wasbs://$CONTAINER_A@$ACCOUNT.blob.core.windows.net/$TEST_BLOB") val pathB = new Path(s"wasbs://$CONTAINER_A@$ACCOUNT.blob.core.windows.net/$TEST_BLOB") for { // Creates a file system associated with "container-a". fs <- managed { val conf = new Configuration conf.set("fs.wasbs.impl", classOf[NativeAzureFileSystem].getName) conf.set(s"fs.azure.sas.$CONTAINER_A.$ACCOUNT.blob.core.windows.net", sasKeyForContainerA) pathA.getFileSystem(conf) } // Opens a reader pointing to "container-a/test-blob". We expect to get the string "foo" written // to this blob previously. readerA <- managed(new BufferedReader(new InputStreamReader(fs open pathA))) // Opens a reader pointing to "container-b/test-blob". We expect to get an exception since the SAS // key used to create the `FileSystem` instance is restricted to "container-a". readerB <- managed(new BufferedReader(new InputStreamReader(fs open pathB))) } { // Should get "foo" assert(readerA.readLine() == "foo") // Should catch an exception ... intercept[AzureException] { // ... but instead, we get string "foo" here, which indicates that the readerB was reading from // "container-a" instead of "container-b". val contents = readerB.readLine() println(s"Should not reach here but we got $contents") } } {code} > NativeAzureFileSystem.open() ignores blob container name > -------------------------------------------------------- > > Key: HADOOP-14700 > URL: https://issues.apache.org/jira/browse/HADOOP-14700 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs > Affects Versions: 3.0.0-beta1, 3.0.0-alpha4 > Reporter: Cheng Lian > > {{NativeAzureFileSystem}} instances are associated with the blob container > used to initialize the file system. Assuming that a file system instance > {{fs}} is associated with a container {{A}}, when trying to access a blob > inside another container {{B}}, {{fs}} still tries to find the blob inside > container {{A}}. If there happens to be two blobs with the same name inside > both containers, the user may get a wrong result because {{fs}} reads the > contents from the blob inside container {{A}} instead of container {{B}}. > The following self-contained Scala code snippet illustrates this issue. You > may reproduce it by running the script inside the [Ammonite > REPL|http://ammonite.io/]. > {code} > import $ivy.`com.jsuereth::scala-arm:2.0` > import $ivy.`com.microsoft.azure:azure-storage:5.2.0` > import $ivy.`org.apache.hadoop:hadoop-azure:3.0.0-alpha4` > import $ivy.`org.apache.hadoop:hadoop-common:3.0.0-alpha4` > import $ivy.`org.scalatest::scalatest:3.0.3` > import java.io.{BufferedReader, InputStreamReader} > import java.net.URI > import java.time.{Duration, Instant} > import java.util.{Date, EnumSet} > import com.microsoft.azure.storage.{CloudStorageAccount, > StorageCredentialsAccountAndKey} > import com.microsoft.azure.storage.blob.{SharedAccessBlobPermissions, > SharedAccessBlobPolicy} > import org.apache.hadoop.conf.Configuration > import org.apache.hadoop.fs.{FileSystem, Path} > import org.apache.hadoop.fs.azure.{AzureException, NativeAzureFileSystem} > import org.scalatest.Assertions._ > import resource._ > // Utility implicit conversion for auto resource management. > implicit def `Closable->Resource`[T <: { def close() }]: Resource[T] = new > Resource[T] { > override def close(closable: T): Unit = closable.close() > } > // Credentials information > val ACCOUNT = "** REDACTED **" > val ACCESS_KEY = "** REDACTED **" > // We'll create two different containers, both contain a blob named > "test-blob" but with different > // contents. > val CONTAINER_A = "container-a" > val CONTAINER_B = "container-b" > val TEST_BLOB = "test-blob" > val blobClient = { > val credentials = new StorageCredentialsAccountAndKey(ACCOUNT, ACCESS_KEY) > val account = new CloudStorageAccount(credentials, /* useHttps */ true) > account.createCloudBlobClient() > } > // Generates a read-only SAS key restricted within "container-a". > val sasKeyForContainerA = { > val since = Instant.now() minus Duration.ofMinutes(10) > val duration = Duration.ofHours(1) > val policy = new SharedAccessBlobPolicy() > policy.setSharedAccessStartTime(Date.from(since)) > policy.setSharedAccessExpiryTime(Date.from(since plus duration)) > policy.setPermissions(EnumSet.of( > SharedAccessBlobPermissions.READ, > SharedAccessBlobPermissions.LIST > )) > blobClient > .getContainerReference(CONTAINER_A) > .generateSharedAccessSignature(policy, null) > } > // Sets up testing containers and blobs using the Azure storage SDK: > // > // container-a/test-blob => "foo" > // container-b/test-blob => "bar" > { > val containerARef = blobClient.getContainerReference(CONTAINER_A) > val containerBRef = blobClient.getContainerReference(CONTAINER_B) > containerARef.createIfNotExists() > containerARef.getBlockBlobReference(TEST_BLOB).uploadText("foo") > containerBRef.createIfNotExists() > containerBRef.getBlockBlobReference(TEST_BLOB).uploadText("bar") > } > val pathA = new > Path(s"wasbs://$CONTAINER_A@$ACCOUNT.blob.core.windows.net/$TEST_BLOB") > val pathB = new > Path(s"wasbs://$CONTAINER_A@$ACCOUNT.blob.core.windows.net/$TEST_BLOB") > for { > // Creates a file system associated with "container-a". > fs <- managed { > val conf = new Configuration > conf.set("fs.wasbs.impl", classOf[NativeAzureFileSystem].getName) > conf.set(s"fs.azure.sas.$CONTAINER_A.$ACCOUNT.blob.core.windows.net", > sasKeyForContainerA) > pathA.getFileSystem(conf) > } > // Opens a reader pointing to "container-a/test-blob". We expect to get the > string "foo" written > // to this blob previously. > readerA <- managed(new BufferedReader(new InputStreamReader(fs open pathA))) > // Opens a reader pointing to "container-b/test-blob". We expect to get an > exception since the SAS > // key used to create the `FileSystem` instance is restricted to > "container-a". > readerB <- managed(new BufferedReader(new InputStreamReader(fs open pathB))) > } { > // Should get "foo" > assert(readerA.readLine() == "foo") > // Should catch an exception ... > intercept[AzureException] { > // ... but instead, we get string "foo" here, which indicates that the > readerB was reading from > // "container-a" instead of "container-b". > val contents = readerB.readLine() > println(s"Should not reach here but we got $contents") > } > } > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org