[
https://issues.apache.org/jira/browse/HADOOP-19795?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18055105#comment-18055105
]
ASF GitHub Bot commented on HADOOP-19795:
-----------------------------------------
anmolanmol1234 commented on code in PR #8212:
URL: https://github.com/apache/hadoop/pull/8212#discussion_r2740967662
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java:
##########
@@ -890,66 +905,79 @@ public AbfsInputStream openFileForRead(Path path,
FileStatus fileStatus = parameters.map(OpenFileParameters::getStatus)
.orElse(null);
String relativePath = getRelativePath(path);
- String resourceType, eTag;
- long contentLength;
+ String resourceType, eTag = EMPTY_STRING;
+ long contentLength = 0;
ContextEncryptionAdapter contextEncryptionAdapter =
NoContextEncryptionAdapter.getInstance();
/*
* GetPathStatus API has to be called in case of:
- * 1. fileStatus is null or not an object of VersionedFileStatus: as
eTag
- * would not be there in the fileStatus object.
+ * 1. restrictGpsOnOpenFile config is disabled AND fileStatus is null
or not
+ * an object of VersionedFileStatus: as eTag would not be there in
the fileStatus object.
* 2. fileStatus is an object of VersionedFileStatus and the object
doesn't
* have encryptionContext field when client's encryptionType is
* ENCRYPTION_CONTEXT.
*/
if ((fileStatus instanceof VersionedFileStatus) && (
- getClient().getEncryptionType() != EncryptionType.ENCRYPTION_CONTEXT
- || ((VersionedFileStatus) fileStatus).getEncryptionContext()
- != null)) {
+ getClient().getEncryptionType() !=
EncryptionType.ENCRYPTION_CONTEXT
+ || ((VersionedFileStatus)
fileStatus).getEncryptionContext()
+ != null)) {
path = path.makeQualified(this.uri, path);
Preconditions.checkArgument(fileStatus.getPath().equals(path),
- String.format(
- "Filestatus path [%s] does not match with given path [%s]",
- fileStatus.getPath(), path));
+ String.format(
+ "Filestatus path [%s] does not match with given path
[%s]",
+ fileStatus.getPath(), path));
resourceType = fileStatus.isFile() ? FILE : DIRECTORY;
contentLength = fileStatus.getLen();
eTag = ((VersionedFileStatus) fileStatus).getVersion();
final String encryptionContext
- = ((VersionedFileStatus) fileStatus).getEncryptionContext();
+ = ((VersionedFileStatus) fileStatus).getEncryptionContext();
if (getClient().getEncryptionType() ==
EncryptionType.ENCRYPTION_CONTEXT) {
contextEncryptionAdapter = new ContextProviderEncryptionAdapter(
- getClient().getEncryptionContextProvider(),
getRelativePath(path),
- encryptionContext.getBytes(StandardCharsets.UTF_8));
+ getClient().getEncryptionContextProvider(),
getRelativePath(path),
+ encryptionContext.getBytes(StandardCharsets.UTF_8));
}
- } else {
+ if (parseIsDirectory(resourceType)) {
+ throw openFileForReadDirectoryException();
+ }
+ }
+ /*
+ * If file created with ENCRYPTION_CONTEXT, irrespective of whether
isRestrictGpsOnOpenFile config is enabled or not,
+ * GetPathStatus API has to be called to get the encryptionContext from
the response header
+ */
+ else if (getClient().getEncryptionType() ==
EncryptionType.ENCRYPTION_CONTEXT
+ || !getAbfsConfiguration().shouldRestrictGpsOnOpenFile()) {
+
AbfsHttpOperation op = getClient().getPathStatus(relativePath, false,
- tracingContext, null).getResult();
- resourceType = getClient().checkIsDir(op) ? DIRECTORY : FILE;
- contentLength = extractContentLength(op);
- eTag = op.getResponseHeader(HttpHeaderConfigurations.ETAG);
+ tracingContext, null).getResult();
/*
* For file created with ENCRYPTION_CONTEXT, client shall receive
* encryptionContext from header field: X_MS_ENCRYPTION_CONTEXT.
*/
if (getClient().getEncryptionType() ==
EncryptionType.ENCRYPTION_CONTEXT) {
final String fileEncryptionContext = op.getResponseHeader(
- HttpHeaderConfigurations.X_MS_ENCRYPTION_CONTEXT);
+ X_MS_ENCRYPTION_CONTEXT);
if (fileEncryptionContext == null) {
LOG.debug("EncryptionContext missing in GetPathStatus response");
throw new PathIOException(path.toString(),
- "EncryptionContext not present in GetPathStatus response
headers");
+ "EncryptionContext not present in GetPathStatus response
headers");
}
contextEncryptionAdapter = new ContextProviderEncryptionAdapter(
- getClient().getEncryptionContextProvider(),
getRelativePath(path),
- fileEncryptionContext.getBytes(StandardCharsets.UTF_8));
+ getClient().getEncryptionContextProvider(),
getRelativePath(path),
+ fileEncryptionContext.getBytes(StandardCharsets.UTF_8));
}
- }
+ resourceType = getClient().checkIsDir(op) ? DIRECTORY : FILE;
+ contentLength = extractContentLength(op);
+ eTag = op.getResponseHeader(HttpHeaderConfigurations.ETAG);
- if (parseIsDirectory(resourceType)) {
- throw new AbfsRestOperationException(
- AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(),
- AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(),
- "openFileForRead must be used with files and not directories",
- null);
+ if (parseIsDirectory(resourceType)) {
+ throw openFileForReadDirectoryException();
+ }
+ }
+ /* The only remaining case is:
+ * - restrictGpsOnOpenFile config is enabled with null FileStatus and
encryptionType not as ENCRYPTION_CONTEXT
+ * In this case, we don't need to call GetPathStatus API.
+ */
+ else {
Review Comment:
Won't this lead to going ahead and opening the stream without checks? Do we
fail later for this case ?
> ABFS: GetPathStatus Optimization on OpenFileForRead
> ---------------------------------------------------
>
> Key: HADOOP-19795
> URL: https://issues.apache.org/jira/browse/HADOOP-19795
> Project: Hadoop Common
> Issue Type: Task
> Components: fs/azure
> Affects Versions: 3.4.1, 3.4.2
> Reporter: Manika Joshi
> Assignee: Manika Joshi
> Priority: Major
> Labels: pull-request-available
>
> We do a getPathStatus call during file open for read. This call is primarily
> used to fetch the file’s metadata properties before the actual read begins.
> We are now introducing an optional, config-driven read flow that avoids the
> getPathStatus call during open and instead derives required metadata from the
> read response itself.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]