anujmodi2021 commented on code in PR #7777: URL: https://github.com/apache/hadoop/pull/7777#discussion_r2192701461
########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java: ########## @@ -1914,7 +1924,9 @@ private List<AbfsHttpHeader> getMetadataHeadersList(final Hashtable<String, Stri // AzureBlobFileSystem supports only ASCII Characters in property values. if (isPureASCII(value)) { try { - value = encodeMetadataAttribute(value); + if (!XML_TAG_HDI_PERMISSION.equalsIgnoreCase(entry.getKey())) { Review Comment: Seems like we want to encode all other headers except XML_TAG_HDI_PERMISSION. Can we add a comment around this explaining the reason? ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java: ########## @@ -1097,8 +1105,10 @@ public AbfsRestOperation flush(byte[] buffer, AbfsRestOperation op1 = getPathStatus(path, true, tracingContext, contextEncryptionAdapter); String metadataMd5 = op1.getResult().getResponseHeader(CONTENT_MD5); - if (!md5Hash.equals(metadataMd5)) { - throw ex; + if (blobMd5 != null) { Review Comment: Nit: combine if statements using && ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java: ########## @@ -42,23 +42,40 @@ public class AbfsBlobBlock extends AbfsBlock { * @param offset Used to generate blockId based on offset. * @throws IOException exception is thrown. */ - AbfsBlobBlock(AbfsOutputStream outputStream, long offset) throws IOException { + AbfsBlobBlock(AbfsOutputStream outputStream, long offset, int blockIdLength, long blockIndex) throws IOException { super(outputStream, offset); - this.blockId = generateBlockId(offset); + this.blockIndex = blockIndex; + String streamId = getOutputStream().getStreamID(); Review Comment: Nit: outputstream is passed as a parameter to this constructor. I think its better to use that parameter directly. Calling a getter for this is giving an impression as if some other input stream is called upon. Beside that getter is not used any where else. ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java: ########## @@ -42,23 +42,40 @@ public class AbfsBlobBlock extends AbfsBlock { * @param offset Used to generate blockId based on offset. * @throws IOException exception is thrown. */ - AbfsBlobBlock(AbfsOutputStream outputStream, long offset) throws IOException { + AbfsBlobBlock(AbfsOutputStream outputStream, long offset, int blockIdLength, long blockIndex) throws IOException { super(outputStream, offset); - this.blockId = generateBlockId(offset); + this.blockIndex = blockIndex; + String streamId = getOutputStream().getStreamID(); + UUID streamIdGuid = UUID.nameUUIDFromBytes(streamId.getBytes(StandardCharsets.UTF_8)); Review Comment: Nit: streamId and streamIdGuid are local variables and used only for generating block Ids. They should be moved inside that method only as it was previously. ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java: ########## @@ -42,23 +42,40 @@ public class AbfsBlobBlock extends AbfsBlock { * @param offset Used to generate blockId based on offset. * @throws IOException exception is thrown. */ - AbfsBlobBlock(AbfsOutputStream outputStream, long offset) throws IOException { + AbfsBlobBlock(AbfsOutputStream outputStream, long offset, int blockIdLength, long blockIndex) throws IOException { super(outputStream, offset); - this.blockId = generateBlockId(offset); + this.blockIndex = blockIndex; + String streamId = getOutputStream().getStreamID(); + UUID streamIdGuid = UUID.nameUUIDFromBytes(streamId.getBytes(StandardCharsets.UTF_8)); + this.blockId = generateBlockId(streamIdGuid, blockIdLength); } /** - * Helper method that generates blockId. - * @param position The offset needed to generate blockId. - * @return String representing the block ID generated. + * Generates a Base64-encoded block ID string based on the given position, stream ID, and desired raw length. Review Comment: How did we arrive at this logic? Is there some server side recommendation to follow this? ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java: ########## @@ -982,6 +985,11 @@ public AbfsRestOperation appendBlock(final String path, if (requestParameters.getLeaseId() != null) { requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, requestParameters.getLeaseId())); } + if (isChecksumValidationEnabled()) { + if (requestParameters.getMd5() != null) { Review Comment: This check on reqParams and adding of headers is repeating. May be we can retain the original method and update its implementation as needed now. That method anyway wasn't used anywhere else as of now. ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java: ########## @@ -42,23 +42,40 @@ public class AbfsBlobBlock extends AbfsBlock { * @param offset Used to generate blockId based on offset. * @throws IOException exception is thrown. */ - AbfsBlobBlock(AbfsOutputStream outputStream, long offset) throws IOException { + AbfsBlobBlock(AbfsOutputStream outputStream, long offset, int blockIdLength, long blockIndex) throws IOException { super(outputStream, offset); - this.blockId = generateBlockId(offset); + this.blockIndex = blockIndex; + String streamId = getOutputStream().getStreamID(); + UUID streamIdGuid = UUID.nameUUIDFromBytes(streamId.getBytes(StandardCharsets.UTF_8)); + this.blockId = generateBlockId(streamIdGuid, blockIdLength); } /** - * Helper method that generates blockId. - * @param position The offset needed to generate blockId. - * @return String representing the block ID generated. + * Generates a Base64-encoded block ID string based on the given position, stream ID, and desired raw length. + * The block ID is composed using the stream UUID and the block index, which is derived from + * the given position divided by the output stream's buffer size. The resulting string is + * optionally adjusted to match the specified raw length, padded or trimmed as needed, and + * then Base64-encoded. + * + * @param streamId The UUID representing the stream, used as a prefix in the block ID. + * @param rawLength The desired length of the raw block ID string before Base64 encoding. + * If 0, no length adjustment is made. + * @return A Base64-encoded block ID string suitable for use in block-based storage APIs. */ - private String generateBlockId(long position) { - String streamId = getOutputStream().getStreamID(); - String streamIdHash = Integer.toString(streamId.hashCode()); - String blockId = String.format("%d_%s", position, streamIdHash); - byte[] blockIdByteArray = new byte[BLOCK_ID_LENGTH]; - System.arraycopy(blockId.getBytes(StandardCharsets.UTF_8), 0, blockIdByteArray, 0, Math.min(BLOCK_ID_LENGTH, blockId.length())); - return new String(Base64.encodeBase64(blockIdByteArray), StandardCharsets.UTF_8); + private String generateBlockId(UUID streamId, int rawLength) { + String rawBlockId = String.format("%s-%06d", streamId, blockIndex); + + if (rawLength != 0) { + // Adjust to match expected decoded length + if (rawBlockId.length() < rawLength) { + rawBlockId = String.format("%-" + rawLength + "s", rawBlockId) Review Comment: Nit: use constants everywhere there is a hardcoded string. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org