anmolanmol1234 commented on code in PR #7777: URL: https://github.com/apache/hadoop/pull/7777#discussion_r2194738434
########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java: ########## @@ -42,23 +42,40 @@ public class AbfsBlobBlock extends AbfsBlock { * @param offset Used to generate blockId based on offset. * @throws IOException exception is thrown. */ - AbfsBlobBlock(AbfsOutputStream outputStream, long offset) throws IOException { + AbfsBlobBlock(AbfsOutputStream outputStream, long offset, int blockIdLength, long blockIndex) throws IOException { super(outputStream, offset); - this.blockId = generateBlockId(offset); + this.blockIndex = blockIndex; + String streamId = getOutputStream().getStreamID(); + UUID streamIdGuid = UUID.nameUUIDFromBytes(streamId.getBytes(StandardCharsets.UTF_8)); + this.blockId = generateBlockId(streamIdGuid, blockIdLength); } /** - * Helper method that generates blockId. - * @param position The offset needed to generate blockId. - * @return String representing the block ID generated. + * Generates a Base64-encoded block ID string based on the given position, stream ID, and desired raw length. + * The block ID is composed using the stream UUID and the block index, which is derived from + * the given position divided by the output stream's buffer size. The resulting string is + * optionally adjusted to match the specified raw length, padded or trimmed as needed, and + * then Base64-encoded. + * + * @param streamId The UUID representing the stream, used as a prefix in the block ID. + * @param rawLength The desired length of the raw block ID string before Base64 encoding. + * If 0, no length adjustment is made. + * @return A Base64-encoded block ID string suitable for use in block-based storage APIs. */ - private String generateBlockId(long position) { - String streamId = getOutputStream().getStreamID(); - String streamIdHash = Integer.toString(streamId.hashCode()); - String blockId = String.format("%d_%s", position, streamIdHash); - byte[] blockIdByteArray = new byte[BLOCK_ID_LENGTH]; - System.arraycopy(blockId.getBytes(StandardCharsets.UTF_8), 0, blockIdByteArray, 0, Math.min(BLOCK_ID_LENGTH, blockId.length())); - return new String(Base64.encodeBase64(blockIdByteArray), StandardCharsets.UTF_8); + private String generateBlockId(UUID streamId, int rawLength) { + String rawBlockId = String.format("%s-%06d", streamId, blockIndex); + + if (rawLength != 0) { + // Adjust to match expected decoded length + if (rawBlockId.length() < rawLength) { + rawBlockId = String.format("%-" + rawLength + "s", rawBlockId) + .replace(' ', '_'); + } else if (rawBlockId.length() > rawLength) { + rawBlockId = rawBlockId.substring(0, rawLength); Review Comment: that will make the readability a bit difficult -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org