This is an automated email from the ASF dual-hosted git repository. heliangjun pushed a commit to branch branch-2.5 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.5 by this push: new 00b1d0761d1 HBASE-28625 ExportSnapshot should verify checksums for the source file and the target file (#5950) 00b1d0761d1 is described below commit 00b1d0761d1bd8e82fa21ff247aff5165a7d25a7 Author: Liangjun He <heliang...@apache.org> AuthorDate: Thu Jun 6 11:08:11 2024 +0800 HBASE-28625 ExportSnapshot should verify checksums for the source file and the target file (#5950) --- .../hadoop/hbase/snapshot/ExportSnapshot.java | 79 +++++++++++++++------- 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index 2e50762357e..b0cf6480846 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -315,24 +315,38 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool { in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L); } + Path inputPath = inputStat.getPath(); try { context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen()); // Ensure that the output folder is there and copy the file createOutputPath(outputPath.getParent()); FSDataOutputStream out = outputFs.create(outputPath, true); - try { - copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen()); - } finally { - out.close(); - } + + long stime = EnvironmentEdgeManager.currentTime(); + long totalBytesWritten = + copyData(context, inputPath, in, outputPath, out, inputStat.getLen()); + + // Verify the file length and checksum + verifyCopyResult(inputStat, outputFs.getFileStatus(outputPath)); + + long etime = EnvironmentEdgeManager.currentTime(); + LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); + LOG + .info("size=" + totalBytesWritten + " (" + StringUtils.humanReadableInt(totalBytesWritten) + + ")" + " time=" + StringUtils.formatTimeDiff(etime, stime) + String + .format(" %.3fM/sec", (totalBytesWritten / ((etime - stime) / 1000.0)) / 1048576.0)); + context.getCounter(Counter.FILES_COPIED).increment(1); // Try to Preserve attributes if (!preserveAttributes(outputPath, inputStat)) { LOG.warn("You may have to run manually chown on: " + outputPath); } + } catch (IOException e) { + LOG.error("Error copying " + inputPath + " to " + outputPath, e); + context.getCounter(Counter.COPY_FAILED).increment(1); + throw e; } finally { - in.close(); injectTestFailure(context, inputInfo); } } @@ -409,7 +423,7 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool { return str != null && str.length() > 0; } - private void copyData(final Context context, final Path inputPath, final InputStream in, + private long copyData(final Context context, final Path inputPath, final InputStream in, final Path outputPath, final FSDataOutputStream out, final long inputFileSize) throws IOException { final String statusMessage = @@ -421,7 +435,6 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool { int reportBytes = 0; int bytesRead; - long stime = EnvironmentEdgeManager.currentTime(); while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); totalBytesWritten += bytesRead; @@ -436,7 +449,6 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool { reportBytes = 0; } } - long etime = EnvironmentEdgeManager.currentTime(); context.getCounter(Counter.BYTES_COPIED).increment(reportBytes); context @@ -444,23 +456,10 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool { (totalBytesWritten / (float) inputFileSize) * 100.0f) + " from " + inputPath + " to " + outputPath); - // Verify that the written size match - if (totalBytesWritten != inputFileSize) { - String msg = "number of bytes copied not matching copied=" + totalBytesWritten - + " expected=" + inputFileSize + " for file=" + inputPath; - throw new IOException(msg); - } - - LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); - LOG - .info("size=" + totalBytesWritten + " (" + StringUtils.humanReadableInt(totalBytesWritten) - + ")" + " time=" + StringUtils.formatTimeDiff(etime, stime) + String - .format(" %.3fM/sec", (totalBytesWritten / ((etime - stime) / 1000.0)) / 1048576.0)); - context.getCounter(Counter.FILES_COPIED).increment(1); - } catch (IOException e) { - LOG.error("Error copying " + inputPath + " to " + outputPath, e); - context.getCounter(Counter.COPY_FAILED).increment(1); - throw e; + return totalBytesWritten; + } finally { + out.close(); + in.close(); } } @@ -540,6 +539,34 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool { } } + private void verifyCopyResult(final FileStatus inputStat, final FileStatus outputStat) + throws IOException { + long inputLen = inputStat.getLen(); + long outputLen = outputStat.getLen(); + Path inputPath = inputStat.getPath(); + Path outputPath = outputStat.getPath(); + + if (inputLen != outputLen) { + throw new IOException("Mismatch in length of input:" + inputPath + " (" + inputLen + + ") and output:" + outputPath + " (" + outputLen + ")"); + } + + // If length==0, we will skip checksum + if (inputLen != 0 && verifyChecksum) { + FileChecksum inChecksum = getFileChecksum(inputFs, inputPath); + if (inChecksum == null) { + LOG.warn("Input file " + inputPath + " checksums are not available"); + } + FileChecksum outChecksum = getFileChecksum(outputFs, outputPath); + if (outChecksum == null) { + LOG.warn("Output file " + outputPath + " checksums are not available"); + } + if (inChecksum != null && outChecksum != null && !inChecksum.equals(outChecksum)) { + throw new IOException("Checksum mismatch between " + inputPath + " and " + outputPath); + } + } + } + /** * Check if the two files are equal by looking at the file length, and at the checksum (if user * has specified the verifyChecksum flag).