Author: tgraves Date: Tue Aug 21 22:38:46 2012 New Revision: 1375834 URL: http://svn.apache.org/viewvc?rev=1375834&view=rev Log: HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C. (Kihwal Lee via szetszwo)
Added: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1375834&r1=1375833&r2=1375834&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt Tue Aug 21 22:38:46 2012 @@ -36,6 +36,9 @@ Release 0.23.3 - UNRELEASED HADOOP-8240. Add a new API to allow users to specify a checksum type on FileSystem.create(..). (Kihwal Lee via szetszwo) + HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file + checksum with CRC32C. (Kihwal Lee via szetszwo) + OPTIMIZATIONS BUG FIXES Added: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java?rev=1375834&view=auto ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java (added) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java Tue Aug 21 22:38:46 2012 @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.util.DataChecksum; + +/** For CRC32 with the Castagnoli polynomial */ +public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum { + /** Same as this(0, 0, null) */ + public MD5MD5CRC32CastagnoliFileChecksum() { + this(0, 0, null); + } + + /** Create a MD5FileChecksum */ + public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { + super(bytesPerCRC, crcPerBlock, md5); + } + + @Override + public DataChecksum.Type getCrcType() { + // default to the one that is understood by all releases. + return DataChecksum.Type.CRC32C; + } +} Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java?rev=1375834&r1=1375833&r2=1375834&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java Tue Aug 21 22:38:46 2012 @@ -23,12 +23,17 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.util.DataChecksum; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.znerd.xmlenc.XMLOutputter; +import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum; +import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum; + /** MD5 of MD5 of CRC32. */ @InterfaceAudience.LimitedPrivate({"HDFS"}) @InterfaceStability.Unstable @@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum ext /** {@inheritDoc} */ public String getAlgorithmName() { - return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32"; + return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + + getCrcType().name(); + } + + public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm) + throws IOException { + if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) { + return DataChecksum.Type.CRC32; + } else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) { + return DataChecksum.Type.CRC32C; + } + + throw new IOException("Unknown checksum type in " + algorithm); } /** {@inheritDoc} */ @@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum ext return WritableUtils.toByteArray(this); } + /** returns the CRC type */ + public DataChecksum.Type getCrcType() { + // default to the one that is understood by all releases. + return DataChecksum.Type.CRC32; + } + + public ChecksumOpt getChecksumOpt() { + return new ChecksumOpt(getCrcType(), bytesPerCRC); + } + /** {@inheritDoc} */ public void readFields(DataInput in) throws IOException { bytesPerCRC = in.readInt(); @@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum ext if (that != null) { xml.attribute("bytesPerCRC", "" + that.bytesPerCRC); xml.attribute("crcPerBlock", "" + that.crcPerBlock); + xml.attribute("crcType", ""+ that.getCrcType().name()); xml.attribute("md5", "" + that.md5); } xml.endTag(); @@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum ext final String bytesPerCRC = attrs.getValue("bytesPerCRC"); final String crcPerBlock = attrs.getValue("crcPerBlock"); final String md5 = attrs.getValue("md5"); + String crcType = attrs.getValue("crcType"); + DataChecksum.Type finalCrcType; if (bytesPerCRC == null || crcPerBlock == null || md5 == null) { return null; } try { - return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC), - Integer.valueOf(crcPerBlock), new MD5Hash(md5)); - } catch(Exception e) { + // old versions don't support crcType. + if (crcType == null || crcType == "") { + finalCrcType = DataChecksum.Type.CRC32; + } else { + finalCrcType = DataChecksum.Type.valueOf(crcType); + } + + switch (finalCrcType) { + case CRC32: + return new MD5MD5CRC32GzipFileChecksum( + Integer.valueOf(bytesPerCRC), + Integer.valueOf(crcPerBlock), + new MD5Hash(md5)); + case CRC32C: + return new MD5MD5CRC32CastagnoliFileChecksum( + Integer.valueOf(bytesPerCRC), + Integer.valueOf(crcPerBlock), + new MD5Hash(md5)); + default: + // we should never get here since finalCrcType will + // hold a valid type or we should have got an exception. + return null; + } + } catch (Exception e) { throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC - + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e); + + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType + + ", md5=" + md5, e); } } @@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum ext public String toString() { return getAlgorithmName() + ":" + md5; } -} \ No newline at end of file +} Added: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java?rev=1375834&view=auto ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java (added) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java Tue Aug 21 22:38:46 2012 @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.util.DataChecksum; + +/** For CRC32 with the Gzip polynomial */ +public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum { + /** Same as this(0, 0, null) */ + public MD5MD5CRC32GzipFileChecksum() { + this(0, 0, null); + } + + /** Create a MD5FileChecksum */ + public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { + super(bytesPerCRC, crcPerBlock, md5); + } + @Override + public DataChecksum.Type getCrcType() { + // default to the one that is understood by all releases. + return DataChecksum.Type.CRC32; + } +} Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java?rev=1375834&r1=1375833&r2=1375834&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java Tue Aug 21 22:38:46 2012 @@ -44,13 +44,15 @@ public class DataChecksum implements Che public static final int CHECKSUM_CRC32 = 1; public static final int CHECKSUM_CRC32C = 2; public static final int CHECKSUM_DEFAULT = 3; + public static final int CHECKSUM_MIXED = 4; /** The checksum types */ public static enum Type { NULL (CHECKSUM_NULL, 0), CRC32 (CHECKSUM_CRC32, 4), CRC32C(CHECKSUM_CRC32C, 4), - DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum + DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum + MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum public final int id; public final int size;