Author: eli
Date: Wed Sep  5 22:19:40 2012
New Revision: 1381419

URL: http://svn.apache.org/viewvc?rev=1381419&view=rev
Log:
HADOOP-8648. libhadoop: native CRC32 validation crashes when 
io.bytes.per.checksum=1. Contributed by Colin Patrick McCabe

Added:
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c
Modified:
    hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
    hadoop/common/trunk/hadoop-common-project/hadoop-common/pom.xml
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/CMakeLists.txt
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1381419&r1=1381418&r2=1381419&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt 
(original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Wed Sep 
 5 22:19:40 2012
@@ -467,6 +467,9 @@ Branch-2 ( Unreleased changes )
 
     HADOOP-8770. NN should not RPC to self to find trash defaults. (eli)
 
+    HADOOP-8648. libhadoop: native CRC32 validation crashes when
+    io.bytes.per.checksum=1. (Colin Patrick McCabe via eli)
+
   BREAKDOWN OF HDFS-3042 SUBTASKS
 
     HADOOP-8220. ZKFailoverController doesn't handle failure to become active

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/pom.xml?rev=1381419&r1=1381418&r2=1381419&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/pom.xml (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/pom.xml Wed Sep  5 
22:19:40 2012
@@ -535,6 +535,20 @@
                   </target>
                 </configuration>
               </execution>
+              <execution>
+                <id>native_tests</id>
+                <phase>test</phase>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <exec executable="sh" failonerror="true" 
dir="${project.build.directory}/native">
+                      <arg value="-c"/>
+                      <arg value="[ x$SKIPTESTS = xtrue ] || 
${project.build.directory}/native/test_bulk_crc32"/>
+                      <env key="SKIPTESTS" value="${skipTests}"/>
+                    </exec>
+                  </target>
+                </configuration>
+              </execution>
             </executions>
           </plugin>
         </plugins>

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/CMakeLists.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/CMakeLists.txt?rev=1381419&r1=1381418&r2=1381419&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/CMakeLists.txt 
(original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/CMakeLists.txt 
Wed Sep  5 22:19:40 2012
@@ -60,6 +60,7 @@ find_package(ZLIB REQUIRED)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2")
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64")
 set(D main/native/src/org/apache/hadoop)
+set(T main/native/src/test/org/apache/hadoop)
 
 GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME)
 
@@ -98,9 +99,16 @@ include_directories(
     ${JNI_INCLUDE_DIRS}
     ${ZLIB_INCLUDE_DIRS}
     ${SNAPPY_INCLUDE_DIR}
+    ${D}/util
 )
 CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h)
 
+add_executable(test_bulk_crc32
+    ${D}/util/bulk_crc32.c
+    ${T}/util/test_bulk_crc32.c
+)
+set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" 
\"-Wall\"")
+
 add_dual_library(hadoop
     ${D}/io/compress/lz4/Lz4Compressor.c
     ${D}/io/compress/lz4/Lz4Decompressor.c

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c?rev=1381419&r1=1381418&r2=1381419&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c
 Wed Sep  5 22:19:40 2012
@@ -23,6 +23,7 @@
  */
 #include <assert.h>
 #include <arpa/inet.h>
+#include <errno.h>
 #include <stdint.h>
 #include <unistd.h>
 
@@ -33,9 +34,10 @@
 
 #define USE_PIPELINED
 
+#define CRC_INITIAL_VAL 0xffffffff
+
 typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t);
-static uint32_t crc_init();
-static uint32_t crc_val(uint32_t crc);
+static inline uint32_t crc_val(uint32_t crc);
 static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t 
length);
 static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length);
 
@@ -45,6 +47,35 @@ static void pipelined_crc32c(uint32_t *c
 static int cached_cpu_supports_crc32; // initialized by constructor below
 static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t 
length);
 
+int bulk_calculate_crc(const uint8_t *data, size_t data_len,
+                    uint32_t *sums, int checksum_type,
+                    int bytes_per_checksum) {
+  uint32_t crc;
+  crc_update_func_t crc_update_func;
+
+  switch (checksum_type) {
+    case CRC32_ZLIB_POLYNOMIAL:
+      crc_update_func = crc32_zlib_sb8;
+      break;
+    case CRC32C_POLYNOMIAL:
+      crc_update_func = crc32c_sb8;
+      break;
+    default:
+      return -EINVAL;
+      break;
+  }
+  while (likely(data_len > 0)) {
+    int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : 
data_len;
+    crc = CRC_INITIAL_VAL;
+    crc = crc_update_func(crc, data, len);
+    *sums = ntohl(crc_val(crc));
+    data += len;
+    data_len -= len;
+    sums++;
+  }
+  return 0;
+}
+
 int bulk_verify_crc(const uint8_t *data, size_t data_len,
                     const uint32_t *sums, int checksum_type,
                     int bytes_per_checksum,
@@ -80,7 +111,7 @@ int bulk_verify_crc(const uint8_t *data,
   if (do_pipelined) {
     /* Process three blocks at a time */
     while (likely(n_blocks >= 3)) {
-      crc1 = crc2 = crc3 = crc_init();  
+      crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
 
       crc = ntohl(crc_val(crc1));
@@ -101,7 +132,7 @@ int bulk_verify_crc(const uint8_t *data,
 
     /* One or two blocks */
     if (n_blocks) {
-      crc1 = crc2 = crc_init();
+      crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 
n_blocks);
 
       if ((crc = ntohl(crc_val(crc1))) != *sums)
@@ -118,7 +149,7 @@ int bulk_verify_crc(const uint8_t *data,
  
     /* For something smaller than a block */
     if (remainder) {
-      crc1 = crc_init();
+      crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
 
       if ((crc = ntohl(crc_val(crc1))) != *sums)
@@ -130,7 +161,7 @@ int bulk_verify_crc(const uint8_t *data,
 
   while (likely(data_len > 0)) {
     int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : 
data_len;
-    crc = crc_init();
+    crc = CRC_INITIAL_VAL;
     crc = crc_update_func(crc, data, len);
     crc = ntohl(crc_val(crc));
     if (unlikely(crc != *sums)) {
@@ -151,18 +182,10 @@ return_crc_error:
   return INVALID_CHECKSUM_DETECTED;
 }
 
-
-/**
- * Initialize a CRC
- */
-static uint32_t crc_init() {
-  return 0xffffffff;
-}
-
 /**
  * Extract the final result of a CRC
  */
-static uint32_t crc_val(uint32_t crc) {
+static inline uint32_t crc_val(uint32_t crc) {
   return ~crc;
 }
 
@@ -398,7 +421,7 @@ static void pipelined_crc32c(uint32_t *c
         counter--;
       }
 
-      /* Take care of the remainder. They are only up to three bytes,
+      /* Take care of the remainder. They are only up to seven bytes,
        * so performing byte-level crc32 won't take much time.
        */
       bdata = (uint8_t*)data;
@@ -433,7 +456,7 @@ static void pipelined_crc32c(uint32_t *c
         "crc32b (%5), %0;\n\t"
         "crc32b (%5,%4,1), %1;\n\t"
          : "=r"(c1), "=r"(c2) 
-         : "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata)
+         : "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
         );
         bdata++;
         remainder--;
@@ -593,7 +616,7 @@ static void pipelined_crc32c(uint32_t *c
         "crc32b (%5), %0;\n\t"
         "crc32b (%5,%4,1), %1;\n\t"
          : "=r"(c1), "=r"(c2) 
-         : "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata)
+         : "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
         );
         bdata++;
         remainder--;

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h?rev=1381419&r1=1381418&r2=1381419&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h
 Wed Sep  5 22:19:40 2012
@@ -19,6 +19,7 @@
 #define BULK_CRC32_H_INCLUDED
 
 #include <stdint.h>
+#include <unistd.h> /* for size_t */
 
 // Constants for different CRC algorithms
 #define CRC32C_POLYNOMIAL 1
@@ -42,16 +43,45 @@ typedef struct crc32_error {
  * of bytes_per_checksum bytes. The checksums are each 32 bits
  * and are stored in sequential indexes of the 'sums' array.
  *
- *  checksum_type - one of the CRC32 constants defined above
- *  error_info - if non-NULL, will be filled in if an error
- *               is detected
+ * @param data                  The data to checksum
+ * @param dataLen               Length of the data buffer
+ * @param sums                  (out param) buffer to write checksums into.
+ *                              It must contain at least dataLen * 4 bytes.
+ * @param checksum_type         One of the CRC32 algorithm constants defined 
+ *                              above
+ * @param bytes_per_checksum    How many bytes of data to process per checksum.
+ * @param error_info            If non-NULL, will be filled in if an error
+ *                              is detected
  *
- * Returns: 0 for success, non-zero for an error, result codes
- *          for which are defined above
+ * @return                      0 for success, non-zero for an error, result 
codes
+ *                              for which are defined above
  */
 extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
     const uint32_t *sums, int checksum_type,
     int bytes_per_checksum,
     crc32_error_t *error_info);
 
+/**
+ * Calculate checksums for some data.
+ *
+ * The checksums are each 32 bits and are stored in sequential indexes of the
+ * 'sums' array.
+ *
+ * This function is not (yet) optimized.  It is provided for testing purposes
+ * only.
+ *
+ * @param data                  The data to checksum
+ * @param dataLen               Length of the data buffer
+ * @param sums                  (out param) buffer to write checksums into.
+ *                              It must contain at least dataLen * 4 bytes.
+ * @param checksum_type         One of the CRC32 algorithm constants defined 
+ *                              above
+ * @param bytesPerChecksum      How many bytes of data to process per checksum.
+ *
+ * @return                      0 for success, non-zero for an error
+ */
+int bulk_calculate_crc(const uint8_t *data, size_t data_len,
+                    uint32_t *sums, int checksum_type,
+                    int bytes_per_checksum);
+
 #endif

Added: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c?rev=1381419&view=auto
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c
 (added)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c
 Wed Sep  5 22:19:40 2012
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bulk_crc32.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define EXPECT_ZERO(x) \
+    do { \
+        int __my_ret__ = x; \
+        if (__my_ret__) { \
+            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
+              "code %d: got nonzero from %s\n", __LINE__, __my_ret__, #x); \
+            return __my_ret__; \
+        } \
+    } while (0);
+
+static int testBulkVerifyCrc(int dataLen, int crcType, int bytesPerChecksum)
+{
+  int i;
+  uint8_t *data;
+  uint32_t *sums;
+  crc32_error_t errorData;
+
+  data = malloc(dataLen);
+  for (i = 0; i < dataLen; i++) {
+    data[i] = (i % 16) + 1;
+  }
+  sums = calloc(sizeof(uint32_t),
+                (dataLen + bytesPerChecksum - 1) / bytesPerChecksum);
+
+  EXPECT_ZERO(bulk_calculate_crc(data, dataLen, sums, crcType,
+                                 bytesPerChecksum));
+  EXPECT_ZERO(bulk_verify_crc(data, dataLen, sums, crcType,
+                            bytesPerChecksum, &errorData));
+  free(data);
+  free(sums);
+  return 0;
+}
+
+int main(int argc, char **argv)
+{
+  /* Test running bulk_calculate_crc with some different algorithms and
+   * bytePerChecksum values. */
+  EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32C_POLYNOMIAL, 512));
+  EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32_ZLIB_POLYNOMIAL, 512));
+  EXPECT_ZERO(testBulkVerifyCrc(256, CRC32C_POLYNOMIAL, 1));
+  EXPECT_ZERO(testBulkVerifyCrc(256, CRC32_ZLIB_POLYNOMIAL, 1));
+  EXPECT_ZERO(testBulkVerifyCrc(1, CRC32C_POLYNOMIAL, 1));
+  EXPECT_ZERO(testBulkVerifyCrc(1, CRC32_ZLIB_POLYNOMIAL, 1));
+  EXPECT_ZERO(testBulkVerifyCrc(2, CRC32C_POLYNOMIAL, 1));
+  EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 1));
+  EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 2));
+  EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 2));
+  EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 4));
+  EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 4));
+
+  fprintf(stderr, "%s: SUCCESS.\n", argv[0]);
+  return EXIT_SUCCESS;
+}


Reply via email to