Repository: hadoop Updated Branches: refs/heads/yarn-2877 566712927 -> 855d52927
HADOOP-11887. Introduce Intel ISA-L erasure coding library for native erasure encoding support (Kai Zheng via Colin P. McCabe) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/482e35c5 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/482e35c5 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/482e35c5 Branch: refs/heads/yarn-2877 Commit: 482e35c55a4bec27fa62b29d9e5f125816f1d8bd Parents: efc73d7 Author: Colin Patrick Mccabe <cmcc...@cloudera.com> Authored: Thu Nov 5 10:31:01 2015 -0800 Committer: Colin Patrick Mccabe <cmcc...@cloudera.com> Committed: Thu Nov 5 10:44:47 2015 -0800 ---------------------------------------------------------------------- BUILDING.txt | 25 ++ hadoop-common-project/hadoop-common/CHANGES.txt | 3 + hadoop-common-project/hadoop-common/pom.xml | 21 +- .../hadoop-common/src/CMakeLists.txt | 25 ++ .../hadoop-common/src/config.h.cmake | 1 + .../io/erasurecode/ErasureCodeNative.java | 86 +++++ .../apache/hadoop/util/NativeCodeLoader.java | 11 +- .../hadoop/util/NativeLibraryChecker.java | 20 +- .../src/main/native/native.vcxproj | 22 +- .../io/erasurecode/coder/erasure_code_native.c | 49 +++ ...he_hadoop_io_erasurecode_ErasureCodeNative.h | 29 ++ .../apache/hadoop/io/erasurecode/erasure_code.c | 271 ++++++++++++++++ .../io/erasurecode/include/erasure_code.h | 125 ++++++++ .../hadoop/io/erasurecode/include/gf_util.h | 111 +++++++ .../org/apache/hadoop/util/NativeCodeLoader.c | 10 + .../hadoop/io/erasurecode/erasure_code_test.c | 310 +++++++++++++++++++ hadoop-project-dist/pom.xml | 13 +- hadoop-project/pom.xml | 1 + 18 files changed, 1122 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/BUILDING.txt ---------------------------------------------------------------------- diff --git a/BUILDING.txt b/BUILDING.txt index efd93d1..c7be7a3 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -74,6 +74,8 @@ Optional packages: * Snappy compression $ sudo apt-get install snappy libsnappy-dev +* Intel ISA-L library for erasure coding + Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version * Bzip2 $ sudo apt-get install bzip2 libbz2-dev * Jansson (C Library for JSON) @@ -179,6 +181,29 @@ Maven build goals: * -Dtest.exclude=<TESTCLASSNAME> * -Dtest.exclude.pattern=**/<TESTCLASSNAME1>.java,**/<TESTCLASSNAME2>.java + Intel ISA-L build options: + + Intel ISA-L is a erasure coding library that can be utilized by the native code. + It is currently an optional component, meaning that Hadoop can be built with + or without this dependency. Note the library is used via dynamic module. Please + reference the official site for the library details. + https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version + + * Use -Drequire.isal to fail the build if libisal.so is not found. + If this option is not specified and the isal library is missing, + we silently build a version of libhadoop.so that cannot make use of ISA-L and + the native raw erasure coders. + This option is recommended if you plan on making use of native raw erasure + coders and want to get more repeatable builds. + * Use -Disal.prefix to specify a nonstandard location for the libisal + library files. You do not need this option if you have installed ISA-L to the + system library path. + * Use -Disal.lib to specify a nonstandard location for the libisal library + files. + * Use -Dbundle.isal to copy the contents of the isal.lib directory into + the final tar file. This option requires that -Disal.lib is also given, + and it ignores the -Disal.prefix option. + ---------------------------------------------------------------------------------- Building components separately http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index dd70947..85df389 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -948,6 +948,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12040. Adjust inputs order for the decode API in raw erasure coder. (Kai Zheng via yliu) + HADOOP-11887. Introduce Intel ISA-L erasure coding library for native + erasure encoding support (Kai Zheng via Colin P. McCabe) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 4735c6b..502bbbf 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -567,6 +567,9 @@ <openssl.prefix></openssl.prefix> <openssl.lib></openssl.lib> <openssl.include></openssl.include> + <require.isal>false</require.isal> + <isal.prefix></isal.prefix> + <isal.lib></isal.lib> <require.openssl>false</require.openssl> <runningWithNative>true</runningWithNative> <bundle.openssl.in.bin>false</bundle.openssl.in.bin> @@ -620,6 +623,7 @@ <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName> + <javahClassName>org.apache.hadoop.io.erasurecode.ErasureCodeNative</javahClassName> <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName> <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName> <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName> @@ -642,7 +646,7 @@ <configuration> <target> <exec executable="cmake" dir="${project.build.directory}/native" failonerror="true"> - <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_BZIP2=${require.bzip2} -DREQUIRE_SNAPPY=${require.snappy} -DCUSTOM_SNAPPY_PREFIX=${snappy.prefix} -DCUSTOM_SNAPPY_LIB=${snappy.lib} -DCUSTOM_SNAPPY_INCLUDE=${snappy.include} -DREQUIRE_OPENSSL=${require.openssl} -DCUSTOM_OPENSSL_PREFIX=${openssl.prefix} -DCUSTOM_OPENSSL_LIB=${openssl.lib} -DCUSTOM_OPENSSL_INCLUDE=${openssl.include} -DEXTRA_LIBHADOOP_RPATH=${extra.libhadoop.rpath}"/> + <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_BZIP2=${require.bzip2} -DREQUIRE_SNAPPY=${require.snappy} -DCUSTOM_SNAPPY_PREFIX=${snappy.prefix} -DCUSTOM_SNAPPY_LIB=${snappy.lib} -DCUSTOM_SNAPPY_INCLUDE=${snappy.include} -DREQUIRE_ISAL=${require.isal} -DCUSTOM_ISAL_PREFIX=${isal.prefix} -DCUSTOM_ISAL_LIB=${isal.lib} -DREQUIRE_OPENSSL=${require.openssl} -DCUSTOM_OPENSSL_PREFIX=${openssl.prefix} -DCUSTOM_OPENSSL_LIB=${openssl.lib} -DCUSTOM_OPENSSL_INCLUDE=${openssl.include} -DEXTRA_LIBHADOOP_RPATH=${extra.libhadoop.rpath}"/> </exec> <exec executable="make" dir="${project.build.directory}/native" failonerror="true"> <arg line="VERBOSE=1"/> @@ -664,7 +668,13 @@ <arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/> <env key="SKIPTESTS" value="${skipTests}"/> </exec> - </target> + <exec executable="${shell-executable}" failonerror="true" dir="${project.build.directory}/native"> + <arg value="-c"/> + <arg value="[ ! -f ${project.build.directory}/native/erasure_code_test ] || ${project.build.directory}/native/erasure_code_test"/> + <env key="SKIPTESTS" value="${skipTests}"/> + <env key="LD_LIBRARY_PATH" value="${LD_LIBRARY_PATH}:${isal.lib}:${isal.prefix}"/> + </exec> + </target> </configuration> </execution> </executions> @@ -684,6 +694,9 @@ <snappy.prefix></snappy.prefix> <snappy.lib></snappy.lib> <snappy.include></snappy.include> + <require.isal>false</require.isal> + <isal.prefix></isal.prefix> + <isal.lib></isal.lib> <require.snappy>false</require.snappy> <bundle.snappy.in.bin>true</bundle.snappy.in.bin> <openssl.prefix></openssl.prefix> @@ -737,6 +750,7 @@ <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName> + <javahClassName>org.apache.hadoop.io.erasurecode.ErasureCodeNative</javahClassName> <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName> <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName> <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName> @@ -790,6 +804,9 @@ <argument>/p:CustomOpensslLib=${openssl.lib}</argument> <argument>/p:CustomOpensslInclude=${openssl.include}</argument> <argument>/p:RequireOpenssl=${require.openssl}</argument> + <argument>/p:RequireIsal=${require.isal}</argument> + <argument>/p:CustomIsalPrefix=${isal.prefix}</argument> + <argument>/p:CustomIsalLib=${isal.lib}</argument> </arguments> </configuration> </execution> http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index c93bfe7..63bb773 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -94,6 +94,29 @@ else() endif() endif() +set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +hadoop_set_find_shared_library_version("2") +find_library(ISAL_LIBRARY + NAMES isal + PATHS ${CUSTOM_ISAL_PREFIX} ${CUSTOM_ISAL_PREFIX}/lib + ${CUSTOM_ISAL_PREFIX}/lib64 ${CUSTOM_ISAL_LIB}) +set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) +if (ISAL_LIBRARY) + GET_FILENAME_COMPONENT(HADOOP_ISAL_LIBRARY ${ISAL_LIBRARY} NAME) + set(ISAL_INCLUDE_DIR ${SRC}/io/erasurecode/include) + set(ISAL_SOURCE_FILES + ${SRC}/io/erasurecode/erasure_code.c) + add_executable(erasure_code_test + ${SRC}/io/erasurecode/erasure_code.c + ${TST}/io/erasurecode/erasure_code_test.c + ) + target_link_libraries(erasure_code_test ${CMAKE_DL_LIBS}) +else (ISAL_LIBRARY) + IF(REQUIRE_ISAL) + MESSAGE(FATAL_ERROR "Required ISA-L library could not be found. ISAL_LIBRARY=${ISAL_LIBRARY}, CUSTOM_ISAL_PREFIX=${CUSTOM_ISAL_PREFIX}") + ENDIF(REQUIRE_ISAL) +endif (ISAL_LIBRARY) + # Build hardware CRC32 acceleration, if supported on the platform. if(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") set(BULK_CRC_ARCH_SOURCE_FIlE "${SRC}/util/bulk_crc32_x86.c") @@ -169,6 +192,7 @@ include_directories( ${ZLIB_INCLUDE_DIRS} ${BZIP2_INCLUDE_DIR} ${SNAPPY_INCLUDE_DIR} + ${ISAL_INCLUDE_DIR} ${OPENSSL_INCLUDE_DIR} ${SRC}/util ) @@ -181,6 +205,7 @@ hadoop_add_dual_library(hadoop ${SRC}/io/compress/lz4/Lz4Decompressor.c ${SRC}/io/compress/lz4/lz4.c ${SRC}/io/compress/lz4/lz4hc.c + ${ISAL_SOURCE_FILES} ${SNAPPY_SOURCE_FILES} ${OPENSSL_SOURCE_FILES} ${SRC}/io/compress/zlib/ZlibCompressor.c http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/config.h.cmake ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/config.h.cmake b/hadoop-common-project/hadoop-common/src/config.h.cmake index d71271d..445cc33 100644 --- a/hadoop-common-project/hadoop-common/src/config.h.cmake +++ b/hadoop-common-project/hadoop-common/src/config.h.cmake @@ -22,6 +22,7 @@ #cmakedefine HADOOP_BZIP2_LIBRARY "@HADOOP_BZIP2_LIBRARY@" #cmakedefine HADOOP_SNAPPY_LIBRARY "@HADOOP_SNAPPY_LIBRARY@" #cmakedefine HADOOP_OPENSSL_LIBRARY "@HADOOP_OPENSSL_LIBRARY@" +#cmakedefine HADOOP_ISAL_LIBRARY "@HADOOP_ISAL_LIBRARY@" #cmakedefine HAVE_SYNC_FILE_RANGE #cmakedefine HAVE_POSIX_FADVISE http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java new file mode 100644 index 0000000..1c98f3c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.erasurecode; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.NativeCodeLoader; + +/** + * Erasure code native libraries (for now, Intel ISA-L) related utilities. + */ +public final class ErasureCodeNative { + + private static final Log LOG = + LogFactory.getLog(ErasureCodeNative.class.getName()); + + /** + * The reason why ISA-L library is not available, or null if it is available. + */ + private static final String LOADING_FAILURE_REASON; + + static { + if (!NativeCodeLoader.isNativeCodeLoaded()) { + LOADING_FAILURE_REASON = "hadoop native library cannot be loaded."; + } else if (!NativeCodeLoader.buildSupportsIsal()) { + LOADING_FAILURE_REASON = "libhadoop was built without ISA-L support"; + } else { + String problem = null; + try { + loadLibrary(); + } catch (Throwable t) { + problem = "Loading ISA-L failed: " + t.getMessage(); + LOG.error("Loading ISA-L failed", t); + } + LOADING_FAILURE_REASON = problem; + } + } + + private ErasureCodeNative() {} + + /** + * Are native libraries loaded? + */ + public static boolean isNativeCodeLoaded() { + return LOADING_FAILURE_REASON == null; + } + + /** + * Is the native ISA-L library loaded and initialized? Throw exception if not. + */ + public static void checkNativeCodeLoaded() { + if (LOADING_FAILURE_REASON != null) { + throw new RuntimeException(LOADING_FAILURE_REASON); + } + } + + /** + * Load native library available or supported. + */ + public static native void loadLibrary(); + + /** + * Get the native library name that's available or supported. + */ + public static native String getLibraryName(); + + public static String getLoadingFailureReason() { + return LOADING_FAILURE_REASON; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java index 79d4c0c..dd04a19 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java @@ -31,7 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability; */ @InterfaceAudience.Private @InterfaceStability.Unstable -public class NativeCodeLoader { +public final class NativeCodeLoader { private static final Log LOG = LogFactory.getLog(NativeCodeLoader.class); @@ -62,6 +62,8 @@ public class NativeCodeLoader { } } + private NativeCodeLoader() {} + /** * Check if native-hadoop code is loaded for this platform. * @@ -76,7 +78,12 @@ public class NativeCodeLoader { * Returns true only if this build was compiled with support for snappy. */ public static native boolean buildSupportsSnappy(); - + + /** + * Returns true only if this build was compiled with support for ISA-L. + */ + public static native boolean buildSupportsIsal(); + /** * Returns true only if this build was compiled with support for openssl. */ http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java index d8c6899..c31f85d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -18,6 +18,7 @@ package org.apache.hadoop.util; +import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.io.compress.Lz4Codec; @@ -65,6 +66,7 @@ public class NativeLibraryChecker { boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; boolean snappyLoaded = false; + boolean isalLoaded = false; // lz4 is linked within libhadoop boolean lz4Loaded = nativeHadoopLoaded; boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); @@ -75,6 +77,7 @@ public class NativeLibraryChecker { String hadoopLibraryName = ""; String zlibLibraryName = ""; String snappyLibraryName = ""; + String isalDetail = ""; String lz4LibraryName = ""; String bzip2LibraryName = ""; String winutilsPath = null; @@ -85,18 +88,29 @@ public class NativeLibraryChecker { if (zlibLoaded) { zlibLibraryName = ZlibFactory.getLibraryName(); } + snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && SnappyCodec.isNativeCodeLoaded(); if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { snappyLibraryName = SnappyCodec.getLibraryName(); } - if (OpensslCipher.getLoadingFailureReason() != null) { - openSslDetail = OpensslCipher.getLoadingFailureReason(); + + isalDetail = ErasureCodeNative.getLoadingFailureReason(); + if (isalDetail != null) { + isalLoaded = false; + } else { + isalDetail = ErasureCodeNative.getLibraryName(); + isalLoaded = true; + } + + openSslDetail = OpensslCipher.getLoadingFailureReason(); + if (openSslDetail != null) { openSslLoaded = false; } else { openSslDetail = OpensslCipher.getLibraryName(); openSslLoaded = true; } + if (lz4Loaded) { lz4LibraryName = Lz4Codec.getLibraryName(); } @@ -125,6 +139,8 @@ public class NativeLibraryChecker { System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); + System.out.printf("ISA-L: %b %s%n", isalLoaded, isalDetail); + if (Shell.WINDOWS) { System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index 0912c6a..17149f7 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -17,7 +17,7 @@ limitations under the License. --> -<Project DefaultTargets="CheckRequireSnappy;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +<Project DefaultTargets="CheckRequireSnappy;CheckRequireIsal;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <ItemGroup Label="ProjectConfigurations"> <ProjectConfiguration Include="Release|Win32"> <Configuration>Release</Configuration> @@ -79,11 +79,22 @@ <IncludePath Condition="'$(SnappyEnabled)' == 'true'">$(SnappyInclude);$(IncludePath)</IncludePath> <IncludePath Condition="Exists('$(ZLIB_HOME)')">$(ZLIB_HOME);$(IncludePath)</IncludePath> </PropertyGroup> + <PropertyGroup> + <IsalLib Condition="Exists('$(CustomIsalPrefix)\isa-l.dll')">$(CustomIsalPrefix)</IsalLib> + <IsalLib Condition="Exists('$(CustomIsalPrefix)\lib\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\lib</IsalLib> + <IsalLib Condition="Exists('$(CustomIsalLib)') And '$(IsalLib)' == ''">$(CustomIsalLib)</IsalLib> + <IsalEnabled Condition="'$(IsalLib)' != ''">true</IsalEnabled> + </PropertyGroup> <Target Name="CheckRequireSnappy"> <Error Text="Required snappy library could not be found. SnappyLibrary=$(SnappyLibrary), SnappyInclude=$(SnappyInclude), CustomSnappyLib=$(CustomSnappyLib), CustomSnappyInclude=$(CustomSnappyInclude), CustomSnappyPrefix=$(CustomSnappyPrefix)" Condition="'$(RequireSnappy)' == 'true' And '$(SnappyEnabled)' != 'true'" /> </Target> + <Target Name="CheckRequireIsal"> + <Error + Text="Required ISA-L library could not be found. CustomIsalLib=$(CustomIsalLib), CustomIsalPrefix=$(CustomIsalPrefix)" + Condition="'$(RequireIsal)' == 'true' And '$(IsalEnabled)' != 'true'" /> + </Target> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ClCompile> <WarningLevel>Level3</WarningLevel> @@ -92,7 +103,7 @@ <FunctionLevelLinking>true</FunctionLevelLinking> <IntrinsicFunctions>true</IntrinsicFunctions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions> - <AdditionalIncludeDirectories>..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <AdditionalIncludeDirectories>..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <CompileAs>CompileAsC</CompileAs> <DisableSpecificWarnings>4244</DisableSpecificWarnings> </ClCompile> @@ -113,7 +124,7 @@ <FunctionLevelLinking>true</FunctionLevelLinking> <IntrinsicFunctions>true</IntrinsicFunctions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions> - <AdditionalIncludeDirectories>..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <AdditionalIncludeDirectories>..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <CompileAs>CompileAsC</CompileAs> <DisableSpecificWarnings>4244</DisableSpecificWarnings> </ClCompile> @@ -145,11 +156,16 @@ <ClCompile Include="src\org\apache\hadoop\util\bulk_crc32.c" /> <ClCompile Include="src\org\apache\hadoop\util\NativeCodeLoader.c"> <AdditionalOptions Condition="'$(SnappyEnabled)' == 'true'">/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions> + <AdditionalOptions Condition="'$(IsalEnabled)' == 'true'">/D HADOOP_ISAL_LIBRARY=\"isa-l.dll\"</AdditionalOptions> </ClCompile> <ClCompile Include="src\org\apache\hadoop\util\NativeCrc32.c" /> <ClCompile Include="src\org\apache\hadoop\yarn\server\nodemanager\windows_secure_container_executor.c"> <AdditionalIncludeDirectories>src\org\apache\hadoop\io\nativeio;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> </ClCompile> + <ClCompile Include="src\org\apache\hadoop\io\erasurecode\erasure_code.c" Condition="'$(IsalEnabled)' == 'true'"> + <AdditionalOptions>/D HADOOP_ISAL_LIBRARY=\"isa-l.dll\"</AdditionalOptions> + </ClCompile> + <ClCompile Include="src\org\apache\hadoop\io\erasurecode\coder\erasure_code_native.c" Condition="'$(IsalEnabled)' == 'true'"/> </ItemGroup> <ItemGroup> <ClInclude Include="..\src\org\apache\hadoop\util\crc32c_tables.h" /> http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c new file mode 100644 index 0000000..e84df9a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "org_apache_hadoop.h" +#include "../include/erasure_code.h" +#include "org_apache_hadoop_io_erasurecode_ErasureCodeNative.h" + +#ifdef UNIX +#include "config.h" +#endif + +JNIEXPORT void JNICALL +Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary +(JNIEnv *env, jclass myclass) { + char errMsg[1024]; + load_erasurecode_lib(errMsg, sizeof(errMsg)); + if (strlen(errMsg) > 0) { + THROW(env, "java/lang/UnsatisfiedLinkError", errMsg); + } +} + +JNIEXPORT jstring JNICALL +Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName +(JNIEnv *env, jclass myclass) { + char* libName = get_library_name(); + if (libName == NULL) { + libName = "Unavailable"; + } + return (*env)->NewStringUTF(env, libName); +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h new file mode 100644 index 0000000..d8ff3a0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include <jni.h> +/* Header for class org_apache_hadoop_io_erasurecode_ErasureCodeNative */ + +#ifndef _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative +#define _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_apache_hadoop_io_erasurecode_ErasureCodeNative + * Method: loadLibrary + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary + (JNIEnv *, jclass); + +/* + * Class: org_apache_hadoop_io_erasurecode_ErasureCodeNative + * Method: getLibraryName + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName + (JNIEnv *, jclass); + +#ifdef __cplusplus +} +#endif +#endif http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c new file mode 100644 index 0000000..a6c099a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "org_apache_hadoop.h" +#include "../include/gf_util.h" +#include "../include/erasure_code.h" + +#ifdef UNIX +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <dlfcn.h> + +#include "config.h" +#endif + +#ifdef WINDOWS +#include <Windows.h> +#endif + +/** + * erasure_code.c + * Implementation erasure code utilities based on lib of erasure_code.so. + * Building of this codes won't rely on any ISA-L source codes, but running + * into this will rely on successfully loading of the dynamic library. + * + */ + +/** + * The loaded library handle. + */ +static void* libec = NULL; + +/** + * A helper function to dlsym a 'symbol' from a given library-handle. + */ + +#ifdef UNIX + +static __attribute__ ((unused)) +void *my_dlsym(void *handle, const char *symbol) { + void *func_ptr = dlsym(handle, symbol); + return func_ptr; +} + +/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */ +#define EC_LOAD_DYNAMIC_SYMBOL(func_ptr, handle, symbol) \ + if ((func_ptr = my_dlsym(handle, symbol)) == NULL) { \ + return "Failed to load symbol" symbol; \ + } + +#endif + +#ifdef WINDOWS + + + +static FARPROC WINAPI my_dlsym(HMODULE handle, LPCSTR symbol) { + FARPROC func_ptr = GetProcAddress(handle, symbol); + return func_ptr; +} + +/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */ +#define EC_LOAD_DYNAMIC_SYMBOL(func_type, func_ptr, handle, symbol) \ + if ((func_ptr = (func_type)my_dlsym(handle, symbol)) == NULL) { \ + return "Failed to load symbol" symbol; \ + } + +#endif + + +#ifdef UNIX +// For gf_util.h +static unsigned char (*d_gf_mul)(unsigned char, unsigned char); +static unsigned char (*d_gf_inv)(unsigned char); +static void (*d_gf_gen_rs_matrix)(unsigned char *, int, int); +static void (*d_gf_gen_cauchy_matrix)(unsigned char *, int, int); +static int (*d_gf_invert_matrix)(unsigned char *, unsigned char *, const int); +static int (*d_gf_vect_mul)(int, unsigned char *, void *, void *); + +// For erasure_code.h +static void (*d_ec_init_tables)(int, int, unsigned char*, unsigned char*); +static void (*d_ec_encode_data)(int, int, int, unsigned char*, + unsigned char**, unsigned char**); +static void (*d_ec_encode_data_update)(int, int, int, int, unsigned char*, + unsigned char*, unsigned char**); +#endif + +#ifdef WINDOWS +// For erasure_code.h +typedef unsigned char (__cdecl *__d_gf_mul)(unsigned char, unsigned char); +static __d_gf_mul d_gf_mul; +typedef unsigned char (__cdecl *__d_gf_inv)(unsigned char); +static __d_gf_inv d_gf_inv; +typedef void (__cdecl *__d_gf_gen_rs_matrix)(unsigned char *, int, int); +static __d_gf_gen_rs_matrix d_gf_gen_rs_matrix; +typedef void (__cdecl *__d_gf_gen_cauchy_matrix)(unsigned char *, int, int); +static __d_gf_gen_cauchy_matrix d_gf_gen_cauchy_matrix; +typedef int (__cdecl *__d_gf_invert_matrix)(unsigned char *, + unsigned char *, const int); +static __d_gf_invert_matrix d_gf_invert_matrix; +typedef int (__cdecl *__d_gf_vect_mul)(int, unsigned char *, void *, void *); +static __d_gf_vect_mul d_gf_vect_mul; + +// For erasure_code.h +typedef void (__cdecl *__d_ec_init_tables)(int, int, + unsigned char*, unsigned char*); +static __d_ec_init_tables d_ec_init_tables; +typedef void (__cdecl *__d_ec_encode_data)(int, int, int, unsigned char*, + unsigned char**, unsigned char**); +static __d_ec_encode_data d_ec_encode_data; +typedef void (__cdecl *__d_ec_encode_data_update)(int, int, int, int, unsigned char*, + unsigned char*, unsigned char**); +static __d_ec_encode_data_update d_ec_encode_data_update; +#endif + +static const char* load_functions(void* libec) { +#ifdef UNIX + EC_LOAD_DYNAMIC_SYMBOL(d_gf_mul, libec, "gf_mul"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_inv, libec, "gf_inv"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_invert_matrix, libec, "gf_invert_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_vect_mul, libec, "gf_vect_mul"); + + EC_LOAD_DYNAMIC_SYMBOL(d_ec_init_tables, libec, "ec_init_tables"); + EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data, libec, "ec_encode_data"); + EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data_update, libec, "ec_encode_data_update"); +#endif + +#ifdef WINDOWS + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_mul, d_gf_mul, libec, "gf_mul"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_inv, d_gf_inv, libec, "gf_inv"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_rs_matrix, d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_cauchy_matrix, d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_invert_matrix, d_gf_invert_matrix, libec, "gf_invert_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_vect_mul, d_gf_vect_mul, libec, "gf_vect_mul"); + + EC_LOAD_DYNAMIC_SYMBOL(__d_ec_init_tables, d_ec_init_tables, libec, "ec_init_tables"); + EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data, d_ec_encode_data, libec, "ec_encode_data"); + EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data_update, d_ec_encode_data_update, libec, "ec_encode_data_update"); +#endif + + return NULL; +} + +void load_erasurecode_lib(char* err, size_t err_len) { + const char* errMsg; + + err[0] = '\0'; + + if (libec != NULL) { + return; + } + + // Load Intel ISA-L + #ifdef UNIX + libec = dlopen(HADOOP_ISAL_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); + if (libec == NULL) { + snprintf(err, err_len, "Failed to load %s (%s)", + HADOOP_ISAL_LIBRARY, dlerror()); + return; + } + // Clear any existing error + dlerror(); + #endif + + #ifdef WINDOWS + libec = LoadLibrary(HADOOP_ISAL_LIBRARY); + if (libec == NULL) { + snprintf(err, err_len, "Failed to load %s", HADOOP_ISAL_LIBRARY); + return; + } + #endif + + errMsg = load_functions(libec); + if (errMsg != NULL) { + snprintf(err, err_len, "Loading functions from ISA-L failed: %s", errMsg); + } +} + +int build_support_erasurecode() { +#ifdef HADOOP_ISAL_LIBRARY + return 1; +#else + return 0; +#endif +} + +const char* get_library_name() { +#ifdef UNIX + Dl_info dl_info; + + if (d_ec_encode_data == NULL) { + return HADOOP_ISAL_LIBRARY; + } + + if(dladdr(d_ec_encode_data, &dl_info)) { + return dl_info.dli_fname; + } +#else + LPTSTR filename = NULL; + + if (libec == NULL) { + return HADOOP_ISAL_LIBRARY; + } + + if (GetModuleFileName(libec, filename, 256) > 0) { + return filename; + } +#endif + + return NULL; +} + +unsigned char h_gf_mul(unsigned char a, unsigned char b) { + return d_gf_mul(a, b); +} + +unsigned char h_gf_inv(unsigned char a) { + return d_gf_inv(a); +} + +void h_gf_gen_rs_matrix(unsigned char *a, int m, int k) { + d_gf_gen_rs_matrix(a, m, k); +} + +void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k) { + d_gf_gen_cauchy_matrix(a, m, k); +} + +int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n) { + return d_gf_invert_matrix(in, out, n); +} + +int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest) { + return d_gf_vect_mul(len, gftbl, src, dest); +} + +void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls) { + d_ec_init_tables(k, rows, a, gftbls); +} + +void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls, + unsigned char **data, unsigned char **coding) { + d_ec_encode_data(len, k, rows, gftbls, data, coding); +} + +void h_ec_encode_data_update(int len, int k, int rows, int vec_i, + unsigned char *gftbls, unsigned char *data, unsigned char **coding) { + d_ec_encode_data_update(len, k, rows, vec_i, gftbls, data, coding); +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h new file mode 100644 index 0000000..123085e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ERASURE_CODE_H_ +#define _ERASURE_CODE_H_ + +#include <stddef.h> + +/** + * Interface to functions supporting erasure code encode and decode. + * + * This file defines the interface to optimized functions used in erasure + * codes. Encode and decode of erasures in GF(2^8) are made by calculating the + * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a + * set of coefficients. Values for the coefficients are determined by the type + * of erasure code. Using a general dot product means that any sequence of + * coefficients may be used including erasure codes based on random + * coefficients. + * Multiple versions of dot product are supplied to calculate 1-6 output + * vectors in one pass. + * Base GF multiply and divide functions can be sped up by defining + * GF_LARGE_TABLES at the expense of memory size. + * + */ + +/** + * Return 0 if not support, 1 otherwise. + */ +int build_support_erasurecode(); + +/** + * Get the library name possibly of full path. + */ +const char* get_library_name(); + +/** + * Initialize and load erasure code library, returning error message if any. + * + * @param err The err message buffer. + * @param err_len The length of the message buffer. + */ +void load_erasurecode_lib(char* err, size_t err_len); + +/** + * Initialize tables for fast Erasure Code encode and decode. + * + * Generates the expanded tables needed for fast encode or decode for erasure + * codes on blocks of data. 32bytes is generated for each input coefficient. + * + * @param k The number of vector sources or rows in the generator matrix + * for coding. + * @param rows The number of output vectors to concurrently encode/decode. + * @param a Pointer to sets of arrays of input coefficients used to encode + * or decode data. + * @param gftbls Pointer to start of space for concatenated output tables + * generated from input coefficients. Must be of size 32*k*rows. + * @returns none + */ +void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); + +/** + * Generate or decode erasure codes on blocks of data, runs appropriate version. + * + * Given a list of source data blocks, generate one or multiple blocks of + * encoded data as specified by a matrix of GF(2^8) coefficients. When given a + * suitable set of coefficients, this function will perform the fast generation + * or decoding of Reed-Solomon type erasure codes. + * + * This function determines what instruction sets are enabled and + * selects the appropriate version at runtime. + * + * @param len Length of each block of data (vector) of source or dest data. + * @param k The number of vector sources or rows in the generator matrix + * for coding. + * @param rows The number of output vectors to concurrently encode/decode. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*k*rows + * @param data Array of pointers to source input buffers. + * @param coding Array of pointers to coded output buffers. + * @returns none + */ +void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls, + unsigned char **data, unsigned char **coding); + +/** + * @brief Generate update for encode or decode of erasure codes from single + * source, runs appropriate version. + * + * Given one source data block, update one or multiple blocks of encoded data as + * specified by a matrix of GF(2^8) coefficients. When given a suitable set of + * coefficients, this function will perform the fast generation or decoding of + * Reed-Solomon type erasure codes from one input source at a time. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param len Length of each block of data (vector) of source or dest data. + * @param k The number of vector sources or rows in the generator matrix + * for coding. + * @param rows The number of output vectors to concurrently encode/decode. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*k*rows + * @param data Pointer to single input source used to update output parity. + * @param coding Array of pointers to coded output buffers. + * @returns none + */ +void h_ec_encode_data_update(int len, int k, int rows, int vec_i, + unsigned char *gftbls, unsigned char *data, unsigned char **coding); + +#endif //_ERASURE_CODE_H_ http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h new file mode 100644 index 0000000..2be8328 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _GF_UTIL_H +#define _GF_UTIL_H + +/** + * gf_util.h + * Interface to functions for vector (block) multiplication in GF(2^8). + * + * This file defines the interface to routines used in fast RAID rebuild and + * erasure codes. + */ + + +/** + * Single element GF(2^8) multiply. + * + * @param a Multiplicand a + * @param b Multiplicand b + * @returns Product of a and b in GF(2^8) + */ +unsigned char h_gf_mul(unsigned char a, unsigned char b); + +/** + * Single element GF(2^8) inverse. + * + * @param a Input element + * @returns Field element b such that a x b = {1} + */ +unsigned char h_gf_inv(unsigned char a); + +/** + * Generate a matrix of coefficients to be used for encoding. + * + * Vandermonde matrix example of encoding coefficients where high portion of + * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)} + * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in + * erasure encoding but does not guarantee invertable for every sub matrix. For + * large k it is possible to find cases where the decode matrix chosen from + * sources and parity not in erasure are not invertable. Users may want to + * adjust for k > 5. + * + * @param a [mxk] array to hold coefficients + * @param m number of rows in matrix corresponding to srcs + parity. + * @param k number of columns in matrix corresponding to srcs. + * @returns none + */ +void h_gf_gen_rs_matrix(unsigned char *a, int m, int k); + +/** + * Generate a Cauchy matrix of coefficients to be used for encoding. + * + * Cauchy matrix example of encoding coefficients where high portion of matrix + * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, + * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. + * + * @param a [mxk] array to hold coefficients + * @param m number of rows in matrix corresponding to srcs + parity. + * @param k number of columns in matrix corresponding to srcs. + * @returns none + */ +void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k); + +/** + * Invert a matrix in GF(2^8) + * + * @param in input matrix + * @param out output matrix such that [in] x [out] = [I] - identity matrix + * @param n size of matrix [nxn] + * @returns 0 successful, other fail on singular input matrix + */ +int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); + +/** + * GF(2^8) vector multiply by constant, runs appropriate version. + * + * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C + * is a single field element in GF(2^8). Can be used for RAID6 rebuild + * and partial write functions. Function requires pre-calculation of a + * 32-element constant array based on constant C. gftbl(C) = {C{00}, + * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. + * Len and src must be aligned to 32B. + * + * This function determines what instruction sets are enabled + * and selects the appropriate version at runtime. + * + * @param len Length of vector in bytes. Must be aligned to 32B. + * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C. + * @param src Pointer to src data array. Must be aligned to 32B. + * @param dest Pointer to destination data array. Must be aligned to 32B. + * @returns 0 pass, other fail + */ +int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest); + + +#endif //_GF_UTIL_H http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c index 3625112..ae8263a 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c @@ -49,6 +49,16 @@ JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSup #endif } +JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSupportsIsal + (JNIEnv *env, jclass clazz) +{ +#ifdef HADOOP_ISAL_LIBRARY + return JNI_TRUE; +#else + return JNI_FALSE; +#endif +} + JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName (JNIEnv *env, jclass clazz) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c new file mode 100644 index 0000000..9817a76 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c @@ -0,0 +1,310 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This is a lightweight version of the same file in Intel ISA-L library to test + * and verify the basic functions of ISA-L integration. Note it's not serving as + * a complete ISA-L library test nor as any sample to write an erasure coder + * using the library. A sample is to be written and provided separately. + */ + +#include "org_apache_hadoop.h" +#include "erasure_code.h" +#include "gf_util.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define TEST_LEN 8192 +#define TEST_SOURCES 127 +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES +#define TEST_SEED 11 + +static void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +static void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +static void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (h_gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= h_gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + char err[256]; + size_t err_len = sizeof(err); + int re, i, j, p, m, k; + int nerrs, nsrcerrs; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + if (0 == build_support_erasurecode()) { + printf("The native library isn't available, skipping this test\n"); + return 0; // Normal, not an error + } + + load_erasurecode_lib(err, err_len); + if (strlen(err) > 0) { + printf("Loading erasurecode library failed: %s\n", err); + return -1; + } + + printf("Performing erasure code test\n"); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + buffs[i] = malloc(TEST_LEN); + } + + for (i = 0; i < TEST_SOURCES; i++) { + temp_buffs[i] = malloc(TEST_LEN); + } + + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + snprintf(err, err_len, "%s", "allocating test matrix buffers error"); + return -1; + } + + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + h_gf_gen_cauchy_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + h_ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + h_ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + snprintf(err, err_len, "%s", "gf_gen_decode_matrix failed"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + h_ec_init_tables(k, nerrs, decode_matrix, g_tbls); + h_ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + snprintf(err, err_len, "%s", "Error recovery failed"); + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) { + printf(" %d", src_err_list[j]); + } + + printf(" - Index = "); + for (p = 0; p < k; p++) { + printf(" %d", decode_index[p]); + } + + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + + return -1; + } + } + + printf("done EC tests: Pass\n"); + return 0; +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-project-dist/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 8df266f..81773d6 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -41,6 +41,7 @@ <hadoop.component>UNDEF</hadoop.component> <bundle.snappy>false</bundle.snappy> <bundle.snappy.in.bin>false</bundle.snappy.in.bin> + <bundle.isal>true</bundle.isal> <bundle.openssl>false</bundle.openssl> <bundle.openssl.in.bin>false</bundle.openssl.in.bin> </properties> @@ -332,14 +333,22 @@ mkdir -p $${TARGET_DIR} cd $${LIB_DIR} $$TAR lib* | (cd $${TARGET_DIR}/; $$UNTAR) - if [ "${bundle.snappy}" = "true" ] ; then + if [ "X${bundle.snappy}" = "Xtrue" ] ; then cd "${snappy.lib}" $$TAR *snappy* | (cd $${TARGET_DIR}/; $$UNTAR) fi - if [ "${bundle.openssl}" = "true" ] ; then + if [ "X${bundle.openssl}" = "Xtrue" ] ; then cd "${openssl.lib}" $$TAR *crypto* | (cd $${TARGET_DIR}/; $$UNTAR) fi + if [ "X${bundle.isal}" = "Xtrue" ] ; then + if [ "X${isal.lib}" != "X" ]; then + cd "${isal.lib}" + $$TAR *isa* | (cd $${TARGET_DIR}/; $$UNTAR) + else + echo "The required option isal.lib isn't given, bundling ISA-L skipped" + fi + fi fi BIN_DIR="${BUILD_DIR}/bin" if [ -d $${BIN_DIR} ] ; then http://git-wip-us.apache.org/repos/asf/hadoop/blob/482e35c5/hadoop-project/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index efc3a7d..7a8425d 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1245,6 +1245,7 @@ <!-- Specify where to look for the native DLL on Windows --> <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib}</PATH> <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib}</PATH> + <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${isal.lib}</PATH> </environmentVariables> </configuration> </plugin>