This is the first draft of the Skein hash algorithm that was recently
mentioned, as a prominent submission to NIST's SHA-3 competition.

Website:        http://www.schneier.com/skein.html

It still needs more work, linux-ifying, testing, and reviewing.

One note I forgot to mention in the commit itself, but should be
considered when reviewing this:

Skein permits the output digest size to be specified by the user.
"Skein-256" means 256 bits of internal state, NOT 256 bits of output
digest.  The output digest size is specified to Skein at init time.

In my implementation below, I attempted to follow the Principle of Least
Surprise, by hardcoding output digest size == internal state size.
Thus, in my implementation, "skein256" really does mean 256 output bits.

I am currently pushing this work to the 'skein' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git skein

Comments welcome!

---
 crypto/Kconfig             |   12 +
 crypto/Makefile            |    3 +
 crypto/skein.h             |  265 ++++++++++++++++++++++
 crypto/skein1024_generic.c |  518 ++++++++++++++++++++++++++++++++++++++++++++
 crypto/skein256_generic.c  |  367 +++++++++++++++++++++++++++++++
 crypto/skein512_generic.c  |  417 +++++++++++++++++++++++++++++++++++
 6 files changed, 1582 insertions(+), 0 deletions(-)
 create mode 100644 crypto/skein.h
 create mode 100644 crypto/skein1024_generic.c
 create mode 100644 crypto/skein256_generic.c
 create mode 100644 crypto/skein512_generic.c

Jeff Garzik (1):
   [CRYPTO] Add Skein hash algorithm, 256-, 512-, and 1024-bit variants
   
   Import the public domain reference implementation of the Skein hash
   algorithm into the Linux Crypto API.  This is a prominent submission
   to the NIST's competition for SHA-3.
   
   See Skein website for more info: http://www.schneier.com/skein.html
   
   This is just a rough import, and still needs more cleaning and Linux-ifying.
   
   Signed-off-by: Jeff Garzik <[EMAIL PROTECTED]>


diff --git a/crypto/Kconfig b/crypto/Kconfig
index 39dbd8e..f18868f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -352,6 +352,18 @@ config CRYPTO_SHA512
          This code also includes SHA-384, a 384 bit hash with 192 bits
          of security against collision attacks.
 
+config CRYPTO_SKEIN256
+       tristate "Skein-256(256) digest algorithm"
+       select CRYPTO_ALGAPI
+
+config CRYPTO_SKEIN512
+       tristate "Skein-512(512) digest algorithm"
+       select CRYPTO_ALGAPI
+
+config CRYPTO_SKEIN1024
+       tristate "Skein-1024(1024) digest algorithm"
+       select CRYPTO_ALGAPI
+
 config CRYPTO_TGR192
        tristate "Tiger digest algorithms"
        select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index 5862b80..10c3ca8 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -39,6 +39,9 @@ obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
 obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
 obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
 obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
+obj-$(CONFIG_CRYPTO_SKEIN256) += skein256_generic.o
+obj-$(CONFIG_CRYPTO_SKEIN512) += skein512_generic.o
+obj-$(CONFIG_CRYPTO_SKEIN1024) += skein1024_generic.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
diff --git a/crypto/skein.h b/crypto/skein.h
new file mode 100644
index 0000000..2753b55
--- /dev/null
+++ b/crypto/skein.h
@@ -0,0 +1,265 @@
+#ifndef _SKEIN_H_
+#define _SKEIN_H_     1
+/**************************************************************************
+**
+** Interface declarations and internal definitions for Skein hashing.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+***************************************************************************
+** 
+** The following compile-time switches may be defined to control some
+** tradeoffs between speed, code size, error checking, and security.
+**
+** The "default" note explains what happens when the switch is not defined.
+**
+**  SKEIN_DEBUG            -- make callouts from inside Skein code
+**                            to examine/display intermediate values.
+**                            [default: no callouts (no overhead)]
+**
+**  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
+**                            code. If not defined, most error checking 
+**                            is disabled (for performance). Otherwise, 
+**                            the switch value is interpreted as:
+**                                0: use assert()      to flag errors
+**                                1: return SKEIN_FAIL to flag errors
+**
+***************************************************************************/
+
+#include <linux/types.h>
+
+enum {
+       SKEIN_SUCCESS = 0,      /* return codes from Skein calls */
+       SKEIN_FAIL = 1,
+       SKEIN_BAD_HASHLEN = 2
+};
+
+#define  SKEIN_MODIFIER_WORDS  ( 2)    /* number of modifier (tweak) words */
+
+#define  SKEIN_256_STATE_WORDS ( 4)
+#define  SKEIN_512_STATE_WORDS ( 8)
+#define  SKEIN1024_STATE_WORDS (16)
+#define  SKEIN_MAX_STATE_WORDS (16)
+
+#define  SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define  SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define  SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define  SKEIN_256_STATE_BITS  (64*SKEIN_256_STATE_WORDS)
+#define  SKEIN_512_STATE_BITS  (64*SKEIN_512_STATE_WORDS)
+#define  SKEIN1024_STATE_BITS  (64*SKEIN1024_STATE_WORDS)
+
+#define  SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define  SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define  SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+typedef struct {
+       size_t hashBitLen;      /* size of hash result, in bits */
+       size_t bCnt;            /* current byte count in buffer b[] */
+       u64 T[SKEIN_MODIFIER_WORDS];    /* tweak words: T[0]=byte cnt, 
T[1]=flags */
+} Skein_Ctxt_Hdr_t;
+
+struct skein256_ctx {          /*  256-bit Skein hash context structure */
+       Skein_Ctxt_Hdr_t h;     /* common header context variables */
+       u64 X[SKEIN_256_STATE_WORDS];   /* chaining variables */
+       u8 b[SKEIN_256_BLOCK_BYTES];    /* partial block buffer (8-byte 
aligned) */
+};
+
+struct skein512_ctx {          /*  512-bit Skein hash context structure */
+       Skein_Ctxt_Hdr_t h;     /* common header context variables */
+       u64 X[SKEIN_512_STATE_WORDS];   /* chaining variables */
+       u8 b[SKEIN_512_BLOCK_BYTES];    /* partial block buffer (8-byte 
aligned) */
+};
+
+struct skein1024_ctx {         /* 1024-bit Skein hash context structure */
+       Skein_Ctxt_Hdr_t h;     /* common header context variables */
+       u64 X[SKEIN1024_STATE_WORDS];   /* chaining variables */
+       u8 b[SKEIN1024_BLOCK_BYTES];    /* partial block buffer (8-byte 
aligned) */
+};
+
+/*****************************************************************
+** "Internal" Skein definitions
+**    -- not needed for sequential hashing API, but will be 
+**           helpful for other uses of Skein (e.g., tree hash mode).
+**    -- included here so that they can be shared between
+**           reference and optimized code.
+******************************************************************/
+
+/* tweak word T[1]: bit field starting positions */
+#define SKEIN_T1_BIT(BIT)       ((BIT) - 64)   /* offset 64 because it's the 
second word  */
+
+#define SKEIN_T1_POS_TREE_LVL   SKEIN_T1_BIT(112)      /* bits 112..118: level 
in hash tree       */
+#define SKEIN_T1_POS_BIT_PAD    SKEIN_T1_BIT(119)      /* bit  119     : 
partial final input byte */
+#define SKEIN_T1_POS_BLK_TYPE   SKEIN_T1_BIT(120)      /* bits 120..125: type 
field               */
+#define SKEIN_T1_POS_FIRST      SKEIN_T1_BIT(126)      /* bits 126     : first 
block flag         */
+#define SKEIN_T1_POS_FINAL      SKEIN_T1_BIT(127)      /* bit  127     : final 
block flag         */
+
+/* tweak word T[1]: flag bit definition(s) */
+#define SKEIN_T1_FLAG_FIRST     (((u64)  1 ) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_FINAL     (((u64)  1 ) << SKEIN_T1_POS_FINAL)
+#define SKEIN_T1_FLAG_BIT_PAD   (((u64)  1 ) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define SKEIN_T1_TREE_LVL_MASK  (((u64)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define        SKEIN_T1_TREE_LEVEL(n)  (((u64) (n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define SKEIN_BLK_TYPE_KEY      ( 0)   /* key, for MAC and KDF */
+#define SKEIN_BLK_TYPE_CFG      ( 4)   /* configuration block */
+#define SKEIN_BLK_TYPE_PERS     ( 8)   /* personalization string */
+#define SKEIN_BLK_TYPE_PK       (12)   /* public key (for digital signature 
hashing) */
+#define SKEIN_BLK_TYPE_KDF      (16)   /* key identifier for KDF */
+#define SKEIN_BLK_TYPE_NONCE    (20)   /* nonce for PRNG */
+#define SKEIN_BLK_TYPE_MSG      (48)   /* message processing */
+#define SKEIN_BLK_TYPE_OUT      (63)   /* output stage */
+#define SKEIN_BLK_TYPE_MASK     (63)   /* bit field mask */
+
+#define SKEIN_T1_BLK_TYPE(T)   (((u64) (SKEIN_BLK_TYPE_##T)) << 
SKEIN_T1_POS_BLK_TYPE)
+#define SKEIN_T1_BLK_TYPE_KEY   SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF 
*/
+#define SKEIN_T1_BLK_TYPE_CFG   SKEIN_T1_BLK_TYPE(CFG) /* configuration block 
*/
+#define SKEIN_T1_BLK_TYPE_PERS  SKEIN_T1_BLK_TYPE(PERS)        /* 
personalization string */
+#define SKEIN_T1_BLK_TYPE_PK    SKEIN_T1_BLK_TYPE(PK)  /* public key (for 
digital signature hashing) */
+#define SKEIN_T1_BLK_TYPE_KDF   SKEIN_T1_BLK_TYPE(KDF) /* key identifier for 
KDF */
+#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)       /* nonce for 
PRNG */
+#define SKEIN_T1_BLK_TYPE_MSG   SKEIN_T1_BLK_TYPE(MSG) /* message processing */
+#define SKEIN_T1_BLK_TYPE_OUT   SKEIN_T1_BLK_TYPE(OUT) /* output stage */
+#define SKEIN_T1_BLK_TYPE_MASK  SKEIN_T1_BLK_TYPE(MASK)        /* field bit 
mask */
+
+#define SKEIN_T1_BLK_TYPE_CFG_FINAL       (SKEIN_T1_BLK_TYPE_CFG | 
SKEIN_T1_FLAG_FINAL)
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL       (SKEIN_T1_BLK_TYPE_OUT | 
SKEIN_T1_FLAG_FINAL)
+
+#define SKEIN_VERSION           (1)
+
+#ifndef SKEIN_ID_STRING_LE     /* allow compile-time personalization */
+#define SKEIN_ID_STRING_LE      (0x33414853)   /* "SHA3" (little-endian) */
+#endif
+
+#define SKEIN_MK_64(hi32,lo32)  ((lo32) + (((u64) (hi32)) << 32))
+#define SKEIN_SCHEMA_VER        SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE)
+#define SKEIN_KS_PARITY         SKEIN_MK_64(0x55555555,0x55555555)
+
+/* bit field definitions in config block treeInfo word */
+#define SKEIN_CFG_TREE_LEAF_SIZE_POS  ( 0)
+#define SKEIN_CFG_TREE_NODE_SIZE_POS  ( 8)
+#define SKEIN_CFG_TREE_MAX_LEVEL_POS  (16)
+
+#define SKEIN_CFG_TREE_LEAF_SIZE_MSK  ((u64) 0xFF) << 
SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define SKEIN_CFG_TREE_NODE_SIZE_MSK  ((u64) 0xFF) << 
SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define SKEIN_CFG_TREE_MAX_LEVEL_MSK  ((u64) 0xFF) << 
SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define SKEIN_CFG_TREE_INFO_SEQUENTIAL (0)     /* use as treeInfo in InitExt() 
call for sequential processing */
+#define SKEIN_CFG_TREE_INFO(leaf,node,maxLevel) ((u64) ((leaf) | ((node) << 8) 
| ((maxLevel) << 16)))
+
+/*
+**   Skein macros for getting/setting tweak words, etc.
+**   These are useful for partial input bytes, hash tree init/update, etc.
+**/
+#define Skein_Get_Tweak(ctxPtr,TWK_NUM)         ((ctxPtr)->h.T[TWK_NUM])
+#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal)    {(ctxPtr)->h.T[TWK_NUM] = 
(tVal);}
+
+#define Skein_Get_T0(ctxPtr)    Skein_Get_Tweak(ctxPtr,0)
+#define Skein_Get_T1(ctxPtr)    Skein_Get_Tweak(ctxPtr,1)
+#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
+#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
+
+/* set both tweak words at once */
+#define Skein_Set_T0_T1(ctxPtr,T0,T1)           \
+    {                                           \
+    Skein_Set_T0(ctxPtr,(T0));                  \
+    Skein_Set_T1(ctxPtr,(T1));                  \
+    }
+
+#define Skein_Set_Type(ctxPtr,BLK_TYPE)         \
+    Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; 
*/
+#define Skein_Start_New_Type(ctxPtr,BLK_TYPE)   \
+    { Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | 
SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
+
+#define Skein_Clear_First_Flag(hdr)         { (hdr).T[1] &= 
~SKEIN_T1_FLAG_FIRST;       }
+#define Skein_Set_Bit_Pad_Flag(hdr)      { (hdr).T[1] |=  
SKEIN_T1_FLAG_BIT_PAD;     }
+
+#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= 
SKEIN_T1_TREE_LEVEL(height);}
+
+/*****************************************************************
+** "Internal" Skein definitions for debugging and error checking
+******************************************************************/
+#ifdef  SKEIN_DEBUG            /* examine/display intermediate values? */
+#include "skein_debug.h"
+#else /* default is no callouts */
+#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr)
+#define Skein_Show_Round(bits,ctx,r,X)
+#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr)
+#define Skein_Show_Final(bits,ctx,cnt,outPtr)
+#define Skein_Show_Key(bits,ctx,key,keyBytes)
+#endif
+
+#ifndef SKEIN_ERR_CHECK                /* run-time checks (e.g., bad params, 
uninitialized context)? */
+#define Skein_Assert(x,retCode)        /* default: ignore all Asserts, for 
performance */
+#define Skein_assert(x)
+#elif   defined(SKEIN_ASSERT)
+#include <assert.h>
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x)         assert(x)
+#else
+#include <assert.h>
+#define Skein_Assert(x,retCode) { if (!(x)) return retCode; }  /*  caller  
error */
+#define Skein_assert(x)         assert(x)      /* internal error */
+#endif
+
+/*****************************************************************
+** Skein block function constants (shared across Ref and Opt code)
+******************************************************************/
+enum {
+       /* Skein_256 round rotation constants */
+       R_256_0_0 = 5, R_256_0_1 = 56,
+       R_256_1_0 = 36, R_256_1_1 = 28,
+       R_256_2_0 = 13, R_256_2_1 = 46,
+       R_256_3_0 = 58, R_256_3_1 = 44,
+       R_256_4_0 = 26, R_256_4_1 = 20,
+       R_256_5_0 = 53, R_256_5_1 = 35,
+       R_256_6_0 = 11, R_256_6_1 = 42,
+       R_256_7_0 = 59, R_256_7_1 = 50,
+
+       /* Skein_512 round rotation constants */
+       R_512_0_0 = 38, R_512_0_1 = 30, R_512_0_2 = 50, R_512_0_3 = 53,
+       R_512_1_0 = 48, R_512_1_1 = 20, R_512_1_2 = 43, R_512_1_3 = 31,
+       R_512_2_0 = 34, R_512_2_1 = 14, R_512_2_2 = 15, R_512_2_3 = 27,
+       R_512_3_0 = 26, R_512_3_1 = 12, R_512_3_2 = 58, R_512_3_3 = 7,
+       R_512_4_0 = 33, R_512_4_1 = 49, R_512_4_2 = 8, R_512_4_3 = 42,
+       R_512_5_0 = 39, R_512_5_1 = 27, R_512_5_2 = 41, R_512_5_3 = 14,
+       R_512_6_0 = 29, R_512_6_1 = 26, R_512_6_2 = 11, R_512_6_3 = 9,
+       R_512_7_0 = 33, R_512_7_1 = 51, R_512_7_2 = 39, R_512_7_3 = 35,
+
+       /* Skein1024 round rotation constants */
+       R1024_0_0 = 55, R1024_0_1 = 43, R1024_0_2 = 37, R1024_0_3 =
+           40, R1024_0_4 = 16, R1024_0_5 = 22, R1024_0_6 = 38, R1024_0_7 = 12,
+       R1024_1_0 = 25, R1024_1_1 = 25, R1024_1_2 = 46, R1024_1_3 =
+           13, R1024_1_4 = 14, R1024_1_5 = 13, R1024_1_6 = 52, R1024_1_7 = 57,
+       R1024_2_0 = 33, R1024_2_1 = 8, R1024_2_2 = 18, R1024_2_3 =
+           57, R1024_2_4 = 21, R1024_2_5 = 12, R1024_2_6 = 32, R1024_2_7 = 54,
+       R1024_3_0 = 34, R1024_3_1 = 43, R1024_3_2 = 25, R1024_3_3 =
+           60, R1024_3_4 = 44, R1024_3_5 = 9, R1024_3_6 = 59, R1024_3_7 = 34,
+       R1024_4_0 = 28, R1024_4_1 = 7, R1024_4_2 = 47, R1024_4_3 =
+           48, R1024_4_4 = 51, R1024_4_5 = 9, R1024_4_6 = 35, R1024_4_7 = 41,
+       R1024_5_0 = 17, R1024_5_1 = 6, R1024_5_2 = 18, R1024_5_3 =
+           25, R1024_5_4 = 43, R1024_5_5 = 42, R1024_5_6 = 40, R1024_5_7 = 15,
+       R1024_6_0 = 58, R1024_6_1 = 7, R1024_6_2 = 32, R1024_6_3 =
+           45, R1024_6_4 = 19, R1024_6_5 = 18, R1024_6_6 = 2, R1024_6_7 = 56,
+       R1024_7_0 = 47, R1024_7_1 = 49, R1024_7_2 = 27, R1024_7_3 =
+           58, R1024_7_4 = 37, R1024_7_5 = 48, R1024_7_6 = 53, R1024_7_7 = 56
+};
+
+#ifndef SKEIN_ROUNDS
+#define SKEIN_256_ROUNDS_TOTAL (72)    /* number of rounds for the different 
block sizes */
+#define SKEIN_512_ROUNDS_TOTAL (72)
+#define SKEIN1024_ROUNDS_TOTAL (80)
+#else /* allow command-line define in range 8*(5..14)   */
+#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5))
+#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5))
+#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS    ) + 5) % 10) + 5))
+#endif
+
+#endif /* ifndef _SKEIN_H_ */
diff --git a/crypto/skein1024_generic.c b/crypto/skein1024_generic.c
new file mode 100644
index 0000000..45cf122
--- /dev/null
+++ b/crypto/skein1024_generic.c
@@ -0,0 +1,518 @@
+/***********************************************************************
+**
+** Implementation of the Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+** 
+************************************************************************/
+
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include "skein.h"             /* get the Skein API definitions   */
+
+#define SKEIN1024_BLOCK_SIZE   SKEIN1024_BLOCK_BYTES
+#define SKEIN1024_DIGEST_BITS  1024
+#define SKEIN1024_DIGEST_SIZE  (SKEIN1024_DIGEST_BITS / 8)
+
+static int Skein1024_Init(struct skein1024_ctx * ctx, size_t hashBitLen);
+static int Skein1024_Update(struct skein1024_ctx * ctx, const u8 * msg, size_t 
msgByteCnt);
+static int Skein1024_Final(struct skein1024_ctx * ctx, u8 * hashVal);
+
+/*****************************************************************/
+/*     Portable (i.e., slow) endianness conversion functions     */
+static u64 Skein_Swap64(u64 w64)
+{                              /* instantiate the function body here */
+       static const u64 ONE = 1;       /* use this to check endianness */
+
+       /* figure out endianness "on-the-fly" */
+       if (1 == ((u8 *) & ONE)[0])
+               return w64;     /* little-endian is fast */
+       else
+               return ((w64 & 0xFF) << 56) |   /*    big-endian is slow */
+                   (((w64 >> 8) & 0xFF) << 48) |
+                   (((w64 >> 16) & 0xFF) << 40) |
+                   (((w64 >> 24) & 0xFF) << 32) |
+                   (((w64 >> 32) & 0xFF) << 24) |
+                   (((w64 >> 40) & 0xFF) << 16) |
+                   (((w64 >> 48) & 0xFF) << 8) | (((w64 >> 56) & 0xFF));
+}
+
+static void Skein_Put64_LSB_First(u8 * dst, const u64 * src, size_t bCnt)
+{                              /* this version is fully portable (big-endian 
or little-endian), but slow */
+       size_t n;
+
+       for (n = 0; n < bCnt; n++)
+               dst[n] = (u8) (src[n >> 3] >> (8 * (n & 7)));
+}
+
+static void Skein_Get64_LSB_First(u64 * dst, const u8 * src, size_t wCnt)
+{                              /* this version is fully portable (big-endian 
or little-endian), but slow */
+       size_t n;
+
+       for (n = 0; n < 8 * wCnt; n += 8)
+               dst[n / 8] = (((u64) src[n])) +
+                   (((u64) src[n + 1]) << 8) +
+                   (((u64) src[n + 2]) << 16) +
+                   (((u64) src[n + 3]) << 24) +
+                   (((u64) src[n + 4]) << 32) +
+                   (((u64) src[n + 5]) << 40) +
+                   (((u64) src[n + 6]) << 48) + (((u64) src[n + 7]) << 56);
+}
+
+/* 64-bit rotate left */
+static u64 RotL_64(u64 x, unsigned int N)
+{
+       return (x << (N & 63)) | (x >> ((64 - N) & 63));
+}
+
+#define BLK_BITS    (WCNT*64)
+
+/* macro to perform a key injection (same for all block sizes) */
+#define InjectKey(r)                                                \
+    for (i=0;i < WCNT;i++)                                          \
+         X[i] += ks[((r)+i) % (WCNT+1)];                            \
+    X[WCNT-3] += ts[((r)+0) % 3];                                   \
+    X[WCNT-2] += ts[((r)+1) % 3];                                   \
+    X[WCNT-1] += (r);                    /* avoid slide attacks */  \
+    Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,X);
+
+static void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 
*blkPtr,
+                                   size_t blkCnt, size_t byteCntAdd)
+{                              /* do it in C */
+       enum {
+               WCNT = SKEIN1024_STATE_WORDS
+       };
+
+       size_t i, r;
+       u64 ts[3];              /* key schedule: tweak */
+       u64 ks[WCNT + 1];       /* key schedule: chaining vars */
+       u64 X[WCNT];            /* local copy of vars */
+       u64 w[WCNT];            /* local copy of input block */
+
+       Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
+       do {
+               /* this implementation only supports 2**64 input bytes (no 
carry out here) */
+               ctx->h.T[0] += byteCntAdd;      /* update processed length */
+
+               /* precompute the key schedule for this block */
+               ks[WCNT] = SKEIN_KS_PARITY;
+               for (i = 0; i < WCNT; i++) {
+                       ks[i] = ctx->X[i];
+                       ks[WCNT] ^= ctx->X[i];  /* compute overall parity */
+               }
+               ts[0] = ctx->h.T[0];
+               ts[1] = ctx->h.T[1];
+               ts[2] = ts[0] ^ ts[1];
+
+               Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in 
little-endian format */
+               Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+               for (i = 0; i < WCNT; i++) {    /* do the first full key 
injection */
+                       X[i] = w[i] + ks[i];
+               }
+               X[WCNT - 3] += ts[0];
+               X[WCNT - 2] += ts[1];
+
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, X);  
/* show starting state values */
+               for (r = 1; r <= SKEIN1024_ROUNDS_TOTAL / 8; r++) {     /* 
unroll 8 rounds */
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R1024_0_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R1024_0_1);
+                       X[3] ^= X[2];
+                       X[4] += X[5];
+                       X[5] = RotL_64(X[5], R1024_0_2);
+                       X[5] ^= X[4];
+                       X[6] += X[7];
+                       X[7] = RotL_64(X[7], R1024_0_3);
+                       X[7] ^= X[6];
+                       X[8] += X[9];
+                       X[9] = RotL_64(X[9], R1024_0_4);
+                       X[9] ^= X[8];
+                       X[10] += X[11];
+                       X[11] = RotL_64(X[11], R1024_0_5);
+                       X[11] ^= X[10];
+                       X[12] += X[13];
+                       X[13] = RotL_64(X[13], R1024_0_6);
+                       X[13] ^= X[12];
+                       X[14] += X[15];
+                       X[15] = RotL_64(X[15], R1024_0_7);
+                       X[15] ^= X[14];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 7, X);
+
+                       X[0] += X[9];
+                       X[9] = RotL_64(X[9], R1024_1_0);
+                       X[9] ^= X[0];
+                       X[2] += X[13];
+                       X[13] = RotL_64(X[13], R1024_1_1);
+                       X[13] ^= X[2];
+                       X[6] += X[11];
+                       X[11] = RotL_64(X[11], R1024_1_2);
+                       X[11] ^= X[6];
+                       X[4] += X[15];
+                       X[15] = RotL_64(X[15], R1024_1_3);
+                       X[15] ^= X[4];
+                       X[10] += X[7];
+                       X[7] = RotL_64(X[7], R1024_1_4);
+                       X[7] ^= X[10];
+                       X[12] += X[3];
+                       X[3] = RotL_64(X[3], R1024_1_5);
+                       X[3] ^= X[12];
+                       X[14] += X[5];
+                       X[5] = RotL_64(X[5], R1024_1_6);
+                       X[5] ^= X[14];
+                       X[8] += X[1];
+                       X[1] = RotL_64(X[1], R1024_1_7);
+                       X[1] ^= X[8];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 6, X);
+
+                       X[0] += X[7];
+                       X[7] = RotL_64(X[7], R1024_2_0);
+                       X[7] ^= X[0];
+                       X[2] += X[5];
+                       X[5] = RotL_64(X[5], R1024_2_1);
+                       X[5] ^= X[2];
+                       X[4] += X[3];
+                       X[3] = RotL_64(X[3], R1024_2_2);
+                       X[3] ^= X[4];
+                       X[6] += X[1];
+                       X[1] = RotL_64(X[1], R1024_2_3);
+                       X[1] ^= X[6];
+                       X[12] += X[15];
+                       X[15] = RotL_64(X[15], R1024_2_4);
+                       X[15] ^= X[12];
+                       X[14] += X[13];
+                       X[13] = RotL_64(X[13], R1024_2_5);
+                       X[13] ^= X[14];
+                       X[8] += X[11];
+                       X[11] = RotL_64(X[11], R1024_2_6);
+                       X[11] ^= X[8];
+                       X[10] += X[9];
+                       X[9] = RotL_64(X[9], R1024_2_7);
+                       X[9] ^= X[10];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 5, X);
+
+                       X[0] += X[15];
+                       X[15] = RotL_64(X[15], R1024_3_0);
+                       X[15] ^= X[0];
+                       X[2] += X[11];
+                       X[11] = RotL_64(X[11], R1024_3_1);
+                       X[11] ^= X[2];
+                       X[6] += X[13];
+                       X[13] = RotL_64(X[13], R1024_3_2);
+                       X[13] ^= X[6];
+                       X[4] += X[9];
+                       X[9] = RotL_64(X[9], R1024_3_3);
+                       X[9] ^= X[4];
+                       X[14] += X[1];
+                       X[1] = RotL_64(X[1], R1024_3_4);
+                       X[1] ^= X[14];
+                       X[8] += X[5];
+                       X[5] = RotL_64(X[5], R1024_3_5);
+                       X[5] ^= X[8];
+                       X[10] += X[3];
+                       X[3] = RotL_64(X[3], R1024_3_6);
+                       X[3] ^= X[10];
+                       X[12] += X[7];
+                       X[7] = RotL_64(X[7], R1024_3_7);
+                       X[7] ^= X[12];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 4, X);
+                       InjectKey(2 * r - 1);
+
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R1024_4_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R1024_4_1);
+                       X[3] ^= X[2];
+                       X[4] += X[5];
+                       X[5] = RotL_64(X[5], R1024_4_2);
+                       X[5] ^= X[4];
+                       X[6] += X[7];
+                       X[7] = RotL_64(X[7], R1024_4_3);
+                       X[7] ^= X[6];
+                       X[8] += X[9];
+                       X[9] = RotL_64(X[9], R1024_4_4);
+                       X[9] ^= X[8];
+                       X[10] += X[11];
+                       X[11] = RotL_64(X[11], R1024_4_5);
+                       X[11] ^= X[10];
+                       X[12] += X[13];
+                       X[13] = RotL_64(X[13], R1024_4_6);
+                       X[13] ^= X[12];
+                       X[14] += X[15];
+                       X[15] = RotL_64(X[15], R1024_4_7);
+                       X[15] ^= X[14];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 3, X);
+
+                       X[0] += X[9];
+                       X[9] = RotL_64(X[9], R1024_5_0);
+                       X[9] ^= X[0];
+                       X[2] += X[13];
+                       X[13] = RotL_64(X[13], R1024_5_1);
+                       X[13] ^= X[2];
+                       X[6] += X[11];
+                       X[11] = RotL_64(X[11], R1024_5_2);
+                       X[11] ^= X[6];
+                       X[4] += X[15];
+                       X[15] = RotL_64(X[15], R1024_5_3);
+                       X[15] ^= X[4];
+                       X[10] += X[7];
+                       X[7] = RotL_64(X[7], R1024_5_4);
+                       X[7] ^= X[10];
+                       X[12] += X[3];
+                       X[3] = RotL_64(X[3], R1024_5_5);
+                       X[3] ^= X[12];
+                       X[14] += X[5];
+                       X[5] = RotL_64(X[5], R1024_5_6);
+                       X[5] ^= X[14];
+                       X[8] += X[1];
+                       X[1] = RotL_64(X[1], R1024_5_7);
+                       X[1] ^= X[8];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 2, X);
+
+                       X[0] += X[7];
+                       X[7] = RotL_64(X[7], R1024_6_0);
+                       X[7] ^= X[0];
+                       X[2] += X[5];
+                       X[5] = RotL_64(X[5], R1024_6_1);
+                       X[5] ^= X[2];
+                       X[4] += X[3];
+                       X[3] = RotL_64(X[3], R1024_6_2);
+                       X[3] ^= X[4];
+                       X[6] += X[1];
+                       X[1] = RotL_64(X[1], R1024_6_3);
+                       X[1] ^= X[6];
+                       X[12] += X[15];
+                       X[15] = RotL_64(X[15], R1024_6_4);
+                       X[15] ^= X[12];
+                       X[14] += X[13];
+                       X[13] = RotL_64(X[13], R1024_6_5);
+                       X[13] ^= X[14];
+                       X[8] += X[11];
+                       X[11] = RotL_64(X[11], R1024_6_6);
+                       X[11] ^= X[8];
+                       X[10] += X[9];
+                       X[9] = RotL_64(X[9], R1024_6_7);
+                       X[9] ^= X[10];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 1, X);
+
+                       X[0] += X[15];
+                       X[15] = RotL_64(X[15], R1024_7_0);
+                       X[15] ^= X[0];
+                       X[2] += X[11];
+                       X[11] = RotL_64(X[11], R1024_7_1);
+                       X[11] ^= X[2];
+                       X[6] += X[13];
+                       X[13] = RotL_64(X[13], R1024_7_2);
+                       X[13] ^= X[6];
+                       X[4] += X[9];
+                       X[9] = RotL_64(X[9], R1024_7_3);
+                       X[9] ^= X[4];
+                       X[14] += X[1];
+                       X[1] = RotL_64(X[1], R1024_7_4);
+                       X[1] ^= X[14];
+                       X[8] += X[5];
+                       X[5] = RotL_64(X[5], R1024_7_5);
+                       X[5] ^= X[8];
+                       X[10] += X[3];
+                       X[3] = RotL_64(X[3], R1024_7_6);
+                       X[3] ^= X[10];
+                       X[12] += X[7];
+                       X[7] = RotL_64(X[7], R1024_7_7);
+                       X[7] ^= X[12];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r, X);
+                       InjectKey(2 * r);
+               }
+               /* do the final "feedforward" xor, update context chaining vars 
*/
+               for (i = 0; i < WCNT; i++)
+                       ctx->X[i] = X[i] ^ w[i];
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+               Skein_Clear_First_Flag(ctx->h); /* clear the start bit */
+               blkPtr += SKEIN1024_BLOCK_BYTES;
+       }
+       while (--blkCnt);
+}
+
+/*****************************************************************/
+/*    1024-bit Skein                                             */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+static int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen)
+{
+       union {
+               u8 b[SKEIN1024_STATE_BYTES];
+               u64 w[SKEIN1024_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+
+       /* build/process config block for hashing */
+       ctx->h.hashBitLen = hashBitLen; /* output hash byte count */
+       Skein_Start_New_Type(ctx, CFG_FINAL);   /* set tweaks: T0=0; T1=CFG | 
FINAL */
+
+       memset(&cfg.w, 0, sizeof(cfg.w));       /* pre-pad cfg.w[] with zeroes 
*/
+       cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);      /* set the schema, 
version */
+       cfg.w[1] = Skein_Swap64(hashBitLen);    /* hash result length in bits */
+       cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+
+       /* compute the initial chaining values from config block */
+       memset(ctx->X, 0, sizeof(ctx->X));      /* zero the chaining variables 
*/
+       Skein1024_Process_Block(ctx, cfg.b, 1, sizeof(cfg));
+
+       /* The chaining vars ctx->X are now initialized for the given 
hashBitLen. */
+       /* Set up to process the data message portion of the hash (default) */
+       ctx->h.bCnt = 0;        /* buffer b[] starts out empty */
+       Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+       return SKEIN_SUCCESS;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+static int Skein1024_Update(struct skein1024_ctx *ctx, const u8 * msg,
+                           size_t msgByteCnt)
+{
+       size_t n;
+
+       Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); /* 
catch uninitialized context */
+
+       /* process full blocks, if any */
+       if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) {
+               if (ctx->h.bCnt) {      /* finish up any buffered message data 
*/
+                       n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt;        /* # 
bytes free in buffer b[] */
+                       if (n) {
+                               Skein_assert(n < msgByteCnt);   /* check on our 
logic here */
+                               memcpy(&ctx->b[ctx->h.bCnt], msg, n);
+                               msgByteCnt -= n;
+                               msg += n;
+                               ctx->h.bCnt += n;
+                       }
+                       Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+                       Skein1024_Process_Block(ctx, ctx->b, 1,
+                                               SKEIN1024_BLOCK_BYTES);
+                       ctx->h.bCnt = 0;
+               }
+               /* now process any remaining full blocks, directly from input 
message data */
+               if (msgByteCnt > SKEIN1024_BLOCK_BYTES) {
+                       n = (msgByteCnt - 1) / SKEIN1024_BLOCK_BYTES;   /* 
number of full blocks to process */
+                       Skein1024_Process_Block(ctx, msg, n,
+                                               SKEIN1024_BLOCK_BYTES);
+                       msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+                       msg += n * SKEIN1024_BLOCK_BYTES;
+               }
+               Skein_assert(ctx->h.bCnt == 0);
+       }
+
+       /* copy any remaining source message data bytes into b[] */
+       if (msgByteCnt) {
+               Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+               memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
+               ctx->h.bCnt += msgByteCnt;
+       }
+
+       return SKEIN_SUCCESS;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+static int Skein1024_Final(struct skein1024_ctx *ctx, u8 * hashVal)
+{
+       size_t i, n, byteCnt;
+       u64 X[SKEIN1024_STATE_WORDS];
+       Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); /* 
catch uninitialized context */
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)        /* zero pad b[] if 
necessary */
+               memset(&ctx->b[ctx->h.bCnt], 0,
+                      SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+       Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);   /* process the 
final block */
+
+       /* now output the result */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes 
*/
+
+       /* run Threefish in "counter mode" to generate output */
+       memset(ctx->b, 0, sizeof(ctx->b));      /* zero out b[], so it can hold 
the counter */
+       memcpy(X, ctx->X, sizeof(X));   /* keep a local copy of counter mode 
"key" */
+       for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+               ((u64 *) ctx->b)[0] = Skein_Swap64((u64) i);    /* build the 
counter block */
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               Skein1024_Process_Block(ctx, ctx->b, 1, sizeof(u64));   /* run 
"counter mode" */
+               n = byteCnt - i * SKEIN1024_BLOCK_BYTES;        /* number of 
output bytes left to go */
+               if (n >= SKEIN1024_BLOCK_BYTES)
+                       n = SKEIN1024_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES, 
ctx->X, n);  /* "output" the ctr mode bytes */
+               Skein_Show_Final(1024, &ctx->h, n,
+                                hashVal + i * SKEIN1024_BLOCK_BYTES);
+               memcpy(ctx->X, X, sizeof(X));   /* restore the counter mode key 
for next time */
+       }
+       return SKEIN_SUCCESS;
+}
+
+static void skein1024_init(struct crypto_tfm *tfm)
+{
+       struct skein1024_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein1024_Init(sctx, SKEIN1024_DIGEST_BITS);
+}
+
+static void skein1024_update(struct crypto_tfm *tfm, const u8 * data,
+                           unsigned int len)
+{
+       struct skein1024_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein1024_Update(sctx, data, len);
+}
+
+static void skein1024_final(struct crypto_tfm *tfm, u8 * out)
+{
+       struct skein1024_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein1024_Final(sctx, out);
+}
+
+static struct crypto_alg skein1024 = {
+       .cra_name = "skein1024",
+       .cra_driver_name = "skein1024-generic",
+       .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
+       .cra_blocksize = SKEIN1024_BLOCK_SIZE,
+       .cra_ctxsize = sizeof(struct skein1024_ctx),
+       .cra_module = THIS_MODULE,
+       .cra_alignmask = 3,
+       .cra_list = LIST_HEAD_INIT(skein1024.cra_list),
+       .cra_u = {.digest = {
+                            .dia_digestsize = SKEIN1024_DIGEST_SIZE,
+                            .dia_init = skein1024_init,
+                            .dia_update = skein1024_update,
+                            .dia_final = skein1024_final}}
+};
+
+static int __init skein1024_generic_mod_init(void)
+{
+       int ret;
+
+       ret = crypto_register_alg(&skein1024);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static void __exit skein1024_generic_mod_fini(void)
+{
+       crypto_unregister_alg(&skein1024);
+}
+
+module_init(skein1024_generic_mod_init);
+module_exit(skein1024_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Skein-1024 Secure Hash Algorithm");
+
+MODULE_ALIAS("skein1024");
diff --git a/crypto/skein256_generic.c b/crypto/skein256_generic.c
new file mode 100644
index 0000000..45adf6c
--- /dev/null
+++ b/crypto/skein256_generic.c
@@ -0,0 +1,367 @@
+/***********************************************************************
+**
+** Implementation of the Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+** 
+************************************************************************/
+
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include "skein.h"             /* get the Skein API definitions   */
+
+#define SKEIN256_BLOCK_SIZE    SKEIN_256_BLOCK_BYTES
+#define SKEIN256_DIGEST_BITS   256
+#define SKEIN256_DIGEST_SIZE   (SKEIN256_DIGEST_BITS / 8)
+
+static int Skein_256_Init(struct skein256_ctx * ctx, size_t hashBitLen);
+static int Skein_256_Update(struct skein256_ctx * ctx, const u8 * msg, size_t 
msgByteCnt);
+static int Skein_256_Final(struct skein256_ctx * ctx, u8 * hashVal);
+
+/*****************************************************************/
+/*     Portable (i.e., slow) endianness conversion functions     */
+static u64 Skein_Swap64(u64 w64)
+{                              /* instantiate the function body here */
+       static const u64 ONE = 1;       /* use this to check endianness */
+
+       /* figure out endianness "on-the-fly" */
+       if (1 == ((u8 *) & ONE)[0])
+               return w64;     /* little-endian is fast */
+       else
+               return ((w64 & 0xFF) << 56) |   /*    big-endian is slow */
+                   (((w64 >> 8) & 0xFF) << 48) |
+                   (((w64 >> 16) & 0xFF) << 40) |
+                   (((w64 >> 24) & 0xFF) << 32) |
+                   (((w64 >> 32) & 0xFF) << 24) |
+                   (((w64 >> 40) & 0xFF) << 16) |
+                   (((w64 >> 48) & 0xFF) << 8) | (((w64 >> 56) & 0xFF));
+}
+
+static void Skein_Put64_LSB_First(u8 * dst, const u64 * src, size_t bCnt)
+{                              /* this version is fully portable (big-endian 
or little-endian), but slow */
+       size_t n;
+
+       for (n = 0; n < bCnt; n++)
+               dst[n] = (u8) (src[n >> 3] >> (8 * (n & 7)));
+}
+
+static void Skein_Get64_LSB_First(u64 * dst, const u8 * src, size_t wCnt)
+{                              /* this version is fully portable (big-endian 
or little-endian), but slow */
+       size_t n;
+
+       for (n = 0; n < 8 * wCnt; n += 8)
+               dst[n / 8] = (((u64) src[n])) +
+                   (((u64) src[n + 1]) << 8) +
+                   (((u64) src[n + 2]) << 16) +
+                   (((u64) src[n + 3]) << 24) +
+                   (((u64) src[n + 4]) << 32) +
+                   (((u64) src[n + 5]) << 40) +
+                   (((u64) src[n + 6]) << 48) + (((u64) src[n + 7]) << 56);
+}
+
+/* 64-bit rotate left */
+static u64 RotL_64(u64 x, unsigned int N)
+{
+       return (x << (N & 63)) | (x >> ((64 - N) & 63));
+}
+
+#define BLK_BITS    (WCNT*64)
+
+/* macro to perform a key injection (same for all block sizes) */
+#define InjectKey(r)                                                \
+    for (i=0;i < WCNT;i++)                                          \
+         X[i] += ks[((r)+i) % (WCNT+1)];                            \
+    X[WCNT-3] += ts[((r)+0) % 3];                                   \
+    X[WCNT-2] += ts[((r)+1) % 3];                                   \
+    X[WCNT-1] += (r);                    /* avoid slide attacks */  \
+    Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,X);
+
+static void Skein_256_Process_Block(struct skein256_ctx *ctx, const u8 *blkPtr,
+                                   size_t blkCnt, size_t byteCntAdd)
+{                              /* do it in C */
+       enum {
+               WCNT = SKEIN_256_STATE_WORDS
+       };
+       size_t i, r;
+       u64 ts[3];              /* key schedule: tweak */
+       u64 ks[WCNT + 1];       /* key schedule: chaining vars */
+       u64 X[WCNT];            /* local copy of context vars */
+       u64 w[WCNT];            /* local copy of input block */
+
+       Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
+       do {
+               /* this implementation only supports 2**64 input bytes (no 
carry out here) */
+               ctx->h.T[0] += byteCntAdd;      /* update processed length */
+
+               /* precompute the key schedule for this block */
+               ks[WCNT] = SKEIN_KS_PARITY;
+               for (i = 0; i < WCNT; i++) {
+                       ks[i] = ctx->X[i];
+                       ks[WCNT] ^= ctx->X[i];  /* compute overall parity */
+               }
+               ts[0] = ctx->h.T[0];
+               ts[1] = ctx->h.T[1];
+               ts[2] = ts[0] ^ ts[1];
+
+               Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in 
little-endian format */
+               Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+               for (i = 0; i < WCNT; i++) {    /* do the first full key 
injection */
+                       X[i] = w[i] + ks[i];
+               }
+               X[WCNT - 3] += ts[0];
+               X[WCNT - 2] += ts[1];
+
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, X);  
/* show starting state values */
+
+               for (r = 1; r <= SKEIN_256_ROUNDS_TOTAL / 8; r++) {     /* 
unroll 8 rounds */
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R_256_0_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R_256_0_1);
+                       X[3] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 7, X);
+
+                       X[0] += X[3];
+                       X[3] = RotL_64(X[3], R_256_1_0);
+                       X[3] ^= X[0];
+                       X[2] += X[1];
+                       X[1] = RotL_64(X[1], R_256_1_1);
+                       X[1] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 6, X);
+
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R_256_2_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R_256_2_1);
+                       X[3] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 5, X);
+
+                       X[0] += X[3];
+                       X[3] = RotL_64(X[3], R_256_3_0);
+                       X[3] ^= X[0];
+                       X[2] += X[1];
+                       X[1] = RotL_64(X[1], R_256_3_1);
+                       X[1] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 4, X);
+                       InjectKey(2 * r - 1);
+
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R_256_4_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R_256_4_1);
+                       X[3] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 3, X);
+
+                       X[0] += X[3];
+                       X[3] = RotL_64(X[3], R_256_5_0);
+                       X[3] ^= X[0];
+                       X[2] += X[1];
+                       X[1] = RotL_64(X[1], R_256_5_1);
+                       X[1] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 2, X);
+
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R_256_6_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R_256_6_1);
+                       X[3] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 1, X);
+
+                       X[0] += X[3];
+                       X[3] = RotL_64(X[3], R_256_7_0);
+                       X[3] ^= X[0];
+                       X[2] += X[1];
+                       X[1] = RotL_64(X[1], R_256_7_1);
+                       X[1] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r, X);
+                       InjectKey(2 * r);
+               }
+               /* do the final "feedforward" xor, update context chaining vars 
*/
+               for (i = 0; i < WCNT; i++)
+                       ctx->X[i] = X[i] ^ w[i];
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+               Skein_Clear_First_Flag(ctx->h); /* clear the start bit */
+               blkPtr += SKEIN_256_BLOCK_BYTES;
+       }
+       while (--blkCnt);
+}
+
+/*****************************************************************/
+/*     256-bit Skein                                             */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+static int Skein_256_Init(struct skein256_ctx *ctx, size_t hashBitLen)
+{
+       union {
+               u8 b[SKEIN_256_STATE_BYTES];
+               u64 w[SKEIN_256_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+
+       /* build/process config block for hashing */
+       ctx->h.hashBitLen = hashBitLen; /* output hash byte count */
+       Skein_Start_New_Type(ctx, CFG_FINAL);   /* set tweaks: T0=0; T1=CFG | 
FINAL */
+
+       memset(&cfg.w, 0, sizeof(cfg.w));       /* pre-pad cfg.w[] with zeroes 
*/
+       cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);      /* set the schema, 
version */
+       cfg.w[1] = Skein_Swap64(hashBitLen);    /* hash result length in bits */
+       cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+
+       /* compute the initial chaining values from config block */
+       memset(ctx->X, 0, sizeof(ctx->X));      /* zero the chaining variables 
*/
+       Skein_256_Process_Block(ctx, cfg.b, 1, sizeof(cfg));
+
+       /* The chaining vars ctx->X are now initialized for the given 
hashBitLen. */
+       /* Set up to process the data message portion of the hash (default) */
+       ctx->h.bCnt = 0;        /* buffer b[] starts out empty */
+       Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+       return SKEIN_SUCCESS;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+static int Skein_256_Update(struct skein256_ctx *ctx, const u8 * msg,
+                           size_t msgByteCnt)
+{
+       size_t n;
+
+       Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); /* 
catch uninitialized context */
+
+       /* process full blocks, if any */
+       if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) {
+               if (ctx->h.bCnt) {      /* finish up any buffered message data 
*/
+                       n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt;        /* # 
bytes free in buffer b[] */
+                       if (n) {
+                               Skein_assert(n < msgByteCnt);   /* check on our 
logic here */
+                               memcpy(&ctx->b[ctx->h.bCnt], msg, n);
+                               msgByteCnt -= n;
+                               msg += n;
+                               ctx->h.bCnt += n;
+                       }
+                       Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+                       Skein_256_Process_Block(ctx, ctx->b, 1,
+                                               SKEIN_256_BLOCK_BYTES);
+                       ctx->h.bCnt = 0;
+               }
+               /* now process any remaining full blocks, directly from input 
message data */
+               if (msgByteCnt > SKEIN_256_BLOCK_BYTES) {
+                       n = (msgByteCnt - 1) / SKEIN_256_BLOCK_BYTES;   /* 
number of full blocks to process */
+                       Skein_256_Process_Block(ctx, msg, n,
+                                               SKEIN_256_BLOCK_BYTES);
+                       msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+                       msg += n * SKEIN_256_BLOCK_BYTES;
+               }
+               Skein_assert(ctx->h.bCnt == 0);
+       }
+
+       /* copy any remaining source message data bytes into b[] */
+       if (msgByteCnt) {
+               Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+               memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
+               ctx->h.bCnt += msgByteCnt;
+       }
+
+       return SKEIN_SUCCESS;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+static int Skein_256_Final(struct skein256_ctx *ctx, u8 * hashVal)
+{
+       size_t i, n, byteCnt;
+       u64 X[SKEIN_256_STATE_WORDS];
+       Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); /* 
catch uninitialized context */
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)        /* zero pad b[] if 
necessary */
+               memset(&ctx->b[ctx->h.bCnt], 0,
+                      SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+       Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);   /* process the 
final block */
+
+       /* now output the result */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes 
*/
+
+       /* run Threefish in "counter mode" to generate output */
+       memset(ctx->b, 0, sizeof(ctx->b));      /* zero out b[], so it can hold 
the counter */
+       memcpy(X, ctx->X, sizeof(X));   /* keep a local copy of counter mode 
"key" */
+       for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+               ((u64 *) ctx->b)[0] = Skein_Swap64((u64) i);    /* build the 
counter block */
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               Skein_256_Process_Block(ctx, ctx->b, 1, sizeof(u64));   /* run 
"counter mode" */
+               n = byteCnt - i * SKEIN_256_BLOCK_BYTES;        /* number of 
output bytes left to go */
+               if (n >= SKEIN_256_BLOCK_BYTES)
+                       n = SKEIN_256_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES, 
ctx->X, n);  /* "output" the ctr mode bytes */
+               Skein_Show_Final(256, &ctx->h, n,
+                                hashVal + i * SKEIN_256_BLOCK_BYTES);
+               memcpy(ctx->X, X, sizeof(X));   /* restore the counter mode key 
for next time */
+       }
+       return SKEIN_SUCCESS;
+}
+
+static void skein256_init(struct crypto_tfm *tfm)
+{
+       struct skein256_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein_256_Init(sctx, SKEIN256_DIGEST_BITS);
+}
+
+static void skein256_update(struct crypto_tfm *tfm, const u8 * data,
+                           unsigned int len)
+{
+       struct skein256_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein_256_Update(sctx, data, len);
+}
+
+static void skein256_final(struct crypto_tfm *tfm, u8 * out)
+{
+       struct skein256_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein_256_Final(sctx, out);
+}
+
+static struct crypto_alg skein256 = {
+       .cra_name = "skein256",
+       .cra_driver_name = "skein256-generic",
+       .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
+       .cra_blocksize = SKEIN256_BLOCK_SIZE,
+       .cra_ctxsize = sizeof(struct skein256_ctx),
+       .cra_module = THIS_MODULE,
+       .cra_alignmask = 3,
+       .cra_list = LIST_HEAD_INIT(skein256.cra_list),
+       .cra_u = {.digest = {
+                            .dia_digestsize = SKEIN256_DIGEST_SIZE,
+                            .dia_init = skein256_init,
+                            .dia_update = skein256_update,
+                            .dia_final = skein256_final}}
+};
+
+static int __init skein256_generic_mod_init(void)
+{
+       return crypto_register_alg(&skein256);
+}
+
+static void __exit skein256_generic_mod_fini(void)
+{
+       crypto_unregister_alg(&skein256);
+}
+
+module_init(skein256_generic_mod_init);
+module_exit(skein256_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Skein-256 Secure Hash Algorithm");
+
+MODULE_ALIAS("skein256");
diff --git a/crypto/skein512_generic.c b/crypto/skein512_generic.c
new file mode 100644
index 0000000..7ea8e71
--- /dev/null
+++ b/crypto/skein512_generic.c
@@ -0,0 +1,417 @@
+/***********************************************************************
+**
+** Implementation of the Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+** 
+************************************************************************/
+
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include "skein.h"             /* get the Skein API definitions   */
+
+#define SKEIN512_BLOCK_SIZE    SKEIN_512_BLOCK_BYTES
+#define SKEIN512_DIGEST_BITS   512
+#define SKEIN512_DIGEST_SIZE   (SKEIN512_DIGEST_BITS / 8)
+
+static int Skein_512_Init(struct skein512_ctx * ctx, size_t hashBitLen);
+static int Skein_512_Update(struct skein512_ctx * ctx, const u8 * msg, size_t 
msgByteCnt);
+static int Skein_512_Final(struct skein512_ctx * ctx, u8 * hashVal);
+
+/*****************************************************************/
+/*     Portable (i.e., slow) endianness conversion functions     */
+static u64 Skein_Swap64(u64 w64)
+{                              /* instantiate the function body here */
+       static const u64 ONE = 1;       /* use this to check endianness */
+
+       /* figure out endianness "on-the-fly" */
+       if (1 == ((u8 *) & ONE)[0])
+               return w64;     /* little-endian is fast */
+       else
+               return ((w64 & 0xFF) << 56) |   /*    big-endian is slow */
+                   (((w64 >> 8) & 0xFF) << 48) |
+                   (((w64 >> 16) & 0xFF) << 40) |
+                   (((w64 >> 24) & 0xFF) << 32) |
+                   (((w64 >> 32) & 0xFF) << 24) |
+                   (((w64 >> 40) & 0xFF) << 16) |
+                   (((w64 >> 48) & 0xFF) << 8) | (((w64 >> 56) & 0xFF));
+}
+
+static void Skein_Put64_LSB_First(u8 * dst, const u64 * src, size_t bCnt)
+{                              /* this version is fully portable (big-endian 
or little-endian), but slow */
+       size_t n;
+
+       for (n = 0; n < bCnt; n++)
+               dst[n] = (u8) (src[n >> 3] >> (8 * (n & 7)));
+}
+
+static void Skein_Get64_LSB_First(u64 * dst, const u8 * src, size_t wCnt)
+{                              /* this version is fully portable (big-endian 
or little-endian), but slow */
+       size_t n;
+
+       for (n = 0; n < 8 * wCnt; n += 8)
+               dst[n / 8] = (((u64) src[n])) +
+                   (((u64) src[n + 1]) << 8) +
+                   (((u64) src[n + 2]) << 16) +
+                   (((u64) src[n + 3]) << 24) +
+                   (((u64) src[n + 4]) << 32) +
+                   (((u64) src[n + 5]) << 40) +
+                   (((u64) src[n + 6]) << 48) + (((u64) src[n + 7]) << 56);
+}
+
+/* 64-bit rotate left */
+static u64 RotL_64(u64 x, unsigned int N)
+{
+       return (x << (N & 63)) | (x >> ((64 - N) & 63));
+}
+
+#define BLK_BITS    (WCNT*64)
+
+/* macro to perform a key injection (same for all block sizes) */
+#define InjectKey(r)                                                \
+    for (i=0;i < WCNT;i++)                                          \
+         X[i] += ks[((r)+i) % (WCNT+1)];                            \
+    X[WCNT-3] += ts[((r)+0) % 3];                                   \
+    X[WCNT-2] += ts[((r)+1) % 3];                                   \
+    X[WCNT-1] += (r);                    /* avoid slide attacks */  \
+    Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,X);
+
+static void Skein_512_Process_Block(struct skein512_ctx *ctx, const u8 *blkPtr,
+                                   size_t blkCnt, size_t byteCntAdd)
+{                              /* do it in C */
+       enum {
+               WCNT = SKEIN_512_STATE_WORDS
+       };
+
+       size_t i, r;
+       u64 ts[3];              /* key schedule: tweak */
+       u64 ks[WCNT + 1];       /* key schedule: chaining vars */
+       u64 X[WCNT];            /* local copy of vars */
+       u64 w[WCNT];            /* local copy of input block */
+
+       Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
+       do {
+               /* this implementation only supports 2**64 input bytes (no 
carry out here) */
+               ctx->h.T[0] += byteCntAdd;      /* update processed length */
+
+               /* precompute the key schedule for this block */
+               ks[WCNT] = SKEIN_KS_PARITY;
+               for (i = 0; i < WCNT; i++) {
+                       ks[i] = ctx->X[i];
+                       ks[WCNT] ^= ctx->X[i];  /* compute overall parity */
+               }
+               ts[0] = ctx->h.T[0];
+               ts[1] = ctx->h.T[1];
+               ts[2] = ts[0] ^ ts[1];
+
+               Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in 
little-endian format */
+               Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+               for (i = 0; i < WCNT; i++) {    /* do the first full key 
injection */
+                       X[i] = w[i] + ks[i];
+               }
+               X[WCNT - 3] += ts[0];
+               X[WCNT - 2] += ts[1];
+
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, X);
+               for (r = 1; r <= SKEIN_512_ROUNDS_TOTAL / 8; r++) {     /* 
unroll 8 rounds */
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R_512_0_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R_512_0_1);
+                       X[3] ^= X[2];
+                       X[4] += X[5];
+                       X[5] = RotL_64(X[5], R_512_0_2);
+                       X[5] ^= X[4];
+                       X[6] += X[7];
+                       X[7] = RotL_64(X[7], R_512_0_3);
+                       X[7] ^= X[6];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 7, X);
+
+                       X[2] += X[1];
+                       X[1] = RotL_64(X[1], R_512_1_0);
+                       X[1] ^= X[2];
+                       X[4] += X[7];
+                       X[7] = RotL_64(X[7], R_512_1_1);
+                       X[7] ^= X[4];
+                       X[6] += X[5];
+                       X[5] = RotL_64(X[5], R_512_1_2);
+                       X[5] ^= X[6];
+                       X[0] += X[3];
+                       X[3] = RotL_64(X[3], R_512_1_3);
+                       X[3] ^= X[0];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 6, X);
+
+                       X[4] += X[1];
+                       X[1] = RotL_64(X[1], R_512_2_0);
+                       X[1] ^= X[4];
+                       X[6] += X[3];
+                       X[3] = RotL_64(X[3], R_512_2_1);
+                       X[3] ^= X[6];
+                       X[0] += X[5];
+                       X[5] = RotL_64(X[5], R_512_2_2);
+                       X[5] ^= X[0];
+                       X[2] += X[7];
+                       X[7] = RotL_64(X[7], R_512_2_3);
+                       X[7] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 5, X);
+
+                       X[6] += X[1];
+                       X[1] = RotL_64(X[1], R_512_3_0);
+                       X[1] ^= X[6];
+                       X[0] += X[7];
+                       X[7] = RotL_64(X[7], R_512_3_1);
+                       X[7] ^= X[0];
+                       X[2] += X[5];
+                       X[5] = RotL_64(X[5], R_512_3_2);
+                       X[5] ^= X[2];
+                       X[4] += X[3];
+                       X[3] = RotL_64(X[3], R_512_3_3);
+                       X[3] ^= X[4];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 4, X);
+                       InjectKey(2 * r - 1);
+
+                       X[0] += X[1];
+                       X[1] = RotL_64(X[1], R_512_4_0);
+                       X[1] ^= X[0];
+                       X[2] += X[3];
+                       X[3] = RotL_64(X[3], R_512_4_1);
+                       X[3] ^= X[2];
+                       X[4] += X[5];
+                       X[5] = RotL_64(X[5], R_512_4_2);
+                       X[5] ^= X[4];
+                       X[6] += X[7];
+                       X[7] = RotL_64(X[7], R_512_4_3);
+                       X[7] ^= X[6];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 3, X);
+
+                       X[2] += X[1];
+                       X[1] = RotL_64(X[1], R_512_5_0);
+                       X[1] ^= X[2];
+                       X[4] += X[7];
+                       X[7] = RotL_64(X[7], R_512_5_1);
+                       X[7] ^= X[4];
+                       X[6] += X[5];
+                       X[5] = RotL_64(X[5], R_512_5_2);
+                       X[5] ^= X[6];
+                       X[0] += X[3];
+                       X[3] = RotL_64(X[3], R_512_5_3);
+                       X[3] ^= X[0];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 2, X);
+
+                       X[4] += X[1];
+                       X[1] = RotL_64(X[1], R_512_6_0);
+                       X[1] ^= X[4];
+                       X[6] += X[3];
+                       X[3] = RotL_64(X[3], R_512_6_1);
+                       X[3] ^= X[6];
+                       X[0] += X[5];
+                       X[5] = RotL_64(X[5], R_512_6_2);
+                       X[5] ^= X[0];
+                       X[2] += X[7];
+                       X[7] = RotL_64(X[7], R_512_6_3);
+                       X[7] ^= X[2];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r - 1, X);
+
+                       X[6] += X[1];
+                       X[1] = RotL_64(X[1], R_512_7_0);
+                       X[1] ^= X[6];
+                       X[0] += X[7];
+                       X[7] = RotL_64(X[7], R_512_7_1);
+                       X[7] ^= X[0];
+                       X[2] += X[5];
+                       X[5] = RotL_64(X[5], R_512_7_2);
+                       X[5] ^= X[2];
+                       X[4] += X[3];
+                       X[3] = RotL_64(X[3], R_512_7_3);
+                       X[3] ^= X[4];
+                       Skein_Show_Round(BLK_BITS, &ctx->h, 8 * r, X);
+                       InjectKey(2 * r);
+               }
+               /* do the final "feedforward" xor, update context chaining vars 
*/
+               for (i = 0; i < WCNT; i++)
+                       ctx->X[i] = X[i] ^ w[i];
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+               Skein_Clear_First_Flag(ctx->h); /* clear the start bit */
+               blkPtr += SKEIN_512_BLOCK_BYTES;
+       }
+       while (--blkCnt);
+}
+
+/*****************************************************************/
+/*     512-bit Skein                                             */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+static int Skein_512_Init(struct skein512_ctx *ctx, size_t hashBitLen)
+{
+       union {
+               u8 b[SKEIN_512_STATE_BYTES];
+               u64 w[SKEIN_512_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+
+       /* build/process config block for hashing */
+       ctx->h.hashBitLen = hashBitLen; /* output hash byte count */
+       Skein_Start_New_Type(ctx, CFG_FINAL);   /* set tweaks: T0=0; T1=CFG | 
FINAL */
+
+       memset(&cfg.w, 0, sizeof(cfg.w));       /* pre-pad cfg.w[] with zeroes 
*/
+       cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);      /* set the schema, 
version */
+       cfg.w[1] = Skein_Swap64(hashBitLen);    /* hash result length in bits */
+       cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+
+       /* compute the initial chaining values from config block */
+       memset(ctx->X, 0, sizeof(ctx->X));      /* zero the chaining variables 
*/
+       Skein_512_Process_Block(ctx, cfg.b, 1, sizeof(cfg));
+
+       /* The chaining vars ctx->X are now initialized for the given 
hashBitLen. */
+       /* Set up to process the data message portion of the hash (default) */
+       ctx->h.bCnt = 0;        /* buffer b[] starts out empty */
+       Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+       return SKEIN_SUCCESS;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+static int Skein_512_Update(struct skein512_ctx *ctx, const u8 * msg,
+                           size_t msgByteCnt)
+{
+       size_t n;
+
+       Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); /* 
catch uninitialized context */
+
+       /* process full blocks, if any */
+       if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) {
+               if (ctx->h.bCnt) {      /* finish up any buffered message data 
*/
+                       n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;        /* # 
bytes free in buffer b[] */
+                       if (n) {
+                               Skein_assert(n < msgByteCnt);   /* check on our 
logic here */
+                               memcpy(&ctx->b[ctx->h.bCnt], msg, n);
+                               msgByteCnt -= n;
+                               msg += n;
+                               ctx->h.bCnt += n;
+                       }
+                       Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+                       Skein_512_Process_Block(ctx, ctx->b, 1,
+                                               SKEIN_512_BLOCK_BYTES);
+                       ctx->h.bCnt = 0;
+               }
+               /* now process any remaining full blocks, directly from input 
message data */
+               if (msgByteCnt > SKEIN_512_BLOCK_BYTES) {
+                       n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES;   /* 
number of full blocks to process */
+                       Skein_512_Process_Block(ctx, msg, n,
+                                               SKEIN_512_BLOCK_BYTES);
+                       msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+                       msg += n * SKEIN_512_BLOCK_BYTES;
+               }
+               Skein_assert(ctx->h.bCnt == 0);
+       }
+
+       /* copy any remaining source message data bytes into b[] */
+       if (msgByteCnt) {
+               Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+               memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
+               ctx->h.bCnt += msgByteCnt;
+       }
+
+       return SKEIN_SUCCESS;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+static int Skein_512_Final(struct skein512_ctx *ctx, u8 * hashVal)
+{
+       size_t i, n, byteCnt;
+       u64 X[SKEIN_512_STATE_WORDS];
+       Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); /* 
catch uninitialized context */
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)        /* zero pad b[] if 
necessary */
+               memset(&ctx->b[ctx->h.bCnt], 0,
+                      SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+       Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);   /* process the 
final block */
+
+       /* now output the result */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes 
*/
+
+       /* run Threefish in "counter mode" to generate more output */
+       memset(ctx->b, 0, sizeof(ctx->b));      /* zero out b[], so it can hold 
the counter */
+       memcpy(X, ctx->X, sizeof(X));   /* keep a local copy of counter mode 
"key" */
+       for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+               ((u64 *) ctx->b)[0] = Skein_Swap64((u64) i);    /* build the 
counter block */
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               Skein_512_Process_Block(ctx, ctx->b, 1, sizeof(u64));   /* run 
"counter mode" */
+               n = byteCnt - i * SKEIN_512_BLOCK_BYTES;        /* number of 
output bytes left to go */
+               if (n >= SKEIN_512_BLOCK_BYTES)
+                       n = SKEIN_512_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES, 
ctx->X, n);  /* "output" the ctr mode bytes */
+               Skein_Show_Final(512, &ctx->h, n,
+                                hashVal + i * SKEIN_512_BLOCK_BYTES);
+               memcpy(ctx->X, X, sizeof(X));   /* restore the counter mode key 
for next time */
+       }
+
+       return SKEIN_SUCCESS;
+}
+
+static void skein512_init(struct crypto_tfm *tfm)
+{
+       struct skein512_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein_512_Init(sctx, SKEIN512_DIGEST_BITS);
+}
+
+static void skein512_update(struct crypto_tfm *tfm, const u8 * data,
+                           unsigned int len)
+{
+       struct skein512_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein_512_Update(sctx, data, len);
+}
+
+static void skein512_final(struct crypto_tfm *tfm, u8 * out)
+{
+       struct skein512_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       Skein_512_Final(sctx, out);
+}
+
+static struct crypto_alg skein512 = {
+       .cra_name = "skein512",
+       .cra_driver_name = "skein512-generic",
+       .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
+       .cra_blocksize = SKEIN512_BLOCK_SIZE,
+       .cra_ctxsize = sizeof(struct skein512_ctx),
+       .cra_module = THIS_MODULE,
+       .cra_alignmask = 3,
+       .cra_list = LIST_HEAD_INIT(skein512.cra_list),
+       .cra_u = {.digest = {
+                            .dia_digestsize = SKEIN512_DIGEST_SIZE,
+                            .dia_init = skein512_init,
+                            .dia_update = skein512_update,
+                            .dia_final = skein512_final}}
+};
+
+static int __init skein512_generic_mod_init(void)
+{
+       return crypto_register_alg(&skein512);
+}
+
+static void __exit skein512_generic_mod_fini(void)
+{
+       crypto_unregister_alg(&skein512);
+}
+
+module_init(skein512_generic_mod_init);
+module_exit(skein512_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Skein-512 Secure Hash Algorithm");
+
+MODULE_ALIAS("skein512");
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to