Hey Mike,

Thanks again for putting this together. Lots of fun to play with. In
case you're curious, I've attached here my "respin" of your
implementation, making it suitable for kernelspace. The "#ifdef
__SIZEOF_INT128__" might be of note for your WBITS to-do comment.
Sending this to you in case its of interest to see an example of how
it might be used.

Jason
/* Copyright 2015 Jason A. Donenfeld <ja...@zx2c4.com>. All Rights Reserved.
 * Copyright 2015 Cryptography Research, Inc.
 */

#include "curve448.h"
#include <linux/string.h>
#include <linux/random.h>
#include <crypto/algapi.h>


#ifdef __SIZEOF_INT128__
#define WBITS 64
typedef uint64_t decaf_word_t;
typedef int64_t decaf_sword_t;
typedef __uint128_t decaf_dword_t;
typedef __int128_t decaf_sdword_t;
#else
#define WBITS 32
typedef uint32_t decaf_word_t;
typedef int32_t decaf_sword_t;
typedef uint64_t decaf_dword_t;
typedef int64_t decaf_sdword_t;
#endif

#define LBITS (WBITS * 7 / 8)
#define CURVE448_LIMBS (448 / LBITS)

typedef struct {
	decaf_word_t limb[CURVE448_LIMBS];
} gf_s, gf[1];

static const unsigned char CURVE448_BASE_POINT[CURVE448_POINT_SIZE] = { 5 };

static const gf ZERO = {{{0}}}, ONE = {{{1}}};

#define LMASK ((((decaf_word_t)1) << LBITS) - 1)
#if WBITS == 64
static const gf P = {{{LMASK, LMASK, LMASK, LMASK, LMASK - 1, LMASK, LMASK, LMASK}}};
#else
static const gf P = {{{LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK - 1, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK}}};
#endif
static const int EDWARDS_D = -39081;

#if CURVE448_LIMBS == 8
#define FOR_LIMB_U(i, op) { unsigned int i = 0; \
       op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
    }
#elif CURVE448_LIMBS == 16
#define FOR_LIMB_U(i, op) { unsigned int i = 0; \
       op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
       op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
    }
#else
#define FOR_LIMB_U(i, op) { unsigned int i = 0; for (i = 0; i < CURVE448_LIMBS; i++)  { op; }}
#endif

#define FOR_LIMB(i, op) { unsigned int i = 0; for (i = 0; i < CURVE448_LIMBS; i++)  { op; }}

/** Copy x = y */
static void gf_cpy(gf x, const gf y)
{
	FOR_LIMB_U(i, x->limb[i] = y->limb[i]);
}

/** Mostly-unoptimized multiply (PERF), but at least it's unrolled. */
static void gf_mul(gf c, const gf a, const gf b)
{
	decaf_dword_t accum[CURVE448_LIMBS] = { 0 };
	gf aa;
	gf_cpy(aa, a);

	FOR_LIMB_U(i, {
		   FOR_LIMB_U(j, {
			      accum[(i + j) % CURVE448_LIMBS] +=
			      (decaf_dword_t) b->limb[i] * aa->limb[j];
			      });
		   aa->limb[(CURVE448_LIMBS - 1 - i) ^ (CURVE448_LIMBS / 2)] +=
		   aa->limb[CURVE448_LIMBS - 1 - i];});

	accum[CURVE448_LIMBS - 1] += accum[CURVE448_LIMBS - 2] >> LBITS;
	accum[CURVE448_LIMBS - 2] &= LMASK;
	accum[CURVE448_LIMBS / 2] += accum[CURVE448_LIMBS - 1] >> LBITS;
	FOR_LIMB_U(j, {
		   accum[j] += accum[(j - 1) % CURVE448_LIMBS] >> LBITS;
		   accum[(j - 1) % CURVE448_LIMBS] &= LMASK;});
	FOR_LIMB_U(j, c->limb[j] = accum[j]);
}

/** No dedicated square (PERF) */
#define gf_sqr(c,a) gf_mul(c,a,a)

/** Inverse square root using addition chain. */
static void gf_isqrt(gf y, const gf x)
{
	int i;
#define STEP(s,m,n) gf_mul(s,m,c); gf_cpy(c,s); for (i=0;i<n;i++) gf_sqr(c,c);
	gf a, b, c;
	gf_sqr(c, x);
	STEP(b, x, 1);
	STEP(b, x, 3);
	STEP(a, b, 3);
	STEP(a, b, 9);
	STEP(b, a, 1);
	STEP(a, x, 18);
	STEP(a, b, 37);
	STEP(b, a, 37);
	STEP(b, a, 111);
	STEP(a, b, 1);
	STEP(b, x, 223);
	gf_mul(y, a, c);
#undef STEP
}

static void gf_inv(gf y, const gf x)
{
	gf z, w;
	gf_sqr(z, x);		/* x^2 */
	gf_isqrt(w, z);		/* +- 1/sqrt(x^2) = +- 1/x */
	gf_sqr(z, w);		/* 1/x^2 */
	gf_mul(w, x, z);	/* 1/x */
	gf_cpy(y, w);
}

/** Weak reduce mod p. */
static void gf_reduce(gf x)
{
	x->limb[CURVE448_LIMBS / 2] += x->limb[CURVE448_LIMBS - 1] >> LBITS;
	FOR_LIMB_U(j, {
		   x->limb[j] += x->limb[(j - 1) % CURVE448_LIMBS] >> LBITS;
		   x->limb[(j - 1) % CURVE448_LIMBS] &= LMASK;
	});
}

/** Add mod p.  Conservatively always weak-reduce. (PERF) */
static void gf_add(gf x, const gf y, const gf z)
{
	FOR_LIMB_U(i, x->limb[i] = y->limb[i] + z->limb[i]);
	gf_reduce(x);
}

/** Subtract mod p.  Conservatively always weak-reduce. (PERF) */
static void gf_sub(gf x, const gf y, const gf z)
{
	FOR_LIMB_U(i, x->limb[i] = y->limb[i] - z->limb[i] + 2 * P->limb[i]);
	gf_reduce(x);
}

/** Constant time, if (swap) (x,y) = (y,x); */
static void cond_swap(gf x, gf_s * __restrict__ y, decaf_word_t swap)
{
	FOR_LIMB_U(i, {
		   decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap;
		   x->limb[i] ^= s; y->limb[i] ^= s;
	});
}

/**
 * Mul by signed int.  Not constant-time WRT the sign of that int.
 * Just uses a full mul (PERF)
 */
static inline void gf_mlw(gf a, const gf b, int w)
{
	if (w > 0) {
		gf ww = {{{w}}};
		gf_mul(a, b, ww);
	} else {
		gf ww = {{{-w}}};
		gf_mul(a, b, ww);
		gf_sub(a, ZERO, a);
	}
}

/** Canonicalize */
static void gf_canon(gf a)
{
	decaf_word_t addback;
	decaf_sdword_t carry = 0;
	gf_reduce(a);

	/* subtract p with borrow */
	FOR_LIMB(i, {
		 carry = carry + a->limb[i] - P->limb[i];
		 a->limb[i] = carry & LMASK; carry >>= LBITS;
	});

	addback = carry;
	carry = 0;

	/* add it back */
	FOR_LIMB(i, {
		 carry = carry + a->limb[i] + (P->limb[i] & addback);
		 a->limb[i] = carry & LMASK; carry >>= LBITS;
	});
}

/* Deserialize */
static decaf_word_t gf_deser(gf s, const unsigned char ser[CURVE448_POINT_SIZE])
{
	decaf_sdword_t accum;
	unsigned int i, k = 0, bits = 0;
	decaf_dword_t buf = 0;
	for (i = 0; i < CURVE448_POINT_SIZE; i++) {
		buf |= (decaf_dword_t) ser[i] << bits;
		for (bits += 8;
		     (bits >= LBITS || i == CURVE448_POINT_SIZE - 1) && k < CURVE448_LIMBS;
		     bits -= LBITS, buf >>= LBITS) {
			s->limb[k++] = buf & LMASK;
		}
	}

	accum = 0;
	FOR_LIMB(i, accum = (accum + s->limb[i] - P->limb[i]) >> WBITS);
	return accum;
}

/* Serialize */
static void gf_ser(uint8_t ser[CURVE448_POINT_SIZE], gf a)
{
	int k = 0, bits = 0;
	decaf_dword_t buf = 0;
	gf_canon(a);
	FOR_LIMB(i, {
		 buf |= (decaf_dword_t) a->limb[i] << bits;
		 for (bits += LBITS;
		      (bits >= 8 || i == CURVE448_LIMBS - 1) && k < CURVE448_POINT_SIZE;
		      bits -= 8, buf >>= 8) {
			ser[k++] = buf;
		}
	});
}

bool curve448(uint8_t out[CURVE448_POINT_SIZE], const uint8_t secret[CURVE448_POINT_SIZE], const uint8_t basepoint[CURVE448_POINT_SIZE])
{
	gf x1, x2, z2, x3, z3, t1, t2;
	int t;
	decaf_word_t swap = 0;
	decaf_sword_t nz;

	gf_deser(x1, basepoint);
	gf_cpy(x2, ONE);
	gf_cpy(z2, ZERO);
	gf_cpy(x3, x1);
	gf_cpy(z3, ONE);

	for (t = 448 - 1; t >= 0; t--) {
		uint8_t sb = secret[t / 8];
		decaf_word_t k_t;

		/* Scalar conditioning */
		if (t / 8 == 0)
			sb &= 0xFC;
		else if (t / 8 == CURVE448_POINT_SIZE - 1)
			sb |= 0x80;

		k_t = (sb >> (t % 8)) & 1;
		k_t = -k_t;	/* set to all 0s or all 1s */

		swap ^= k_t;
		cond_swap(x2, x3, swap);
		cond_swap(z2, z3, swap);
		swap = k_t;

		gf_add(t1, x2, z2);	/* A = x2 + z2 */
		gf_sub(t2, x2, z2);	/* B = x2 - z2 */
		gf_sub(z2, x3, z3);	/* D = x3 - z3 */
		gf_mul(x2, t1, z2);	/* DA */
		gf_add(z2, z3, x3);	/* C = x3 + z3 */
		gf_mul(x3, t2, z2);	/* CB */
		gf_sub(z3, x2, x3);	/* DA-CB */
		gf_sqr(z2, z3);	/* (DA-CB)^2 */
		gf_mul(z3, x1, z2);	/* z3 = x1(DA-CB)^2 */
		gf_add(z2, x2, x3);	/* (DA+CB) */
		gf_sqr(x3, z2);	/* x3 = (DA+CB)^2 */

		gf_sqr(z2, t1);	/* AA = A^2 */
		gf_sqr(t1, t2);	/* BB = B^2 */
		gf_mul(x2, z2, t1);	/* x2 = AA*BB */
		gf_sub(t2, z2, t1);	/* E = AA-BB */

		gf_mlw(t1, t2, -EDWARDS_D);	/* E*-d = a24*E */
		gf_add(t1, t1, z2);	/* AA + a24*E */
		gf_mul(z2, t2, t1);	/* z2 = E(AA+a24*E) */
	}

	/* Finish */
	cond_swap(x2, x3, swap);
	cond_swap(z2, z3, swap);
	gf_inv(z2, z2);
	gf_mul(x1, x2, z2);
	gf_ser(out, x1);

	nz = 0;
	for (t = 0; t < CURVE448_POINT_SIZE; t++) {
		nz |= out[t];
	}
	nz = (nz - 1) >> 8;	/* 0 = succ, -1 = fail */

	/* return value: 0 = succ, -1 = fail */
	return nz == 0;
}

void curve448_generate_public(uint8_t pub[CURVE448_POINT_SIZE], const uint8_t secret[CURVE448_POINT_SIZE])
{
	curve448(pub, secret, CURVE448_BASE_POINT);
}

void curve448_generate_secret(uint8_t secret[CURVE448_POINT_SIZE])
{
	get_random_bytes(secret, CURVE448_POINT_SIZE);
	/* TODO: do we have to normalize the secret, like in 25519? */
}
/* Copyright 2015 Jason A. Donenfeld <ja...@zx2c4.com>. All Rights Reserved.
 * Copyright 2015 Cryptography Research, Inc.
 */

#ifndef CURVE448_H
#define CURVE448_H

#include <linux/types.h>

enum curve448_lengths {
	CURVE448_POINT_SIZE = (448 / 8)
};

bool curve448(uint8_t out[CURVE448_POINT_SIZE], const uint8_t secret[CURVE448_POINT_SIZE], const uint8_t basepoint[CURVE448_POINT_SIZE]);
void curve448_generate_secret(uint8_t secret[CURVE448_POINT_SIZE]);
void curve448_generate_public(uint8_t pub[CURVE448_POINT_SIZE], const uint8_t secret[CURVE448_POINT_SIZE]);

#endif
_______________________________________________
Curves mailing list
Curves@moderncrypto.org
https://moderncrypto.org/mailman/listinfo/curves

Reply via email to