Hi there,

I've just finished whipping up a "curiosity" implementation of an ENGINE
that performs RSA private key operations using the GMP library. The
results rather surprised me.

With a number of quick hacks to get this going and very little
consideration to optimisation, my AMD-based laptop went from 100 private
key ops/sec to 139. The only "optimisations" I used were;

 - first time an RSA key is used, cache a table of GMP mpz_t values
corresponding to the RSA key elements and attach it to the RSA key. This
prevents having to allocate/initialise/convert the key data for every
operation.

 - temporary working variables for each operation are also attached to the
RSA key to prevent having to reallocate/initialise them inside each RSA
operation.

On the other hand, a number of possible optimisations were ignored;

 - I am using the default build of GMP, which uses shared-libraries (and
thus potentially slower PIC code)

 - <cough> I am using *text* output and input functions in both openssl
and GMP to convert input and output variables to/from GMP format for each
operation!!! This is undoubtedly dumb. Especially as the string data is
allocated dynamically.

 - if GMP is using montgomery forms (quite likely), it must be deriving
the montgomery forms for each operation rather than caching them as
openssl does.

 - I simply reworded the CRT implementation in rsa_eay.c into GMP-speak,
without giving any thought to how GMP could perhaps prefer to do things
differently??

I've not yet looked at public key ops (RSA_METHOD's internal organisation
would require I duplicate some of rsa_eay.c's higher-level code to
properly hook this whereas private key ops are easy). I suspect public key
operations are likely to show a less dramatic difference. Likewise, I
haven't touched DH, DSA, etc either.

However, what seems interesting to me about this is that if we get a 40%
speed up on the most time-critical operation in OpenSSL on one of the most
commonly used CPUs, what kind of speed up might we get on other chips for
which OpenSSL has had much less development? (and/or with using non-RSA
algorithms?). GMP has clearly put quite a bit of work into optimisation on
a number of architectures. Does anyone else want to look into this? It
would be very interesting to see results on any alphas, sparcs, powerpcs,
mips, etc.

I've attached a working (hack) ENGINE implementation for those wanting to
play, it's assuming a recent snapshot from the HEAD of CVS (not
0.9.7-stable, though that might work too). You'd need to;

(1) dump the attached hw_gmp.c into crypto/engine/
(2) add the obvious "L GMP hw_gmp_err.h hw_gmp_err.c" entry to
    crypto/engine/hw.ec
(3) in crypto/engine/Makefile.ssl, add hw_gmp.c to LIBSRC and hw_gmp.o to
    LIBOBJ
(4) declare "void ENGINE_load_gmp(void);" in crypto/engine/engine.h like
    all the other engines.
(5) add a call to "ENGINE_load_gmp();" in crypto/engine/eng_all.c.
(6) run "make update"
(7) ensure GMP is installed in a system-visible location and run
       "./config -lgmp"
    *OR* if GMP is built/installed elsewhere, run
       "./config -I<p1> -L<p2> -lgmp"
    where <p1>=path to the GMP include files, and <p2>=path to GMP libs.
(8) run "make" to build an OpenSSL with the GMP engine built in.

Then compare openssl and GMP speeds by repeating the following a few
times;

# openssl speed rsa1024
# openssl speed -engine gmp rsa1024

(also worth looking at rsa512, rsa2048, etc)

Any/all feedback is welcome. Patches too. :-)

Cheers,
Geoff
#include <stdio.h>
#include <string.h>
#include <openssl/crypto.h>
#include "cryptlib.h"
#include <openssl/engine.h>

#ifndef OPENSSL_NO_HW
#ifndef OPENSSL_NO_HW_GMP

#include <gmp.h>

#define E_GMP_LIB_NAME "gmp engine"
#include "hw_gmp_err.c"

static int e_gmp_destroy(ENGINE *e);
static int e_gmp_init(ENGINE *e);
static int e_gmp_finish(ENGINE *e);
static int e_gmp_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)()); 

#ifndef OPENSSL_NO_RSA
/* RSA stuff */
static int e_gmp_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa);
static int e_gmp_rsa_finish(RSA *r);
#endif

/* The definitions for control commands specific to this engine */
/* #define E_GMP_CMD_SO_PATH		ENGINE_CMD_BASE */
static const ENGINE_CMD_DEFN e_gmp_cmd_defns[] = {
#if 0
	{E_GMP_CMD_SO_PATH,
		"SO_PATH",
		"Specifies the path to the 'e_gmp' shared library",
		ENGINE_CMD_FLAG_STRING},
#endif
	{0, NULL, NULL, 0}
	};

#ifndef OPENSSL_NO_RSA
/* Our internal RSA_METHOD that we provide pointers to */
static RSA_METHOD e_gmp_rsa =
	{
	"GMP RSA method",
	NULL,
	NULL,
	NULL,
	NULL,
	e_gmp_rsa_mod_exp,
	NULL,
	NULL,
	e_gmp_rsa_finish,
	/* These flags initialise montgomery crud that GMP ignores, however it
	 * makes sure the public key ops (which are done in openssl) don't seem
	 * *slower* than usual :-) */
	RSA_FLAG_CACHE_PUBLIC|RSA_FLAG_CACHE_PRIVATE,
	NULL,
	NULL,
	NULL
	};
#endif

/* Constants used when creating the ENGINE */
static const char *engine_e_gmp_id = "gmp";
static const char *engine_e_gmp_name = "GMP engine support";

/* This internal function is used by ENGINE_gmp() and possibly by the
 * "dynamic" ENGINE support too */
static int bind_helper(ENGINE *e)
	{
#ifndef OPENSSL_NO_RSA
	const RSA_METHOD *meth1;
#endif
	if(!ENGINE_set_id(e, engine_e_gmp_id) ||
			!ENGINE_set_name(e, engine_e_gmp_name) ||
#ifndef OPENSSL_NO_RSA
			!ENGINE_set_RSA(e, &e_gmp_rsa) ||
#endif
			!ENGINE_set_destroy_function(e, e_gmp_destroy) ||
			!ENGINE_set_init_function(e, e_gmp_init) ||
			!ENGINE_set_finish_function(e, e_gmp_finish) ||
			!ENGINE_set_ctrl_function(e, e_gmp_ctrl) ||
			!ENGINE_set_cmd_defns(e, e_gmp_cmd_defns))
		return 0;

#ifndef OPENSSL_NO_RSA
	/* We know that the "PKCS1_SSLeay()" functions hook properly
	 * to the cswift-specific mod_exp and mod_exp_crt so we use
	 * those functions. NB: We don't use ENGINE_openssl() or
	 * anything "more generic" because something like the RSAref
	 * code may not hook properly, and if you own one of these
	 * cards then you have the right to do RSA operations on it
	 * anyway! */ 
	meth1 = RSA_PKCS1_SSLeay();
	e_gmp_rsa.rsa_pub_enc = meth1->rsa_pub_enc;
	e_gmp_rsa.rsa_pub_dec = meth1->rsa_pub_dec;
	e_gmp_rsa.rsa_priv_enc = meth1->rsa_priv_enc;
	e_gmp_rsa.rsa_priv_dec = meth1->rsa_priv_dec;
	e_gmp_rsa.bn_mod_exp = meth1->bn_mod_exp;
#endif

	/* Ensure the e_gmp error handling is set up */
	ERR_load_GMP_strings();
	return 1;
	}

static ENGINE *engine_gmp(void)
	{
	ENGINE *ret = ENGINE_new();
	if(!ret)
		return NULL;
	if(!bind_helper(ret))
		{
		ENGINE_free(ret);
		return NULL;
		}
	return ret;
	}

void ENGINE_load_gmp(void)
	{
	/* Copied from eng_[openssl|dyn].c */
	ENGINE *toadd = engine_gmp();
	if(!toadd) return;
	ENGINE_add(toadd);
	ENGINE_free(toadd);
	ERR_clear_error();
	}

#ifndef OPENSSL_NO_RSA
/* Used to attach our own key-data to an RSA structure */
static int hndidx_rsa = -1;
#endif

static int e_gmp_destroy(ENGINE *e)
	{
	ERR_unload_GMP_strings();
	return 1;
	}

/* (de)initialisation functions. */
static int e_gmp_init(ENGINE *e)
	{
#ifndef OPENSSL_NO_RSA
	if (hndidx_rsa == -1)
		hndidx_rsa = RSA_get_ex_new_index(0,
			"GMP-based RSA key handle",
			NULL, NULL, NULL);
#endif
	if (hndidx_rsa == -1)
		return 0;
	return 1;
	}

static int e_gmp_finish(ENGINE *e)
	{
	return 1;
	}

static int e_gmp_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)())
	{
	int to_return = 1;

	switch(cmd)
		{
#if 0
	case E_GMP_CMD_SO_PATH:
		/* ... */
#endif
	/* The command isn't understood by this engine */
	default:
		GMPerr(GMP_F_E_GMP_CTRL,
			GMP_R_CTRL_COMMAND_NOT_IMPLEMENTED);
		to_return = 0;
		break;
		}

	return to_return;
	}

/* HACK - use text I/O functions in openssl and GMP to handle conversions. This
 * is vile. */
static int bn2gmp(const BIGNUM *bn, mpz_t g)
	{
	int toret;
	char *tmpchar = BN_bn2hex(bn);
	if(!tmpchar) return 0;
	toret = (mpz_set_str(g, tmpchar, 16) == 0 ? 1 : 0);
	OPENSSL_free(tmpchar);
	return toret;
	}

static int gmp2bn(mpz_t g, BIGNUM *bn)
	{
	int toret;
	char *tmpchar = OPENSSL_malloc(mpz_sizeinbase(g, 16) + 10);
	if(!tmpchar) return 0;
	mpz_get_str(tmpchar, 16, g);
	toret = BN_hex2bn(&bn, tmpchar);
	OPENSSL_free(tmpchar);
	return toret;
	}

#ifndef OPENSSL_NO_RSA 
typedef struct st_e_gmp_rsa_ctx
	{
	int public_only;
	mpz_t n;
	mpz_t d;
	mpz_t e;
	mpz_t p;
	mpz_t q;
	mpz_t dmp1;
	mpz_t dmq1;
	mpz_t iqmp;
	mpz_t r0, r1, I0, m1;
	} E_GMP_RSA_CTX;

static E_GMP_RSA_CTX *e_gmp_get_rsa(RSA *rsa)
	{
	E_GMP_RSA_CTX *hptr = RSA_get_ex_data(rsa, hndidx_rsa);
	if(hptr) return hptr;
	hptr = OPENSSL_malloc(sizeof(E_GMP_RSA_CTX));
	if(!hptr) return NULL;
	/* These inits could probably be replaced by more intelligent
	 * mpz_init2() versions, to reduce malloc-thrashing. */
	mpz_init(hptr->n);
	mpz_init(hptr->d);
	mpz_init(hptr->e);
	mpz_init(hptr->p);
	mpz_init(hptr->q);
	mpz_init(hptr->dmp1);
	mpz_init(hptr->dmq1);
	mpz_init(hptr->iqmp);
	mpz_init(hptr->r0);
	mpz_init(hptr->r1);
	mpz_init(hptr->I0);
	mpz_init(hptr->m1);
	if(!bn2gmp(rsa->n, hptr->n) || !bn2gmp(rsa->e, hptr->e))
		goto err;
	if(!rsa->p || !rsa->q || !rsa->d || !rsa->dmp1 || !rsa->dmq1 || !rsa->iqmp)
		{
		hptr->public_only = 1;
		return hptr;
		}
	if(!bn2gmp(rsa->d, hptr->d) || !bn2gmp(rsa->p, hptr->p) ||
			!bn2gmp(rsa->q, hptr->q) || !bn2gmp(rsa->dmp1, hptr->dmp1) ||
			!bn2gmp(rsa->dmq1, hptr->dmq1) || !bn2gmp(rsa->iqmp, hptr->iqmp))
		goto err;
	hptr->public_only = 0;
	RSA_set_ex_data(rsa, hndidx_rsa, hptr);
	return hptr;
err:
	mpz_clear(hptr->n);
	mpz_clear(hptr->d);
	mpz_clear(hptr->e);
	mpz_clear(hptr->p);
	mpz_clear(hptr->q);
	mpz_clear(hptr->dmp1);
	mpz_clear(hptr->dmq1);
	mpz_clear(hptr->iqmp);
	mpz_clear(hptr->r0);
	mpz_clear(hptr->r1);
	mpz_clear(hptr->I0);
	mpz_clear(hptr->m1);
	OPENSSL_free(hptr);
	return NULL;
	}

static int e_gmp_rsa_finish(RSA *rsa)
	{
	E_GMP_RSA_CTX *hptr = RSA_get_ex_data(rsa, hndidx_rsa);
	if(!hptr) return 0;
	mpz_clear(hptr->n);
	mpz_clear(hptr->d);
	mpz_clear(hptr->e);
	mpz_clear(hptr->p);
	mpz_clear(hptr->q);
	mpz_clear(hptr->dmp1);
	mpz_clear(hptr->dmq1);
	mpz_clear(hptr->iqmp);
	mpz_clear(hptr->r0);
	mpz_clear(hptr->r1);
	mpz_clear(hptr->I0);
	mpz_clear(hptr->m1);
	OPENSSL_free(hptr);
	RSA_set_ex_data(rsa, hndidx_rsa, NULL);
	return 1;
	}

static int e_gmp_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa)
	{
	E_GMP_RSA_CTX *hptr;
	int to_return = 0;

	hptr = e_gmp_get_rsa(rsa);
	if(!hptr)
		{
		GMPerr(GMP_F_E_GMP_RSA_MOD_EXP,
				GMP_R_KEY_CONTEXT_ERROR);
		return 0;
		}
	if(hptr->public_only)
		{
		GMPerr(GMP_F_E_GMP_RSA_MOD_EXP,
				GMP_R_MISSING_KEY_COMPONENTS);
		return 0;
		}

	/* ugh!!! */
	if(!bn2gmp(I, hptr->I0))
		return 0;

	/* This is basically the CRT logic in crypto/rsa/rsa_eay.c reworded into
	 * GMP-speak. It may be that GMP's API facilitates cleaner formulations
	 * of this stuff, eg. better handling of negatives, or functions that
	 * combine operations. */

	mpz_mod(hptr->r1, hptr->I0, hptr->q);
	mpz_powm(hptr->m1, hptr->r1, hptr->dmq1, hptr->q);

	mpz_mod(hptr->r1, hptr->I0, hptr->p);
	mpz_powm(hptr->r0, hptr->r1, hptr->dmp1, hptr->p);

	mpz_sub(hptr->r0, hptr->r0, hptr->m1);

	if(mpz_sgn(hptr->r0) < 0)
		mpz_add(hptr->r0, hptr->r0, hptr->p);
	mpz_mul(hptr->r1, hptr->r0, hptr->iqmp);
	mpz_mod(hptr->r0, hptr->r1, hptr->p);

	if(mpz_sgn(hptr->r0) < 0)
		mpz_add(hptr->r0, hptr->r0, hptr->p);
	mpz_mul(hptr->r1, hptr->r0, hptr->q);
	mpz_add(hptr->r0, hptr->r1, hptr->m1);

	/* ugh!!! */
	if(gmp2bn(hptr->r0, r))
		to_return = 1;

	return 1;
	}
#endif

/* This stuff is needed if this ENGINE is being compiled into a self-contained
 * shared-library. */	   
#ifdef ENGINE_DYNAMIC_SUPPORT
static int bind_fn(ENGINE *e, const char *id)
	{
	if(id && (strcmp(id, engine_e_gmp_id) != 0))
		return 0;
	if(!bind_helper(e))
		return 0;
	return 1;
	}       
IMPLEMENT_DYNAMIC_CHECK_FN()
IMPLEMENT_DYNAMIC_BIND_FN(bind_fn)
#endif /* ENGINE_DYNAMIC_SUPPORT */

#endif /* !OPENSSL_NO_HW_GMP */
#endif /* !OPENSSL_NO_HW */

Reply via email to