Re: relayd SSL/TLS keep RSA private keys in separate process

2014-04-17 Thread Reyk Floeter
On Fri, Apr 11, 2014 at 08:15:27PM -0600, Bob Beck wrote:
> On Fri, Apr 11, 2014 at 6:09 PM, Reyk Floeter  wrote:
> 
> 
> >
> > I did some testing with apache bench (ab) and it shows a negative
> > performance impact when running with multiple preforked relays and
> > concurrent requests.  But this is expected because all processes have
> > to wait for the single "ca" process to complete the RSA operations.
> >
> > Is this desirable?
> 
> Ahh - as compared to what - relayd running with each process itself
> doing the RSA ops (acting as an SSL accelerator)
> in front of it?  then that would be expected.  I wouldn't call it
> "desirable" - let relayd have more than one "CA" process.

This updated diff creates one ca process per relay and gives much
better performance results (a little decrease is still there but
that's because of the imsg passing.  That can also be optimized
later).

Currently, relayd creates 5 relay processe by default unless you
change it with the "prefork" option.  I changed the default to 3
because I think 5 is a little bit too much for most "normal" use cases
and I didn't measure big differences with normal load testing.  But
these 3 relays will turn into 6 processes now, each one with a "CA".

Creating a more flexible N:M mapping (eg. 5 relays and 3 CAs) would
make the code more complex and doesn't seem to work very well, so I
want to stick on the "1 CA per relay".

Any more testers?  Thoughts?

Reyk

Index: Makefile
===
RCS file: /cvs/src/usr.sbin/relayd/Makefile,v
retrieving revision 1.25
diff -u -p -u -p -r1.25 Makefile
--- Makefile14 Apr 2014 12:58:04 -  1.25
+++ Makefile17 Apr 2014 19:26:59 -
@@ -1,11 +1,12 @@
 #  $OpenBSD: Makefile,v 1.25 2014/04/14 12:58:04 blambert Exp $
 
 PROG=  relayd
-SRCS=  parse.y log.c control.c ssl.c ssl_privsep.c \
-   relayd.c pfe.c pfe_filter.c pfe_route.c hce.c relay.c \
-   relay_http.c relay_udp.c carp.c check_icmp.c check_tcp.c \
-   check_script.c name2id.c snmp.c shuffle.c proc.c config.c \
-   agentx.c
+SRCS=  parse.y
+SRCS+= agentx.c ca.c carp.c check_icmp.c check_script.c \
+   check_tcp.c config.c control.c hce.c log.c name2id.c \
+   pfe.c pfe_filter.c pfe_route.c proc.c \
+   relay.c relay_http.c relay_udp.c relayd.c \
+   shuffle.c snmp.c ssl.c ssl_privsep.c
 MAN=   relayd.8 relayd.conf.5
 
 LDADD= -levent -lssl -lcrypto -lutil
Index: ca.c
===
RCS file: ca.c
diff -N ca.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ ca.c17 Apr 2014 19:26:59 -
@@ -0,0 +1,431 @@
+/* $OpenBSD$   */
+
+/*
+ * Copyright (c) 2014 Reyk Floeter 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "relayd.h"
+
+voidca_init(struct privsep *, struct privsep_proc *p, void *);
+voidca_launch(void);
+
+int ca_dispatch_parent(int, struct privsep_proc *, struct imsg *);
+int ca_dispatch_relay(int, struct privsep_proc *, struct imsg *);
+
+int rsae_pub_enc(int, const u_char *, u_char *, RSA *, int);
+int rsae_pub_dec(int,const u_char *, u_char *, RSA *, int);
+int rsae_priv_enc(int, const u_char *, u_char *, RSA *, int);
+int rsae_priv_dec(int, const u_char *, u_char *, RSA *, int);
+int rsae_mod_exp(BIGNUM *, const BIGNUM *, RSA *, BN_CTX *);
+int rsae_bn_mod_exp(BIGNUM *, const BIGNUM *, const BIGNUM *,
+   const BIGNUM *, BN_CTX *, BN_MONT_CTX *);
+int rsae_init(RSA *);
+int rsae_finish(RSA *);
+int rsae_sign(int, const u_char *, u_int,
+   u_char *, u_int *, const RSA *);
+int rsae_verify(int dtype, const u_char *m, u_int,
+   const u_char *, u_int, const RSA *);
+int rsae_keygen(RSA *, int, BIGNUM *, BN_GENCB *);
+
+static struct relayd *env = NULL;
+extern int  proc_id;
+
+static struct privsep_proc procs[] = {
+   { "parent", PROC_PARENT,ca_dispatch_parent },
+

Re: relayd SSL/TLS keep RSA private keys in separate process

2014-04-11 Thread Bob Beck
On Fri, Apr 11, 2014 at 6:09 PM, Reyk Floeter  wrote:


>
> I did some testing with apache bench (ab) and it shows a negative
> performance impact when running with multiple preforked relays and
> concurrent requests.  But this is expected because all processes have
> to wait for the single "ca" process to complete the RSA operations.
>
> Is this desirable?

Ahh - as compared to what - relayd running with each process itself
doing the RSA ops (acting as an SSL accelerator)
in front of it?  then that would be expected.  I wouldn't call it
"desirable" - let relayd have more than one "CA" process.



Re: relayd SSL/TLS keep RSA private keys in separate process

2014-04-11 Thread Reyk Floeter
On Wed, Apr 09, 2014 at 04:20:23PM +0200, Reyk Floeter wrote:
> relayd uses privsep to mitigate the risk of potential attacks.
> OpenSSL's SSL code wasn't designed with privsep in mind.  We already
> have a hack to load the keys and certificates in the parent process
> and to send them via imsg to the chroot'ed relays; OpenSSL normally
> wants to load them from files.
> 
> This diff goes a bit further by moving the private keys to a new
> separate process instead of copying them to the relays.  A custom RSA
> engine is used by the SSL/TLS code of the relay processes to send RSA
> private key encryption/decryption requests to the new "ca" process (I
> used the name "ca" because something similar exists in iked) instead
> of operating on the private key directly.
> 
> The diff is still experimental, misses support for the SSL inspection,
> probably client mode and hasn't been tested very well.  The following
> configuration has been tested:
> 
> relay wwwssl {
> listen on $ext_addr port 443 ssl
> forward to 127.0.0.1 port 80
> }
> 
> It may impact performance because it adds a little for the RSA
> communication between the processes, but SSL only does it once to get
> the session keys and RSA is already slow.
> 
> I would like to get some first thoughts / feedback.
> 

I attached an updated diff with the following changes:

- don't keep text versions of the keys/certs in memory
- add purge_keys() to explicit_bzero() the text version of keys and certs
- plug memleaks (OpenSSL's confusing free/deref of objects is sometimes 
annoying)
- as a plus, the diff reduces CPU load

TODO: ca / ssl inspection

I did some testing with apache bench (ab) and it shows a negative
performance impact when running with multiple preforked relays and
concurrent requests.  But this is expected because all processes have
to wait for the single "ca" process to complete the RSA operations. 

Is this desirable?

Reyk

Index: Makefile
===
RCS file: /cvs/src/usr.sbin/relayd/Makefile,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 Makefile
--- Makefile18 Jan 2014 05:54:51 -  1.24
+++ Makefile12 Apr 2014 00:09:01 -
@@ -1,10 +1,12 @@
 #  $OpenBSD: Makefile,v 1.24 2014/01/18 05:54:51 martynas Exp $
 
 PROG=  relayd
-SRCS=  parse.y log.c control.c ssl.c ssl_privsep.c \
-   relayd.c pfe.c pfe_filter.c pfe_route.c hce.c relay.c \
-   relay_http.c relay_udp.c carp.c check_icmp.c check_tcp.c \
-   check_script.c name2id.c snmp.c shuffle.c proc.c config.c
+SRCS=  parse.y
+SRCS+= ca.c carp.c check_icmp.c check_script.c check_tcp.c \
+   config.c control.c hce.c log.c name2id.c \
+   pfe.c pfe_filter.c pfe_route.c proc.c \
+   relay.c relay_http.c relay_udp.c relayd.c \
+   shuffle.c snmp.c ssl.c ssl_privsep.c
 MAN=   relayd.8 relayd.conf.5
 
 LDADD= -levent -lssl -lcrypto -lutil
Index: ca.c
===
RCS file: ca.c
diff -N ca.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ ca.c12 Apr 2014 00:09:01 -
@@ -0,0 +1,430 @@
+/* $OpenBSD$   */
+
+/*
+ * Copyright (c) 2014 Reyk Floeter 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "relayd.h"
+
+voidca_init(struct privsep *, struct privsep_proc *p, void *);
+voidca_launch(void);
+
+int ca_dispatch_parent(int, struct privsep_proc *, struct imsg *);
+int ca_dispatch_relay(int, struct privsep_proc *, struct imsg *);
+
+int rsae_pub_enc(int, const u_char *, u_char *, RSA *, int);
+int rsae_pub_dec(int,const u_char *, u_char *, RSA *, int);
+int rsae_priv_enc(int, const u_char *, u_char *, RSA *, int);
+int rsae_priv_dec(int, const u_char *, u_char *, RSA *, int);
+int rsae_mod_exp(BIGNUM *, const BIGNUM *, RSA *, BN_CTX *);
+int rsae_bn_mod_exp(BIGNUM *, const BIGNUM *, const BIGNUM *,
+   const BIGNUM *, BN_CTX *, BN_MONT_CTX *);
+int rsae_init(RSA *);
+int  

relayd SSL/TLS keep RSA private keys in separate process

2014-04-09 Thread Reyk Floeter
Hi,

relayd uses privsep to mitigate the risk of potential attacks.
OpenSSL's SSL code wasn't designed with privsep in mind.  We already
have a hack to load the keys and certificates in the parent process
and to send them via imsg to the chroot'ed relays; OpenSSL normally
wants to load them from files.

This diff goes a bit further by moving the private keys to a new
separate process instead of copying them to the relays.  A custom RSA
engine is used by the SSL/TLS code of the relay processes to send RSA
private key encryption/decryption requests to the new "ca" process (I
used the name "ca" because something similar exists in iked) instead
of operating on the private key directly.

The diff is still experimental, misses support for the SSL inspection,
probably client mode and hasn't been tested very well.  The following
configuration has been tested:

relay wwwssl {
listen on $ext_addr port 443 ssl
forward to 127.0.0.1 port 80
}

It may impact performance because it adds a little for the RSA
communication between the processes, but SSL only does it once to get
the session keys and RSA is already slow.

I would like to get some first thoughts / feedback.

Reyk

Index: Makefile
===
RCS file: /cvs/src/usr.sbin/relayd/Makefile,v
retrieving revision 1.24
diff -u -p -u -p -r1.24 Makefile
--- Makefile18 Jan 2014 05:54:51 -  1.24
+++ Makefile9 Apr 2014 14:02:37 -
@@ -1,10 +1,12 @@
 #  $OpenBSD: Makefile,v 1.24 2014/01/18 05:54:51 martynas Exp $
 
 PROG=  relayd
-SRCS=  parse.y log.c control.c ssl.c ssl_privsep.c \
-   relayd.c pfe.c pfe_filter.c pfe_route.c hce.c relay.c \
-   relay_http.c relay_udp.c carp.c check_icmp.c check_tcp.c \
-   check_script.c name2id.c snmp.c shuffle.c proc.c config.c
+SRCS=  parse.y
+SRCS+= ca.c carp.c check_icmp.c check_script.c check_tcp.c \
+   config.c control.c hce.c log.c name2id.c \
+   pfe.c pfe_filter.c pfe_route.c proc.c \
+   relay.c relay_http.c relay_udp.c relayd.c \
+   shuffle.c snmp.c ssl.c ssl_privsep.c
 MAN=   relayd.8 relayd.conf.5
 
 LDADD= -levent -lssl -lcrypto -lutil
Index: ca.c
===
RCS file: ca.c
diff -N ca.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ ca.c9 Apr 2014 14:02:37 -
@@ -0,0 +1,414 @@
+/* $OpenBSD$   */
+
+/*
+ * Copyright (c) 2014 Reyk Floeter 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "relayd.h"
+
+voidca_init(struct privsep *, struct privsep_proc *p, void *);
+
+int ca_dispatch_parent(int, struct privsep_proc *, struct imsg *);
+int ca_dispatch_relay(int, struct privsep_proc *, struct imsg *);
+
+int rsae_pub_enc(int, const u_char *, u_char *, RSA *, int);
+int rsae_pub_dec(int,const u_char *, u_char *, RSA *, int);
+int rsae_priv_enc(int, const u_char *, u_char *, RSA *, int);
+int rsae_priv_dec(int, const u_char *, u_char *, RSA *, int);
+int rsae_mod_exp(BIGNUM *, const BIGNUM *, RSA *, BN_CTX *);
+int rsae_bn_mod_exp(BIGNUM *, const BIGNUM *, const BIGNUM *,
+   const BIGNUM *, BN_CTX *, BN_MONT_CTX *);
+int rsae_init(RSA *);
+int rsae_finish(RSA *);
+int rsae_sign(int, const u_char *, u_int,
+   u_char *, u_int *, const RSA *);
+int rsae_verify(int dtype, const u_char *m, u_int,
+   const u_char *, u_int, const RSA *);
+int rsae_keygen(RSA *, int, BIGNUM *, BN_GENCB *);
+
+static struct relayd *env = NULL;
+extern int  proc_id;
+
+static struct privsep_proc procs[] = {
+   { "parent", PROC_PARENT,ca_dispatch_parent },
+   { "relay",  PROC_RELAY, ca_dispatch_relay },
+};
+
+pid_t
+ca(struct privsep *ps, struct privsep_proc *p)
+{
+   env = ps->ps_env;
+
+   return (proc_run(ps, p, procs, nitems(procs), ca_init, NULL));
+}
+
+void
+ca_init(struct privsep *ps, struct privsep_proc *p, void *arg)
+{
+   if (confi