On Thu, Apr 14, 2016 at 08:55:47AM +0200, Lukas Tribus wrote:
> Le me put it this way:
>
> frontend haproxy_test
>  bind-process 1-8
>  bind :12345 process 1
>  bind :12345 process 2
>  bind :12345 process 3
>  bind :12345 process 4
>
>
> Leads to 8 processes, and the master process binds the socket 4 times (PID
> 16509):
>
(...)
> lukas@ubuntuvm:~/haproxy-1.5$ sudo netstat -tlp | grep hap
> tcp        0      0 *:12345                 *:* LISTEN      16509/haproxy
> tcp        0      0 *:12345                 *:* LISTEN      16509/haproxy
> tcp        0      0 *:12345                 *:* LISTEN      16509/haproxy
> tcp        0      0 *:12345                 *:* LISTEN      16509/haproxy
> lukas@ubuntuvm:~/haproxy-1.5$

OK so it's netstat which gives a wrong report, I have the same here. I verified
in /proc/$PID/fd/ and I properly saw the FDs. Next, "ss -anp" also shows all the
process list :

  LISTEN     0      128                       *:12345                    *:*    
  
users:(("haproxy",25360,7),("haproxy",25359,7),("haproxy",25358,7),("haproxy",25357,7),("haproxy",25356,7),("haproxy",25355,7),("haproxy",25354,7),("haproxy",25353,7))
  LISTEN     0      128                       *:12345                    *:*    
  
users:(("haproxy",25360,6),("haproxy",25359,6),("haproxy",25358,6),("haproxy",25357,6),("haproxy",25356,6),("haproxy",25355,6),("haproxy",25354,6),("haproxy",25353,6))
  LISTEN     0      128                       *:12345                    *:*    
  
users:(("haproxy",25360,5),("haproxy",25359,5),("haproxy",25358,5),("haproxy",25357,5),("haproxy",25356,5),("haproxy",25355,5),("haproxy",25354,5),("haproxy",25353,5))
  LISTEN     0      128                       *:12345                    *:*    
  
users:(("haproxy",25360,4),("haproxy",25359,4),("haproxy",25358,4),("haproxy",25357,4),("haproxy",25356,4),("haproxy",25355,4),("haproxy",25354,4),("haproxy",25353,4))

A performance test also shows a fair distribution of the load :

  25353 willy     20   0 21872 4216 1668 S   26  0.1   0:04.54 haproxy
  25374 willy     20   0  7456  108    0 S   25  0.0   0:02.26 injectl464
  25376 willy     20   0  7456  108    0 S   25  0.0   0:02.27 injectl464
  25377 willy     20   0  7456  108    0 S   25  0.0   0:02.26 injectl464
  25375 willy     20   0  7456  108    0 S   24  0.0   0:02.26 injectl464
  25354 willy     20   0 21872 4168 1620 R   22  0.1   0:04.51 haproxy
  25356 willy     20   0 21872 4216 1668 R   22  0.1   0:04.21 haproxy
  25355 willy     20   0 21872 4168 1620 S   21  0.1   0:04.38 haproxy

However, as you can see these sockets are still bound to all processes and
that's not a good idea in the multi-queue mode.

I have added a few debug lines in enable_listener() like this :

$ git diff
diff --git a/src/listener.c b/src/listener.c
index 5abeb80..59c51a1 100644
--- a/src/listener.c
+++ b/src/listener.c
@@ -49,6 +49,7 @@ static struct bind_kw_list bind_keywords = {
  */
 void enable_listener(struct listener *listener)
 {
+       fddebug("%d: enabling fd %d\n", getpid(), listener->fd);
        if (listener->state == LI_LISTEN) {
                if ((global.mode & (MODE_DAEMON | MODE_SYSTEMD)) &&
                    listener->bind_conf->bind_proc &&
@@ -57,6 +58,7 @@ void enable_listener(struct listener *listener)
                         * want any fd event to reach it.
                         */
                        fd_stop_recv(listener->fd);
+                       fddebug("%d: pausing fd %d\n", getpid(), listener->fd);
                        listener->state = LI_PAUSED;
                }
                else if (listener->nbconn < listener->maxconn) {

And we're seeing this upon startup for processes 25746..25755 :

Thus as you can see that FDs are properly enabled and paused for the
unavailable ones.

willy@wtap:haproxy$ grep 4294967295 log | grep 25746
25746 write(4294967295, "25746: enabling fd 4\n", 21 <unfinished ...>
25746 write(4294967295, "25746: enabling fd 5\n", 21 <unfinished ...>
25746 write(4294967295, "25746: pausing fd 5\n", 20) = -1 EBADF (Bad file 
descriptor)
25746 write(4294967295, "25746: enabling fd 6\n", 21) = -1 EBADF (Bad file 
descriptor)
25746 write(4294967295, "25746: pausing fd 6\n", 20) = -1 EBADF (Bad file 
descriptor)
25746 write(4294967295, "25746: enabling fd 7\n", 21 <unfinished ...>
25746 write(4294967295, "25746: pausing fd 7\n", 20 <unfinished ...>
willy@wtap:haproxy$ grep 4294967295 log | grep 25747
25747 write(4294967295, "25747: enabling fd 4\n", 21 <unfinished ...>
25747 write(4294967295, "25747: pausing fd 4\n", 20 <unfinished ...>
25747 write(4294967295, "25747: enabling fd 5\n", 21 <unfinished ...>
25747 write(4294967295, "25747: enabling fd 6\n", 21 <unfinished ...>
25747 write(4294967295, "25747: pausing fd 6\n", 20 <unfinished ...>
25747 write(4294967295, "25747: enabling fd 7\n", 21 <unfinished ...>
25747 write(4294967295, "25747: pausing fd 7\n", 20 <unfinished ...>
willy@wtap:haproxy$ grep 4294967295 log | grep 25748
25748 write(4294967295, "25748: enabling fd 4\n", 21 <unfinished ...>
25748 write(4294967295, "25748: pausing fd 4\n", 20 <unfinished ...>
25748 write(4294967295, "25748: enabling fd 5\n", 21 <unfinished ...>
25748 write(4294967295, "25748: pausing fd 5\n", 20 <unfinished ...>
25748 write(4294967295, "25748: enabling fd 6\n", 21 <unfinished ...>
25748 write(4294967295, "25748: enabling fd 7\n", 21 <unfinished ...>
25748 write(4294967295, "25748: pausing fd 7\n", 20 <unfinished ...>
willy@wtap:haproxy$ grep 4294967295 log | grep 25749
25749 write(4294967295, "25749: enabling fd 4\n", 21 <unfinished ...>
25749 write(4294967295, "25749: pausing fd 4\n", 20 <unfinished ...>
25749 write(4294967295, "25749: enabling fd 5\n", 21 <unfinished ...>
25749 write(4294967295, "25749: pausing fd 5\n", 20 <unfinished ...>
25749 write(4294967295, "25749: enabling fd 6\n", 21 <unfinished ...>
25749 write(4294967295, "25749: pausing fd 6\n", 20 <unfinished ...>
25749 write(4294967295, "25749: enabling fd 7\n", 21 <unfinished ...>
willy@wtap:haproxy$ grep 4294967295 log | grep 25750
25750 write(4294967295, "25750: enabling fd 4\n", 21 <unfinished ...>
25750 write(4294967295, "25750: pausing fd 4\n", 20 <unfinished ...>
25750 write(4294967295, "25750: enabling fd 5\n", 21 <unfinished ...>
25750 write(4294967295, "25750: pausing fd 5\n", 20 <unfinished ...>
25750 write(4294967295, "25750: enabling fd 6\n", 21 <unfinished ...>
25750 write(4294967295, "25750: pausing fd 6\n", 20 <unfinished ...>
25750 write(4294967295, "25750: enabling fd 7\n", 21 <unfinished ...>
25750 write(4294967295, "25750: pausing fd 7\n", 20 <unfinished ...>


Now with the following patch to completely unbind such listeners :

diff --git a/src/listener.c b/src/listener.c
index 5abeb80..0296d50 100644
--- a/src/listener.c
+++ b/src/listener.c
@@ -56,8 +57,7 @@ void enable_listener(struct listener *listener)
                        /* we don't want to enable this listener and don't
                         * want any fd event to reach it.
                         */
-                       fd_stop_recv(listener->fd);
-                       listener->state = LI_PAUSED;
+                       unbind_listener(listener);
                }
                else if (listener->nbconn < listener->maxconn) {
                        fd_want_recv(listener->fd);


I get this which is much cleaner :

LISTEN     0      128                       *:12345                    *:*      
users:(("haproxy",25949,7))
LISTEN     0      128                       *:12345                    *:*      
users:(("haproxy",25948,6))
LISTEN     0      128                       *:12345                    *:*      
users:(("haproxy",25947,5))
LISTEN     0      128                       *:12345                    *:*      
users:(("haproxy",25946,4))

So I guess that indeed, if not all the processes a frontend is bound to
have a corresponding bind line, this can cause connection issues as some
incoming connections will be distributed to queues that nobody listens to.

I'm willing to commit this patch to make things cleaner and more reliable.
Here I'm getting the exact same performance with and without. Christian you
may want to apply it by hand to test if it improves the behaviour for you.

Please also note that you'll get a build warning that first needs another
fix on listen_accept() which doesn't have the same prototype between the
.c and the .h (!). I'll handle it as well.

Cheers,
Willy

Reply via email to