At Thu,  7 Jun 2012 17:29:36 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai...@taobao.com>
> 
> Cache pool is a TCP connection, so we don't need to timeout on it, if
> they don't return data to us, it means they are really busy with preparation
> of the response or with other stuff, but it will send the data to us finally.
> If the node is failed without sending back response, poll will return -1 for
> us.
> 
> The timeout of 5s really cause trouble from our observation, we see a lot of
> timeout failure when cluster is doing IO heavily.

5 seconds is actually too short, but is it really good to remove
timeout completely?  Without timeout, how long does send/recv/poll
block when network error happens, and how long do guest OSes wait for
read/write/flush to return?

Thanks,

Kazutaka

> 
> Signed-off-by: Liu Yuan <tailai...@taobao.com>
> ---
>  include/net.h   |    2 --
>  lib/net.c       |   23 -----------------------
>  sheep/gateway.c |    7 +------
>  sheep/sdnet.c   |    7 -------
>  4 files changed, 1 insertion(+), 38 deletions(-)
> 
> diff --git a/include/net.h b/include/net.h
> index d97984e..83da12a 100644
> --- a/include/net.h
> +++ b/include/net.h
> @@ -6,8 +6,6 @@
>  
>  #include "sheepdog_proto.h"
>  
> -#define DEFAULT_SOCKET_TIMEOUT 5 /* seconds */
> -
>  enum conn_state {
>       C_IO_HEADER = 0,
>       C_IO_DATA_INIT,
> diff --git a/lib/net.c b/lib/net.c
> index bebc108..c4a96ac 100644
> --- a/lib/net.c
> +++ b/lib/net.c
> @@ -420,29 +420,6 @@ int set_nodelay(int fd)
>       return ret;
>  }
>  
> -int set_timeout(int fd)
> -{
> -     int ret;
> -     const struct timeval tv = {
> -             .tv_sec = DEFAULT_SOCKET_TIMEOUT,
> -             .tv_usec = 0,
> -     };
> -
> -     ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
> -     if (ret) {
> -             eprintf("failed to set send timeout\n");
> -             return ret;
> -     }
> -
> -     ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
> -     if (ret) {
> -             eprintf("failed to set recv timeout\n");
> -             return ret;
> -     }
> -
> -     return 0;
> -}
> -
>  int get_local_addr(uint8_t *bytes)
>  {
>       struct ifaddrs *ifaddr, *ifa;
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index debe569..e92f3ed 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -157,18 +157,13 @@ int forward_write_obj_req(struct request *req)
>  
>       ret = SD_RES_SUCCESS;
>  again:
> -     pollret = poll(pfds, nr_fds, DEFAULT_SOCKET_TIMEOUT * 1000);
> +     pollret = poll(pfds, nr_fds, -1);
>       if (pollret < 0) {
>               if (errno == EINTR)
>                       goto again;
>  
>               ret = SD_RES_NETWORK_ERROR;
>               goto err;
> -     } else if (pollret == 0) {
> -             /* poll time out */
> -             eprintf("timeout\n");
> -             ret = SD_RES_NETWORK_ERROR;
> -             goto err;
>       }
>  
>       for (i = 0; i < nr_fds; i++) {
> diff --git a/sheep/sdnet.c b/sheep/sdnet.c
> index 6323ee3..bd09217 100644
> --- a/sheep/sdnet.c
> +++ b/sheep/sdnet.c
> @@ -870,13 +870,6 @@ int get_sheep_fd(uint8_t *addr, uint16_t port, int 
> node_idx, uint32_t epoch)
>       if (fd < 0)
>               return -1;
>  
> -     ret = set_timeout(fd);
> -     if (ret) {
> -             eprintf("%m\n");
> -             close(fd);
> -             return -1;
> -     }
> -
>       ret = set_nodelay(fd);
>       if (ret) {
>               eprintf("%m\n");
> -- 
> 1.7.10.2
> 
> -- 
> sheepdog mailing list
> sheepdog@lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to