On Mon, Feb 22, 2010 at 01:45:53PM +0100, Michael Hanselmann wrote:
> If too many clients try to connect to the master at the same time, some of
> them might fail if the master doesn't accept the connections fast enough.
>
> Signed-off-by: Michael Hanselmann <[email protected]>
> ---
> lib/luxi.py | 27 +++++++++++++++++++--------
> 1 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/lib/luxi.py b/lib/luxi.py
> index f062816..97333dc 100644
> --- a/lib/luxi.py
> +++ b/lib/luxi.py
> @@ -37,6 +37,7 @@ import errno
> from ganeti import serializer
> from ganeti import constants
> from ganeti import errors
> +from ganeti import utils
>
>
> KEY_METHOD = 'method'
> @@ -156,15 +157,25 @@ class Transport:
>
> try:
> self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
> - self.socket.settimeout(self._ctimeout)
> +
> + def _Connect():
Could this be at class level rather than defined in this method?
> + self.socket.settimeout(self._ctimeout)
> + try:
> + self.socket.connect(address)
> + except socket.timeout, err:
> + raise TimeoutError("Connect timed out: %s" % str(err))
> + except socket.error, err:
> + if err.args[0] in (errno.ENOENT, errno.ECONNREFUSED):
> + raise NoMasterError(address)
> + if err.args[0] == errno.EAGAIN:
> + raise utils.RetryAgain()
> + raise
> +
> try:
> - self.socket.connect(address)
> - except socket.timeout, err:
> - raise TimeoutError("Connect timed out: %s" % str(err))
> - except socket.error, err:
> - if err.args[0] in (errno.ENOENT, errno.ECONNREFUSED):
> - raise NoMasterError(address)
> - raise
> + utils.Retry(_Connect, 1.0, self._ctimeout)
> + except utils.RetryTimeout:
> + raise TimeoutError("Connect timed out")
Mmm.... 1.0 hardcoded? LGTM, but if any other functions deal with EAGAIN
we might want to have a global constant for EAGAIN retry delays.
iustin