By delaying the agent key query until after the fork, we prevent the
problem of simultaneous access to the agent.

Tested that it works against 80 hosts in parallel without error; the
current version breaks already at 20 hosts.
---
 tools/ganeti-listrunner |   25 ++++++++++++++-----------
 1 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tools/ganeti-listrunner b/tools/ganeti-listrunner
index fba0b75..1f63c1d 100755
--- a/tools/ganeti-listrunner
+++ b/tools/ganeti-listrunner
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 #
 
-# Copyright (C) 2006, 2007, 2010 Google Inc.
+# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -203,7 +203,7 @@ def GetAgentKeys():
     return []
 
 
-def SetupSshConnection(host, username, password, keys, logfile):
+def SetupSshConnection(host, username, password, use_agent, logfile):
   """Setup the ssh connection used for all later steps.
 
   This function sets up the ssh connection that will be used both
@@ -219,6 +219,10 @@ def SetupSshConnection(host, username, password, keys, 
logfile):
     print "  - ERROR: host not reachable on 22/tcp"
     return False
 
+  if use_agent:
+    keys = GetAgentKeys()
+  else:
+    keys = []
   all_kwargs = [{"pkey": k} for k in keys]
   all_desc = ["key %d" % d for d in range(len(keys))]
   if password is not None:
@@ -346,7 +350,7 @@ def RunRemoteCommand(connection, command, logfile):
   return True
 
 
-def HostWorker(logdir, username, password, keys, hostname,
+def HostWorker(logdir, username, password, use_agent, hostname,
                executable, command, filelist):
   """Per-host worker.
 
@@ -357,7 +361,7 @@ def HostWorker(logdir, username, password, keys, hostname,
   @param logdir: the directory where the logfiles must be created
   @param username: SSH username
   @param password: SSH password
-  @param keys: SSH keys
+  @param use_agent: whether we should instead use an agent
   @param hostname: the hostname to connect to
   @param executable: the executable to upload, if not None
   @param command: the command to run
@@ -370,7 +374,7 @@ def HostWorker(logdir, username, password, keys, hostname,
   result = 0  # optimism, I know
   try:
     connection = SetupSshConnection(hostname, username,
-                                    password, keys, logfile)
+                                    password, use_agent, logfile)
     if connection is not False:
       if executable is not None:
         print "  %s: uploading files" % hostname
@@ -412,7 +416,7 @@ def HostWorker(logdir, username, password, keys, hostname,
   sys.exit(result)
 
 
-def LaunchWorker(child_pids, logdir, username, password, keys, hostname,
+def LaunchWorker(child_pids, logdir, username, password, use_agent, hostname,
                  executable, command, filelist):
   """Launch the per-host worker.
 
@@ -426,7 +430,7 @@ def LaunchWorker(child_pids, logdir, username, password, 
keys, hostname,
     # controller just record the pids
     child_pids[pid] = hostname
   else:
-    HostWorker(logdir, username, password, keys, hostname,
+    HostWorker(logdir, username, password, use_agent, hostname,
                executable, command, filelist)
 
 
@@ -489,9 +493,8 @@ def main():
     print "ERROR: cannot create logfiles in dir %s, aborting" % logdir
     sys.exit(1)
 
-  keys = []
   if use_agent:
-    keys = GetAgentKeys()
+    pass
   elif password:
     try:
       fh = file(password)
@@ -523,7 +526,7 @@ def main():
   hosts = hosts[batch_size:]
   child_pids = {}
   for hostname in batch:
-    LaunchWorker(child_pids, logdir, username, password, keys, hostname,
+    LaunchWorker(child_pids, logdir, username, password, use_agent, hostname,
                  executable, command, filelist)
 
   while child_pids:
@@ -535,7 +538,7 @@ def main():
     else:
       failures += 1
     if hosts:
-      LaunchWorker(child_pids, logdir, username, password, keys,
+      LaunchWorker(child_pids, logdir, username, password, use_agent,
                    hosts.pop(0), executable, command, filelist)
 
   print
-- 
1.7.3.1

Reply via email to