On Mon, Feb 24, 2014 at 12:47 PM, Petr Pudlák <[email protected]> wrote:

>
>
>
> On Tue, Feb 18, 2014 at 3:39 PM, Hrvoje Ribicic <[email protected]> wrote:
>
>> This patch adds a QA utility function that acquires a set of locks, and
>> attempts to run a given function with the locks in place. Should the
>> given function block, this function does not detect this - later
>> patches will address the issue.
>>
>> An example of its use is provided by having the move-instance test
>> modified to use it.
>>
>> Signed-off-by: Hrvoje Ribicic <[email protected]>
>> ---
>>  Makefile.am        |   1 +
>>  qa/qa_job_utils.py | 149
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  qa/qa_rapi.py      |  18 +++++--
>>  3 files changed, 163 insertions(+), 5 deletions(-)
>>  create mode 100644 qa/qa_job_utils.py
>>
>> diff --git a/Makefile.am b/Makefile.am
>> index 8cc443e..4009eae 100644
>> --- a/Makefile.am
>> +++ b/Makefile.am
>> @@ -1010,6 +1010,7 @@ qa_scripts = \
>>         qa/qa_instance.py \
>>         qa/qa_instance_utils.py \
>>         qa/qa_job.py \
>> +       qa/qa_job_utils.py \
>>         qa/qa_monitoring.py \
>>         qa/qa_node.py \
>>         qa/qa_os.py \
>> diff --git a/qa/qa_job_utils.py b/qa/qa_job_utils.py
>> new file mode 100644
>> index 0000000..bc4733c
>> --- /dev/null
>> +++ b/qa/qa_job_utils.py
>> @@ -0,0 +1,149 @@
>> +#
>> +#
>> +
>> +# Copyright (C) 2014 Google Inc.
>> +#
>> +# This program is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published by
>> +# the Free Software Foundation; either version 2 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# This program is distributed in the hope that it will be useful, but
>> +# WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +# General Public License for more details.
>> +#
>> +# You should have received a copy of the GNU General Public License
>> +# along with this program; if not, write to the Free Software
>> +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> +# 02110-1301, USA.
>> +
>> +
>> +"""QA utility functions for testing jobs
>> +
>> +"""
>> +
>> +import re
>> +
>> +from ganeti import constants
>> +from ganeti import locking
>> +from ganeti import utils
>> +
>> +import qa_config
>> +import qa_error
>> +
>> +from qa_utils import AssertCommand, GetCommandOutput, GetObjectInfo
>> +
>> +
>> +AVAILABLE_LOCKS = [locking.LEVEL_NODE, ]
>> +
>> +
>> +def _GetOutputFromMaster(cmd):
>> +  """ Gets the output of a command executed on master.
>> +
>> +  """
>> +  if isinstance(cmd, basestring):
>> +    cmdstr = cmd
>> +  else:
>> +    cmdstr = utils.ShellQuoteArgs(cmd)
>> +
>> +  # Necessary due to the stderr stream not being captured properly on the
>> +  # buildbot
>> +  cmdstr += " 2>&1"
>> +
>> +  return GetCommandOutput(qa_config.GetMasterNode().primary, cmdstr)
>> +
>> +
>> +def ExecuteJobProducingCommand(cmd):
>> +  """ Executes a --submit-using command producing a job, and returns an
>> id.
>>
> This is somewhat confusing, I'd suggest to reword it to something like "a
> command using --submit".
>
>

Oh, good point, will change this.
I will also reword the line to indicate the job has to be present.


> +
>> +  @type cmd: list of string
>> +  @param cmd: The command to execute, broken into constituent components.
>> +
>> +  """
>> +  job_id_output = _GetOutputFromMaster(cmd)
>> +
>> +  possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output)
>> +  if len(possible_job_ids) != 1:
>> +    raise qa_error.Error("Cannot parse command output to find job id:
>> output "
>> +                         "is %s" % job_id_output)
>> +
>> +  return int(possible_job_ids[0])
>> +
>> +
>> +def _StartDelayFunction(locks, timeout):
>> +  """ Starts the gnt-debug delay option with the given locks and timeout.
>> +
>> +  """
>> +  # The interruptible switch must be used
>> +  cmd = ["gnt-debug", "delay", "-i", "--submit", "--no-master"]
>> +
>> +  for node in locks.get(locking.LEVEL_NODE, []):
>> +    cmd.append("-n%s" % node)
>> +
>> +  cmd.append(str(timeout))
>> +
>> +  job_id = ExecuteJobProducingCommand(cmd)
>> +  job_info = GetObjectInfo(["gnt-job", "info", str(job_id)])
>> +  execution_logs = job_info[0]["Opcodes"][0]["Execution log"]
>> +
>> +  is_termination_info_fn = \
>> +    lambda e: e["Content"][1] == constants.ELOG_DELAY_TEST
>> +  filtered_logs = filter(is_termination_info_fn, execution_logs)
>> +
>> +  if len(filtered_logs) != 1:
>> +    raise qa_error.Error("Failure when trying to retrieve delay
>> termination "
>> +                         "information")
>> +
>> +  _, _, (socket_path, ) = filtered_logs[0]["Content"]
>> +
>> +  return socket_path
>> +
>> +
>> +def _TerminateDelayFunction(termination_socket):
>> +  """ Terminates the delay function by communicating with the domain
>> socket.
>> +
>> +  """
>> +  AssertCommand("echo a | socat -u stdin UNIX-CLIENT:%s" %
>> termination_socket)
>>
>
> For the QA it's probably completely fine to call an external command. You
> might also want to have a look at Transport in lib/rpc/transport.py, which
> is used in Luxi and WConfd to send a EOM-terminated messages (0x03) over UD
> sockets.
>
>

I have looked at the class, but using it here confers no significant
benefits, while introducing a more complex syntax and tying the QA to the
LUXI protocol. Maybe it's best to keep this line as-is.



> +
>> +
>> +# TODO: Can this be done as a decorator? Implement as needed.
>>
>
> IDK if it makes sense here, but perhaps creating a ContextManager could be
> one option:
> http://docs.python.org/2/reference/compound_stmts.html#the-with-statement/
>

As discussed offline, due to the need to run the code within the block in
another thread, it is probably not an option, unfortunately.


>
>
>
>> +def RunWithLocks(fn, locks, timeout, *args, **kwargs):
>> +  """ Runs the given function, acquiring a set of locks beforehand.
>> +
>> +  @type fn: function
>> +  @param fn: The function to invoke.
>> +  @type locks: dict of string to list of string
>> +  @param locks: The locks to acquire, per lock category.
>> +  @type timeout: number
>> +  @param timeout: The number of seconds the locks should be held before
>> +                  expiring.
>> +
>> +  This function allows a set of locks to be acquired in preparation for
>> a QA
>> +  test, to try and see if the function can run in parallel with other
>> +  operations.
>> +
>> +  The current version simply creates the locks, which expire after a
>> given
>> +  timeout, and attempts to invoke the provided function.
>> +
>> +  This will probably block the QA, and future versions will address this.
>> +
>> +  A default timeout is not provided by design - the test creator must
>> make a
>> +  good conservative estimate.
>> +
>> +  """
>> +  if filter(lambda l_type: l_type not in AVAILABLE_LOCKS, locks):
>> +    raise qa_error.Error("Attempted to acquire locks that cannot yet be "
>> +                         "acquired in the course of a QA test.")
>> +
>> +  # The watcher may interfere by issuing its own jobs - therefore pause
>> it
>> +  AssertCommand(["gnt-cluster", "watcher", "pause", "12h"])
>> +
>> +  termination_socket = _StartDelayFunction(locks, timeout)
>> +
>> +  fn(*args, **kwargs)
>> +
>> +  _TerminateDelayFunction(termination_socket)
>> +
>> +  # Revive the watcher
>> +  AssertCommand(["gnt-cluster", "watcher", "continue"])
>> diff --git a/qa/qa_rapi.py b/qa/qa_rapi.py
>> index 3164cb5..9ad8afb 100644
>> --- a/qa/qa_rapi.py
>> +++ b/qa/qa_rapi.py
>> @@ -52,6 +52,7 @@ import qa_utils
>>  from qa_instance import IsDiskReplacingSupported
>>  from qa_instance import IsFailoverSupported
>>  from qa_instance import IsMigrationSupported
>> +from qa_job_utils import RunWithLocks
>>  from qa_utils import (AssertEqual, AssertIn, AssertMatch,
>> StartLocalCommand)
>>  from qa_utils import InstanceCheck, INST_DOWN, INST_UP, FIRST_ARG
>>
>> @@ -917,7 +918,10 @@ def _InvokeMoveInstance(current_dest_inst,
>> current_src_inst, rapi_pw_filename,
>>        "--dest-secondary-node=%s" % snode,
>>        ])
>>    else:
>> -    cmd.append("--iallocator=%s" % constants.IALLOC_HAIL)
>> +    cmd.extend([
>> +      "--iallocator=%s" % constants.IALLOC_HAIL,
>> +      "--opportunistic-tries=1",
>> +      ])
>>
>>    cmd.extend([
>>      "--net=0:mac=%s" % constants.VALUE_GENERATE,
>> @@ -959,10 +963,14 @@ def TestInterClusterInstanceMove(src_instance,
>> dest_instance,
>>      snode = tnode
>>    pnode = inodes[0]
>>
>> -  # pnode:snode are the *current* nodes, so we move it first to
>> tnode:pnode
>> -  _InvokeMoveInstance(dest_instance.name, src_instance.name,
>> rapi_pw_file.name,
>> -                      master.primary, perform_checks,
>> -                      target_nodes=(tnode.primary, pnode.primary))
>> +  # pnode:snode are the *current* nodes, and the first move is an
>> +  # iallocator-guided move outside of pnode. The node lock for the pnode
>> +  # assures that this happens, and while we cannot be sure where the
>> instance
>> +  # will land, it is a real move.
>> +  locks = {locking.LEVEL_NODE: [pnode.primary]}
>> +  RunWithLocks(_InvokeMoveInstance, locks, 600.0,
>> +               dest_instance.name, src_instance.name, rapi_pw_file.name,
>> +               master.primary, perform_checks)
>>
>>    # And then back to pnode:snode
>>    _InvokeMoveInstance(src_instance.name, dest_instance.name,
>> rapi_pw_file.name,
>> --
>> 1.9.0.rc1.175.g0b1dcb5
>>
>>
> LGTM, thanks.
>
>
Thanks for the review, micro-interdiff is:

diff --git a/qa/qa_job_utils.py b/qa/qa_job_utils.py
index bc4733c..56e4b2e 100644
--- a/qa/qa_job_utils.py
+++ b/qa/qa_job_utils.py
@@ -55,7 +55,7 @@ def _GetOutputFromMaster(cmd):


 def ExecuteJobProducingCommand(cmd):
-  """ Executes a --submit-using command producing a job, and returns an id.
+  """ Executes a command that contains the --submit flag, and returns a
job id.

   @type cmd: list of string
   @param cmd: The command to execute, broken into constituent components.

Reply via email to