Hi, On 25/02/14 10:09, Stefan Hajnoczi wrote: > The nbd-fault-injector.py script is a special kind of NBD server. It > throws away all writes and produces zeroes for reads. Given a list of > fault injection rules, it can simulate NBD protocol errors and is useful > for testing NBD client error handling code paths. > > See the patch for documentation. This scripts is modelled after Kevin > Wolf <kw...@redhat.com>'s blkdebug block driver. > > Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> > --- > tests/qemu-iotests/nbd-fault-injector.py | 231 > +++++++++++++++++++++++++++++++ > 1 file changed, 231 insertions(+) > create mode 100755 tests/qemu-iotests/nbd-fault-injector.py > > diff --git a/tests/qemu-iotests/nbd-fault-injector.py > b/tests/qemu-iotests/nbd-fault-injector.py > new file mode 100755 > index 0000000..b4011f4 > --- /dev/null > +++ b/tests/qemu-iotests/nbd-fault-injector.py > @@ -0,0 +1,231 @@ > +#!/usr/bin/env python > +# NBD server - fault injection utility > +# > +# Configuration file syntax: > +# [inject-error "disconnect-neg1"] > +# event=neg1 > +# io=readwrite > +# when=before > +# > +# Note that Python's ConfigParser squashes together all sections with the > same > +# name, so give each [inject-error] a unique name. > +# > +# inject-error options: > +# event - name of the trigger event > +# "neg1" - first part of negotiation struct > +# "export" - export struct > +# "neg2" - second part of negotiation struct > +# "request" - NBD request struct > +# "reply" - NBD reply struct > +# "data" - request/reply data > +# io - I/O direction that triggers this rule: > +# "read", "write", or "readwrite" > +# default: readwrite > +# when - when to inject the fault relative to the I/O operation > +# "before" or "after" > +# default: before > +# > +# Currently the only error injection action is to terminate the server > process. > +# This resets the TCP connection and thus forces the client to handle > +# unexpected connection termination. > +# > +# Other error injection actions could be added in the future. > +# > +# Copyright Red Hat, Inc. 2014 > +# > +# Authors: > +# Stefan Hajnoczi <stefa...@redhat.com> > +# > +# This work is licensed under the terms of the GNU GPL, version 2 or later. > +# See the COPYING file in the top-level directory. > + > +import sys > +import socket > +import struct > +import collections > +import ConfigParser > + > +# Protocol constants > +NBD_CMD_READ = 0 > +NBD_CMD_WRITE = 1 > +NBD_CMD_DISC = 2 > +NBD_REQUEST_MAGIC = 0x25609513 > +NBD_REPLY_MAGIC = 0x67446698 > +NBD_PASSWD = 0x4e42444d41474943 > +NBD_OPTS_MAGIC = 0x49484156454F5054 > +NBD_OPT_EXPORT_NAME = 1 << 0 > + > +# Protocol structs > +neg1_struct = struct.Struct('>QQH') > +export_tuple = collections.namedtuple('Export', 'reserved magic opt len') > +export_struct = struct.Struct('>IQII') > +neg2_struct = struct.Struct('>QH124x') > +request_tuple = collections.namedtuple('Request', 'magic type handle from_ > len') > +request_struct = struct.Struct('>IIQQI') > +reply_struct = struct.Struct('>IIQ') > + > +def err(msg): > + sys.stderr.write(msg + '\n') > + sys.exit(1) > + > +def recvall(sock, bufsize): > + received = 0 > + chunks = [] > + while received < bufsize: > + chunk = sock.recv(bufsize - received) > + if len(chunk) == 0: > + raise Exception('unexpected disconnect') > + chunks.append(chunk) > + received += len(chunk) > + return ''.join(chunks) > + > +class Rule(object): > + def __init__(self, name, event, io, when): > + self.name = name > + self.event = event > + self.io = io > + self.when = when > + > + def match(self, event, io, when): > + if when != self.when: > + return False > + if event != self.event: > + return False > + if io != self.io and self.io != 'readwrite': > + return False > + return True > + > +class FaultInjectionSocket(object): > + def __init__(self, sock, rules): > + self.sock = sock > + self.rules = rules > + > + def check(self, event, io, when): > + for rule in self.rules: > + if rule.match(event, io, when): > + print 'Closing connection on rule match %s' % rule.name > + sys.exit(0) > + > + def send(self, buf, event): > + self.check(event, 'write', 'before') > + self.sock.sendall(buf) > + self.check(event, 'write', 'after') > + > + def recv(self, bufsize, event): > + self.check(event, 'read', 'before') > + data = recvall(self.sock, bufsize) > + self.check(event, 'read', 'after') > + return data
There's a class of error I recently encountered in our out-of-tree proxy component that only shows up if a read or write is interrupted partway through. Perhaps you could have a "during" event here that happens after bufsize/2 bytes is written? I've not looked at qemu's block/nbd code recently, so I don't know if that exercises a particular failure path. > + def close(self): > + self.sock.close() > + > +def negotiate(conn): > + '''Negotiate export with client''' > + # Send negotiation part 1 > + buf = neg1_struct.pack(NBD_PASSWD, NBD_OPTS_MAGIC, 0) > + conn.send(buf, event='neg1') > + > + # Receive export option > + buf = conn.recv(export_struct.size, event='export') > + export = export_tuple._make(export_struct.unpack(buf)) > + assert export.magic == NBD_OPTS_MAGIC > + assert export.opt == NBD_OPT_EXPORT_NAME > + name = conn.recv(export.len, event='export-name') > + > + # Send negotiation part 2 > + buf = neg2_struct.pack(8 * 1024 * 1024 * 1024, 0) # 8 GB capacity > + conn.send(buf, event='neg2') Is it worth exercising the old-style negotiation too? > +def read_request(conn): > + '''Parse NBD request from client''' > + buf = conn.recv(request_struct.size, event='request') > + req = request_tuple._make(request_struct.unpack(buf)) > + assert req.magic == NBD_REQUEST_MAGIC > + return req > + > +def write_reply(conn, error, handle): > + buf = reply_struct.pack(NBD_REPLY_MAGIC, error, handle) > + conn.send(buf, event='reply') > + > +def handle_connection(conn): > + negotiate(conn) > + while True: > + req = read_request(conn) > + if req.type == NBD_CMD_READ: > + write_reply(conn, 0, req.handle) > + conn.send('\0' * req.len, event='data') > + elif req.type == NBD_CMD_WRITE: > + _ = conn.recv(req.len, event='data') > + write_reply(conn, 0, req.handle) > + elif req.type == NBD_CMD_DISC: > + break > + else: > + print 'unrecognized command type %#02x' % req.type > + break > + conn.close() > + > +def run_server(sock, rules): > + while True: > + conn, _ = sock.accept() > + handle_connection(FaultInjectionSocket(conn, rules)) > + > +def parse_inject_error(name, options): > + if 'event' not in options: > + err('missing \"event\" option in %s' % name) > + event = options['event'] > + if event not in ('neg1', 'export', 'neg2', 'request', 'reply', 'data'): > + err('invalid \"event\" option value \"%s\" in %s' % (event, name)) > + io = options.get('io', 'readwrite') > + if io not in ('read', 'write', 'readwrite'): > + err('invalid \"io\" option value \"%s\" in %s' % (io, name)) > + when = options.get('when', 'before') > + if when not in ('before', 'after'): > + err('invalid \"when\" option value \"%s\" in %s' % (when, name)) > + return Rule(name, event, io, when) > + > +def parse_config(config): > + rules = [] > + for name in config.sections(): > + if name.startswith('inject-error'): > + options = dict(config.items(name)) > + rules.append(parse_inject_error(name, options)) > + else: > + err('invalid config section name: %s' % name) > + return rules > + > +def load_rules(filename): > + config = ConfigParser.RawConfigParser() > + with open(filename, 'rt') as f: > + config.readfp(f, filename) > + return parse_config(config) > + > +def open_socket(path): > + '''Open a TCP or UNIX domain listen socket''' > + if ':' in path: > + host, port = path.split(':', 1) > + sock = socket.socket() > + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) > + sock.bind((host, int(port))) > + else: > + sock = socket.socket(socket.AF_UNIX) > + sock.bind(path) > + sock.listen(0) > + print 'Listening on %s' % path > + return sock > + > +def usage(args): > + sys.stderr.write('usage: %s <tcp-port>|<unix-path> <config-file>\n' % > args[0]) > + sys.stderr.write('Run an fault injector NBD server with rules defined in > a config file.\n') > + sys.exit(1) > + > +def main(args): > + if len(args) != 3: > + usage(args) > + sock = open_socket(args[1]) > + rules = load_rules(args[2]) > + run_server(sock, rules) > + return 0 > + > +if __name__ == '__main__': > + sys.exit(main(sys.argv)) >