On 02/13/22 20:56, Nir Soffer wrote: > Output modules can specify now request_size to override the default > request size in nbdcopy. > > The rhv-upload plugin is translating every NBD command to HTTP request, > translated back to NBD command on imageio server. The HTTP client and > server, and the NBD client on the imageio server side are synchronous > and implemented in python, so they have high overhead per request. To > get good performance we need to use larger request size. > > Testing shows that request size of 4 MiB speeds up the copy disk phase > from 14.7 seconds to 7.9 seconds (1.8x times faster). Request size of 8 > MiB is a little bit faster but is not compatible with VDDK input. > > Here are stats extracted from imageio log when importing Fedora 35 image > with 3 GiB of random data. For each copy, we have 4 connection stats. > > Before: > > connection 1 ops, 14.767843 s > dispatch 4023 ops, 11.427662 s > zero 38 ops, 0.053840 s, 327.91 MiB, 5.95 GiB/s > write 3981 ops, 8.975877 s, 988.61 MiB, 110.14 MiB/s > flush 4 ops, 0.001023 s > > connection 1 ops, 14.770026 s > dispatch 4006 ops, 11.408732 s > zero 37 ops, 0.057205 s, 633.21 MiB, 10.81 GiB/s > write 3965 ops, 8.907420 s, 986.65 MiB, 110.77 MiB/s > flush 4 ops, 0.000280 s > > connection 1 ops, 14.768180 s > dispatch 4057 ops, 11.430712 s > zero 42 ops, 0.030011 s, 470.47 MiB, 15.31 GiB/s > write 4011 ops, 9.002055 s, 996.98 MiB, 110.75 MiB/s > flush 4 ops, 0.000261 s > > connection 1 ops, 14.770744 s > dispatch 4037 ops, 11.462050 s > zero 45 ops, 0.026668 s, 750.82 MiB, 27.49 GiB/s > write 3988 ops, 9.002721 s, 989.36 MiB, 109.90 MiB/s > flush 4 ops, 0.000282 s > > After: > > connection 1 ops, 7.940377 s > dispatch 323 ops, 6.695582 s > zero 37 ops, 0.079958 s, 641.12 MiB, 7.83 GiB/s > write 282 ops, 6.300242 s, 1.01 GiB, 164.54 MiB/s > flush 4 ops, 0.000537 s > > connection 1 ops, 7.908156 s > dispatch 305 ops, 6.643475 s > zero 36 ops, 0.144166 s, 509.43 MiB, 3.45 GiB/s > write 265 ops, 6.179187 s, 941.23 MiB, 152.32 MiB/s > flush 4 ops, 0.000324 s > > connection 1 ops, 7.942349 s > dispatch 325 ops, 6.744800 s > zero 45 ops, 0.185335 s, 622.19 MiB, 3.28 GiB/s > write 276 ops, 6.236819 s, 995.45 MiB, 159.61 MiB/s > flush 4 ops, 0.000369 s > > connection 1 ops, 7.955572 s > dispatch 317 ops, 6.721212 s > zero 43 ops, 0.135771 s, 409.68 MiB, 2.95 GiB/s > write 270 ops, 6.326366 s, 988.26 MiB, 156.21 MiB/s > flush 4 ops, 0.001439 s > --- > > Changes since v1: > > - Decrease request size to 4 MiB for compatibility with VDDK input. > (Richard) > - Reimplement in a nicer way based on > > https://github.com/libguestfs/virt-v2v/commit/08e764959ec9dadd71a95d22d3d88d647a18d165 > (Richard) > > v1 was here: > https://listman.redhat.com/archives/libguestfs/2022-February/msg00183.html > > output/output.ml | 1 + > output/output.mli | 2 ++ > output/output_disk.ml | 2 ++ > output/output_glance.ml | 2 ++ > output/output_json.ml | 2 ++ > output/output_libvirt.ml | 2 ++ > output/output_null.ml | 2 ++ > output/output_openstack.ml | 2 +- > output/output_qemu.ml | 2 ++ > output/output_rhv.ml | 2 ++ > output/output_rhv_upload.ml | 7 +++++++ > output/output_vdsm.ml | 2 ++ > v2v/v2v.ml | 11 ++++++++--- > 13 files changed, 35 insertions(+), 4 deletions(-) > > diff --git a/output/output.ml b/output/output.ml > index 786ee5d5..7256b547 100644 > --- a/output/output.ml > +++ b/output/output.ml > @@ -39,20 +39,21 @@ type options = { > module type OUTPUT = sig > type poptions > type t > val to_string : options -> string > val query_output_options : unit -> unit > val parse_options : options -> Types.source -> poptions > val setup : string -> poptions -> Types.source -> t > val finalize : string -> poptions -> t -> > Types.source -> Types.inspect -> Types.target_meta -> > unit > + val request_size : int option > end > > let error_option_cannot_be_used_in_output_mode mode opt = > error (f_"-o %s: %s option cannot be used in this output mode") mode opt > > let get_disks dir = > let rec loop acc i = > let socket = sprintf "%s/in%d" dir i in > if Sys.file_exists socket then ( > let size = Utils.with_nbd_connect_unix ~socket NBD.get_size in > diff --git a/output/output.mli b/output/output.mli > index eed204ed..8e3efd8e 100644 > --- a/output/output.mli > +++ b/output/output.mli > @@ -52,20 +52,22 @@ module type OUTPUT = sig > > Set up the output mode. Sets up a disk pipeline > [dir // "outX"] for each output disk. *) > > val finalize : string -> poptions -> t -> > Types.source -> Types.inspect -> Types.target_meta -> > unit > (** [finalize dir poptions t inspect target_meta] > > Finalizes the conversion and writes metadata. *) > + > + val request_size : int option > end > > (** Helper functions for output modes. *) > > val error_option_cannot_be_used_in_output_mode : string -> string -> unit > (** [error_option_cannot_be_used_in_output_mode mode option] > prints error message that option cannot be used in this output mode. *) > > val get_disks : string -> (int * int64) list > (** Examines the v2v directory and opens each input socket (in0 etc), > diff --git a/output/output_disk.ml b/output/output_disk.ml > index a05be559..bc5b4e1c 100644 > --- a/output/output_disk.ml > +++ b/output/output_disk.ml > @@ -105,11 +105,13 @@ module Disk = struct > output_format output_name in > > let file = output_storage // output_name ^ ".xml" in > with_open_out file (fun chan -> DOM.doc_to_chan chan doc); > > if verbose () then ( > eprintf "resulting local libvirt XML:\n"; > DOM.doc_to_chan Stdlib.stderr doc; > eprintf "\n%!"; > ) > + > + let request_size = None > end > diff --git a/output/output_glance.ml b/output/output_glance.ml > index 9ca406e8..e6a7f789 100644 > --- a/output/output_glance.ml > +++ b/output/output_glance.ml > @@ -131,11 +131,13 @@ module Glance = struct > properties in > if run_command cmd <> 0 then > error (f_"glance: image upload to glance failed, see earlier > errors"); > > (* Unlink the temporary files as soon as glance has got them. *) > try unlink disk with Unix_error _ -> () > ) source.s_disks; > > (* Remove the temporary directory for the large files. *) > (try rmdir tmpdir with Unix_error _ -> ()) > + > + let request_size = None > end > diff --git a/output/output_json.ml b/output/output_json.ml > index 97883217..6e81b639 100644 > --- a/output/output_json.ml > +++ b/output/output_json.ml > @@ -141,11 +141,13 @@ module Json = struct > output_string Stdlib.stderr doc_string; > eprintf "\n\n%!"; > ); > > let file = output_storage // output_name ^ ".json" in > with_open_out file ( > fun chan -> > output_string chan doc_string; > output_char chan '\n' > ) > + > + let request_size = None > end > diff --git a/output/output_libvirt.ml b/output/output_libvirt.ml > index 2236573f..e0d3432d 100644 > --- a/output/output_libvirt.ml > +++ b/output/output_libvirt.ml > @@ -210,11 +210,13 @@ module Libvirt_ = struct > warning (f_"the target hypervisor does not support a %s KVM guest") > arch; > [] > ) else ( > let node (* first matching <guest> *) = Xml.xpathobj_node obj 0 in > Xml.xpathctx_set_current_context xpathctx node; > > (* Get guest/features/* nodes. *) > let features = xpath_get_nodes xpathctx "features/*" in > List.map Xml.node_name features > ) > + > + let request_size = None > end > diff --git a/output/output_null.ml b/output/output_null.ml > index eeb15653..86d81eaa 100644 > --- a/output/output_null.ml > +++ b/output/output_null.ml > @@ -81,11 +81,13 @@ module Null = struct > List.iter ( > fun (i, _) -> > if i > 0 then ( > let output = sprintf "%s/out%d" dir i in > link socket output > ) > ) disks > > let finalize dir () () source inspect target_meta = > () (* nothing to do *) > + > + let request_size = None > end > diff --git a/output/output_openstack.ml b/output/output_openstack.ml > index 3d7a7882..d0af2ac7 100644 > --- a/output/output_openstack.ml > +++ b/output/output_openstack.ml > @@ -475,12 +475,12 @@ The os-* parameters and environment variables are > optional. > ) volume_ids > > (* UTC conversion time. *) > and iso_time = > let time = time () in > let tm = gmtime time in > sprintf "%04d/%02d/%02d %02d:%02d:%02d" > (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday > tm.tm_hour tm.tm_min tm.tm_sec > > - > + let request_size = None > end > diff --git a/output/output_qemu.ml b/output/output_qemu.ml > index 873a63b7..f8d2e171 100644 > --- a/output/output_qemu.ml > +++ b/output/output_qemu.ml > @@ -320,11 +320,13 @@ module QEMU = struct > Qemuopts.to_chan cmd chan > ); > > Unix.chmod file 0o755; > > (* If -oo qemu-boot option was specified then we should boot the guest. > *) > if qemu_boot then ( > let cmd = sprintf "%s &" (quote file) in > ignore (shell_command cmd) > ) > + > + let request_size = None > end > diff --git a/output/output_rhv.ml b/output/output_rhv.ml > index 022d96e3..119207fd 100644 > --- a/output/output_rhv.ml > +++ b/output/output_rhv.ml > @@ -275,11 +275,13 @@ module RHV = struct > Create_ovf.create_ovf source inspect target_meta sizes > output_alloc output_format output_name esd_uuid image_uuids vol_uuids > ~need_actual_sizes:true dir vm_uuid > Create_ovf.RHVExportStorageDomain in > > (* Write it to the metadata file. *) > let dir = esd_mp // esd_uuid // "master" // "vms" // vm_uuid in > Changeuid.mkdir changeuid_t dir 0o755; > let file = dir // vm_uuid ^ ".ovf" in > Changeuid.output changeuid_t file (fun chan -> DOM.doc_to_chan chan ovf) > + > + let request_size = None > end > diff --git a/output/output_rhv_upload.ml b/output/output_rhv_upload.ml > index 49f13099..7c2434bd 100644 > --- a/output/output_rhv_upload.ml > +++ b/output/output_rhv_upload.ml > @@ -477,11 +477,18 @@ e command line has to match the number of guest disk > images (for this guest: %d) > let json_params = > match rhv_cluster_uuid with > | None -> assert false > | Some uuid -> ("rhv_cluster_uuid", JSON.String uuid) :: json_params in > > let ovf_file = dir // "vm.ovf" in > with_open_out ovf_file (fun chan -> output_string chan ovf); > if Python_script.run_command createvm_script json_params [ovf_file] <> 0 > then > error (f_"failed to create virtual machine, see earlier errors") > + > + (* The imageio server has high overhead per request. Using 4 MiB > + * request size is 1.8x times faster compared with nbdcopy default > + * request size (256k). Request size of 8 MiB is a little bit faster > + * but is is not compatible with VDDK input. > + *) > + let request_size = Some (4*1024*1024) > end > diff --git a/output/output_vdsm.ml b/output/output_vdsm.ml > index 14cbb961..a1e8c246 100644 > --- a/output/output_vdsm.ml > +++ b/output/output_vdsm.ml > @@ -218,11 +218,13 @@ For each disk you must supply one of each of these > options: > output_alloc output_format output_name dd_uuid > image_uuids > vol_uuids > dir > vm_uuid > ovf_flavour in > > (* Write it to the metadata file. *) > let file = ovf_output // vm_uuid ^ ".ovf" in > with_open_out file (fun chan -> DOM.doc_to_chan chan ovf) > + > + let request_size = None > end > diff --git a/v2v/v2v.ml b/v2v/v2v.ml > index fddb0742..cadf864d 100644 > --- a/v2v/v2v.ml > +++ b/v2v/v2v.ml > @@ -583,32 +583,33 @@ read the man page virt-v2v(1). > List.rev acc > in > let disks = loop [] 0 in > let nr_disks = List.length disks in > > (* Copy the disks. *) > List.iter ( > fun (i, input_socket, output_socket) -> > message (f_"Copying disk %d/%d") (i+1) nr_disks; > > - let input_uri = nbd_uri_of_socket input_socket > + let request_size = Output_module.request_size > + and input_uri = nbd_uri_of_socket input_socket > and output_uri = nbd_uri_of_socket output_socket in > > (* In verbose mode print some information about each > * side of the pipeline. > *) > if verbose () then ( > nbdinfo ~content:true input_uri; > nbdinfo ~content:false output_uri > ); > > - nbdcopy output_alloc input_uri output_uri > + nbdcopy ?request_size output_alloc input_uri output_uri > ) disks; > > (* End of copying phase. *) > unlink (tmpdir // "copy"); > > (* Do the finalization step. *) > message (f_"Creating output metadata"); > Output_module.finalize tmpdir output_poptions output_t > source inspect target_meta; > > @@ -627,26 +628,30 @@ read the man page virt-v2v(1). > * appliance may be created there. (RHBZ#1316479, RHBZ#2051394) > *) > and check_host_free_space () = > let free_space = StatVFS.free_space (StatVFS.statvfs large_tmpdir) in > debug "check_host_free_space: large_tmpdir=%s free_space=%Ld" > large_tmpdir free_space; > if free_space < 1_073_741_824L then > error (f_"insufficient free space in the conversion server temporary > directory %s (%s).\n\nEither free up space in that directory, or set the > LIBGUESTFS_CACHEDIR environment variable to point to another directory with > more than 1GB of free space.\n\nSee also the virt-v2v(1) manual, section > \"Minimum free space check in the host\".") > large_tmpdir (human_size free_space) > > -and nbdcopy output_alloc input_uri output_uri = > +and nbdcopy ?request_size output_alloc input_uri output_uri = > (* XXX It's possible that some output modes know whether > * --target-is-zero which would be a useful optimization. > *) > let cmd = ref [] in > List.push_back_list cmd [ "nbdcopy"; input_uri; output_uri ]; > + (match request_size with > + | None -> () > + | Some size -> List.push_back cmd (sprintf "--request-size=%d" size) > + ); > List.push_back cmd "--flush"; > (*List.push_back cmd "--verbose";*) > if not (quiet ()) then List.push_back cmd "--progress"; > if output_alloc = Types.Preallocated then List.push_back cmd "--allocated"; > let cmd = !cmd in > > if run_command cmd <> 0 then > error (f_"nbdcopy command failed, see earlier error messages") > > (* Run nbdinfo on a URI and dump the information to stderr. >
Acked-by: Laszlo Ersek <ler...@redhat.com> _______________________________________________ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs