Alright, with a d06 aarch64 machine I was able to reproduce it after 8 attempts.I'll debug it today and provide feedback on my findings.
(gdb) bt full #0 0x0000ffffb0b2181c in __GI_ppoll (fds=0xaaaace5ab770, nfds=4, timeout=<optimized out>, timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 _x3tmp = 0 _x0tmp = 187650583213936 _x0 = 187650583213936 _x3 = 0 _x4tmp = 8 _x1tmp = 4 _x1 = 4 _x4 = 8 _x2tmp = <optimized out> _x2 = 0 _x8 = 73 _sys_result = <optimized out> _sys_result = <optimized out> sc_cancel_oldtype = 0 sc_ret = <optimized out> tval = {tv_sec = 0, tv_nsec = 187650583137792} #1 0x0000aaaacd2a773c in ppoll (__ss=0x0, __timeout=0x0, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/aarch64-linux-gnu/bits/poll2.h:77 No locals. #2 qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=timeout@entry=-1) at ./util/qemu-timer.c:322 No locals. #3 0x0000aaaacd2a8764 in os_host_main_loop_wait (timeout=-1) at ./util/main-loop.c:233 context = 0xaaaace599d90 ret = <optimized out> context = <optimized out> ret = <optimized out> #4 main_loop_wait (nonblocking=<optimized out>) at ./util/main-loop.c:497 ret = <optimized out> timeout = 4294967295 timeout_ns = <optimized out> #5 0x0000aaaacd1df454 in convert_do_copy (s=0xfffff9b2b1d8) at ./qemu-img.c:1981 ret = <optimized out> i = <optimized out> n = <optimized out> sector_num = <optimized out> ret = <optimized out> i = <optimized out> n = <optimized out> sector_num = <optimized out> #6 img_convert (argc=<optimized out>, argv=<optimized out>) at ./qemu-img.c:2457 c = <optimized out> bs_i = <optimized out> flags = 16898 src_flags = 0 fmt = 0xfffff9b2bad1 "qcow2" out_fmt = <optimized out> cache = 0xaaaacd2cb1c8 "unsafe" src_cache = 0xaaaacd2ca9c0 "writeback" out_baseimg = <optimized out> out_filename = <optimized out> out_baseimg_param = <optimized out> snapshot_name = 0x0 drv = <optimized out> proto_drv = <optimized out> bdi = {cluster_size = 65536, vm_state_offset = 32212254720, is_dirty = false, unallocated_blocks_are_zero = true, needs_compressed_writes = false} out_bs = <optimized out> opts = 0xaaaace5ab390 sn_opts = 0x0 create_opts = 0xaaaace5ab0c0 open_opts = <optimized out> options = 0x0 local_err = 0x0 writethrough = false src_writethrough = false quiet = <optimized out> image_opts = false skip_create = false progress = <optimized out> tgt_image_opts = false ret = <optimized out> force_share = false explict_min_sparse = false s = {src = 0xaaaace577240, src_sectors = 0xaaaace577300, src_num = 1, total_sectors = 62914560,allocated_sectors = 9572096, allocated_done = 6541440, sector_num = 8863744, wr_offs = 8859776, status = BLK_DATA, sector_next_status = 8863744, target = 0xaaaace5bd2a0, has_zero_init = true,compressed = false, unallocated_blocks_are_zero = true, target_has_backing = false, target_backing_sectors = -1, wr_in_order = true, copy_range = false, min_sparse = 8, alignment = 8,cluster_sectors = 128, buf_sectors = 4096, num_coroutines = 8, running_coroutines = 8, co = {0xaaaace5ceda0,0xaaaace5cef50, 0xaaaace5cf100, 0xaaaace5cf2b0, 0xaaaace5cf460, 0xaaaace5cf610, 0xaaaace5cf7c0,0xaaaace5cf970, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, wait_sector_num = {-1, 8859904, 8860928, 8863360,8861952, 8862976, 8862592, 8861440, 0, 0, 0, 0, 0, 0, 0, 0}, lock = {locked = 0, ctx = 0x0, from_push = {slh_first = 0x0}, to_pop = {slh_first = 0x0}, handoff = 0, sequence = 0, holder = 0x0}, ret = -115} __PRETTY_FUNCTION__ = "img_convert" #7 0x0000aaaacd1d8400 in main (argc=7, argv=<optimized out>) at ./qemu-img.c:4976 cmd = 0xaaaacd34ad78 <img_cmds+80> cmdname = <optimized out> local_error = 0x0 trace_file = 0x0 c = <optimized out> long_options = {{name = 0xaaaacd2cbbb0 "help", has_arg = 0, flag = 0x0, val = 104}, { name = 0xaaaacd2cbc78 "version", has_arg = 0, flag = 0x0, val = 86}, {name = 0xaaaacd2cbc80 "trace", has_arg = 1, flag = 0x0, val = 84}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1805256 Title: qemu-img hangs on high core count ARM system Status in QEMU: Confirmed Status in qemu package in Ubuntu: In Progress Bug description: On the HiSilicon D06 system - a 96 core NUMA arm64 box - qemu-img frequently hangs (~50% of the time) with this command: qemu-img convert -f qcow2 -O qcow2 /tmp/cloudimg /tmp/cloudimg2 Where "cloudimg" is a standard qcow2 Ubuntu cloud image. This qcow2->qcow2 conversion happens to be something uvtool does every time it fetches images. Once hung, attaching gdb gives the following backtrace: (gdb) bt #0 0x0000ffffae4f8154 in __GI_ppoll (fds=0xaaaae8a67dc0, nfds=187650274213760, timeout=<optimized out>, timeout@entry=0x0, sigmask=0xffffc123b950) at ../sysdeps/unix/sysv/linux/ppoll.c:39 #1 0x0000aaaabbefaf00 in ppoll (__ss=0x0, __timeout=0x0, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/aarch64-linux-gnu/bits/poll2.h:77 #2 qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=timeout@entry=-1) at util/qemu-timer.c:322 #3 0x0000aaaabbefbf80 in os_host_main_loop_wait (timeout=-1) at util/main-loop.c:233 #4 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:497 #5 0x0000aaaabbe2aa30 in convert_do_copy (s=0xffffc123bb58) at qemu-img.c:1980 #6 img_convert (argc=<optimized out>, argv=<optimized out>) at qemu-img.c:2456 #7 0x0000aaaabbe2333c in main (argc=7, argv=<optimized out>) at qemu-img.c:4975 Reproduced w/ latest QEMU git (@ 53744e0a182) To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1805256/+subscriptions