Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases
On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote: > From: Christopher Covington > > Ensure that reads of the PMCCNTR_EL0 are monotonically increasing, > even for the smallest delta of two subsequent reads. > > Signed-off-by: Christopher Covington > Signed-off-by: Wei Huang > --- > arm/pmu.c | 98 > +++ > 1 file changed, 98 insertions(+) > > diff --git a/arm/pmu.c b/arm/pmu.c > index 0b29088..d5e3ac3 100644 > --- a/arm/pmu.c > +++ b/arm/pmu.c > @@ -14,6 +14,7 @@ > */ > #include "libcflat.h" > > +#define PMU_PMCR_E (1 << 0) > #define PMU_PMCR_N_SHIFT 11 > #define PMU_PMCR_N_MASK0x1f > #define PMU_PMCR_ID_SHIFT 16 > @@ -21,6 +22,10 @@ > #define PMU_PMCR_IMP_SHIFT 24 > #define PMU_PMCR_IMP_MASK 0xff > > +#define PMU_CYCLE_IDX 31 > + > +#define NR_SAMPLES 10 > + > #if defined(__arm__) > static inline uint32_t pmcr_read(void) > { > @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void) > asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret)); > return ret; > } > + > +static inline void pmcr_write(uint32_t value) > +{ > + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value)); > +} > + > +static inline void pmselr_write(uint32_t value) > +{ > + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value)); > +} > + > +static inline void pmxevtyper_write(uint32_t value) > +{ > + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value)); > +} > + > +/* > + * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning > 64 > + * bits doesn't seem worth the trouble when differential usage of the result > is > + * expected (with differences that can easily fit in 32 bits). So just return > + * the lower 32 bits of the cycle count in AArch32. Like I said in the last review, I'd rather we not do this. We should return the full value and then the test case should confirm the upper 32 bits are zero. > + */ > +static inline uint32_t pmccntr_read(void) > +{ > + uint32_t cycles; > + > + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles)); > + return cycles; > +} > + > +static inline void pmcntenset_write(uint32_t value) > +{ > + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value)); > +} > + > +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */ > +static inline void pmccfiltr_write(uint32_t value) > +{ > + pmselr_write(PMU_CYCLE_IDX); > + pmxevtyper_write(value); > +} > #elif defined(__aarch64__) > static inline uint32_t pmcr_read(void) > { > @@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void) > asm volatile("mrs %0, pmcr_el0" : "=r" (ret)); > return ret; > } > + > +static inline void pmcr_write(uint32_t value) > +{ > + asm volatile("msr pmcr_el0, %0" : : "r" (value)); > +} > + > +static inline uint32_t pmccntr_read(void) > +{ > + uint32_t cycles; > + > + asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles)); > + return cycles; > +} > + > +static inline void pmcntenset_write(uint32_t value) > +{ > + asm volatile("msr pmcntenset_el0, %0" : : "r" (value)); > +} > + > +static inline void pmccfiltr_write(uint32_t value) > +{ > + asm volatile("msr pmccfiltr_el0, %0" : : "r" (value)); > +} > #endif > > /* > @@ -63,11 +132,40 @@ static bool check_pmcr(void) > return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0; > } > > +/* > + * Ensure that the cycle counter progresses between back-to-back reads. > + */ > +static bool check_cycles_increase(void) > +{ > + pmcr_write(pmcr_read() | PMU_PMCR_E); > + > + for (int i = 0; i < NR_SAMPLES; i++) { > + unsigned long a, b; > + > + a = pmccntr_read(); > + b = pmccntr_read(); > + > + if (a >= b) { > + printf("Read %ld then %ld.\n", a, b); > + return false; > + } > + } > + > + pmcr_write(pmcr_read() & ~PMU_PMCR_E); > + > + return true; > +} > + > int main(void) > { > report_prefix_push("pmu"); > > + /* init for PMU event access, right now only care about cycle count */ > + pmcntenset_write(1 << PMU_CYCLE_IDX); > + pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */ > + > report("Control register", check_pmcr()); > + report("Monotonically increasing cycle count", check_cycles_increase()); > > return report_summary(); > } > -- > 1.8.3.1 Besides needing to use u64's for registers that return u64's, it looks good to me. drew
Re: [Qemu-devel] [kvm-unit-tests PATCH v8 1/3] arm: Add PMU test
On Tue, Nov 08, 2016 at 12:17:13PM -0600, Wei Huang wrote: > From: Christopher Covington > > Beginning with a simple sanity check of the control register, add > a unit test for the ARM Performance Monitors Unit (PMU). > > Signed-off-by: Christopher Covington > Signed-off-by: Wei Huang > --- > arm/Makefile.common | 3 ++- > arm/pmu.c | 73 > + > arm/unittests.cfg | 5 > 3 files changed, 80 insertions(+), 1 deletion(-) > create mode 100644 arm/pmu.c Reviewed-by: Andrew Jones
Re: [Qemu-devel] [PATCH v2] target-m68k: add rol/ror/roxl/roxr instructions
On 11/10/2016 11:51 PM, Laurent Vivier wrote: +/* Result of rotate_x() is valid if 0 < shift < (size + 1) < 32 */ +static TCGv rotate_x(TCGv dest, TCGv src, TCGv shift, int left, int size) +{ +TCGv X, shl, shr, shx; + +shr = tcg_temp_new(); +shl = tcg_temp_new(); +shx = tcg_temp_new(); +if (left) { +tcg_gen_mov_i32(shl, shift); /* shl = shift */ +tcg_gen_movi_i32(shr, size + 1); +tcg_gen_sub_i32(shr, shr, shift); /* shr = size + 1 - shift */ +tcg_gen_subi_i32(shx, shift, 1); /* shx = shift - 1 */ You're failing to bound shx properly. If shift == 0, you produce -1 here which is illegal. You could mask this to 31, but it's even better to produce size, as I said before, because then you don't have to have +tcg_gen_movcond_i32(TCG_COND_EQ, X, +t1, zero, +QREG_CC_X, X); +tcg_gen_movcond_i32(TCG_COND_EQ, DREG(insn, 0), +t1, zero, +DREG(insn, 0), res); this at the end. A zero shift will simply leave everything where it belongs, and you extract all the parts as normal. +DISAS_INSN(rotate_mem) +{ +TCGv src; +TCGv addr; +TCGv shift; +int left = (insn & 0x100); + +SRC_EA(env, src, OS_WORD, 0, &addr); + +shift = tcg_const_i32(1); +if (insn & 8) { It's bit 9 for memory rotate. r~
Re: [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration
On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote: > From: Cao jin > > Commit afea4e14 seems forgetting to undo the overwrites, which is > unsuitable. > > CC: Hannes Reinecke > CC: Paolo Bonzini > CC: Markus Armbruster > CC: Marcel Apfelbaum > CC: Michael S. Tsirkin > > Reviewed-by: Markus Armbruster > Signed-off-by: Cao jin > Acked-by: Marcel Apfelbaum > Reviewed-by: Michael S. Tsirkin > Signed-off-by: Michael S. Tsirkin > --- > hw/scsi/megasas.c | 7 +++ > 1 file changed, 3 insertions(+), 4 deletions(-) > Reviewed-by: Hannes Reinecke Cheers, Hannes -- Dr. Hannes ReineckeTeamlead Storage & Networking h...@suse.de +49 911 74053 688 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg)
Re: [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix()
On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote: > From: Cao jin > > Also move certain hunk above, to place msix init related code together. > > CC: Hannes Reinecke > CC: Paolo Bonzini > CC: Markus Armbruster > CC: Marcel Apfelbaum > CC: Michael S. Tsirkin > > Reviewed-by: Markus Armbruster > Signed-off-by: Cao jin > Acked-by: Marcel Apfelbaum > Reviewed-by: Michael S. Tsirkin > Signed-off-by: Michael S. Tsirkin > --- > hw/scsi/megasas.c | 19 ++- > 1 file changed, 6 insertions(+), 13 deletions(-) > Reviewed-by: Hannes Reinecke Cheers, Hannes -- Dr. Hannes ReineckeTeamlead Storage & Networking h...@suse.de +49 911 74053 688 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg)
Re: [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch
On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote: > From: Cao jin > > Resolve the TODO, msix=auto means msix on; if user specify msix=on, > then device creation fail on msix_init failure. > Also undo the overwrites of user configuration of msix. > > CC: Michael S. Tsirkin > CC: Hannes Reinecke > CC: Paolo Bonzini > CC: Markus Armbruster > CC: Marcel Apfelbaum > > Reviewed-by: Markus Armbruster > Signed-off-by: Cao jin > Acked-by: Marcel Apfelbaum > Reviewed-by: Michael S. Tsirkin > Signed-off-by: Michael S. Tsirkin > --- > hw/scsi/megasas.c | 28 > 1 file changed, 20 insertions(+), 8 deletions(-) > Reviewed-by: Hannes Reinecke Cheers, Hannes -- Dr. Hannes ReineckeTeamlead Storage & Networking h...@suse.de +49 911 74053 688 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg)
Re: [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it
On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote: > From: Cao jin > > msix_init() reports errors with error_report(), which is wrong when > it's used in realize(). The same issue was fixed for msi_init() in > commit 1108b2f. > > For some devices(like e1000e, vmxnet3) who won't fail because of > msix_init's failure, suppress the error report by passing NULL error object. > > Bonus: add comment for msix_init. > > CC: Jiri Pirko > CC: Gerd Hoffmann > CC: Dmitry Fleytman > CC: Jason Wang > CC: Michael S. Tsirkin > CC: Hannes Reinecke > CC: Paolo Bonzini > CC: Alex Williamson > CC: Markus Armbruster > CC: Marcel Apfelbaum > > Reviewed-by: Markus Armbruster > Signed-off-by: Cao jin > Acked-by: Marcel Apfelbaum > Reviewed-by: Michael S. Tsirkin > Signed-off-by: Michael S. Tsirkin > --- > include/hw/pci/msix.h | 5 +++-- > hw/block/nvme.c| 5 - > hw/misc/ivshmem.c | 8 > hw/net/e1000e.c| 6 +- > hw/net/rocker/rocker.c | 7 ++- > hw/net/vmxnet3.c | 8 ++-- > hw/pci/msix.c | 34 +- > hw/scsi/megasas.c | 5 - > hw/usb/hcd-xhci.c | 13 - > hw/vfio/pci.c | 8 ++-- > hw/virtio/virtio-pci.c | 11 +-- > 11 files changed, 80 insertions(+), 30 deletions(-) > Reviewed-by: Hannes Reinecke Cheers, Hannes -- Dr. Hannes ReineckeTeamlead Storage & Networking h...@suse.de +49 911 74053 688 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg)
Re: [Qemu-devel] alpha platform is missing files after initrd load
Am 09.11.2016 um 07:34 schrieb Dennis Luehring: im using cross-linux-from-scratch manual/script for building the non-x86_64 platforms fyi after using the crosscompilers from the gcc-5-crossports packages (http://packages.ubuntu.com/source/xenial/gcc-5-cross-ports) alpha-linux-gnu-gcc-5 sparc64-linux-gnu-gcc-5 mips64-linux-gnuabi64-gcc-5 powerpc64-linux-gnu-gcc-5 sadly no sparc, arm gccs :) kompiling the kernels using my standard configs and init-programm alpha - my small initrd cpio boots without "junk in compressed archive" message mips64 - does not compile ppc64 - does not boot sparc64 - my small initrd cpio boots without "junk in compressed archive" message so i only get these "junk in compressed archive" messages with my own gcc builds and whats also interesting is that the excutables are 10 times smaller (6MB:59MB alpha kernel...) using the prebuild compilers so i think the warning that it is not easy to build working cross compilers for nearly all the qemu platforms from source seems to be true and i need to check my gcc compilation settings against the prebuild ones - never thought it would be that hard to build linux kernels based on self compiled gccs... in such limited spare time :) but thx for all your help
[Qemu-devel] ARM documentation
I was wondering if there is a list somewhere of all the ARM boards QEMU supports. I want to add a section to the ARM wiki page that lists at least a few of them.
[Qemu-devel] More platforms for the wiki
http://fossboss.com/2016/08/13/use-qemu-test-operating-systems- distributions/ On this page I found a huge list of QEMU emulators. I haven't heard of most of them, but some of them are not on the platforms page. Do you think we should add the missing ones? qemu-system-aarch64 qemu-system-alpha qemu-system-arm qemu-system-cris qemu-system-i386 qemu-system-lm32 qemu-system-m68k qemu-system-microblaze qemu-system-microblazeel qemu-system-mips qemu-system-mips64 qemu-system-mips64el qemu-system-mipsel qemu-system-moxie qemu-system-or32 qemu-system-ppc qemu-system-ppc64 qemu-system-ppc64le qemu-system-ppcemb qemu-system-sh4 qemu-system-sh4eb qemu-system-sparc qemu-system-sparc64 qemu-system-tricore qemu-system-unicore32 qemu-system-x86_64 qemu-system-x86_64-spice qemu-system-xtensa qemu-system-xtensaeb
Re: [Qemu-devel] [PATCH V2 07/11] virtio-pci: address space translation service (ATS) support
On 2016年11月11日 11:49, Michael S. Tsirkin wrote: On Fri, Nov 11, 2016 at 11:26:12AM +0800, Jason Wang wrote: > > >On 2016年11月11日 01:32, Michael S. Tsirkin wrote: > >On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote: > > > > > >On 2016年11月04日 03:49, Michael S. Tsirkin wrote: > > > >On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote: > > > > > >This patches enable the Address Translation Service support for virtio > > > > > >pci devices. This is needed for a guest visible Device IOTLB > > > > > >implementation and will be required by vhost device IOTLB API > > > > > >implementation for intel IOMMU. > > > > > > > > > > > >Cc: Michael S. Tsirkin > > > > > >Signed-off-by: Jason Wang > > > >I'd like to understand why do you think this is strictly required. > > > >Won't setting CM bit in the IOMMU do the trick. > > >ATS was chosen for performance. Since there're many problems for CM: > > > > > >- CM was slow (10%-20% slower on real hardware for things like netperf) > > >because of each transition between non-present and present mapping needs an > > >explicit invalidation. It may slow down the whole VM. > > >- Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending > > >of IOTLB entries. (What we can do in this case is in fact userspace IOTLB > > >snooping, this could be done even without CM). > > >It was natural to think of ATS when designing interface between IOMMU and > > >device/remote IOTLBs. Do you see any drawbacks on ATS here? > > > > > >Thanks > >In fact at this point I'm confused. Any mapping needs to be programmed > >in the IOMMU. We need to implement this correctly. > >Once we do why do we need ATS? > >I think what you need is map/unmap notifiers that Aviv is working on. > >No? > >Let me clarify, device IOTLB API can work without ATS or CM. So there're >three ways to do: > >1) without ATS or CM support, the function could be implemented through: >1.1: asking for qemu help if there's an IOTLB miss in vhost >1.2: snooping the userspace IOTLB invalidation (present to non-present >mapping) and update device IOTLB > >2) with CM enabled, the only thing we can add is snooping the non-present to >present mapping and update the device IOTLB. This is not a requirement since >we still can get this through asking qemu's(1.2) help. > >3) with ATS enabled, guest knows the existence of device IOTLB, and device >IOTLB entires needs to be flushed explicitly by guest. In this case there's >no need to snoop the ordinary IOTLB invalidation in 1.2. We just need to >snoop the device IOTLB specific invalidation request from guest. > >All the above 3 methods work very well, but let's have a look at performance >impact: > >- Method 1 (without CM or ATS), the performance is not the best since guest >does not know about the existence of remote IOTLB, this means the flush of >device IOTLB entry could not be done on demand. One example is some IOMMU >driver (e.g intel) tends to optimize the IOTLB invalidations by issuing a >global invalidation periodically. We need to flush the device IOTLB too in >this case. Thus we can notice some jitter (because of IOTLB miss). > >- Method 2 (with CM but without ATS) seems to be the worst case. It has not >only all problems above a but also a new one: each transition needs to >notify the device explicitly. Even if dpdk use static mappings, all other >devices in the VM use dynamic ones which slows down the whole the system. >According to the test, CM is about 10%-20% slower in real hardware. > >- Method 3 (ATS) can give the best performance, all the problems have gone >since guest can flush the device IOTLB entry on demand. It was defined by >spec and was designed to solve the issues just like what we meet here, and >was supported by modern IOMMUs. > >And what's even better, implementing ATS turns out less than 100 lines of >codes. And it was much more easier to be enabled on other IOMMU (AMD IOMMU >only needs 20 lines of codes). All other ways (I started and have codes for >method 1 for intel IOMMU) need lots of work specific to each kind of IOMMU. method 1 is basically what Aviv implemented except you don't need map notifiers, only unmap. > >Consider so much advantages by just adding so small lines of codes. I don't >see why we don't need ATS (for the IOOMUs that supports it). > >Thanks I am concerned that not all IOMMUs and guests support ATS. For IOMMUs that does not support ATS, we can used method 1. For legacy guests, it does not even support VIRTIO_F_IOMMU_PLATFORM. So probably not an issue. Thanks
Re: [Qemu-devel] [PATCH v6 1/3] IOMMU: add option to enable VTD_CAP_CM to vIOMMU capility exposoed to guest
On 2016年11月11日 11:39, Michael S. Tsirkin wrote: On Fri, Nov 11, 2016 at 10:32:42AM +0800, Jason Wang wrote: On 2016年11月10日 06:00, Michael S. Tsirkin wrote: On Wed, Nov 09, 2016 at 03:28:02PM +0800, Jason Wang wrote: On 2016年11月08日 19:04, Aviv B.D wrote: From: "Aviv Ben-David" This capability asks the guest to invalidate cache before each map operation. We can use this invalidation to trap map operations in the hypervisor. Hi: Like I've asked twice in the past, I want to know why don't you cache translation faults as what spec required (especially this is a guest visible behavior)? Btw, please cc me on posting future versions. Thanks Caching isn't guest visible. Seems not, if one fault mapping were cached by IOTLB. Guest can notice this behavior. Sorry, I don't get what you are saying. Spec just says you*can* cache, not that you must. Yes, but what did in this patch is "don't". What I suggest is just a "can", since anyway the IOTLB entries were limited and could be replaced by other. Thanks Have trouble understanding this. Can you given an example of a guest visible difference? I guess this may do the detection: 1) map iova A to be non-present. 2) invalidate iova A 3) map iova A to addr B 4) access iova A A correct implemented CM may meet fault in step 4, but with this patch, we never.
Re: [Qemu-devel] [PATCH 07/13] virtio-scsi: always use dataplane path if ioeventfd is active
On Fri, 21 Oct 2016 22:48:10 +0200 Paolo Bonzini wrote: > Override start_ioeventfd and stop_ioeventfd to start/stop the > whole dataplane logic. This has some positive side effects: > > - no need anymore for virtio_add_queue_aio (i.e. a revert of > commit 1c627137c10ee2dcf59e0383ade8a9abfa2d4355) > > - no need anymore to switch from generic ioeventfd handlers to > dataplane > > It detects some errors better: > > $ qemu-system-x86_64 -object iothread,id=io \ > -device virtio-scsi-pci,ioeventfd=off,iothread=io > qemu-system-x86_64: -device virtio-scsi-pci,ioeventfd=off,iothread=io: > ioeventfd is required for iothread > > while previously it would have started just fine. > > Reviewed-by: Cornelia Huck > Signed-off-by: Paolo Bonzini > --- > hw/scsi/virtio-scsi-dataplane.c | 56 > + > hw/scsi/virtio-scsi.c | 24 -- > include/hw/virtio/virtio-scsi.h | 6 ++--- > 3 files changed, 48 insertions(+), 38 deletions(-) Looks like this added a more subtle regression with MST's previous pull request, I have an OVMF/440FX/virtio-scsi/virtio-net/win8.1 VM, with this patch it fails to shutdown. QEMU just spins at 400% load. (ps from libvirt launched vm, linebreaks added) /usr/local/bin/qemu-system-x86_64 -name guest=Steam,debug-threads=on -S \ -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-54-Steam/master-key.aes \ -machine pc-i440fx-2.8,accel=kvm,usb=off,vmport=off \ -cpu host,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vendor_id=KeenlyKVM,kvm=off \ -drive file=/usr/share/edk2.git/ovmf-x64/OVMF_CODE-pure-efi.fd,if=pflash,format=raw,unit=0,readonly=on \ -drive file=/var/lib/libvirt/qemu/nvram/Steam_VARS.fd,if=pflash,format=raw,unit=1 \ -m 4096 -mem-prealloc -mem-path /dev/hugepages/libvirt/qemu -realtime mlock=off \ -smp 4,sockets=1,cores=4,threads=1 -uuid 79f39860-d659-426b-89e8-90cb46ee57c6 \ -display none -no-user-config -nodefaults \ -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-54-Steam/monitor.sock,server,nowait \ -mon chardev=charmonitor,id=monitor,mode=control -rtc base=localtime,driftfix=slew \ -global kvm-pit.lost_tick_policy=discard -no-hpet -no-shutdown -boot menu=on,strict=on \ -device ich9-usb-ehci1,id=usb,bus=pci.0,addr=0x8.0x7 \ -device ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pci.0,multifunction=on,addr=0x8 \ -device ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pci.0,addr=0x8.0x1 \ -device ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pci.0,addr=0x8.0x2 \ -device virtio-scsi-pci,id=scsi0,num_queues=4,bus=pci.0,addr=0x5 \ -drive file=/mnt/store/vm/Steam.img,format=raw,if=none,id=drive-scsi0-0-0-0,cache=none \ -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=2 \ -netdev tap,fd=28,id=hostnet0,vhost=on,vhostfd=30 \ -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:60:ef:ac,bus=pci.0,addr=0x3 \ -device vfio-pci,host=01:00.0,id=hostdev0,bus=pci.0,addr=0x2 \ -device vfio-pci,host=01:00.1,id=hostdev1,bus=pci.0,addr=0x6,rombar=0 \ -device vfio-pci,host=02:00.0,id=hostdev2,bus=pci.0,addr=0x7 -snapshot -msg timestamp=on Fails: ad07cd6 virtio-scsi: always use dataplane path if ioeventfd is active Works (but I'm not using virtio-blk): 9ffe337 virtio-blk: always use dataplane path if ioeventfd is active Thanks, Alex
Re: [Qemu-devel] [PATCH 4/4] block: Cater to iscsi with non-power-of-2 discard
On 11/08/2016 05:03 AM, Peter Lieven wrote: > Am 25.10.2016 um 18:12 schrieb Eric Blake: >> On 10/25/2016 09:36 AM, Paolo Bonzini wrote: >>> >>> On 25/10/2016 16:35, Eric Blake wrote: So your argument is that we should always pass down every unaligned less-than-optimum discard request all the way to the hardware, rather than dropping it higher in the stack, even though discard requests are already advisory, in order to leave the hardware as the ultimate decision on whether to ignore the unaligned request? >>> Yes, I agree with Peter as to this. >> Okay, I'll work on patches. I think it counts as bug fix, so appropriate >> even if I miss soft freeze (I'd still like to get NBD write zero support >> into 2.8, since it already missed 2.7, but that one is still awaiting >> review with not much time left). >> > > Hi Eric, > > have you had time to look at this? > If you need help, let me know. Patches posted, but testing help would be appreciated since you have the actual hardware that exhibits the issue. I'm also trying to write a patch to extend the blkdebug driver to share this "feature" of a 15M page, and write a qemu-iotest to make it harder to regress in the future. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH V2 07/11] virtio-pci: address space translation service (ATS) support
On Fri, Nov 11, 2016 at 11:26:12AM +0800, Jason Wang wrote: > > > On 2016年11月11日 01:32, Michael S. Tsirkin wrote: > > On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote: > > > > > > On 2016年11月04日 03:49, Michael S. Tsirkin wrote: > > > > On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote: > > > > > > This patches enable the Address Translation Service support for > > > > > > virtio > > > > > > pci devices. This is needed for a guest visible Device IOTLB > > > > > > implementation and will be required by vhost device IOTLB API > > > > > > implementation for intel IOMMU. > > > > > > > > > > > > Cc: Michael S. Tsirkin > > > > > > Signed-off-by: Jason Wang > > > > I'd like to understand why do you think this is strictly required. > > > > Won't setting CM bit in the IOMMU do the trick. > > > ATS was chosen for performance. Since there're many problems for CM: > > > > > > - CM was slow (10%-20% slower on real hardware for things like netperf) > > > because of each transition between non-present and present mapping needs > > > an > > > explicit invalidation. It may slow down the whole VM. > > > - Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of > > > contending > > > of IOTLB entries. (What we can do in this case is in fact userspace IOTLB > > > snooping, this could be done even without CM). > > > It was natural to think of ATS when designing interface between IOMMU and > > > device/remote IOTLBs. Do you see any drawbacks on ATS here? > > > > > > Thanks > > In fact at this point I'm confused. Any mapping needs to be programmed > > in the IOMMU. We need to implement this correctly. > > Once we do why do we need ATS? > > I think what you need is map/unmap notifiers that Aviv is working on. > > No? > > Let me clarify, device IOTLB API can work without ATS or CM. So there're > three ways to do: > > 1) without ATS or CM support, the function could be implemented through: > 1.1: asking for qemu help if there's an IOTLB miss in vhost > 1.2: snooping the userspace IOTLB invalidation (present to non-present > mapping) and update device IOTLB > > 2) with CM enabled, the only thing we can add is snooping the non-present to > present mapping and update the device IOTLB. This is not a requirement since > we still can get this through asking qemu's(1.2) help. > > 3) with ATS enabled, guest knows the existence of device IOTLB, and device > IOTLB entires needs to be flushed explicitly by guest. In this case there's > no need to snoop the ordinary IOTLB invalidation in 1.2. We just need to > snoop the device IOTLB specific invalidation request from guest. > > All the above 3 methods work very well, but let's have a look at performance > impact: > > - Method 1 (without CM or ATS), the performance is not the best since guest > does not know about the existence of remote IOTLB, this means the flush of > device IOTLB entry could not be done on demand. One example is some IOMMU > driver (e.g intel) tends to optimize the IOTLB invalidations by issuing a > global invalidation periodically. We need to flush the device IOTLB too in > this case. Thus we can notice some jitter (because of IOTLB miss). > > - Method 2 (with CM but without ATS) seems to be the worst case. It has not > only all problems above a but also a new one: each transition needs to > notify the device explicitly. Even if dpdk use static mappings, all other > devices in the VM use dynamic ones which slows down the whole the system. > According to the test, CM is about 10%-20% slower in real hardware. > > - Method 3 (ATS) can give the best performance, all the problems have gone > since guest can flush the device IOTLB entry on demand. It was defined by > spec and was designed to solve the issues just like what we meet here, and > was supported by modern IOMMUs. > > And what's even better, implementing ATS turns out less than 100 lines of > codes. And it was much more easier to be enabled on other IOMMU (AMD IOMMU > only needs 20 lines of codes). All other ways (I started and have codes for > method 1 for intel IOMMU) need lots of work specific to each kind of IOMMU. method 1 is basically what Aviv implemented except you don't need map notifiers, only unmap. > > Consider so much advantages by just adding so small lines of codes. I don't > see why we don't need ATS (for the IOOMUs that supports it). > > Thanks I am concerned that not all IOMMUs and guests support ATS. > > > > > > > > Also, could you remind me pls - can guests just disable ATS? > > > > > > > > What happens then? > > > > > > > >
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Fri, Nov 11, 2016 at 11:51:34AM +0800, Cao jin wrote: > > > On 11/11/2016 11:32 AM, Michael S. Tsirkin wrote: > > On Fri, Nov 11, 2016 at 10:46:29AM +0800, Cao jin wrote: > > > > > > > > > On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote: > > > > On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: > > > > > > > > > > > > I think I'll drop this, this patchset was borderline useful anyway. > > > > > > > > > > Really sorry for the issue, I moved usb_xhci_init() too far from its > > > original place, results in the segmentation fault. > > > > > > Could I send new version of this patch to fix it right now? > > > > > > > > > > > OK but I'd like to know which other patches > > in the patchset are untested. You really must inform people > > when you post untested patches. > > > > I see, really sorry for my mistake. My patches just pass make check, didn't > test for each patch. > -- > Yours Sincerely, > > Cao jin > I think it's best to drop them for now in this case. -- MST
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On 11/11/2016 11:32 AM, Michael S. Tsirkin wrote: On Fri, Nov 11, 2016 at 10:46:29AM +0800, Cao jin wrote: On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote: On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: I think I'll drop this, this patchset was borderline useful anyway. Really sorry for the issue, I moved usb_xhci_init() too far from its original place, results in the segmentation fault. Could I send new version of this patch to fix it right now? OK but I'd like to know which other patches in the patchset are untested. You really must inform people when you post untested patches. I see, really sorry for my mistake. My patches just pass make check, didn't test for each patch. -- Yours Sincerely, Cao jin
Re: [Qemu-devel] [PATCH v6 1/3] IOMMU: add option to enable VTD_CAP_CM to vIOMMU capility exposoed to guest
On Fri, Nov 11, 2016 at 10:32:42AM +0800, Jason Wang wrote: > > > On 2016年11月10日 06:00, Michael S. Tsirkin wrote: > > On Wed, Nov 09, 2016 at 03:28:02PM +0800, Jason Wang wrote: > > > > > > > > > > > >On 2016年11月08日 19:04, Aviv B.D wrote: > > > > > >From: "Aviv Ben-David" > > > > > > > > > > > >This capability asks the guest to invalidate cache before each map > > > > > >operation. > > > > > >We can use this invalidation to trap map operations in the > > > > > >hypervisor. > > > > > > > >Hi: > > > > > > > >Like I've asked twice in the past, I want to know why don't you cache > > > >translation faults as what spec required (especially this is a guest > > > >visible > > > >behavior)? > > > > > > > >Btw, please cc me on posting future versions. > > > > > > > >Thanks > > Caching isn't guest visible. > > Seems not, if one fault mapping were cached by IOTLB. Guest can notice this > behavior. Sorry, I don't get what you are saying. > > Spec just says you*can* cache, > > not that you must. > > > > Yes, but what did in this patch is "don't". What I suggest is just a "can", > since anyway the IOTLB entries were limited and could be replaced by other. > > Thanks Have trouble understanding this. Can you given an example of a guest visible difference?
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Fri, Nov 11, 2016 at 10:46:29AM +0800, Cao jin wrote: > > > On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote: > > On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: > > > > > > So I merge this tag to try to resolve it, now I get qemu segfaulting > > > > > bisected to: > > > > > > > > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > > > > > Author: Cao jin > > > > > Date: Sat Nov 5 10:07:21 2016 +0800 > > > > > > > > > > hcd-xhci: check & correct param before using it > > > > > > Here's the backtrace from this one: > > > > > > #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 > > > #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, > > > bits=131072) > > > at hw/usb/hcd-xhci.c:2870 > > > #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, > > > is_detach=0) > > > at hw/usb/hcd-xhci.c:2905 > > > #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) > > > at hw/usb/hcd-xhci.c:2961 > > > #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) > > > at hw/core/qdev.c:1145 > > > #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) > > > at hw/core/qdev.c:295 > > > #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, > > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > > post_busfn=0x559828e2 , opaque=0x0) at > > > hw/core/qdev.c:610 > > > #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, > > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > > post_busfn=0x559828e2 , opaque=0x0) at > > > hw/core/bus.c:59 > > > #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, > > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > > post_busfn=0x559828e2 , opaque=0x0) at > > > hw/core/qdev.c:602 > > > #9 0x5598763b in qbus_walk_children (bus=0x568296c0, > > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > > post_busfn=0x559828e2 , opaque=0x0) at > > > hw/core/bus.c:59 > > > #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) > > > at hw/core/qdev.c:321 > > > #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) > > > at hw/core/qdev.c:327 > > > #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 > > > #13 0x558367ce in pc_machine_reset () > > > at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 > > > #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 > > > #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, > > > envp=0x7fffe0e0) at vl.c:4656 > > > > > > Here's a commandline sufficient to trigger it: > > > > > > qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none > > > -monitor stdio -device nec-usb-xhci > > > > > > So apparently that never got tested or something got mangled in the > > > commit. Thanks, > > > > > > Alex > > > > I think I'll drop this, this patchset was borderline useful anyway. > > > > Really sorry for the issue, I moved usb_xhci_init() too far from its > original place, results in the segmentation fault. > > Could I send new version of this patch to fix it right now? > > -- > Yours Sincerely, > > Cao jin > OK but I'd like to know which other patches in the patchset are untested. You really must inform people when you post untested patches. -- MST
Re: [Qemu-devel] [PATCH V2 07/11] virtio-pci: address space translation service (ATS) support
On 2016年11月11日 01:32, Michael S. Tsirkin wrote: On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote: On 2016年11月04日 03:49, Michael S. Tsirkin wrote: On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote: This patches enable the Address Translation Service support for virtio pci devices. This is needed for a guest visible Device IOTLB implementation and will be required by vhost device IOTLB API implementation for intel IOMMU. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang I'd like to understand why do you think this is strictly required. Won't setting CM bit in the IOMMU do the trick. ATS was chosen for performance. Since there're many problems for CM: - CM was slow (10%-20% slower on real hardware for things like netperf) because of each transition between non-present and present mapping needs an explicit invalidation. It may slow down the whole VM. - Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending of IOTLB entries. (What we can do in this case is in fact userspace IOTLB snooping, this could be done even without CM). It was natural to think of ATS when designing interface between IOMMU and device/remote IOTLBs. Do you see any drawbacks on ATS here? Thanks In fact at this point I'm confused. Any mapping needs to be programmed in the IOMMU. We need to implement this correctly. Once we do why do we need ATS? I think what you need is map/unmap notifiers that Aviv is working on. No? Let me clarify, device IOTLB API can work without ATS or CM. So there're three ways to do: 1) without ATS or CM support, the function could be implemented through: 1.1: asking for qemu help if there's an IOTLB miss in vhost 1.2: snooping the userspace IOTLB invalidation (present to non-present mapping) and update device IOTLB 2) with CM enabled, the only thing we can add is snooping the non-present to present mapping and update the device IOTLB. This is not a requirement since we still can get this through asking qemu's(1.2) help. 3) with ATS enabled, guest knows the existence of device IOTLB, and device IOTLB entires needs to be flushed explicitly by guest. In this case there's no need to snoop the ordinary IOTLB invalidation in 1.2. We just need to snoop the device IOTLB specific invalidation request from guest. All the above 3 methods work very well, but let's have a look at performance impact: - Method 1 (without CM or ATS), the performance is not the best since guest does not know about the existence of remote IOTLB, this means the flush of device IOTLB entry could not be done on demand. One example is some IOMMU driver (e.g intel) tends to optimize the IOTLB invalidations by issuing a global invalidation periodically. We need to flush the device IOTLB too in this case. Thus we can notice some jitter (because of IOTLB miss). - Method 2 (with CM but without ATS) seems to be the worst case. It has not only all problems above a but also a new one: each transition needs to notify the device explicitly. Even if dpdk use static mappings, all other devices in the VM use dynamic ones which slows down the whole the system. According to the test, CM is about 10%-20% slower in real hardware. - Method 3 (ATS) can give the best performance, all the problems have gone since guest can flush the device IOTLB entry on demand. It was defined by spec and was designed to solve the issues just like what we meet here, and was supported by modern IOMMUs. And what's even better, implementing ATS turns out less than 100 lines of codes. And it was much more easier to be enabled on other IOMMU (AMD IOMMU only needs 20 lines of codes). All other ways (I started and have codes for method 1 for intel IOMMU) need lots of work specific to each kind of IOMMU. Consider so much advantages by just adding so small lines of codes. I don't see why we don't need ATS (for the IOOMUs that supports it). Thanks Also, could you remind me pls - can guests just disable ATS? What happens then?
[Qemu-devel] [PATCH 2/2] pcie: fix typo in comments
Signed-off-by: Cao jin --- hw/pci/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 99cfb45..39b10b8 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -656,7 +656,7 @@ static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next) } /* - * caller must supply valid (offset, size) * such that the range shouldn't + * Caller must supply valid (offset, size) such that the range wouldn't * overlap with other capability or other registers. * This function doesn't check it. */ -- 2.1.0
[Qemu-devel] [PATCH 1/2] vfio: remove a duplicated word in comments
Signed-off-by: Cao jin --- hw/vfio/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 31aaecb..c94987c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1881,8 +1881,8 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) * 0 is reserved for this since absence of capabilities is indicated by * 0 for the ID, version, AND next pointer. However, pcie_add_capability() * uses ID 0 as reserved for list management and will incorrectly match and - * assert if we attempt to pre-load the head of the chain with with this - * ID. Use ID 0x temporarily since it is also seems to be reserved in + * assert if we attempt to pre-load the head of the chain with this ID. + * Use ID 0x temporarily since it is also seems to be reserved in * part for identifying absence of capabilities in a root complex register * block. If the ID still exists after adding capabilities, switch back to * zero. We'll mark this entire first dword as emulated for this purpose. -- 2.1.0
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote: On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: So I merge this tag to try to resolve it, now I get qemu segfaulting bisected to: commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 Author: Cao jin Date: Sat Nov 5 10:07:21 2016 +0800 hcd-xhci: check & correct param before using it Here's the backtrace from this one: #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072) at hw/usb/hcd-xhci.c:2870 #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0) at hw/usb/hcd-xhci.c:2905 #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) at hw/usb/hcd-xhci.c:2961 #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) at hw/core/qdev.c:1145 #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) at hw/core/qdev.c:295 #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:610 #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:602 #9 0x5598763b in qbus_walk_children (bus=0x568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) at hw/core/qdev.c:321 #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) at hw/core/qdev.c:327 #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 #13 0x558367ce in pc_machine_reset () at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, envp=0x7fffe0e0) at vl.c:4656 Here's a commandline sufficient to trigger it: qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci So apparently that never got tested or something got mangled in the commit. Thanks, Alex I think I'll drop this, this patchset was borderline useful anyway. Really sorry for the issue, I moved usb_xhci_init() too far from its original place, results in the segmentation fault. Could I send new version of this patch to fix it right now? -- Yours Sincerely, Cao jin
Re: [Qemu-devel] [RFC PATCH 7/8] quorum: Implement .bdrv_co_preadv/pwritev()
On 11/10/2016 11:19 AM, Kevin Wolf wrote: > This enables byte granularity requests on quorum nodes. > > Note that the QMP events emitted by the driver are an external API that > we were careless enough to define as sector based. The offset and length > of requests reported in events are rounded down therefore. Would it be better to round offset down and length up? A length of 0 looks odd. Should we add more fields to the two affected events (QUORUM_FAILURE and QUORUM_REPORT_BAD)? We have to keep the existing fields for back-compat, but we could add new fields that give byte-based locations for management apps smart enough to use the new instead of the old (particularly since the old fields are named 'sector-num' and 'sectors-count'). > > Signed-off-by: Kevin Wolf > --- > block/quorum.c | 75 > ++ > 1 file changed, 33 insertions(+), 42 deletions(-) > > @@ -198,14 +198,17 @@ static void quorum_report_bad(QuorumOpType type, > uint64_t sector_num, > } > > qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, > - sector_num, nb_sectors, &error_abort); > + offset / BDRV_SECTOR_SIZE, > + bytes / BDRV_SECTOR_SIZE, > &error_abort); Rounding the offset down makes sense, but rounding the bytes down can lead to weird messages. Blindly rounding it up to a sector boundary can also be wrong (example: writing 2 bytes at offset 511 really affects 1024 bytes when you consider that two sectors had to undergo read-modify-write). Don't we have a helper routine for determining the end boundary when we have to convert an offset and length to a courser alignment? > @@ -462,8 +461,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB > *acb, > va_list ap; > > va_start(ap, fmt); > -fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ", > -acb->sector_num, acb->nb_sectors); > +fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ", > +acb->offset, acb->bytes); Might be worth a separate patch to get rid of fprintf and use correct error reporting. But not the work for this patch. Overall, I like that you are getting rid of more sector-based cruft. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v6 1/3] IOMMU: add option to enable VTD_CAP_CM to vIOMMU capility exposoed to guest
On 2016年11月10日 06:00, Michael S. Tsirkin wrote: On Wed, Nov 09, 2016 at 03:28:02PM +0800, Jason Wang wrote: > > >On 2016年11月08日 19:04, Aviv B.D wrote: > >From: "Aviv Ben-David" > > > >This capability asks the guest to invalidate cache before each map operation. > >We can use this invalidation to trap map operations in the hypervisor. > >Hi: > >Like I've asked twice in the past, I want to know why don't you cache >translation faults as what spec required (especially this is a guest visible >behavior)? > >Btw, please cc me on posting future versions. > >Thanks Caching isn't guest visible. Seems not, if one fault mapping were cached by IOTLB. Guest can notice this behavior. Spec just says you*can* cache, not that you must. Yes, but what did in this patch is "don't". What I suggest is just a "can", since anyway the IOTLB entries were limited and could be replaced by other. Thanks
Re: [Qemu-devel] [RFC PATCH 6/8] quorum: Avoid bdrv_aio_writev() for rewrites
On 11/10/2016 11:19 AM, Kevin Wolf wrote: > Replacing it with bdrv_co_pwritev() prepares us for byte granularity > requests and gets us rid of the last bdrv_aio_*() user in quorum. > > Signed-off-by: Kevin Wolf > --- > block/quorum.c | 52 +--- > 1 file changed, 33 insertions(+), 19 deletions(-) > > diff --git a/block/quorum.c b/block/quorum.c > index b2bb3af..1426115 100644 > --- a/block/quorum.c > +++ b/block/quorum.c > @@ -134,6 +134,11 @@ struct QuorumAIOCB { > int children_read; /* how many children have been read from */ > }; > > +typedef struct QuorumCo { > +QuorumAIOCB *acb; > +int i; > +} QuorumCo; > + > @@ -586,11 +605,6 @@ free_exit: > return rewrite; > } > > -typedef struct QuorumCo { > -QuorumAIOCB *acb; > -int i; > -} QuorumCo; Code motion of a struct added earlier in this series; do you want to declare it up front to begin with to minimize churn? -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [RFC PATCH 4/8] quorum: Do cleanup in caller coroutine
On 11/10/2016 11:19 AM, Kevin Wolf wrote: > Instead of calling quorum_aio_finalize() deeply nested in what used > to be an AIO callback, do it in the same functions that allocated the > AIOCB. > > Signed-off-by: Kevin Wolf > --- > block/quorum.c | 15 +-- > 1 file changed, 9 insertions(+), 6 deletions(-) > Looks clean, but again I'm weak enough on coroutines that I won't give R-b to an RFC -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [RFC PATCH 3/8] quorum: Implement .bdrv_co_readv/writev
On 11/10/2016 11:19 AM, Kevin Wolf wrote: > Signed-off-by: Kevin Wolf > --- > block/quorum.c | 194 > ++--- > 1 file changed, 117 insertions(+), 77 deletions(-) > > + > +static void read_quorum_children_entry(void *opaque) > +{ > +QuorumCo *co = opaque; > +QuorumAIOCB *acb = co->acb; > +BDRVQuorumState *s = acb->bs->opaque; > +int i = co->i; > +int ret; > +co = NULL; /* Not valid after the first yield */ Why bother to invalidate co... > + > +acb->qcrs[i].bs = s->children[i]->bs; > +ret = bdrv_co_preadv(s->children[i], acb->sector_num * BDRV_SECTOR_SIZE, > + acb->nb_sectors * BDRV_SECTOR_SIZE, > + &acb->qcrs[i].qiov, 0); > +quorum_aio_cb(&acb->qcrs[i], ret); > +} ...when it isn't used later? Is it just for future-proofing edits made in later patches? > +static void write_quorum_entry(void *opaque) > +{ > +QuorumCo *co = opaque; > +QuorumAIOCB *acb = co->acb; > +BDRVQuorumState *s = acb->bs->opaque; > +int i = co->i; > +int ret; > +co = NULL; /* Not valid after the first yield */ > + > +acb->qcrs[i].bs = s->children[i]->bs; > +ret = bdrv_co_pwritev(s->children[i], acb->sector_num * BDRV_SECTOR_SIZE, > + acb->nb_sectors * BDRV_SECTOR_SIZE, acb->qiov, 0); > +quorum_aio_cb(&acb->qcrs[i], ret); > } and again Otherwise, the conversion looks sane to me, but I'm just weak enough on coroutines that I won't give R-b while this is still in RFC -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH] pci: fix some typos
On Fri, 11 Nov 2016 09:37:58 +0800 Cao jin wrote: > Signed-off-by: Cao jin > --- > hw/pci/pcie.c | 2 +- > hw/vfio/pci.c | 4 ++-- > 2 files changed, 3 insertions(+), 3 deletions(-) Send separate patches. Thanks, Alex > > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c > index 99cfb45..39b10b8 100644 > --- a/hw/pci/pcie.c > +++ b/hw/pci/pcie.c > @@ -656,7 +656,7 @@ static void pcie_ext_cap_set_next(PCIDevice *dev, > uint16_t pos, uint16_t next) > } > > /* > - * caller must supply valid (offset, size) * such that the range shouldn't > + * Caller must supply valid (offset, size) such that the range wouldn't > * overlap with other capability or other registers. > * This function doesn't check it. > */ > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 31aaecb..c94987c 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1881,8 +1881,8 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) > * 0 is reserved for this since absence of capabilities is indicated by > * 0 for the ID, version, AND next pointer. However, > pcie_add_capability() > * uses ID 0 as reserved for list management and will incorrectly match > and > - * assert if we attempt to pre-load the head of the chain with with this > - * ID. Use ID 0x temporarily since it is also seems to be reserved > in > + * assert if we attempt to pre-load the head of the chain with this ID. > + * Use ID 0x temporarily since it is also seems to be reserved in > * part for identifying absence of capabilities in a root complex > register > * block. If the ID still exists after adding capabilities, switch back > to > * zero. We'll mark this entire first dword as emulated for this > purpose.
Re: [Qemu-devel] [virtio-dev] Re: [PATCH v12 0/2] virtio-crypto: virtio crypto device specification
> From: virtio-...@lists.oasis-open.org [mailto:virtio-...@lists.oasis-open.org] > On Behalf Of Michael S. Tsirkin > Subject: Re: [virtio-dev] Re: [PATCH v12 0/2] virtio-crypto: virtio crypto > device > specification > > On Sat, Nov 05, 2016 at 09:15:58AM +, Gonglei (Arei) wrote: > > > > > > Subject: [virtio-dev] Re: [PATCH v12 0/2] virtio-crypto: virtio crypto > > > device > > > specification > > > > > > On Mon, Oct 24, 2016 at 06:51:52AM +, Gonglei (Arei) wrote: > > > > Ping > > > > > > > > And the corresponding source code v9 on QEMU side had been posted: > > > > > > > > [PATCH v9 00/12] virtio-crypto: introduce framework and device emulation > > > > https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg04755.html > > > > > > > > Regards, > > > > -Gonglei > > > > > > If there are no comments and this is ready to get votes now, > > > pls open the jira issue that you created. > > > I can then start the ballot. > > > > > Hi Michael, > > > > I just updated and opened the jira issue. > > Please address it, thanks :) > > > > Regards, > > -Gonglei > > Looks like there are still minor comments, pls address, > wait a bit then ping me again. > Thanks! > OK, thanks :) Regards, -Gonglei > -- > MST > > - > To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
[Qemu-devel] [PATCH] pci: fix some typos
Signed-off-by: Cao jin --- hw/pci/pcie.c | 2 +- hw/vfio/pci.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 99cfb45..39b10b8 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -656,7 +656,7 @@ static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next) } /* - * caller must supply valid (offset, size) * such that the range shouldn't + * Caller must supply valid (offset, size) such that the range wouldn't * overlap with other capability or other registers. * This function doesn't check it. */ diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 31aaecb..c94987c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1881,8 +1881,8 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) * 0 is reserved for this since absence of capabilities is indicated by * 0 for the ID, version, AND next pointer. However, pcie_add_capability() * uses ID 0 as reserved for list management and will incorrectly match and - * assert if we attempt to pre-load the head of the chain with with this - * ID. Use ID 0x temporarily since it is also seems to be reserved in + * assert if we attempt to pre-load the head of the chain with this ID. + * Use ID 0x temporarily since it is also seems to be reserved in * part for identifying absence of capabilities in a root complex register * block. If the ID still exists after adding capabilities, switch back to * zero. We'll mark this entire first dword as emulated for this purpose. -- 2.1.0
Re: [Qemu-devel] [PATCH v13 1/2] virtio-crypto: Add virtio crypto device specification
Hi Halil, > > > On Thu, Nov 10, 2016 at 09:37:40AM +, Gonglei (Arei) wrote: > >> > Hi, > >> > > >> > I attach a diff for next version in order to review more convenient, with > >> > > >> > - Drop the all gap stuff; > >> > - Drop all structures undefined in virtio_crypto.h > >> > - re-describe per request for per crypto service avoid confusion > >> > > >> > Pls review, thanks! > >> > > >> > > >> > diff --git a/virtio-crypto.tex b/virtio-crypto.tex > >> > index 448296e..ab17e7b 100644 > >> > --- a/virtio-crypto.tex > >> > +++ b/virtio-crypto.tex > >> > @@ -69,13 +69,13 @@ The following services are defined: > >> > > >> > \begin{lstlisting} > >> > /* CIPHER service */ > >> > -#define VIRTIO_CRYPTO_SERVICE_CIPHER (0) > >> > +#define VIRTIO_CRYPTO_SERVICE_CIPHER 0 > >> > /* HASH service */ > >> > -#define VIRTIO_CRYPTO_SERVICE_HASH (1) > >> > +#define VIRTIO_CRYPTO_SERVICE_HASH 1 > >> > /* MAC (Message Authentication Codes) service */ > >> > -#define VIRTIO_CRYPTO_SERVICE_MAC(2) > >> > +#define VIRTIO_CRYPTO_SERVICE_MAC2 > >> > /* AEAD (Authenticated Encryption with Associated Data) service */ > >> > -#define VIRTIO_CRYPTO_SERVICE_AEAD (3) > >> > +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 > >> > \end{lstlisting} > >> > > >> > The last driver-read-only fields specify detailed algorithms masks > >> > @@ -210,7 +210,7 @@ data that should be utilized in operations, and > input data is equal to > >> > The general header for controlq is as follows: > >> > > >> > \begin{lstlisting} > >> > -#define VIRTIO_CRYPTO_OPCODE(service, op) ((service << 8) | (op)) > >> > +#define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) > >> > > >> > struct virtio_crypto_ctrl_header { > >> > #define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ > >> > @@ -380,20 +380,18 @@ struct virtio_crypto_mac_session_para { > >> > le32 auth_key_len; > >> > le32 padding; > >> > }; > >> > -struct virtio_crypto_mac_session_output { > >> > -le64 auth_key_addr; /* guest key physical address */ > >> > -}; > >> > > >> > struct virtio_crypto_mac_create_session_req { > >> > /* Device-readable part */ > >> > struct virtio_crypto_mac_session_para para; > >> > -struct virtio_crypto_mac_session_output out; > >> > +/* The authenticated key buffer */ > >> > +/* output data here */ > >> > + > >> > /* Device-writable part */ > >> > struct virtio_crypto_session_input input; > >> > }; > >> > \end{lstlisting} > >> > > >> > -The output data are > >> > \subparagraph{Session operation: Symmetric algorithms > session}\label{sec:Device Types / Crypto Device / Device > >> > Operation / Control Virtqueue / Session operation / Session operation: > Symmetric algorithms session} > >> > > >> > @@ -413,13 +411,13 @@ struct virtio_crypto_cipher_session_para { > >> > le32 padding; > >> > }; > >> > > >> > -struct virtio_crypto_cipher_session_output { > >> > -le64 key_addr; /* guest key physical address */ > >> > -}; > >> > - > >> > struct virtio_crypto_cipher_session_req { > >> > +/* Device-readable part */ > >> > struct virtio_crypto_cipher_session_para para; > >> > -struct virtio_crypto_cipher_session_output out; > >> > +/* The cipher key buffer */ > >> > +/* Output data here */ > >> > + > >> > +/* Device-writable part */ > >> > struct virtio_crypto_session_input input; > >> > }; > >> > \end{lstlisting} > >> > @@ -448,18 +446,20 @@ struct virtio_crypto_alg_chain_session_para { > >> > le32 padding; > >> > }; > >> > > >> > -struct virtio_crypto_alg_chain_session_output { > >> > -struct virtio_crypto_cipher_session_output cipher; > >> > -struct virtio_crypto_mac_session_output mac; > >> > -}; > >> > - > >> > struct virtio_crypto_alg_chain_session_req { > >> > +/* Device-readable part */ > >> > struct virtio_crypto_alg_chain_session_para para; > >> > -struct virtio_crypto_alg_chain_session_output out; > >> > +/* The cipher key buffer */ > >> > +/* The authenticated key buffer */ > >> > +/* Output data here */ > >> > + > >> > +/* Device-writable part */ > >> > struct virtio_crypto_session_input input; > >> > }; > >> > \end{lstlisting} > >> > > >> > +The output data includs both cipher key buffer and authenticated key > buffer. > > .. includes both the cipher key buffer and the uthenticated key buffer. > > > >> > + > >> > The packet for symmetric algorithm is as follows: > >> > > >> > \begin{lstlisting} > >> > @@ -503,13 +503,13 @@ struct virtio_crypto_aead_session_para { > >> > le32 padding; > >> > }; > >> > > >> > -struct virtio_crypto_aead_session_output { > >> > -le64 key_addr; /* guest key physical address */ > >> > -}; > >> > - > >> > struct virtio_crypto_aead_create_session_req { > >> > +/* Device-readable part */ > >> > struct virtio_crypto_aead_session_para para; > >> > -struct virtio_crypto_aead_session_output out; > >> > +/* The cipher key buffer */ > >> > +/* O
Re: [Qemu-devel] [virtio-dev] Re: [PATCH v13 1/2] virtio-crypto: Add virtio crypto device specification
Hi guys, > From: virtio-...@lists.oasis-open.org [mailto:virtio-...@lists.oasis-open.org] > On Behalf Of Michael S. Tsirkin > Sent: Friday, November 11, 2016 1:04 AM > Subject: [virtio-dev] Re: [Qemu-devel] [PATCH v13 1/2] virtio-crypto: Add > virtio > crypto device specification > > On Thu, Nov 10, 2016 at 05:47:54PM +0100, Halil Pasic wrote: > > the typos > > That's something that's avoidable btw, please use > a spell and grammar checker. I hear http://www.languagetool.org/ > is useful to some people. > Thanks for your suggestion :) Regards, -Gonglei
Re: [Qemu-devel] [PATCH] spapr-vty: Fix bad assert() statement
On Thu, Nov 10, 2016 at 10:06:37AM +0100, Thomas Huth wrote: > When using the serial console in the GTK interface of QEMU (and > QEMU has been compiled with CONFIG_VTE), it is possible to trigger > the assert() statement in vty_receive() in spapr_vty.c by pasting > a chunk of text with length > 16 into the QEMU window. > Most of the other serial backends seem to simply drop characters > that they can not handle, so I think we should also do the same in > spapr-vty to fix this issue. And since it is quite ugly when pasted > text is chopped after 16 bytes, we also increase the size of the > input buffer here so that we can at least handle a couple of text > lines. Adding Paolo to CC, as qemu-char maintainer. So, vastly increasing the buffer like this doesn't seem right - the plain 16550 serial driver doesn't maintain a buffer bigger than its small internal FIFO (32 characters IIRC) - or a single byte if the FIFO is disabled. I thought the other side of the char driver was supposed to call can_receive() first and not deliver more bytes than we can handle - hence the assert. That said, I think vty_can_receive() has a bug - looking at other drivers, I think it's supposed to return the amount of buffer space available, but we're just returning 0 or 1. Although AFAICT that should still work, just inefficiently. If you use a serial port with the same GTK interface, do you get the same problem with pastes of >16 characters being truncated? > > Buglink: https://bugs.launchpad.net/qemu/+bug/1639322 > Signed-off-by: Thomas Huth > --- > hw/char/spapr_vty.c | 13 +++-- > 1 file changed, 11 insertions(+), 2 deletions(-) > > diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c > index 31822fe..bee6c34 100644 > --- a/hw/char/spapr_vty.c > +++ b/hw/char/spapr_vty.c > @@ -1,4 +1,5 @@ > #include "qemu/osdep.h" > +#include "qemu/error-report.h" > #include "qapi/error.h" > #include "qemu-common.h" > #include "cpu.h" > @@ -7,7 +8,7 @@ > #include "hw/ppc/spapr.h" > #include "hw/ppc/spapr_vio.h" > > -#define VTERM_BUFSIZE 16 > +#define VTERM_BUFSIZE 2048 > > typedef struct VIOsPAPRVTYDevice { > VIOsPAPRDevice sdev; > @@ -37,7 +38,15 @@ static void vty_receive(void *opaque, const uint8_t *buf, > int size) > qemu_irq_pulse(spapr_vio_qirq(&dev->sdev)); > } > for (i = 0; i < size; i++) { > -assert((dev->in - dev->out) < VTERM_BUFSIZE); > +if (dev->in - dev->out >= VTERM_BUFSIZE) { > +static bool reported; > +if (!reported) { > +error_report("VTY input buffer exhausted - characters > dropped." > + " (input size = %i)", size); > +reported = true; > +} > +break; > +} > dev->buf[dev->in++ % VTERM_BUFSIZE] = buf[i]; > } > } -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson signature.asc Description: PGP signature
Re: [Qemu-devel] [PATCH v2] FU exceptions should carry a cause (IC)
On Thu, Nov 10, 2016 at 03:37:31PM +1100, Balbir Singh wrote: > > > As per the ISA we need a cause and executing a tabort r9 in libc > for example causes a EXCP_FU exception, we don't wire up the > IC (cause) when we post the exception. The cause is required > for the kernel to do the right thing. The fix applies only to 64 > bit ppc targets. > > Signed-off-by: Balbir singh Applied to ppc-for-2.8. > --- > Changelog v2: >Make the code 64 bit specific > > target-ppc/excp_helper.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c > index 808760b..93369d4 100644 > --- a/target-ppc/excp_helper.c > +++ b/target-ppc/excp_helper.c > @@ -427,6 +427,9 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int > excp_model, int excp) > case POWERPC_EXCP_VPU: /* Vector unavailable exception > */ > case POWERPC_EXCP_VSXU: /* VSX unavailable exception > */ > case POWERPC_EXCP_FU: /* Facility unavailable exception > */ > +#ifdef TARGET_PPC64 > +env->spr[SPR_FSCR] |= ((target_ulong)env->error_code << 56); > +#endif > break; > case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt > */ > LOG_EXCP("PIT exception\n"); -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson signature.asc Description: PGP signature
Re: [Qemu-devel] [PATCH] tests: add XSCOM tests for the PowerNV machine
On Thu, Nov 10, 2016 at 01:17:10PM +0100, Cédric Le Goater wrote: > Add a couple of tests on the XSCOM bus of the PowerNV machine for the > the POWER8 and POWER9 CPUs. The first tests reads the CFAM identifier > of the chip. The second test goes further in the XSCOM address space > and reaches the cores to read their DTS registers. This last one is > disabled on P9 for the moment as the EQ/EX/EC XSCOM mapping needs more > work to be functional. > > Signed-off-by: Cédric Le Goater > --- > tests/Makefile.include | 1 + > tests/pnv-xscom-test.c | 138 > + > 2 files changed, 139 insertions(+) > create mode 100644 tests/pnv-xscom-test.c > > diff --git a/tests/Makefile.include b/tests/Makefile.include > index de516341fd44..90c9ad9ac6e1 100644 > --- a/tests/Makefile.include > +++ b/tests/Makefile.include > @@ -270,6 +270,7 @@ gcov-files-ppc64-y = ppc64-softmmu/hw/ppc/spapr_pci.c > check-qtest-ppc64-y += tests/endianness-test$(EXESUF) > check-qtest-ppc64-y += tests/boot-order-test$(EXESUF) > check-qtest-ppc64-y += tests/prom-env-test$(EXESUF) > +check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) > check-qtest-ppc64-y += tests/drive_del-test$(EXESUF) > check-qtest-ppc64-y += tests/postcopy-test$(EXESUF) > check-qtest-ppc64-y += tests/boot-serial-test$(EXESUF) > diff --git a/tests/pnv-xscom-test.c b/tests/pnv-xscom-test.c > new file mode 100644 > index ..994dd015c425 > --- /dev/null > +++ b/tests/pnv-xscom-test.c > @@ -0,0 +1,138 @@ > +/* > + * QTest testcase for PowerNV XSCOM bus > + * > + * Copyright (c) 2016, IBM Corporation. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > +#include "qemu/osdep.h" > + > +#include "libqtest.h" > + > +typedef enum PnvChipType { > +PNV_CHIP_POWER8E, /* AKA Murano (default) */ > +PNV_CHIP_POWER8, /* AKA Venice */ > +PNV_CHIP_POWER8NVL, /* AKA Naples */ > +PNV_CHIP_POWER9, /* AKA Nimbus */ > +} PnvChipType; > + > +#define PNV_XSCOM_EX_BASE 0x1000 > +#define PNV_XSCOM_EX_CORE_BASE(i) (PNV_XSCOM_EX_BASE | (((uint64_t)i) << 24)) > + > +static const struct pnv_chip { > +PnvChipType chip_type; > +const char *cpu_model; > +uint64_txscom_base; > +uint64_tcfam_id; > +uint32_tfirst_core; > +} pnv_chips[] = { > +{ > +.chip_type = PNV_CHIP_POWER8, > +.cpu_model = "POWER8", > +.xscom_base = 0x003fc00ull, > +.cfam_id= 0x220ea0498000ull, > +.first_core = 0x1, > +}, > +{ > +.chip_type = PNV_CHIP_POWER8NVL, > +.cpu_model = "POWER8NVL", > +.xscom_base = 0x003fc00ull, > +.cfam_id= 0x120d30498000ull, > +.first_core = 0x1, > +}, > +{ > +.chip_type = PNV_CHIP_POWER9, > +.cpu_model = "POWER9", > +.xscom_base = 0x00603fcull, > +.cfam_id= 0x100d10498000ull, > +.first_core = 0x20, > +}, > +}; > + > +static uint64_t pnv_xscom_addr(const struct pnv_chip *chip, uint32_t pcba) > +{ > +uint64_t addr = chip->xscom_base; > + > +if (chip->chip_type == PNV_CHIP_POWER9) { > +return addr | ((uint64_t) pcba << 3); > +} else { > +return addr | (((uint64_t) pcba << 4) & ~0xfful) | > +(((uint64_t) pcba << 3) & 0x78); > +} > +} To make writing future tests which use XSCOM easier, I think it would be better to actually have xscom_read / xscom_write helper functions. > +static void test_xscom_cfam_id(const struct pnv_chip *chip) > +{ > +uint64_t f000f = readq(pnv_xscom_addr(chip, 0xf000f)); > + > +g_assert_cmphex(f000f, ==, chip->cfam_id); > +} > + > +static void test_cfam_id(const void *data) > +{ > +char *args; > +const struct pnv_chip *chip = data; > + > +args = g_strdup_printf("-M powernv,accel=tcg -cpu %s", chip->cpu_model); > + > +qtest_start(args); > +test_xscom_cfam_id(chip); > +qtest_quit(global_qtest); > + > +g_free(args); > +} > + > +static void test_xscom_core(const struct pnv_chip *chip) > +{ > +uint32_t first_core_dts0 = > +PNV_XSCOM_EX_CORE_BASE(chip->first_core) | 0x5; > +uint64_t dts0 = readq(pnv_xscom_addr(chip, first_core_dts0)); > + > +g_assert_cmphex(dts0, ==, 0x26f024f023full); > +} > + > +static void test_core(const void *data) > +{ > +char *args; > +const struct pnv_chip *chip = data; > + > +args = g_strdup_printf("-M powernv,accel=tcg -cpu %s", chip->cpu_model); > + > +qtest_start(args); > +test_xscom_core(chip); > +qtest_quit(global_qtest); > + > +g_free(args); > +} > + > +int main(int argc, char **argv) > +{ > +int i; > + > +g_test_init(&argc, &argv, NULL); > + > +for (i = 0; i < ARRAY_SIZE(pnv_chips); i++) { > +char *name = g_strdup_printf("pnv-xscom/cfam_id/%s", > +
Re: [Qemu-devel] [PATCH v2 for-2.8] spapr-vty: Fix bad assert() statement
On Thu, Nov 10, 2016 at 09:16:01PM +0100, Thomas Huth wrote: > When using the serial console in the GTK interface of QEMU (and > QEMU has been compiled with CONFIG_VTE), it is possible to trigger > the assert() statement in vty_receive() in spapr_vty.c by pasting > a chunk of text with length > 16 into the QEMU window. > Most of the other serial backends seem to simply drop characters > that they can not handle, so I think we should also do the same in > spapr-vty to fix this issue. > > Buglink: https://bugs.launchpad.net/qemu/+bug/1639322 > Signed-off-by: Thomas Huth > --- > v2: > - Do not increase the buffer size since this breaks migration. >(We should figure out a better solution for the size of the > buffer once 2.8 is released) Applied to ppc-for-2.8. since actually hitting the assert is clearly bad. I still think this smells like a bug on the other side of the chardev, feeding in more characters than the can_receive() hook says can be supplied. > > hw/char/spapr_vty.c | 11 ++- > 1 file changed, 10 insertions(+), 1 deletion(-) > > diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c > index 31822fe..06b9b39 100644 > --- a/hw/char/spapr_vty.c > +++ b/hw/char/spapr_vty.c > @@ -1,4 +1,5 @@ > #include "qemu/osdep.h" > +#include "qemu/error-report.h" > #include "qapi/error.h" > #include "qemu-common.h" > #include "cpu.h" > @@ -37,7 +38,15 @@ static void vty_receive(void *opaque, const uint8_t *buf, > int size) > qemu_irq_pulse(spapr_vio_qirq(&dev->sdev)); > } > for (i = 0; i < size; i++) { > -assert((dev->in - dev->out) < VTERM_BUFSIZE); > +if (dev->in - dev->out >= VTERM_BUFSIZE) { > +static bool reported; > +if (!reported) { > +error_report("VTY input buffer exhausted - characters > dropped." > + " (input size = %i)", size); > +reported = true; > +} > +break; > +} > dev->buf[dev->in++ % VTERM_BUFSIZE] = buf[i]; > } > } -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson signature.asc Description: PGP signature
Re: [Qemu-devel] [RFC 10/17] pseries: Rewrite CAS PVR compatibility logic
On Thu, Nov 10, 2016 at 11:54:13AM -0600, Michael Roth wrote: > Quoting David Gibson (2016-10-30 06:12:01) > > During boot, PAPR guests negotiate CPU model support with the > > ibm,client-architecture-support mechanism. The logic to implement this in > > qemu is very convoluted. This cleans it up to be cleaner, using the new > > ppc_check_compat() call. > > > > The new logic for choosing a compatibility mode is: > > 1. If the guest lists the CPU's real PVR as supported *AND* no > >maximum compatibility mode has been requested on the command line > >then we use "raw" mode - the CPU acts with full capabilities. > > 2. Otherwise, we pick the most recent compatibility mode which is > >both supported by the CPU, and is advertised as supported by the > >guest. > > I think the original code approximated the same thing, but it's hard to be > > sure, and I think it had some weird edge cases. > > > > Signed-off-by: David Gibson > > --- > > hw/ppc/spapr_hcall.c | 107 > > +-- > > hw/ppc/trace-events | 2 +- > > 2 files changed, 37 insertions(+), 72 deletions(-) > > > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > > index d93f580..3bd6d06 100644 > > --- a/hw/ppc/spapr_hcall.c > > +++ b/hw/ppc/spapr_hcall.c > > @@ -895,98 +895,63 @@ static void do_set_compat(CPUState *cs, void *arg) > > ppc_set_compat(cpu, s->compat_pvr, &s->err); > > } > > > > -#define get_compat_level(cpuver) ( \ > > -((cpuver) == CPU_POWERPC_LOGICAL_2_05) ? 2050 : \ > > -((cpuver) == CPU_POWERPC_LOGICAL_2_06) ? 2060 : \ > > -((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \ > > -((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) > > - > > -static void cas_handle_compat_cpu(PowerPCCPUClass *pcc, uint32_t pvr, > > - unsigned max_lvl, unsigned *compat_lvl, > > - unsigned *compat_pvr) > > -{ > > -unsigned lvl = get_compat_level(pvr); > > -bool is205, is206, is207; > > - > > -if (!lvl) { > > -return; > > -} > > - > > -/* If it is a logical PVR, try to determine the highest level */ > > -is205 = (pcc->pcr_supported & PCR_COMPAT_2_05) && > > -(lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05)); > > -is206 = (pcc->pcr_supported & PCR_COMPAT_2_06) && > > -((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) || > > - (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS))); > > -is207 = (pcc->pcr_supported & PCR_COMPAT_2_07) && > > -(lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_07)); > > - > > -if (is205 || is206 || is207) { > > -if (!max_lvl) { > > -/* User did not set the level, choose the highest */ > > -if (*compat_lvl <= lvl) { > > -*compat_lvl = lvl; > > -*compat_pvr = pvr; > > -} > > -} else if (max_lvl >= lvl) { > > -/* User chose the level, don't set higher than this */ > > -*compat_lvl = lvl; > > -*compat_pvr = pvr; > > -} > > -} > > -} > > - > > -static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, > > +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, > >sPAPRMachineState *spapr, > >target_ulong opcode, > >target_ulong *args) > > { > > target_ulong list = ppc64_phys_to_real(args[0]); > > target_ulong ov_table; > > -PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu_); > > CPUState *cs; > > -bool cpu_match = false; > > -unsigned old_compat_pvr = cpu_->compat_pvr; > > -unsigned compat_lvl = 0, compat_pvr = 0; > > -unsigned max_lvl = get_compat_level(cpu_->max_compat); > > -int counter; > > +bool explicit_match = false; /* Matched the CPU's real PVR */ > > +uint32_t max_compat = cpu->max_compat; > > +uint32_t best_compat = 0; > > +int i; > > sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates; > > > > -/* Parse PVR list */ > > -for (counter = 0; counter < 512; ++counter) { > > +/* > > + * We scan the supplied table of PVRs looking for two things > > + * 1. Is our real CPU PVR in the list? > > + * 2. What's the "best" listed logical PVR > > + */ > > +for (i = 0; i < 512; ++i) { > > uint32_t pvr, pvr_mask; > > > > -pvr_mask = ldl_be_phys(&address_space_memory, list); > > -list += 4; > > pvr = ldl_be_phys(&address_space_memory, list); > > -list += 4; > > - > > -trace_spapr_cas_pvr_try(pvr); > > -if (!max_lvl && > > -((cpu_->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask))) { > > -cpu_match = true; > > -compat_pvr = 0; > > -} else if (pvr == cp
Re: [Qemu-devel] [virtio-dev] Re: [PATCH v13 1/2] virtio-crypto: Add virtio crypto device specification
> From: virtio-...@lists.oasis-open.org [mailto:virtio-...@lists.oasis-open.org] > On Behalf Of Michael S. Tsirkin > Subject: [virtio-dev] Re: [Qemu-devel] [PATCH v13 1/2] virtio-crypto: Add > virtio > crypto device specification > > On Thu, Nov 10, 2016 at 09:37:40AM +, Gonglei (Arei) wrote: > > Hi, > > > > I attach a diff for next version in order to review more convenient, with > > > > - Drop the all gap stuff; > > - Drop all structures undefined in virtio_crypto.h > > - re-describe per request for per crypto service avoid confusion > > > > Pls review, thanks! > > > > > > diff --git a/virtio-crypto.tex b/virtio-crypto.tex > > index 448296e..ab17e7b 100644 > > --- a/virtio-crypto.tex > > +++ b/virtio-crypto.tex > > @@ -69,13 +69,13 @@ The following services are defined: > > > > \begin{lstlisting} > > /* CIPHER service */ > > -#define VIRTIO_CRYPTO_SERVICE_CIPHER (0) > > +#define VIRTIO_CRYPTO_SERVICE_CIPHER 0 > > /* HASH service */ > > -#define VIRTIO_CRYPTO_SERVICE_HASH (1) > > +#define VIRTIO_CRYPTO_SERVICE_HASH 1 > > /* MAC (Message Authentication Codes) service */ > > -#define VIRTIO_CRYPTO_SERVICE_MAC(2) > > +#define VIRTIO_CRYPTO_SERVICE_MAC2 > > /* AEAD (Authenticated Encryption with Associated Data) service */ > > -#define VIRTIO_CRYPTO_SERVICE_AEAD (3) > > +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 > > \end{lstlisting} > > > > The last driver-read-only fields specify detailed algorithms masks > > @@ -210,7 +210,7 @@ data that should be utilized in operations, and input > data is equal to > > The general header for controlq is as follows: > > > > \begin{lstlisting} > > -#define VIRTIO_CRYPTO_OPCODE(service, op) ((service << 8) | (op)) > > +#define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) > > > > struct virtio_crypto_ctrl_header { > > #define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ > > @@ -380,20 +380,18 @@ struct virtio_crypto_mac_session_para { > > le32 auth_key_len; > > le32 padding; > > }; > > -struct virtio_crypto_mac_session_output { > > -le64 auth_key_addr; /* guest key physical address */ > > -}; > > > > struct virtio_crypto_mac_create_session_req { > > /* Device-readable part */ > > struct virtio_crypto_mac_session_para para; > > -struct virtio_crypto_mac_session_output out; > > +/* The authenticated key buffer */ > > +/* output data here */ > > + > > /* Device-writable part */ > > struct virtio_crypto_session_input input; > > }; > > \end{lstlisting} > > > > -The output data are > > \subparagraph{Session operation: Symmetric algorithms > session}\label{sec:Device Types / Crypto Device / Device > > Operation / Control Virtqueue / Session operation / Session operation: > Symmetric algorithms session} > > > > @@ -413,13 +411,13 @@ struct virtio_crypto_cipher_session_para { > > le32 padding; > > }; > > > > -struct virtio_crypto_cipher_session_output { > > -le64 key_addr; /* guest key physical address */ > > -}; > > - > > struct virtio_crypto_cipher_session_req { > > +/* Device-readable part */ > > struct virtio_crypto_cipher_session_para para; > > -struct virtio_crypto_cipher_session_output out; > > +/* The cipher key buffer */ > > +/* Output data here */ > > + > > +/* Device-writable part */ > > struct virtio_crypto_session_input input; > > }; > > \end{lstlisting} > > @@ -448,18 +446,20 @@ struct virtio_crypto_alg_chain_session_para { > > le32 padding; > > }; > > > > -struct virtio_crypto_alg_chain_session_output { > > -struct virtio_crypto_cipher_session_output cipher; > > -struct virtio_crypto_mac_session_output mac; > > -}; > > - > > struct virtio_crypto_alg_chain_session_req { > > +/* Device-readable part */ > > struct virtio_crypto_alg_chain_session_para para; > > -struct virtio_crypto_alg_chain_session_output out; > > +/* The cipher key buffer */ > > +/* The authenticated key buffer */ > > +/* Output data here */ > > + > > +/* Device-writable part */ > > struct virtio_crypto_session_input input; > > }; > > \end{lstlisting} > > > > +The output data includs both cipher key buffer and authenticated key > > buffer. > > .. includes both the cipher key buffer and the uthenticated key buffer. > OK. > > + > > The packet for symmetric algorithm is as follows: > > > > \begin{lstlisting} > > @@ -503,13 +503,13 @@ struct virtio_crypto_aead_session_para { > > le32 padding; > > }; > > > > -struct virtio_crypto_aead_session_output { > > -le64 key_addr; /* guest key physical address */ > > -}; > > - > > struct virtio_crypto_aead_create_session_req { > > +/* Device-readable part */ > > struct virtio_crypto_aead_session_para para; > > -struct virtio_crypto_aead_session_output out; > > +/* The cipher key buffer */ > > +/* Output data here */ > > + > > +/* Device-writeable part */ > > struct virtio_crypto_session_input input; > > }; > > \end{lstlisting} > > @@ -5
Re: [Qemu-devel] [virtio-dev] Re: [PATCH v13 1/2] virtio-crypto: Add virtio crypto device specification
> From: virtio-...@lists.oasis-open.org [mailto:virtio-...@lists.oasis-open.org] > On Behalf Of Michael S. Tsirkin > Sent: Thursday, November 10, 2016 9:03 PM > brian.a.keat...@intel.com; Claudio Fontana; mike.cara...@nxp.com; Wubin > (H) > Subject: Re: [virtio-dev] Re: [Qemu-devel] [PATCH v13 1/2] virtio-crypto: Add > virtio crypto device specification > > On Thu, Nov 10, 2016 at 02:25:44AM +, Gonglei (Arei) wrote: > > > > > > > > Subject: [virtio-dev] Re: [Qemu-devel] [PATCH v13 1/2] virtio-crypto: Add > virtio > > > crypto device specification > > > > > > On Wed, Nov 09, 2016 at 01:11:20AM +, Gonglei (Arei) wrote: > > > > Nope, Actually I kept those description here is because I wanted to > represent > > > each packet > > > > Intuitionally, otherwise I don't know how to explain them only occupy > > > > one > > > entry in desc table > > > > by indirect table method. > > > > > > whether to use the indirect method should not be up to device. > > > I don't see where does the spec require indirect method, > > > ANY_LAYOUT is implied so spec should not dictate that IMO. > > > > > Yes, I reread the virtio spec. And > > > > 2.4.4 says: > > " The framing of messages with descriptors is independent of the contents > of the buffers." > > > > 2.4.4.2 says: > > "The driver SHOULD NOT use an excessive number of descriptors to > describe a buffer." > > > > For virtio-crypto, I think I can just add a NOTE to remind people using > > indirect > method > > if they want to get better performance in production environment, can I? > > > > > > Thanks, > > -Gonglei > > I'm not sure why frankly. Is there a reason crypto is special here? > I just think each virtio crypto device maybe use many entries for one request. But you are right, whether to use the indirect method should not be up to device. The device shouldn't touch this. Thanks, -Gonglei
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On 16-11-10 03:44 PM, Alex Williamson wrote: > On Fri, 11 Nov 2016 01:09:05 +0200 > "Michael S. Tsirkin" wrote: > >> On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: >>> On Fri, 11 Nov 2016 00:33:17 +0200 >>> "Michael S. Tsirkin" wrote: >>> On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote: > On Thu, 10 Nov 2016 18:12:20 +0200 > "Michael S. Tsirkin" wrote: > >> The following changes since commit >> 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: >> >> MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 >> +) >> >> are available in the git repository at: >> >> git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream >> >> for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17: >> >> docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200) >> >> >> virtio, vhost, pc, pci: tests, documentation, fixes and cleanups >> >> Lots of fixes all over the place. I allowed some cleanups in even >> though they >> are not strictly bugfixes, they might prevent bugs and seem very safe. >> >> Most importantly, this fixes a regression with vhost introduced >> by the last pull. > > I think I'm hitting this previous regression, I see this in my libvirt > log: > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists > > And a backtrace as seen here > https://paste.fedoraproject.org/477562/88144131/ > > So I merge this tag to try to resolve it, now I get qemu segfaulting > bisected to: > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > Author: Cao jin > Date: Sat Nov 5 10:07:21 2016 +0800 > > hcd-xhci: check & correct param before using it >>> >>> Here's the backtrace from this one: >>> >>> #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 >>> #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, >>> bits=131072) >>> at hw/usb/hcd-xhci.c:2870 >>> #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, >>> is_detach=0) >>> at hw/usb/hcd-xhci.c:2905 >>> #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) >>> at hw/usb/hcd-xhci.c:2961 >>> #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) >>> at hw/core/qdev.c:1145 >>> #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) >>> at hw/core/qdev.c:295 >>> #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, >>> pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , >>> post_busfn=0x559828e2 , opaque=0x0) at >>> hw/core/qdev.c:610 >>> #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, >>> pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , >>> post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 >>> #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, >>> pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , >>> post_busfn=0x559828e2 , opaque=0x0) at >>> hw/core/qdev.c:602 >>> #9 0x5598763b in qbus_walk_children (bus=0x568296c0, >>> pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , >>> post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 >>> #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) >>> at hw/core/qdev.c:321 >>> #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) >>> at hw/core/qdev.c:327 >>> #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 >>> #13 0x558367ce in pc_machine_reset () >>> at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 >>> #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 >>> #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, >>> envp=0x7fffe0e0) at vl.c:4656 >>> >>> Here's a commandline sufficient to trigger it: >>> >>> qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor >>> stdio -device nec-usb-xhci >>> >>> So apparently that never got tested or something got mangled in the >>> commit. Thanks, >>> >>> Alex >> >> On the original tree (without this mangled pull), does one of >> [PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers >> and >> [PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers > > The latter I already have from your tag as f99d0d394758, adding the > former on top of your tag does not resolve the assert (I simply removed > the xhci device from the vm config to avoid the segfault). Thanks, > > Alex > hmm with those patches it doesn't even get past boot for me. qemu-system-x86_64: /home/john/git/qemu/memory.c:2012: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. Aborted (core dumped) I need to run out for a bit but can look at it more tomorrow if needed. .John
Re: [Qemu-devel] [RFC 12/17] ppc: Migrate compatibility mode
Quoting David Gibson (2016-11-09 19:59:37) > On Tue, Nov 08, 2016 at 04:51:10PM +1100, Alexey Kardashevskiy wrote: > > On 08/11/16 16:19, David Gibson wrote: > > > On Fri, Nov 04, 2016 at 04:58:47PM +1100, Alexey Kardashevskiy wrote: > > >> On 30/10/16 22:12, David Gibson wrote: > > >>> Server-class POWER CPUs can be put into several compatibility modes. > > >>> These > > >>> can be specified on the command line, or negotiated by the guest during > > >>> boot. > > >>> > > >>> Currently we don't migrate the compatibility mode, which means after a > > >>> migration the guest will revert to running with whatever compatibility > > >>> mode (or none) specified on the command line. > > >>> > > >>> With the limited range of CPUs currently used, this doesn't usually > > >>> cause > > >>> a problem, but it could. Fix this by adding the compatibility mode (if > > >>> set) to the migration stream. > > >>> > > >>> Signed-off-by: David Gibson > > >>> --- > > >>> target-ppc/machine.c | 34 ++ > > >>> 1 file changed, 34 insertions(+) > > >>> > > >>> diff --git a/target-ppc/machine.c b/target-ppc/machine.c > > >>> index 4820f22..5d87ff6 100644 > > >>> --- a/target-ppc/machine.c > > >>> +++ b/target-ppc/machine.c > > >>> @@ -9,6 +9,7 @@ > > >>> #include "mmu-hash64.h" > > >>> #include "migration/cpu.h" > > >>> #include "exec/exec-all.h" > > >>> +#include "qapi/error.h" > > >>> > > >>> static int cpu_load_old(QEMUFile *f, void *opaque, int version_id) > > >>> { > > >>> @@ -176,6 +177,20 @@ static int cpu_post_load(void *opaque, int > > >>> version_id) > > >>> * software has to take care of running QEMU in a compatible mode. > > >>> */ > > >>> env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value; > > >>> + > > >>> +#if defined(TARGET_PPC64) > > >>> +if (cpu->compat_pvr) { > > >>> +Error *local_err = NULL; > > >>> + > > >>> +ppc_set_compat(cpu, cpu->compat_pvr, &local_err); > > >>> +if (local_err) { > > >>> +error_report_err(local_err); > > >>> +error_free(local_err); > > >>> +return -1; > > >>> +} > > >>> +} > > >>> +#endif > > >>> + > > >>> env->lr = env->spr[SPR_LR]; > > >>> env->ctr = env->spr[SPR_CTR]; > > >>> cpu_write_xer(env, env->spr[SPR_XER]); > > >>> @@ -528,6 +543,24 @@ static const VMStateDescription vmstate_tlbmas = { > > >>> } > > >>> }; > > >>> > > >>> +static bool compat_needed(void *opaque) > > >>> +{ > > >>> +PowerPCCPU *cpu = opaque; > > >>> + > > >>> +return cpu->compat_pvr != 0; > > >> > > >> > > >> Finally got to trying how this affects migration :) > > >> > > >> This breaks migration to QEMU <=2.7, and it should not at least when both > > >> source and destination are running with -cpu host,compat=power7. > > > > > > IIUC, we don't generally try to maintain backwards migration, even for > > > old machine types. > > > > > > I thought the opposite - we generally try to maintain it, this is pretty > > much why we use these subsections in cases like this; otherwise you could > > just add a new field and bump the vmstate_ppc_cpu.version. > > Hm, I guess that's true. We do at least try to allow backwards > migration, we just don't insist on it. The example here partially > suceeds - it will allow backwards migration for CPUs in raw mode. What if we changed the condition to cpu->compat_pvr != cpu->max_compat? Assuming each end sets compat=powerX (which seems like a reasonable requirement for migration), it seems like in most cases the guest would end up negotiating max_compat anyway. It's only in cases where it doesn't that we end up in an ambiguous state. Newer QEMU would guard against this by migrating it's compat_pvr accordingly (which will probably become more important with p9 guests potentially running older kernels), but otherwise it seems like we could maintain backwards migration in most cases. Although with the proposed version bump for vmstate_spapr_pci I guess the discussion is somewhat moot unless we decide we rely want to maintain backward migration... > > I'll look at just bumping the version instead. > > > > > > > > > > >> > > >> > > >>> +} > > >>> + > > >>> +static const VMStateDescription vmstate_compat = { > > >>> +.name = "cpu/compat", > > >>> +.version_id = 1, > > >>> +.minimum_version_id = 1, > > >>> +.needed = compat_needed, > > >>> +.fields = (VMStateField[]) { > > >>> +VMSTATE_UINT32(compat_pvr, PowerPCCPU), > > >>> +VMSTATE_END_OF_LIST() > > >>> +} > > >>> +}; > > >>> + > > >>> const VMStateDescription vmstate_ppc_cpu = { > > >>> .name = "cpu", > > >>> .version_id = 5, > > >>> @@ -580,6 +613,7 @@ const VMStateDescription vmstate_ppc_cpu = { > > >>> &vmstate_tlb6xx, > > >>> &vmstate_tlbemb, > > >>> &vmstate_tlbmas, > > >>> +&vmstate_compat, > > >>> NULL > > >>> } > > >>> }; > > >>> > > >> > > >> > >
Re: [Qemu-devel] [RFC PATCH 2/8] quorum: Remove s from quorum_aio_get() arguments
On 11/10/2016 11:19 AM, Kevin Wolf wrote: > There is no point in passing the value of bs->opaque in order to > overwrite it with itself. > > Signed-off-by: Kevin Wolf > --- > block/quorum.c | 9 - > 1 file changed, 4 insertions(+), 5 deletions(-) > Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [RFC PATCH 1/8] coroutine: Introduce qemu_coroutine_enter_if_inactive()
On 11/10/2016 11:19 AM, Kevin Wolf wrote: > In the context of asynchronous work, if we have a worker coroutine that > didn't yield, the parent coroutine cannot be reentered because it hasn't > yielded yet. In this case we don't even have to reenter the parent > because it will see that the work is already done and won't even yield. > > Signed-off-by: Kevin Wolf > --- > include/qemu/coroutine.h | 6 ++ > util/qemu-coroutine.c| 8 > 2 files changed, 14 insertions(+) > > +++ b/util/qemu-coroutine.c > @@ -19,6 +19,7 @@ > #include "qemu/atomic.h" > #include "qemu/coroutine.h" > #include "qemu/coroutine_int.h" > +#include "block/aio.h" Why do you need this include? > > enum { > POOL_BATCH_SIZE = 64, > @@ -131,6 +132,13 @@ void qemu_coroutine_enter(Coroutine *co) > } > } > > +void qemu_coroutine_enter_if_inactive(Coroutine *co) > +{ > +if (!qemu_coroutine_entered(co)) { > +qemu_coroutine_enter(co); > +} > +} > + > void coroutine_fn qemu_coroutine_yield(void) > { > Coroutine *self = qemu_coroutine_self(); > Otherwise: Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Fri, 11 Nov 2016 01:09:05 +0200 "Michael S. Tsirkin" wrote: > On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: > > On Fri, 11 Nov 2016 00:33:17 +0200 > > "Michael S. Tsirkin" wrote: > > > > > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote: > > > > On Thu, 10 Nov 2016 18:12:20 +0200 > > > > "Michael S. Tsirkin" wrote: > > > > > > > > > The following changes since commit > > > > > 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: > > > > > > > > > > MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 > > > > > +) > > > > > > > > > > are available in the git repository at: > > > > > > > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > > > > > > > for you to fetch changes up to > > > > > 8038753b86f4cb1e79d4225a799395c4dae96b17: > > > > > > > > > > docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 > > > > > +0200) > > > > > > > > > > > > > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups > > > > > > > > > > Lots of fixes all over the place. I allowed some cleanups in even > > > > > though they > > > > > are not strictly bugfixes, they might prevent bugs and seem very safe. > > > > > > > > > > Most importantly, this fixes a regression with vhost introduced > > > > > by the last pull. > > > > > > > > I think I'm hitting this previous regression, I see this in my libvirt > > > > log: > > > > > > > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists > > > > > > > > And a backtrace as seen here > > > > https://paste.fedoraproject.org/477562/88144131/ > > > > > > > > So I merge this tag to try to resolve it, now I get qemu segfaulting > > > > bisected to: > > > > > > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > > > > Author: Cao jin > > > > Date: Sat Nov 5 10:07:21 2016 +0800 > > > > > > > > hcd-xhci: check & correct param before using it > > > > Here's the backtrace from this one: > > > > #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 > > #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, > > bits=131072) > > at hw/usb/hcd-xhci.c:2870 > > #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, > > is_detach=0) > > at hw/usb/hcd-xhci.c:2905 > > #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) > > at hw/usb/hcd-xhci.c:2961 > > #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) > > at hw/core/qdev.c:1145 > > #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) > > at hw/core/qdev.c:295 > > #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > post_busfn=0x559828e2 , opaque=0x0) at > > hw/core/qdev.c:610 > > #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 > > #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > post_busfn=0x559828e2 , opaque=0x0) at > > hw/core/qdev.c:602 > > #9 0x5598763b in qbus_walk_children (bus=0x568296c0, > > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > > post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 > > #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) > > at hw/core/qdev.c:321 > > #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) > > at hw/core/qdev.c:327 > > #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 > > #13 0x558367ce in pc_machine_reset () > > at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 > > #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 > > #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, > > envp=0x7fffe0e0) at vl.c:4656 > > > > Here's a commandline sufficient to trigger it: > > > > qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor > > stdio -device nec-usb-xhci > > > > So apparently that never got tested or something got mangled in the > > commit. Thanks, > > > > Alex > > On the original tree (without this mangled pull), does one of > [PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers > and > [PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers The latter I already have from your tag as f99d0d394758, adding the former on top of your tag does not resolve the assert (I simply removed the xhci device from the vm config to avoid the segfault). Thanks, Alex
[Qemu-devel] [PATCH 2/2] block: Pass unaligned discard requests to drivers
Discard is advisory, so rounding the requests to alignment boundaries is never semantically wrong from the data that the guest sees. But at least the Dell Equallogic iSCSI SANs has an interesting property that its advertised discard alignment is 15M, yet documents that discarding a sequence of 1M slices will eventually result in the 15M page being marked as discarded, and it is possible to observe which pages have been discarded. Between commits 9f1963b and b8d0a980, we converted the block layer to a byte-based interface that ultimately ignores any unaligned head or tail based on the driver's advertised discard granularity, which means that qemu 2.7 refuses to pass any discard request smaller than 15M down to the Dell Equallogic hardware. This is a slight regression in behavior compared to earlier qemu, where a guest executing discards in power-of-2 chunks used to be able to get every page discarded, but is now left with various pages still allocated because the guest requests did not align with the hardware's 15M pages. Since the SCSI specification says nothing about a minimum discard granularity, and only documents the preferred alignment, it is best if the block layer gives the driver every bit of information about discard requests, rather than rounding it to alignment boundaries early. Rework the block layer discard algorithm to mirror the write zero algorithm: always peel off any unaligned head or tail and manage that in isolation, then do the bulk of the request on an aligned boundary. The fallback when the driver returns -ENOTSUP for an unaligned request is to silently ignore that portion of the discard request; but for devices that can pass the partial request all the way down to hardware, this can result in the hardware coalescing requests and discarding aligned pages after all. Reported by: Peter Lieven Signed-off-by: Eric Blake CC: qemu-sta...@nongnu.org --- block/io.c | 36 +++- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/block/io.c b/block/io.c index aa532a5..76c3030 100644 --- a/block/io.c +++ b/block/io.c @@ -2421,7 +2421,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, { BdrvTrackedRequest req; int max_pdiscard, ret; -int head, align; +int head, tail, align; if (!bs->drv) { return -ENOMEDIUM; @@ -2444,19 +2444,15 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, return 0; } -/* Discard is advisory, so ignore any unaligned head or tail */ +/* Discard is advisory, but some devices track and coalesce + * unaligned requests, so we must pass everything down rather than + * round here. Still, most devices will just silently ignore + * unaligned requests (by returning -ENOTSUP), so we must fragment + * the request accordingly. */ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); assert(align % bs->bl.request_alignment == 0); head = offset % align; -if (head) { -head = MIN(count, align - head); -count -= head; -offset += head; -} -count = QEMU_ALIGN_DOWN(count, align); -if (!count) { -return 0; -} +tail = (offset + count) % align; bdrv_inc_in_flight(bs); tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD); @@ -2468,11 +2464,25 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX), align); -assert(max_pdiscard); +assert(max_pdiscard >= bs->bl.request_alignment); while (count > 0) { int ret; -int num = MIN(count, max_pdiscard); +int num = count; + +if (head) { +/* Make a small request up to the first aligned sector. */ +num = MIN(count, align - head); +head = (head + num) % align; +assert(num < max_pdiscard); +} else if (tail && num > align) { +/* Shorten the request to the last aligned sector. */ +num -= tail; +} +/* limit request size */ +if (num > max_pdiscard) { +num = max_pdiscard; +} if (bs->drv->bdrv_co_pdiscard) { ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); -- 2.7.4
[Qemu-devel] [PATCH for-2.8 0/2] pass partial discard requests all the way down
Peter reported a mild regression in qemu 2.7 when targetting the Dell Equallogic iSCSI, which advertizes a preferred and maximum unmap alignment at 15M. In qemu 2.6, trims not aligned to those boundaries still made it to the device, but in 2.7, the block layer is ignoring unaligned portions of guest requests, resulting in fewer blocks being actually trimmed. Since discard is advisary, it is borderline if this counts as a bug fix worthy for inclusion in 2.8, or if it should be deferred to 2.9. At any rate, while I was able to test that the patch didn't misbehave for me when I tweaked an NBD setup to force 15M alignment, I am unable to test that it makes an actual difference for Peter's hardware, and that needs to happen before anyone even thinks of applying this. Eric Blake (2): block: Return -ENOTSUP rather than assert on unaligned discards block: Pass unaligned discard requests to drivers block/io.c | 36 +++- block/iscsi.c| 4 +++- block/qcow2.c| 4 block/sheepdog.c | 5 +++-- 4 files changed, 33 insertions(+), 16 deletions(-) -- 2.7.4
[Qemu-devel] [PATCH 1/2] block: Return -ENOTSUP rather than assert on unaligned discards
Right now, the block layer rounds discard requests, so that individual drivers are able to assert that discard requests will never be unaligned. But there are some ISCSI devices that track and coalesce multiple unaligned requests, turning it into an actual discard if the requests eventually cover an entire page, which implies that it is better to always pass discard requests as low down the stack as possible. In isolation, this patch has no semantic effect, since the block layer currently never passes an unaligned request down. But the block layer already has code that silently ignores drivers that return -ENOTSUP for a discard request that cannot be honored (as well as drivers that return 0 even when nothing was done). So the next patch will update the block layer to fragment discard requests, so that clients are guaranteed that they are either dealing with an unaligned head or tail, or an aligned body, of the overall request, making it similar to the block layer semantics of write zero fragmentation. Signed-off-by: Eric Blake CC: qemu-sta...@nongnu.org --- block/iscsi.c| 4 +++- block/qcow2.c| 4 block/sheepdog.c | 5 +++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 71bd523..0960929 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1083,7 +1083,9 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) struct IscsiTask iTask; struct unmap_list list; -assert(is_byte_request_lun_aligned(offset, count, iscsilun)); +if (!is_byte_request_lun_aligned(offset, count, iscsilun)) { +return -ENOTSUP; +} if (!iscsilun->lbp.lbpu) { /* UNMAP is not supported by the target */ diff --git a/block/qcow2.c b/block/qcow2.c index 6d5689a..9af00fd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2490,6 +2490,10 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, int ret; BDRVQcow2State *s = bs->opaque; +if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) { +return -ENOTSUP; +} + qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS, QCOW2_DISCARD_REQUEST, false); diff --git a/block/sheepdog.c b/block/sheepdog.c index 1fb9173..4c9af89 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2829,8 +2829,9 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, iov.iov_len = sizeof(zero); discard_iov.iov = &iov; discard_iov.niov = 1; -assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -assert((count & (BDRV_SECTOR_SIZE - 1)) == 0); +if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) { +return -ENOTSUP; +} acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS, count >> BDRV_SECTOR_BITS); acb->aiocb_type = AIOCB_DISCARD_OBJ; -- 2.7.4
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: > On Fri, 11 Nov 2016 00:33:17 +0200 > "Michael S. Tsirkin" wrote: > > > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote: > > > On Thu, 10 Nov 2016 18:12:20 +0200 > > > "Michael S. Tsirkin" wrote: > > > > > > > The following changes since commit > > > > 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: > > > > > > > > MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 > > > > +) > > > > > > > > are available in the git repository at: > > > > > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > > > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17: > > > > > > > > docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 > > > > +0200) > > > > > > > > > > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups > > > > > > > > Lots of fixes all over the place. I allowed some cleanups in even > > > > though they > > > > are not strictly bugfixes, they might prevent bugs and seem very safe. > > > > > > > > Most importantly, this fixes a regression with vhost introduced > > > > by the last pull. > > > > > > I think I'm hitting this previous regression, I see this in my libvirt > > > log: > > > > > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists > > > > > > And a backtrace as seen here > > > https://paste.fedoraproject.org/477562/88144131/ > > > > > > So I merge this tag to try to resolve it, now I get qemu segfaulting > > > bisected to: > > > > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > > > Author: Cao jin > > > Date: Sat Nov 5 10:07:21 2016 +0800 > > > > > > hcd-xhci: check & correct param before using it > > Here's the backtrace from this one: > > #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 > #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072) > at hw/usb/hcd-xhci.c:2870 > #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0) > at hw/usb/hcd-xhci.c:2905 > #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) > at hw/usb/hcd-xhci.c:2961 > #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) > at hw/core/qdev.c:1145 > #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) > at hw/core/qdev.c:295 > #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:610 > #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 > #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:602 > #9 0x5598763b in qbus_walk_children (bus=0x568296c0, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 > #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) > at hw/core/qdev.c:321 > #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) > at hw/core/qdev.c:327 > #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 > #13 0x558367ce in pc_machine_reset () > at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 > #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 > #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, > envp=0x7fffe0e0) at vl.c:4656 > > Here's a commandline sufficient to trigger it: > > qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor > stdio -device nec-usb-xhci > > So apparently that never got tested or something got mangled in the > commit. Thanks, > > Alex On the original tree (without this mangled pull), does one of [PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers and [PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers help? -- MST
Re: [Qemu-devel] qemu virtio_net abort bisect
On Thu, Nov 10, 2016 at 03:02:43PM -0800, John Fastabend wrote: > Hi Michael, > > I'm getting a qemu crash from a load/unload of virtio_net.ko in guest > kernel with the following, > > > ./x86_64-softmmu/qemu-system-x86_64 -hda > > /var/lib/libvirt/images/Fedora-test0.img \ >-m 4096 -enable-kvm -smp 4 -netdev tap,id=hn0,queues=4,vhost=on \ >-device virtio-net-pci,netdev=hn0,mq=on,vectors=9 > > in the host I do a simple > > # rmmod virtio_net > # modprobe virtio_net > > And bisecting points at this merge which just from the commit message > looks like it could be the culprit, > > > Merge: 4eb28ab 5300063 > > Author: Stefan Hajnoczi > > Date: Thu Nov 3 14:41:53 2016 + > > > > Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging > > > > virtio, pc: fixes and features > > > > nvdimm hotplug support > > virtio migration and ioeventfd rework<--- ioeventfd bits? > > virtio crypto device > > ipmi fixes > > > > Signed-off-by: Michael S. Tsirkin > > Running without the above and everything works no issues. I started to > debug but admittedly got a bit lost in the notifier hooks maybe someone > will have a better idea. Here is the error I get, > > > kvm_io_ioeventfd_add: error adding ioeventfd: File exists > > Aborted (core dumped) > > I had a gdb backtrace at one point and can post that if its helpful. > > Thanks, > John Thanks! Unfortunately it looks like a recent change broke this for a lot of people. If you have the time, it might be helpful to try both [PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers and [PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers -- MST
[Qemu-devel] qemu virtio_net abort bisect
Hi Michael, I'm getting a qemu crash from a load/unload of virtio_net.ko in guest kernel with the following, > ./x86_64-softmmu/qemu-system-x86_64 -hda > /var/lib/libvirt/images/Fedora-test0.img \ -m 4096 -enable-kvm -smp 4 -netdev tap,id=hn0,queues=4,vhost=on \ -device virtio-net-pci,netdev=hn0,mq=on,vectors=9 in the host I do a simple # rmmod virtio_net # modprobe virtio_net And bisecting points at this merge which just from the commit message looks like it could be the culprit, > Merge: 4eb28ab 5300063 > Author: Stefan Hajnoczi > Date: Thu Nov 3 14:41:53 2016 + > > Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging > > virtio, pc: fixes and features > > nvdimm hotplug support > virtio migration and ioeventfd rework<--- ioeventfd bits? > virtio crypto device > ipmi fixes > > Signed-off-by: Michael S. Tsirkin Running without the above and everything works no issues. I started to debug but admittedly got a bit lost in the notifier hooks maybe someone will have a better idea. Here is the error I get, > kvm_io_ioeventfd_add: error adding ioeventfd: File exists > Aborted (core dumped) I had a gdb backtrace at one point and can post that if its helpful. Thanks, John
[Qemu-devel] [PATCH v2] target-m68k: add rol/ror/roxl/roxr instructions
Signed-off-by: Laurent Vivier --- v2: - use shift to do rotate_x() for 8 and 16bit value - rotate_x()/rotate32_x() are a no-op when shift % (size + 1) == 0 - add some missing tcg_temp_free() target-m68k/translate.c | 414 1 file changed, 414 insertions(+) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index 96af672..0a85dc4 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -3080,6 +3080,413 @@ DISAS_INSN(shift_mem) set_cc_op(s, CC_OP_FLAGS); } +static void rotate(TCGv reg, TCGv shift, int left, int size) +{ +switch (size) { +case 8: +/* Replicate the 8-bit input so that a 32-bit rotate works. */ +tcg_gen_ext8u_i32(reg, reg); +tcg_gen_muli_i32(reg, reg, 0x01010101); +goto do_long; +case 16: +/* Replicate the 16-bit input so that a 32-bit rotate works. */ +tcg_gen_deposit_i32(reg, reg, reg, 16, 16); +goto do_long; +do_long: +default: +if (left) { +tcg_gen_rotl_i32(reg, reg, shift); +} else { +tcg_gen_rotr_i32(reg, reg, shift); +} +} + +/* compute flags */ + +switch (size) { +case 8: +tcg_gen_ext8s_i32(reg, reg); +break; +case 16: +tcg_gen_ext16s_i32(reg, reg); +break; +default: +break; +} + +/* QREG_CC_X is not affected */ + +tcg_gen_mov_i32(QREG_CC_N, reg); +tcg_gen_mov_i32(QREG_CC_Z, reg); + +if (left) { +tcg_gen_andi_i32(QREG_CC_C, reg, 1); +} else { +tcg_gen_shri_i32(QREG_CC_C, reg, 31); +} + +tcg_gen_movi_i32(QREG_CC_V, 0); /* always cleared */ +} + +static void rotate_x_flags(TCGv reg, TCGv X, int size) +{ +switch (size) { +case 8: +tcg_gen_ext8s_i32(reg, reg); +break; +case 16: +tcg_gen_ext16s_i32(reg, reg); +break; +default: +break; +} +tcg_gen_mov_i32(QREG_CC_N, reg); +tcg_gen_mov_i32(QREG_CC_Z, reg); +tcg_gen_mov_i32(QREG_CC_X, X); +tcg_gen_mov_i32(QREG_CC_C, X); +tcg_gen_movi_i32(QREG_CC_V, 0); +} + +/* Result of rotate_x() is valid if 0 < shift < (size + 1) < 32 */ +static TCGv rotate_x(TCGv dest, TCGv src, TCGv shift, int left, int size) +{ +TCGv X, shl, shr, shx; + +shr = tcg_temp_new(); +shl = tcg_temp_new(); +shx = tcg_temp_new(); +if (left) { +tcg_gen_mov_i32(shl, shift); /* shl = shift */ +tcg_gen_movi_i32(shr, size + 1); +tcg_gen_sub_i32(shr, shr, shift); /* shr = size + 1 - shift */ +tcg_gen_subi_i32(shx, shift, 1); /* shx = shift - 1 */ +} else { +tcg_gen_mov_i32(shr, shift); /* shr = shift */ +tcg_gen_movi_i32(shl, size + 1); +tcg_gen_sub_i32(shl, shl, shift); /* shl = size + 1 - shift */ +tcg_gen_movi_i32(shx, size); +tcg_gen_sub_i32(shx, shx, shift); /* shx = size - shift */ +} + +/* dest = (src << shl) | (src >> shr) | (x << shx); */ + +tcg_gen_shl_i32(shl, src, shl); +tcg_gen_shr_i32(shr, src, shr); +tcg_gen_or_i32(dest, shl, shr); +tcg_temp_free(shl); +tcg_temp_free(shr); +tcg_gen_shl_i32(shx, QREG_CC_X, shx); +tcg_gen_or_i32(dest, dest, shx); +tcg_temp_free(shx); + +/* X = (dest >> size) & 1 */ + +X = tcg_temp_new(); +tcg_gen_shri_i32(X, dest, size); +tcg_gen_andi_i32(X, X, 1); + +return X; +} + +/* Result of rotate32_x() is valid if 0 < shift < 33 */ +static TCGv rotate32_x(TCGv dest, TCGv src, TCGv shift, int left) +{ +TCGv_i64 t0, shift64; +TCGv X, lo, hi; + +shift64 = tcg_temp_new_i64(); +tcg_gen_extu_i32_i64(shift64, shift); + +t0 = tcg_temp_new_i64(); + +X = tcg_temp_new(); +lo = tcg_temp_new(); +hi = tcg_temp_new(); + +if (left) { +/* create [src:X:..] */ + +tcg_gen_shli_i32(lo, QREG_CC_X, 31); +tcg_gen_concat_i32_i64(t0, lo, src); + +/* rotate */ + +tcg_gen_rotl_i64(t0, t0, shift64); +tcg_temp_free_i64(shift64); + +/* result is [src:..:src:X] */ + +tcg_gen_extr_i64_i32(lo, hi, t0); +tcg_gen_andi_i32(X, lo, 1); + +tcg_gen_shri_i32(lo, lo, 1); +} else { +/* create [..:X:src] */ + +tcg_gen_concat_i32_i64(t0, src, QREG_CC_X); + +tcg_gen_rotr_i64(t0, t0, shift64); +tcg_temp_free_i64(shift64); + +/* result is value: [X:src:..:src] */ + +tcg_gen_extr_i64_i32(lo, hi, t0); + +/* extract X */ + +tcg_gen_shri_i32(X, hi, 31); + +/* extract result */ + +tcg_gen_shli_i32(hi, hi, 1); +} +tcg_gen_or_i32(dest, lo, hi); +tcg_temp_free(hi); +tcg_temp_free(lo); +tcg_temp_free_i64(t0); + +return X; +} + +DISAS_INSN(rotate_im) +{ +TCGv shift; +int tmp; +int left = (insn & 0x100); + +tmp = (insn >> 9) & 7; +if (tmp == 0) { +tmp = 8; +} + +shift = tcg_const_i32(tmp); +
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote: > On Fri, 11 Nov 2016 00:33:17 +0200 > "Michael S. Tsirkin" wrote: > > > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote: > > > On Thu, 10 Nov 2016 18:12:20 +0200 > > > "Michael S. Tsirkin" wrote: > > > > > > > The following changes since commit > > > > 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: > > > > > > > > MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 > > > > +) > > > > > > > > are available in the git repository at: > > > > > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > > > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17: > > > > > > > > docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 > > > > +0200) > > > > > > > > > > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups > > > > > > > > Lots of fixes all over the place. I allowed some cleanups in even > > > > though they > > > > are not strictly bugfixes, they might prevent bugs and seem very safe. > > > > > > > > Most importantly, this fixes a regression with vhost introduced > > > > by the last pull. > > > > > > I think I'm hitting this previous regression, I see this in my libvirt > > > log: > > > > > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists > > > > > > And a backtrace as seen here > > > https://paste.fedoraproject.org/477562/88144131/ > > > > > > So I merge this tag to try to resolve it, now I get qemu segfaulting > > > bisected to: > > > > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > > > Author: Cao jin > > > Date: Sat Nov 5 10:07:21 2016 +0800 > > > > > > hcd-xhci: check & correct param before using it > > Here's the backtrace from this one: > > #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 > #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072) > at hw/usb/hcd-xhci.c:2870 > #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0) > at hw/usb/hcd-xhci.c:2905 > #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) > at hw/usb/hcd-xhci.c:2961 > #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) > at hw/core/qdev.c:1145 > #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) > at hw/core/qdev.c:295 > #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:610 > #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 > #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:602 > #9 0x5598763b in qbus_walk_children (bus=0x568296c0, > pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , > post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 > #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) > at hw/core/qdev.c:321 > #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) > at hw/core/qdev.c:327 > #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 > #13 0x558367ce in pc_machine_reset () > at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 > #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 > #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, > envp=0x7fffe0e0) at vl.c:4656 > > Here's a commandline sufficient to trigger it: > > qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor > stdio -device nec-usb-xhci > > So apparently that never got tested or something got mangled in the > commit. Thanks, > > Alex I think I'll drop this, this patchset was borderline useful anyway.
Re: [Qemu-devel] [PATCH 2/3] qemu: Implement virtio-pstore device
On Fri, Sep 16, 2016 at 07:05:47PM +0900, Namhyung Kim wrote: > On Tue, Sep 13, 2016 at 06:57:10PM +0300, Michael S. Tsirkin wrote: > > On Sat, Aug 20, 2016 at 05:07:43PM +0900, Namhyung Kim wrote: > > > Add virtio pstore device to allow kernel log files saved on the host. > > > It will save the log files on the directory given by pstore device > > > option. > > > > > > $ qemu-system-x86_64 -device virtio-pstore,directory=dir-xx ... > > > > > > (guest) # echo c > /proc/sysrq-trigger > > > > > > $ ls dir-xx > > > dmesg-1.enc.z dmesg-2.enc.z > > > > > > The log files are usually compressed using zlib. Users can see the log > > > messages directly on the host or on the guest (using pstore filesystem). > > > > > > The 'directory' property is required for virtio-pstore device to work. > > > It also adds 'bufsize' property to set size of pstore bufer. > > > > > > Cc: Paolo Bonzini > > > Cc: Radim Krčmář > > > Cc: "Michael S. Tsirkin" > > > Cc: Anthony Liguori > > > Cc: Anton Vorontsov > > > Cc: Colin Cross > > > Cc: Kees Cook > > > Cc: Tony Luck > > > Cc: Steven Rostedt > > > Cc: Ingo Molnar > > > Cc: Minchan Kim > > > Cc: Daniel P. Berrange > > > Cc: k...@vger.kernel.org > > > Cc: qemu-devel@nongnu.org > > > Cc: virtualizat...@lists.linux-foundation.org > > > Signed-off-by: Namhyung Kim > > > --- > > > hw/virtio/Makefile.objs| 2 +- > > > hw/virtio/virtio-pci.c | 52 ++ > > > hw/virtio/virtio-pci.h | 14 + > > > hw/virtio/virtio-pstore.c | 699 > > > + > > > include/hw/pci/pci.h | 1 + > > > include/hw/virtio/virtio-pstore.h | 36 ++ > > > include/standard-headers/linux/virtio_ids.h| 1 + > > > include/standard-headers/linux/virtio_pstore.h | 76 +++ > > > qdev-monitor.c | 1 + > > > 9 files changed, 881 insertions(+), 1 deletion(-) > > > create mode 100644 hw/virtio/virtio-pstore.c > > > create mode 100644 include/hw/virtio/virtio-pstore.h > > > create mode 100644 include/standard-headers/linux/virtio_pstore.h > > > > > > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > > > index 3e2b175..aae7082 100644 > > > --- a/hw/virtio/Makefile.objs > > > +++ b/hw/virtio/Makefile.objs > > > @@ -4,4 +4,4 @@ common-obj-y += virtio-bus.o > > > common-obj-y += virtio-mmio.o > > > > > > obj-y += virtio.o virtio-balloon.o > > > -obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o > > > +obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o > > > virtio-pstore.o > > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > > > index 755f921..c184823 100644 > > > --- a/hw/virtio/virtio-pci.c > > > +++ b/hw/virtio/virtio-pci.c > > > @@ -2416,6 +2416,57 @@ static const TypeInfo virtio_host_pci_info = { > > > }; > > > #endif > > > > > > +/* virtio-pstore-pci */ > > > + > > > +static void virtio_pstore_pci_realize(VirtIOPCIProxy *vpci_dev, Error > > > **errp) > > > +{ > > > +VirtIOPstorePCI *vps = VIRTIO_PSTORE_PCI(vpci_dev); > > > +DeviceState *vdev = DEVICE(&vps->vdev); > > > +Error *err = NULL; > > > + > > > +qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); > > > +object_property_set_bool(OBJECT(vdev), true, "realized", &err); > > > +if (err) { > > > +error_propagate(errp, err); > > > +return; > > > +} > > > +} > > > + > > > +static void virtio_pstore_pci_class_init(ObjectClass *klass, void *data) > > > +{ > > > +DeviceClass *dc = DEVICE_CLASS(klass); > > > +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); > > > +PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); > > > + > > > +k->realize = virtio_pstore_pci_realize; > > > +set_bit(DEVICE_CATEGORY_MISC, dc->categories); > > > + > > > +pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; > > > +pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PSTORE; > > > +pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; > > > +pcidev_k->class_id = PCI_CLASS_OTHERS; > > > +} > > > + > > > +static void virtio_pstore_pci_instance_init(Object *obj) > > > +{ > > > +VirtIOPstorePCI *dev = VIRTIO_PSTORE_PCI(obj); > > > + > > > +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), > > > +TYPE_VIRTIO_PSTORE); > > > +object_property_add_alias(obj, "directory", OBJECT(&dev->vdev), > > > + "directory", &error_abort); > > > +object_property_add_alias(obj, "bufsize", OBJECT(&dev->vdev), > > > + "bufsize", &error_abort); > > > +} > > > + > > > +static const TypeInfo virtio_pstore_pci_info = { > > > +.name = TYPE_VIRTIO_PSTORE_PCI, > > > +.parent= TYPE_VIRTIO_PCI, > > > +.instance_size = sizeof(VirtIOPstorePCI), > > > +.instance_init = virtio_pstore_pci_instance_init, > > > +.class_init= virtio_pstore_pci_clas
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Fri, 11 Nov 2016 00:33:17 +0200 "Michael S. Tsirkin" wrote: > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote: > > On Thu, 10 Nov 2016 18:12:20 +0200 > > "Michael S. Tsirkin" wrote: > > > > > The following changes since commit > > > 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: > > > > > > MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +) > > > > > > are available in the git repository at: > > > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17: > > > > > > docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200) > > > > > > > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups > > > > > > Lots of fixes all over the place. I allowed some cleanups in even though > > > they > > > are not strictly bugfixes, they might prevent bugs and seem very safe. > > > > > > Most importantly, this fixes a regression with vhost introduced > > > by the last pull. > > > > I think I'm hitting this previous regression, I see this in my libvirt > > log: > > > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists > > > > And a backtrace as seen here > > https://paste.fedoraproject.org/477562/88144131/ > > > > So I merge this tag to try to resolve it, now I get qemu segfaulting > > bisected to: > > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > > Author: Cao jin > > Date: Sat Nov 5 10:07:21 2016 +0800 > > > > hcd-xhci: check & correct param before using it Here's the backtrace from this one: #0 0x55a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824 #1 0x55a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072) at hw/usb/hcd-xhci.c:2870 #2 0x55a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0) at hw/usb/hcd-xhci.c:2905 #3 0x55a9f435 in xhci_reset (dev=0x7fffbfb38010) at hw/usb/hcd-xhci.c:2961 #4 0x55984a2e in device_reset (dev=0x7fffbfb38010) at hw/core/qdev.c:1145 #5 0x559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0) at hw/core/qdev.c:295 #6 0x559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:610 #7 0x5598763b in qbus_walk_children (bus=0x56a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 #8 0x559834a3 in qdev_walk_children (dev=0x56a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/qdev.c:602 #9 0x5598763b in qbus_walk_children (bus=0x568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x559828bf , post_busfn=0x559828e2 , opaque=0x0) at hw/core/bus.c:59 #10 0x559829f5 in qbus_reset_all (bus=0x568296c0) at hw/core/qdev.c:321 #11 0x55982a18 in qbus_reset_all_fn (opaque=0x568296c0) at hw/core/qdev.c:327 #12 0x558e76f0 in qemu_devices_reset () at vl.c:1765 #13 0x558367ce in pc_machine_reset () at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181 #14 0x558e778d in qemu_system_reset (report=false) at vl.c:1778 #15 0x558ef44b in main (argc=50, argv=0x7fffdf48, envp=0x7fffe0e0) at vl.c:4656 Here's a commandline sufficient to trigger it: qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci So apparently that never got tested or something got mangled in the commit. Thanks, Alex
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote: > On Thu, 10 Nov 2016 18:12:20 +0200 > "Michael S. Tsirkin" wrote: > > > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: > > > > MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +) > > > > are available in the git repository at: > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17: > > > > docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200) > > > > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups > > > > Lots of fixes all over the place. I allowed some cleanups in even though > > they > > are not strictly bugfixes, they might prevent bugs and seem very safe. > > > > Most importantly, this fixes a regression with vhost introduced > > by the last pull. > > I think I'm hitting this previous regression, I see this in my libvirt > log: > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists > > And a backtrace as seen here > https://paste.fedoraproject.org/477562/88144131/ > > So I merge this tag to try to resolve it, now I get qemu segfaulting > bisected to: > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 > Author: Cao jin > Date: Sat Nov 5 10:07:21 2016 +0800 > > hcd-xhci: check & correct param before using it > > And if I revert that, vhost still fails with: > > qemu-system-x86_64: /net/gimli/home/alwillia/Work/qemu.git/memory.c:2012: > memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. > > So I think this does not fix the previous regression and adds a new > one :-\ > > Thanks, > Alex Thanks! Stefan, pls defer merging while we investigate. Cc a bunch of relevant people. -- MST
Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
On Thu, 10 Nov 2016 18:12:20 +0200 "Michael S. Tsirkin" wrote: > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6: > > MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +) > > are available in the git repository at: > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17: > > docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200) > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups > > Lots of fixes all over the place. I allowed some cleanups in even though they > are not strictly bugfixes, they might prevent bugs and seem very safe. > > Most importantly, this fixes a regression with vhost introduced > by the last pull. I think I'm hitting this previous regression, I see this in my libvirt log: kvm_mem_ioeventfd_add: error adding ioeventfd: File exists And a backtrace as seen here https://paste.fedoraproject.org/477562/88144131/ So I merge this tag to try to resolve it, now I get qemu segfaulting bisected to: commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978 Author: Cao jin Date: Sat Nov 5 10:07:21 2016 +0800 hcd-xhci: check & correct param before using it And if I revert that, vhost still fails with: qemu-system-x86_64: /net/gimli/home/alwillia/Work/qemu.git/memory.c:2012: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. So I think this does not fix the previous regression and adds a new one :-\ Thanks, Alex
Re: [Qemu-devel] [Qemu-ppc] [QEMU PATCH v12 2/4] migration: migrate QTAILQ
On 11/10/2016 11:13 AM, Halil Pasic wrote: > > > On 11/10/2016 07:00 PM, Jianjun Duan wrote: >> >> >> On 11/10/2016 04:04 AM, Halil Pasic wrote: >>> >>> >>> On 11/08/2016 01:06 AM, Jianjun Duan wrote: Currently we cannot directly transfer a QTAILQ instance because of the limitation in the migration code. Here we introduce an approach to transfer such structures. We created VMStateInfo vmstate_info_qtailq for QTAILQ. Similar VMStateInfo can be created for other data structures such as list. When a QTAILQ is migrated from source to target, it is appended to the corresponding QTAILQ structure, which is assumed to have been properly initialized. This approach will be used to transfer pending_events and ccs_list in spapr state. We also create some macros in qemu/queue.h to access a QTAILQ using pointer arithmetic. This ensures that we do not depend on the implementation details about QTAILQ in the migration code. Signed-off-by: Jianjun Duan --- include/migration/vmstate.h | 20 + include/qemu/queue.h| 60 +++ migration/trace-events | 4 +++ migration/vmstate.c | 69 + 4 files changed, 153 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index eafc8f2..6289327 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -253,6 +253,7 @@ extern const VMStateInfo vmstate_info_timer; extern const VMStateInfo vmstate_info_buffer; extern const VMStateInfo vmstate_info_unused_buffer; extern const VMStateInfo vmstate_info_bitmap; +extern const VMStateInfo vmstate_info_qtailq; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) @@ -664,6 +665,25 @@ extern const VMStateInfo vmstate_info_bitmap; .offset = offsetof(_state, _field),\ } +/* For QTAILQ that need customized handling. >>> >>> What do you mean by customized handling? How does non-customized >>> handling look like? >>> >> Customized handling means a user has to implement put/get to get the job >> done while normally one expects to just specify a VMSD to get the job done. > > I would drop the customized since. The user does not have t > o implement put/get since the (not user supplied) .info > that takes care of the stuff. Now assumed I'm a random guy > who wants his QTAILQ migrated, if I read 'For QTAILQ that > need customized handling' I ask myself does _my_ qtailq > need any customized handling. I have a pretty plain qtailq > so it should not need any customized handling. It may be > only my limited English skills though. I'm not anywhere near > of being a native speaker. > > IMHO if you want to indicate that stuff is custom because you > have both vmsd and info I would place a comment next to the > info. I think from language perspective that should be completely > OK assuming t least c99 -- or at least I hope so. The comment > above the macro has an apidoc feel to me so indicating it there > feels wrong. > I guess we mean the same thing. Since QTAILQ requires a get/put and cannot be migrated by just specifying a vmsd, I considered it "need customized handling". I will drop "customized". >>> I would also add something like works only for movable >>> elements/nodes to make it clear that scenarios like Juan has >>> pointed out previously are not currently supported. >>> >> Maybe you mean the backward compatibility regarding the bit stream? >> It is not supported. Will document it. > > I mean this: > https://lists.nongnu.org/archive/html/qemu-devel/2016-11/msg00314.html > > If you look at the code the VirtQueue instances sit in a array > and are organized into lists depending on to which vector do > these belong to (disclaimer: my understanding of the problem formulated > very sloppily). > > I missed the discussion on backward compatibility of the bitstream. > Could you give me a pointer? > It was touched in earlier discussions. http://lists.nongnu.org/archive/html/qemu-ppc/2016-10/msg00150.html + * Target QTAILQ needs be properly initialized. >>> >>> Could this restriction be avoided by doing (1)? >>> + * _type: type of QTAILQ element + * _next: name of QTAILQ entry field in QTAILQ element + * _vmsd: VMSD for QTAILQ element + * size: size of QTAILQ element + * start: offset of QTAILQ entry in QTAILQ element + */ +#define VMSTATE_QTAILQ_V(_field, _state, _version, _vmsd, _type, _next) \ +{\ +.name = (stringify(_field)), \ +.version_id = (_version),
Re: [Qemu-devel] [PATCH v2 3/3] net: virtio-net discards TX data after link down
On Thu, Nov 10, 2016 at 3:54 PM, Michael S. Tsirkin wrote: > On Thu, Nov 10, 2016 at 01:56:05AM +0200, Yuri Benditovich wrote: > > > > > > On Wed, Nov 9, 2016 at 10:28 PM, Michael S. Tsirkin > wrote: > > > > On Wed, Nov 09, 2016 at 05:22:02PM +0200, > yuri.benditov...@daynix.com > > wrote: > > > From: Yuri Benditovich > > > > > > https://bugzilla.redhat.com/show_bug.cgi?id=1295637 > > > Upon set_link monitor command or upon netdev deletion > > > virtio-net sends link down indication to the guest > > > and stops vhost if one is used. > > > Guest driver can still submit data for TX until it > > > recognizes link loss. If these packets not returned by > > > the host, the Windows guest will never be able to finish > > > disable/removal/shutdown. > > > Now each packet sent by guest after NIC indicated link > > > down will be completed immediately. > > > > > > Signed-off-by: Yuri Benditovich > > > --- > > > hw/net/virtio-net.c | 28 > > > 1 file changed, 28 insertions(+) > > > > > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > > > index 06bfe4b..ab4e18a 100644 > > > --- a/hw/net/virtio-net.c > > > +++ b/hw/net/virtio-net.c > > > @@ -218,6 +218,16 @@ static void virtio_net_vnet_endian_status( > VirtIONet > > *n, uint8_t status) > > > } > > > } > > > > > > +static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, > VirtQueue > > *vq) > > > +{ > > > +VirtQueueElement *elem; > > > +while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement { > > > +virtqueue_push(vq, elem, 0); > > > +virtio_notify(vdev, vq); > > > +g_free(elem); > > > +} > > > +} > > > + > > > static void virtio_net_set_status(struct VirtIODevice *vdev, > uint8_t > > status) > > > { > > > VirtIONet *n = VIRTIO_NET(vdev); > > > > I don't like this part. This does too much queue parsing, > > I would like to just copy head from avail to used ring. > > > > For example, people want to support rings >1K in size. > > Let's add bool virtqueue_drop(vq) and be done with it. > > > > > > Please note that this code works only when link is down. > > For me this was too complicated to write simpler procedure > > with the same result. > > Yes - it's somewhat problematic and risky that we process > the ring in qemu, but I don't see an easy way around that. > But at least let's limit the processing and assumptions we > make. > > > > > > > > > @@ -262,6 +272,14 @@ static void virtio_net_set_status(struct > > VirtIODevice *vdev, uint8_t status) > > > } else { > > > qemu_bh_cancel(q->tx_bh); > > > } > > > +if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && > > > +(queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) { > > >> +/* if tx is waiting we are likely have some packets > in > > ... we likely have some ... > > > tx queue > > > + * and disabled notification */ > > what does this refer to? > virtio-net.c processes tx by tic-tac scheme, for example: handle_tx_bh sets tx_waiting, disables queue notification, schedules bh. then tx_bh enables queue notification, flushes tx, clears tx_waiting. when queue notification disabled, tx completion will not raise host interrupt So, when we discard bh upon link down between 'tic' and 'tac', if is good to enable notification back, drop packets and move to 'waiting for tx' state. Similar 'tic-tac' cycle works in case of timer. > > > > +q->tx_waiting = 0; > > > +virtio_queue_set_notification(q->tx_vq, 1); > > > +virtio_net_drop_tx_queue_data(vdev, q->tx_vq); > > > +} > > > } > > > } > > > } > > > > OK but what if guest keeps sending packets? What will drop them? > > > > > > This code fixes following problem in original code (example): > > We are in vhost=off and receive kick ->virtio_net_handle_tx_timer > > -> tx_waiting=1, notification disabled, timer set > > Now we receive link loss, cancel the timer and stay with packets in the > queue > > and with > > disabled notification. Nobody will return them. (easy to reproduce with > timer > > set to 5ms) > > > > Added code drops packets we already have and ensure we will report them > > as completed to guest. If guest keeps sending packets, they will be > dropped > > in virtio_net_handle_tx_timer and in virtio_net_handle_tx_bh (in > procedures > > just below) > > as we already with link down. > > Yes I get that. I'm just not 100% sure all paths have > us listen on the ioeventfd and handle kicks without races - > this was previously assumed not to matter. > > > > > > > > > @@ -1319,6 +1337,11 @@ static void virtio_net_handle_tx_timer( > > VirtIODevice *
Re: [Qemu-devel] [kvm-unit-tests PATCH v5 10/11] arm/arm64: gicv3: add an IPI test
On Thu, Nov 10, 2016 at 07:53:58PM +, Alex Bennée wrote: [...] > > +struct gic gicv2 = { > > + .ipi = { > > + .enable = gicv2_enable_defaults, > > + .send_self = gicv2_ipi_send_self, > > + .send_tlist = gicv2_ipi_send_tlist, > > + .send_broadcast = gicv2_ipi_send_broadcast, > > + }, > > + .read_iar = gicv2_read_iar, > > + .irqnr = gicv2_irqnr, > > + .write_eoi = gicv2_write_eoi, > > +}; > > + > > +struct gic gicv3 = { > > + .ipi = { > > + .enable = gicv3_enable_defaults, > > + .send_self = gicv3_ipi_send_self, > > + .send_tlist = gicv3_ipi_send_tlist, > > + .send_broadcast = gicv3_ipi_send_broadcast, > > + }, > > + .read_iar = gicv3_read_iar, > > + .irqnr = gicv3_irqnr, > > + .write_eoi = gicv3_write_eoir, > > +}; > > + > > So I was re-basing my kvm-unit-tests against your GIC rework and found > myself copy and pasting a bunch of this into my tests that fire IRQs. > That makes me think the abstraction should be in the library code so > other tests can fiddle with sending IRQs. > > What do you think? > I guess you mean moving the above two structs and their corresponding functions (all which aren't already common) to lib/arm/ ? Or do you just mean the one non-trivial function gicv3_ipi_send_tlist? I think agree with gicv3_ipi_send_tlist getting shared, but the others are mostly one-liners, so I'm not sure. I guess I'd have to see how you're using them first. Thanks, drew
[Qemu-devel] [PATCH v2 for-2.8] spapr-vty: Fix bad assert() statement
When using the serial console in the GTK interface of QEMU (and QEMU has been compiled with CONFIG_VTE), it is possible to trigger the assert() statement in vty_receive() in spapr_vty.c by pasting a chunk of text with length > 16 into the QEMU window. Most of the other serial backends seem to simply drop characters that they can not handle, so I think we should also do the same in spapr-vty to fix this issue. Buglink: https://bugs.launchpad.net/qemu/+bug/1639322 Signed-off-by: Thomas Huth --- v2: - Do not increase the buffer size since this breaks migration. (We should figure out a better solution for the size of the buffer once 2.8 is released) hw/char/spapr_vty.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index 31822fe..06b9b39 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -1,4 +1,5 @@ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" @@ -37,7 +38,15 @@ static void vty_receive(void *opaque, const uint8_t *buf, int size) qemu_irq_pulse(spapr_vio_qirq(&dev->sdev)); } for (i = 0; i < size; i++) { -assert((dev->in - dev->out) < VTERM_BUFSIZE); +if (dev->in - dev->out >= VTERM_BUFSIZE) { +static bool reported; +if (!reported) { +error_report("VTY input buffer exhausted - characters dropped." + " (input size = %i)", size); +reported = true; +} +break; +} dev->buf[dev->in++ % VTERM_BUFSIZE] = buf[i]; } } -- 1.8.3.1
Re: [Qemu-devel] [kvm-unit-tests PATCH v5 10/11] arm/arm64: gicv3: add an IPI test
Andrew Jones writes: > Signed-off-by: Andrew Jones > > --- > v5: > - fix copy+paste error in gicv3_write_eoir [drew] > - use modern register names [Andre] > v4: > - heavily comment gicv3_ipi_send_tlist() [Eric] > - changes needed for gicv2 iar/irqstat fix to other patch > v2: > - use IRM for gicv3 broadcast > --- > arm/gic.c | 195 > ++--- > arm/unittests.cfg | 6 ++ > lib/arm/asm/arch_gicv3.h | 23 ++ > lib/arm64/asm/arch_gicv3.h | 22 + > 4 files changed, 236 insertions(+), 10 deletions(-) > > diff --git a/arm/gic.c b/arm/gic.c > index 06092aec7c08..ca68f8ad1cb9 100644 > --- a/arm/gic.c > +++ b/arm/gic.c > @@ -3,6 +3,8 @@ > * > * GICv2 > * + test sending/receiving IPIs > + * GICv3 > + * + test sending/receiving IPIs > * > * Copyright (C) 2016, Red Hat Inc, Andrew Jones > * > @@ -16,6 +18,19 @@ > #include > #include > > +struct gic { > + struct { > + void (*enable)(void); > + void (*send_self)(void); > + void (*send_tlist)(cpumask_t *); > + void (*send_broadcast)(void); > + } ipi; > + u32 (*read_iar)(void); > + u32 (*irqnr)(u32 iar); > + void (*write_eoi)(u32); > +}; > + > +static struct gic *gic; > static int gic_version; > static int acked[NR_CPUS], spurious[NR_CPUS]; > static cpumask_t ready; > @@ -69,13 +84,33 @@ static void check_acked(cpumask_t *mask) > false, missing, extra, unexpected); > } > > +static u32 gicv2_read_iar(void) > +{ > + return readl(gicv2_cpu_base() + GICC_IAR); > +} > + > +static u32 gicv2_irqnr(u32 iar) > +{ > + return iar & GICC_IAR_INT_ID_MASK; > +} > + > +static void gicv2_write_eoi(u32 irqstat) > +{ > + writel(irqstat, gicv2_cpu_base() + GICC_EOIR); > +} > + > +static u32 gicv3_irqnr(u32 iar) > +{ > + return iar; > +} > + > static void ipi_handler(struct pt_regs *regs __unused) > { > - u32 irqstat = readl(gicv2_cpu_base() + GICC_IAR); > - u32 irqnr = irqstat & GICC_IAR_INT_ID_MASK; > + u32 irqstat = gic->read_iar(); > + u32 irqnr = gic->irqnr(irqstat); > > if (irqnr != GICC_INT_SPURIOUS) { > - writel(irqstat, gicv2_cpu_base() + GICC_EOIR); > + gic->write_eoi(irqstat); > smp_rmb(); /* pairs with wmb in ipi_test functions */ > ++acked[smp_processor_id()]; > smp_wmb(); /* pairs with rmb in check_acked */ > @@ -85,6 +120,112 @@ static void ipi_handler(struct pt_regs *regs __unused) > } > } > > +static void gicv2_ipi_send_self(void) > +{ > + writel(2 << 24, gicv2_dist_base() + GICD_SGIR); > +} > + > +static void gicv2_ipi_send_tlist(cpumask_t *mask) > +{ > + u8 tlist = (u8)cpumask_bits(mask)[0]; > + > + writel(tlist << 16, gicv2_dist_base() + GICD_SGIR); > +} > + > +static void gicv2_ipi_send_broadcast(void) > +{ > + writel(1 << 24, gicv2_dist_base() + GICD_SGIR); > +} > + > +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 > +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 > +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 > +#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ > + (MPIDR_AFFINITY_LEVEL(cluster_id, level) << ICC_SGI1R_AFFINITY_## level > ## _SHIFT) > + > +static void gicv3_ipi_send_tlist(cpumask_t *mask) > +{ > + u16 tlist; > + int cpu; > + > + /* > + * For each cpu in the mask collect its peers, which are also in > + * the mask, in order to form target lists. > + */ > + for_each_cpu(cpu, mask) { > + u64 mpidr = cpus[cpu], sgi1r; > + u64 cluster_id; > + > + /* > + * GICv3 can send IPIs to up 16 peer cpus with a single > + * write to ICC_SGI1R_EL1 (using the target list). Peers > + * are cpus that have nearly identical MPIDRs, the only > + * difference being Aff0. The matching upper affinity > + * levels form the cluster ID. > + */ > + cluster_id = mpidr & ~0xffUL; > + tlist = 0; > + > + /* > + * Sort of open code for_each_cpu in order to have a > + * nested for_each_cpu loop. > + */ > + while (cpu < nr_cpus) { > + if ((mpidr & 0xff) >= 16) { > + printf("cpu%d MPIDR:aff0 is %d (>= 16)!\n", > + cpu, (int)(mpidr & 0xff)); > + break; > + } > + > + tlist |= 1 << (mpidr & 0xf); > + > + cpu = cpumask_next(cpu, mask); > + if (cpu >= nr_cpus) > + break; > + > + mpidr = cpus[cpu]; > + > + if (cluster_id != (mpidr & ~0xffUL)) { > + /* > + * The next cpu isn't in our cluster. Roll > + * back the cpu index allo
Re: [Qemu-devel] [PATCH v6 0/3] IOMMU: intel_iommu support map and unmap notifications
On Thu, 10 Nov 2016 21:20:36 +0200 "Michael S. Tsirkin" wrote: > On Thu, Nov 10, 2016 at 09:04:13AM -0700, Alex Williamson wrote: > > On Thu, 10 Nov 2016 17:54:35 +0200 > > "Michael S. Tsirkin" wrote: > > > > > On Thu, Nov 10, 2016 at 08:30:21AM -0700, Alex Williamson wrote: > > > > On Thu, 10 Nov 2016 17:14:24 +0200 > > > > "Michael S. Tsirkin" wrote: > > > > > > > > > On Tue, Nov 08, 2016 at 01:04:21PM +0200, Aviv B.D wrote: > > > > > > From: "Aviv Ben-David" > > > > > > > > > > > > * Advertize Cache Mode capability in iommu cap register. > > > > > > This capability is controlled by "cache-mode" property of > > > > > > intel-iommu device. > > > > > > To enable this option call QEMU with "-device > > > > > > intel-iommu,cache-mode=true". > > > > > > > > > > > > * On page cache invalidation in intel vIOMMU, check if the domain > > > > > > belong to > > > > > > registered notifier, and notify accordingly. > > > > > > > > > > This looks sane I think. Alex, care to comment? > > > > > Merging will have to wait until after the release. > > > > > Pls remember to re-test and re-ping then. > > > > > > > > I don't think it's suitable for upstream until there's a reasonable > > > > replay mechanism > > > > > > Could you pls clarify what do you mean by replay? > > > Is this when you attach a device by hotplug to > > > a running system? > > > > > > If yes this can maybe be addressed by disabling hotplug temporarily. > > > > No, hotplug is not required, moving a device between existing domains > > requires replay, ie. actually using it for nested device assignment. > > Good point, that one is a correctness thing. Aviv, > could you add this in TODO list in a cover letter pls? > > > > > and we straighten out whether it's expected to get > > > > multiple notifies and the notif-ee is responsible for filtering > > > > them or if the notif-er should do filtering. > > > > > > OK this is a documentation thing. > > > > Well no, it needs to be decided and if necessary implemented. > > Let's assume it's the notif-ee for now. Less is more and all that. I think this is opposite of the approach dwg suggested. > > > > Without those, this is > > > > effectively just an RFC. > > > > > > It's infrastructure without users so it doesn't break things, > > > I'm more interested in seeing whether it's broken in > > > some way than whether it's complete. > > > > If it allows use with vfio but doesn't fully implement the complete set > > of interfaces, it does break things. We currently prevent viommu usage > > with vfio because it is incomplete. > > Right - that bit is still in as far as I can see. Nope, 3/3 changes vtd_iommu_notify_flag_changed() to allow use with vfio even though it's still incomplete. We would at least need something like a replay callback for VT-d that triggers an abort if you still want to accept it incomplete. Thanks, Alex > > > The patchset spent out of tree too long and I'd like to see > > > us make progress towards device assignment working with > > > vIOMMU sooner rather than later, so if it's broken I won't > > > merge it but if it's incomplete I will. > > > > So long as it's incomplete and still prevents vfio usage, I'm ok with > > merging it, but I don't want to enable vfio usage until it's complete. > > Thanks, > > > > Alex > > > > > > > > Currently this patch still doesn't enabling VFIO devices support > > > > > > with vIOMMU > > > > > > present. Current problems: > > > > > > * vfio_iommu_map_notify is not aware about memory range belong to > > > > > > specific > > > > > > VFIOGuestIOMMU. > > > > > > * memory_region_iommu_replay hangs QEMU on start up while it > > > > > > itterate over > > > > > > 64bit address space. Commenting out the call to this function > > > > > > enables > > > > > > workable VFIO device while vIOMMU present. > > > > > > * vfio_iommu_map_notify should check if address space range is > > > > > > suitable for > > > > > > current notifier. > > > > > > > > > > > > Changes from v1 to v2: > > > > > > * remove assumption that the cache do not clears > > > > > > * fix lockup on high load. > > > > > > > > > > > > Changes from v2 to v3: > > > > > > * remove debug leftovers > > > > > > * split to sepearate commits > > > > > > * change is_write to flags in vtd_do_iommu_translate, add > > > > > > IOMMU_NO_FAIL > > > > > > to suppress error propagating to guest. > > > > > > > > > > > > Changes from v3 to v4: > > > > > > * Add property to intel_iommu device to control the CM capability, > > > > > > default to False. > > > > > > * Use s->iommu_ops.notify_flag_changed to register notifiers. > > > > > > > > > > > > Changes from v4 to v4 RESEND: > > > > > > * Fix codding style pointed by checkpatch.pl script. > > > > > > > > > > > > Changes from v4 to v5: > > > > > > * Reduce the number of changes in patch 2 and make flags real > > > > > > bitfield. > > > > > > * Revert deleted debug
Re: [Qemu-devel] [PATCH v6 19/19] tcg: enable MTTCG by default for ARM on x86 hosts
On 11/10/2016 07:13 PM, Alex Bennée wrote: Richard Henderson writes: On 11/09/2016 03:57 PM, Alex Bennée wrote: This enables the multi-threaded system emulation by default for ARMv7 and ARMv8 guests using the x86_64 TCG backend. This means: - The x86_64 TCG backend supports cmpxchg based atomic ops - The x86_64 TCG backend emits barriers for barrier ops What tcg backend doesn't support what we need? For a weakly ordered target, any of our hosts should work. True, but this comes with certification that I've tested it. But you are right adding this to configure is fugly. Should I just drop the backend config symbol requirement totally? I was thinking that a good backend config symbol would somehow indicate the memory ordering strength of the host. Preferably in such a way that we can tell that host >= guest. I dunno if we assign ordinal numbers in some arbitrary way, or try something more complex such as x86_64) HOST_MTTCG_MO='TCG_MO_ALL & ~TCG_MO_LD_ST' Or maybe put this in tcg/*/tcg-target.h in preference to configure. Then enable mttcg if the host memory-order is a superset of the guest, (GUEST_MTTCG_MO & ~HOST_MTTCG_MO) == 0 r~
Re: [Qemu-devel] [PATCH v6 0/3] IOMMU: intel_iommu support map and unmap notifications
On Thu, Nov 10, 2016 at 09:04:13AM -0700, Alex Williamson wrote: > On Thu, 10 Nov 2016 17:54:35 +0200 > "Michael S. Tsirkin" wrote: > > > On Thu, Nov 10, 2016 at 08:30:21AM -0700, Alex Williamson wrote: > > > On Thu, 10 Nov 2016 17:14:24 +0200 > > > "Michael S. Tsirkin" wrote: > > > > > > > On Tue, Nov 08, 2016 at 01:04:21PM +0200, Aviv B.D wrote: > > > > > From: "Aviv Ben-David" > > > > > > > > > > * Advertize Cache Mode capability in iommu cap register. > > > > > This capability is controlled by "cache-mode" property of > > > > > intel-iommu device. > > > > > To enable this option call QEMU with "-device > > > > > intel-iommu,cache-mode=true". > > > > > > > > > > * On page cache invalidation in intel vIOMMU, check if the domain > > > > > belong to > > > > > registered notifier, and notify accordingly. > > > > > > > > This looks sane I think. Alex, care to comment? > > > > Merging will have to wait until after the release. > > > > Pls remember to re-test and re-ping then. > > > > > > I don't think it's suitable for upstream until there's a reasonable > > > replay mechanism > > > > Could you pls clarify what do you mean by replay? > > Is this when you attach a device by hotplug to > > a running system? > > > > If yes this can maybe be addressed by disabling hotplug temporarily. > > No, hotplug is not required, moving a device between existing domains > requires replay, ie. actually using it for nested device assignment. Good point, that one is a correctness thing. Aviv, could you add this in TODO list in a cover letter pls? > > > and we straighten out whether it's expected to get > > > multiple notifies and the notif-ee is responsible for filtering > > > them or if the notif-er should do filtering. > > > > OK this is a documentation thing. > > Well no, it needs to be decided and if necessary implemented. Let's assume it's the notif-ee for now. Less is more and all that. > > > Without those, this is > > > effectively just an RFC. > > > > It's infrastructure without users so it doesn't break things, > > I'm more interested in seeing whether it's broken in > > some way than whether it's complete. > > If it allows use with vfio but doesn't fully implement the complete set > of interfaces, it does break things. We currently prevent viommu usage > with vfio because it is incomplete. Right - that bit is still in as far as I can see. > > The patchset spent out of tree too long and I'd like to see > > us make progress towards device assignment working with > > vIOMMU sooner rather than later, so if it's broken I won't > > merge it but if it's incomplete I will. > > So long as it's incomplete and still prevents vfio usage, I'm ok with > merging it, but I don't want to enable vfio usage until it's complete. > Thanks, > > Alex > > > > > > Currently this patch still doesn't enabling VFIO devices support with > > > > > vIOMMU > > > > > present. Current problems: > > > > > * vfio_iommu_map_notify is not aware about memory range belong to > > > > > specific > > > > > VFIOGuestIOMMU. > > > > > * memory_region_iommu_replay hangs QEMU on start up while it itterate > > > > > over > > > > > 64bit address space. Commenting out the call to this function > > > > > enables > > > > > workable VFIO device while vIOMMU present. > > > > > * vfio_iommu_map_notify should check if address space range is > > > > > suitable for > > > > > current notifier. > > > > > > > > > > Changes from v1 to v2: > > > > > * remove assumption that the cache do not clears > > > > > * fix lockup on high load. > > > > > > > > > > Changes from v2 to v3: > > > > > * remove debug leftovers > > > > > * split to sepearate commits > > > > > * change is_write to flags in vtd_do_iommu_translate, add > > > > > IOMMU_NO_FAIL > > > > > to suppress error propagating to guest. > > > > > > > > > > Changes from v3 to v4: > > > > > * Add property to intel_iommu device to control the CM capability, > > > > > default to False. > > > > > * Use s->iommu_ops.notify_flag_changed to register notifiers. > > > > > > > > > > Changes from v4 to v4 RESEND: > > > > > * Fix codding style pointed by checkpatch.pl script. > > > > > > > > > > Changes from v4 to v5: > > > > > * Reduce the number of changes in patch 2 and make flags real > > > > > bitfield. > > > > > * Revert deleted debug prints. > > > > > * Fix memory leak in patch 3. > > > > > > > > > > Changes from v5 to v6: > > > > > * fix prototype of iommu_translate function for more IOMMU types. > > > > > * VFIO will be notified only on the difference, without unmap > > > > > before change to maps. > > > > > > > > > > Aviv Ben-David (3): > > > > > IOMMU: add option to enable VTD_CAP_CM to vIOMMU capility exposoed > > > > > to > > > > > guest > > > > > IOMMU: change iommu_op->translate's is_write to flags, add support > > > > > to > > > > > NO_FAIL flag mode > > > > > IOMMU: enable intel_iommu map and unmap notifiers > > > > > > >
Re: [Qemu-devel] [PATCH v6 15/19] target-arm/cpu: don't reset TLB structures, use cputlb to do it
Richard Henderson writes: > On 11/09/2016 03:57 PM, Alex Bennée wrote: >> +#ifdef CONFIG_SOFTMMU >> +memset(env, 0, offsetof(CPUARMState, tlb_table)); >> +tlb_flush(s, 0); >> +#else >> memset(env, 0, offsetof(CPUARMState, features)); >> +#endif > > I'd really prefer to see the tlb_flush be moved into parent_reset, so that we > handle it identically for all targets. Yeah I'll prepare a series to do that separate from MTTCG. > > As for the memset, do we really need to distinguish softmmu? I don't like you > picking out a variable name within CPU_COMMON. Better to use empty struct > markers, like the > >struct {} start_init_save; > > that x86 uses. OK fair enough. -- Alex Bennée
Re: [Qemu-devel] [Qemu-ppc] [QEMU PATCH v12 2/4] migration: migrate QTAILQ
On 11/10/2016 07:00 PM, Jianjun Duan wrote: > > > On 11/10/2016 04:04 AM, Halil Pasic wrote: >> >> >> On 11/08/2016 01:06 AM, Jianjun Duan wrote: >>> Currently we cannot directly transfer a QTAILQ instance because of the >>> limitation in the migration code. Here we introduce an approach to >>> transfer such structures. We created VMStateInfo vmstate_info_qtailq >>> for QTAILQ. Similar VMStateInfo can be created for other data structures >>> such as list. >>> >>> When a QTAILQ is migrated from source to target, it is appended to the >>> corresponding QTAILQ structure, which is assumed to have been properly >>> initialized. >>> >>> This approach will be used to transfer pending_events and ccs_list in spapr >>> state. >>> >>> We also create some macros in qemu/queue.h to access a QTAILQ using pointer >>> arithmetic. This ensures that we do not depend on the implementation >>> details about QTAILQ in the migration code. >>> >>> Signed-off-by: Jianjun Duan >>> --- >>> include/migration/vmstate.h | 20 + >>> include/qemu/queue.h| 60 +++ >>> migration/trace-events | 4 +++ >>> migration/vmstate.c | 69 >>> + >>> 4 files changed, 153 insertions(+) >>> >>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h >>> index eafc8f2..6289327 100644 >>> --- a/include/migration/vmstate.h >>> +++ b/include/migration/vmstate.h >>> @@ -253,6 +253,7 @@ extern const VMStateInfo vmstate_info_timer; >>> extern const VMStateInfo vmstate_info_buffer; >>> extern const VMStateInfo vmstate_info_unused_buffer; >>> extern const VMStateInfo vmstate_info_bitmap; >>> +extern const VMStateInfo vmstate_info_qtailq; >>> >>> #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) >>> #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) >>> @@ -664,6 +665,25 @@ extern const VMStateInfo vmstate_info_bitmap; >>> .offset = offsetof(_state, _field),\ >>> } >>> >>> +/* For QTAILQ that need customized handling. >> >> What do you mean by customized handling? How does non-customized >> handling look like? >> > Customized handling means a user has to implement put/get to get the job > done while normally one expects to just specify a VMSD to get the job done. I would drop the customized since. The user does not have t o implement put/get since the (not user supplied) .info that takes care of the stuff. Now assumed I'm a random guy who wants his QTAILQ migrated, if I read 'For QTAILQ that need customized handling' I ask myself does _my_ qtailq need any customized handling. I have a pretty plain qtailq so it should not need any customized handling. It may be only my limited English skills though. I'm not anywhere near of being a native speaker. IMHO if you want to indicate that stuff is custom because you have both vmsd and info I would place a comment next to the info. I think from language perspective that should be completely OK assuming t least c99 -- or at least I hope so. The comment above the macro has an apidoc feel to me so indicating it there feels wrong. >> I would also add something like works only for movable >> elements/nodes to make it clear that scenarios like Juan has >> pointed out previously are not currently supported. >> > Maybe you mean the backward compatibility regarding the bit stream? > It is not supported. Will document it. I mean this: https://lists.nongnu.org/archive/html/qemu-devel/2016-11/msg00314.html If you look at the code the VirtQueue instances sit in a array and are organized into lists depending on to which vector do these belong to (disclaimer: my understanding of the problem formulated very sloppily). I missed the discussion on backward compatibility of the bitstream. Could you give me a pointer? >>> + * Target QTAILQ needs be properly initialized. >> >> Could this restriction be avoided by doing (1)? >> >>> + * _type: type of QTAILQ element >>> + * _next: name of QTAILQ entry field in QTAILQ element >>> + * _vmsd: VMSD for QTAILQ element >>> + * size: size of QTAILQ element >>> + * start: offset of QTAILQ entry in QTAILQ element >>> + */ >>> +#define VMSTATE_QTAILQ_V(_field, _state, _version, _vmsd, _type, _next) \ >>> +{\ >>> +.name = (stringify(_field)), \ >>> +.version_id = (_version), \ >>> +.vmsd = &(_vmsd),\ >>> +.size = sizeof(_type), \ >>> +.info = &vmstate_info_qtailq,\ >>> +.offset = offsetof(_state, _field),\ >>> +.start= offsetof(_type, _next), \ >>> +} >>> + >>> /* _f : field name >>> _f_n : num of elements field
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty
Richard Henderson writes: > On 11/09/2016 03:57 PM, Alex Bennée wrote: >> +/* We currently can't handle more than 16 bits in the MMUIDX bitmask. >> + */ >> +QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); > > We already assert <= 12 in exec/cpu_ldst.h. Although really any such assert > belongs in exec/cpu-defs.h, where we define CPU_TLB_BITS et al. > > That said, what's the technical restriction here? Really we just need to ensure that we don't run out of bits to convert the MMUIDX var args into the bottom bit of a page aligned address. We already have: QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); So I guess I can drop the other one. -- Alex Bennée
Re: [Qemu-devel] [PATCH v6 19/19] tcg: enable MTTCG by default for ARM on x86 hosts
On 11/09/2016 03:57 PM, Alex Bennée wrote: This enables the multi-threaded system emulation by default for ARMv7 and ARMv8 guests using the x86_64 TCG backend. This means: - The x86_64 TCG backend supports cmpxchg based atomic ops - The x86_64 TCG backend emits barriers for barrier ops What tcg backend doesn't support what we need? For a weakly ordered target, any of our hosts should work. r~
Re: [Qemu-devel] [PATCH v6 17/19] target-arm: helpers which may affect global state need the BQL
On 11/09/2016 03:57 PM, Alex Bennée wrote: As the arm_call_el_change_hook may affect global state (for example with updating the global GIC state) we need to assert/take the BQL. Signed-off-by: Alex Bennée --- target-arm/helper.c| 6 ++ target-arm/op_helper.c | 4 2 files changed, 10 insertions(+) Reviewed-by: Richard Henderson r~
Re: [Qemu-devel] [Qemu-ppc] [QEMU PATCH v12 2/4] migration: migrate QTAILQ
On 11/10/2016 04:04 AM, Halil Pasic wrote: > > > On 11/08/2016 01:06 AM, Jianjun Duan wrote: >> Currently we cannot directly transfer a QTAILQ instance because of the >> limitation in the migration code. Here we introduce an approach to >> transfer such structures. We created VMStateInfo vmstate_info_qtailq >> for QTAILQ. Similar VMStateInfo can be created for other data structures >> such as list. >> >> When a QTAILQ is migrated from source to target, it is appended to the >> corresponding QTAILQ structure, which is assumed to have been properly >> initialized. >> >> This approach will be used to transfer pending_events and ccs_list in spapr >> state. >> >> We also create some macros in qemu/queue.h to access a QTAILQ using pointer >> arithmetic. This ensures that we do not depend on the implementation >> details about QTAILQ in the migration code. >> >> Signed-off-by: Jianjun Duan >> --- >> include/migration/vmstate.h | 20 + >> include/qemu/queue.h| 60 +++ >> migration/trace-events | 4 +++ >> migration/vmstate.c | 69 >> + >> 4 files changed, 153 insertions(+) >> >> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h >> index eafc8f2..6289327 100644 >> --- a/include/migration/vmstate.h >> +++ b/include/migration/vmstate.h >> @@ -253,6 +253,7 @@ extern const VMStateInfo vmstate_info_timer; >> extern const VMStateInfo vmstate_info_buffer; >> extern const VMStateInfo vmstate_info_unused_buffer; >> extern const VMStateInfo vmstate_info_bitmap; >> +extern const VMStateInfo vmstate_info_qtailq; >> >> #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) >> #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) >> @@ -664,6 +665,25 @@ extern const VMStateInfo vmstate_info_bitmap; >> .offset = offsetof(_state, _field),\ >> } >> >> +/* For QTAILQ that need customized handling. > > What do you mean by customized handling? How does non-customized > handling look like? > Customized handling means a user has to implement put/get to get the job done while normally one expects to just specify a VMSD to get the job done. > I would also add something like works only for movable > elements/nodes to make it clear that scenarios like Juan has > pointed out previously are not currently supported. > Maybe you mean the backward compatibility regarding the bit stream? It is not supported. Will document it. >> + * Target QTAILQ needs be properly initialized. > > Could this restriction be avoided by doing (1)? > >> + * _type: type of QTAILQ element >> + * _next: name of QTAILQ entry field in QTAILQ element >> + * _vmsd: VMSD for QTAILQ element >> + * size: size of QTAILQ element >> + * start: offset of QTAILQ entry in QTAILQ element >> + */ >> +#define VMSTATE_QTAILQ_V(_field, _state, _version, _vmsd, _type, _next) \ >> +{\ >> +.name = (stringify(_field)), \ >> +.version_id = (_version), \ >> +.vmsd = &(_vmsd),\ >> +.size = sizeof(_type), \ >> +.info = &vmstate_info_qtailq,\ >> +.offset = offsetof(_state, _field),\ >> +.start= offsetof(_type, _next), \ >> +} >> + >> /* _f : field name >> _f_n : num of elements field_name >> _n : num of elements >> diff --git a/include/qemu/queue.h b/include/qemu/queue.h >> index 342073f..75616e1 100644 >> --- a/include/qemu/queue.h >> +++ b/include/qemu/queue.h >> @@ -438,4 +438,64 @@ struct { >> \ >> #define QTAILQ_PREV(elm, headname, field) \ >> (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) >> >> +#define field_at_offset(base, offset, type) >>\ >> +((type) (((char *) (base)) + (offset))) >> + >> +typedef struct DUMMY_Q_ENTRY DUMMY_Q_ENTRY; >> +typedef struct DUMMY_Q DUMMY_Q; >> + >> +struct DUMMY_Q_ENTRY { >> +QTAILQ_ENTRY(DUMMY_Q_ENTRY) next; >> +}; >> + >> +struct DUMMY_Q { >> +QTAILQ_HEAD(DUMMY_Q_HEAD, DUMMY_Q_ENTRY) head; >> +}; >> + >> +#define dummy_q ((DUMMY_Q *) 0) >> +#define dummy_qe ((DUMMY_Q_ENTRY *) 0) >> + >> +/* >> + * Offsets of layout of a tail queue head. >> + */ >> +#define QTAILQ_FIRST_OFFSET (offsetof(typeof(dummy_q->head), tqh_first)) >> +#define QTAILQ_LAST_OFFSET (offsetof(typeof(dummy_q->head), tqh_last)) >> +/* >> + * Raw access of elements of a tail queue >> + */ >> +#define QTAILQ_RAW_FIRST(head) >>\ >> +(*field_at_offset(head, QTAILQ_FIRST_OFFSET, void **)) >> +#define Q
Re: [Qemu-devel] [RFC 10/17] pseries: Rewrite CAS PVR compatibility logic
Quoting David Gibson (2016-10-30 06:12:01) > During boot, PAPR guests negotiate CPU model support with the > ibm,client-architecture-support mechanism. The logic to implement this in > qemu is very convoluted. This cleans it up to be cleaner, using the new > ppc_check_compat() call. > > The new logic for choosing a compatibility mode is: > 1. If the guest lists the CPU's real PVR as supported *AND* no >maximum compatibility mode has been requested on the command line >then we use "raw" mode - the CPU acts with full capabilities. > 2. Otherwise, we pick the most recent compatibility mode which is >both supported by the CPU, and is advertised as supported by the >guest. > I think the original code approximated the same thing, but it's hard to be > sure, and I think it had some weird edge cases. > > Signed-off-by: David Gibson > --- > hw/ppc/spapr_hcall.c | 107 > +-- > hw/ppc/trace-events | 2 +- > 2 files changed, 37 insertions(+), 72 deletions(-) > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index d93f580..3bd6d06 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -895,98 +895,63 @@ static void do_set_compat(CPUState *cs, void *arg) > ppc_set_compat(cpu, s->compat_pvr, &s->err); > } > > -#define get_compat_level(cpuver) ( \ > -((cpuver) == CPU_POWERPC_LOGICAL_2_05) ? 2050 : \ > -((cpuver) == CPU_POWERPC_LOGICAL_2_06) ? 2060 : \ > -((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \ > -((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) > - > -static void cas_handle_compat_cpu(PowerPCCPUClass *pcc, uint32_t pvr, > - unsigned max_lvl, unsigned *compat_lvl, > - unsigned *compat_pvr) > -{ > -unsigned lvl = get_compat_level(pvr); > -bool is205, is206, is207; > - > -if (!lvl) { > -return; > -} > - > -/* If it is a logical PVR, try to determine the highest level */ > -is205 = (pcc->pcr_supported & PCR_COMPAT_2_05) && > -(lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05)); > -is206 = (pcc->pcr_supported & PCR_COMPAT_2_06) && > -((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) || > - (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS))); > -is207 = (pcc->pcr_supported & PCR_COMPAT_2_07) && > -(lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_07)); > - > -if (is205 || is206 || is207) { > -if (!max_lvl) { > -/* User did not set the level, choose the highest */ > -if (*compat_lvl <= lvl) { > -*compat_lvl = lvl; > -*compat_pvr = pvr; > -} > -} else if (max_lvl >= lvl) { > -/* User chose the level, don't set higher than this */ > -*compat_lvl = lvl; > -*compat_pvr = pvr; > -} > -} > -} > - > -static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, > +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, >sPAPRMachineState *spapr, >target_ulong opcode, >target_ulong *args) > { > target_ulong list = ppc64_phys_to_real(args[0]); > target_ulong ov_table; > -PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu_); > CPUState *cs; > -bool cpu_match = false; > -unsigned old_compat_pvr = cpu_->compat_pvr; > -unsigned compat_lvl = 0, compat_pvr = 0; > -unsigned max_lvl = get_compat_level(cpu_->max_compat); > -int counter; > +bool explicit_match = false; /* Matched the CPU's real PVR */ > +uint32_t max_compat = cpu->max_compat; > +uint32_t best_compat = 0; > +int i; > sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates; > > -/* Parse PVR list */ > -for (counter = 0; counter < 512; ++counter) { > +/* > + * We scan the supplied table of PVRs looking for two things > + * 1. Is our real CPU PVR in the list? > + * 2. What's the "best" listed logical PVR > + */ > +for (i = 0; i < 512; ++i) { > uint32_t pvr, pvr_mask; > > -pvr_mask = ldl_be_phys(&address_space_memory, list); > -list += 4; > pvr = ldl_be_phys(&address_space_memory, list); > -list += 4; > - > -trace_spapr_cas_pvr_try(pvr); > -if (!max_lvl && > -((cpu_->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask))) { > -cpu_match = true; > -compat_pvr = 0; > -} else if (pvr == cpu_->compat_pvr) { > -cpu_match = true; > -compat_pvr = cpu_->compat_pvr; > -} else if (!cpu_match) { > -cas_handle_compat_cpu(pcc, pvr, max_lvl, &compat_lvl, > &compat_pvr); > -} > -/* Terminator record */ > +pvr_mask = l
Re: [Qemu-devel] [PATCH v6 18/19] target-arm: don't generate WFE/YIELD calls for MTTCG
On 11/09/2016 03:57 PM, Alex Bennée wrote: The WFE and YIELD instructions are really only hints and in TCG's case they were useful to move the scheduling on from one vCPU to the next. In the parallel context (MTTCG) this just causes an unnecessary cpu_exit and contention of the BQL. Signed-off-by: Alex Bennée --- target-arm/op_helper.c | 7 +++ target-arm/translate-a64.c | 8 ++-- target-arm/translate.c | 20 3 files changed, 29 insertions(+), 6 deletions(-) Reviewed-by: Richard Henderson r~
[Qemu-devel] [PULL 6/6] nbd: Don't inf-loop on early EOF
From: Eric Blake Commit 7d3123e converted a single read_sync() into a while loop that assumed that read_sync() would either make progress or give an error. But when the server hangs up early, the client sees EOF (a read_sync() of 0) and never makes progress, which in turn caused qemu-iotest './check -nbd 83' to go into an infinite loop. Rework the loop to accomodate reads cut short by EOF. Reported-by: Max Reitz Signed-off-by: Eric Blake Message-Id: <1478551093-32757-1-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/nbd/client.c b/nbd/client.c index 7db4301..ffb0743 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -90,20 +90,21 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); * the amount of bytes consumed. */ static ssize_t drop_sync(QIOChannel *ioc, size_t size) { -ssize_t ret, dropped = size; +ssize_t ret = 0; char small[1024]; char *buffer; buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size)); while (size > 0) { -ret = read_sync(ioc, buffer, MIN(65536, size)); -if (ret < 0) { +ssize_t count = read_sync(ioc, buffer, MIN(65536, size)); + +if (count <= 0) { goto cleanup; } -assert(ret <= size); -size -= ret; +assert(count <= size); +size -= count; +ret += count; } -ret = dropped; cleanup: if (buffer != small) { -- 1.8.3.1
Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save
On 10/11/2016 12:48, Marcelo Tosatti wrote: > Destination has to run the following logic: > > If (source has KVM_CAP_ADVANCE_CLOCK) > use KVM_GET_CLOCK value > Else >read pvclock from guest > > To support migration from older QEMU versions which do not have > KVM_CAP_ADVANCE_CLOCK (or new QEMU versions running on old > hosts without KVM_CAP_ADVANCE_CLOCK). > > I don't see any clean way to give that information, except changing > the migration format to pass "host: kvm_cap_advance_clock=true/false" > information. If you make it only affect new machine types, you could transmit a dummy clock value such as -1 if the source does not have KVM_CLOCK_TSC_STABLE. Paolo
Re: [Qemu-devel] [PATCH v6 14/19] target-arm/powerctl: defer cpu reset work to CPU context
On 11/09/2016 03:57 PM, Alex Bennée wrote: +/* We are requested to boot in AArch32 mode */ +static uint32_t mode_for_el[] = { 0, + ARM_CPU_MODE_SVC, + ARM_CPU_MODE_HYP, + ARM_CPU_MODE_SVC }; I know you're just moving most of this code, but, const. Otherwise, Reviewed-by: Richard Henderson r~
Re: [Qemu-devel] [PATCH v6 16/19] target-arm: ensure BQL taken for ARM_CP_IO register access
On 11/09/2016 03:57 PM, Alex Bennée wrote: Most ARMCPRegInfo structures just allow updating of the CPU field. However some have more complex operations that *may* be have cross vCPU effects therefor need to be serialised. The most obvious examples at the moment are things that affect the GICv3 IRQ controller. To avoid applying this requirement to all registers with custom access functions we check for if the type is marked ARM_CP_IO. By default all MMIO access to devices already takes the BQL to serialise hardware emulation. Signed-off-by: Alex Bennée --- hw/intc/arm_gicv3_cpuif.c | 3 +++ target-arm/op_helper.c| 39 +++ 2 files changed, 38 insertions(+), 4 deletions(-) Reviewed-by: Richard Henderson r~
Re: [Qemu-devel] [PATCH v6 11/19] cputlb: introduce tlb_flush_* async work.
Richard Henderson writes: > On 11/09/2016 03:57 PM, Alex Bennée wrote: >> +void tlb_flush_page_all(target_ulong addr) > > It's a nit, but when I read this I think all pages, not all cpus. > Can we rename this tlb_fluch_page_all_cpus? So to properly support ARM TLB flush semantics I want to move some of the looping in the helpers into cputlb.c so I'm thinking we'll have: tlb_flush_page_all_cpus tlb_flush_by_mmuidx_all_cpus tlb_flush_page_by_mmuidx_all_cpus Which will have the initial parameters of at least CPUState *src, bool sync Where src is the source vCPU of the flush request and sync will cause the source vCPU to schedule its work as safe work and do a cpu_loop_exit. This will allow the helpers to ensure TLB flushes are in a known state after executing the helper. In fact for ARM we'll be able to put off the reckoning until a DMB instruction comes along and we can force synchronisation at that point but I'm assuming there must be other architectures with stricter requirements. > > Otherwise, > > Reviewed-by: Richard Henderson > > > r~ -- Alex Bennée
[Qemu-devel] [PULL 5/6] target-i386: document how x86 gdb_num_core_regs is computed.
From: Doug Evans It helps when reading the code to see how the number is arrived at. Signed-off-by: Doug Evans Message-Id: <94eb2c187eda43dba005406c8...@google.com> Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 14c5186..6eec5dc 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -3721,6 +3721,9 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) cc->write_elf32_qemunote = x86_cpu_write_elf32_qemunote; cc->vmsd = &vmstate_x86_cpu; #endif +/* CPU_NB_REGS * 2 = general regs + xmm regs + * 25 = eip, eflags, 6 seg regs, st[0-7], fctrl,...,fop, mxcsr. + */ cc->gdb_num_core_regs = CPU_NB_REGS * 2 + 25; #ifndef CONFIG_USER_ONLY cc->debug_excp_handler = breakpoint_handler; -- 1.8.3.1
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty
On 11/10/2016 07:00 PM, Alex Bennée wrote: I should probably expand that to default to false in the case of (sizeof target_ulong > sizeof void *) when we don't have CONFIG_ATOMIC64. Then if the user does force mttcg on they will quickly get an assert although maybe we want to report that in a nicer way? While forcing mttcg is good for testing, small hosts will definitely fail, so there's not point in even trying. We should report it in a nicer way. We shouldn't be checking sizeof(void*), but checking TCG_TARGET_REG_BITS. That says how wide the host registers actually are, as opposed to the memory model in effect -- think x86_64 in x32 mode and the like. If the host register size is smaller than the guest register size, we should force disable mttcg, regardles of CONFIG_ATOMIC64, because e.g. normal 64 bit loads and stores won't be atomic. r~
Re: [Qemu-devel] [PATCH V2 07/11] virtio-pci: address space translation service (ATS) support
On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote: > > > On 2016年11月04日 03:49, Michael S. Tsirkin wrote: > > On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote: > > > >This patches enable the Address Translation Service support for virtio > > > >pci devices. This is needed for a guest visible Device IOTLB > > > >implementation and will be required by vhost device IOTLB API > > > >implementation for intel IOMMU. > > > > > > > >Cc: Michael S. Tsirkin > > > >Signed-off-by: Jason Wang > > I'd like to understand why do you think this is strictly required. > > Won't setting CM bit in the IOMMU do the trick. > > ATS was chosen for performance. Since there're many problems for CM: > > - CM was slow (10%-20% slower on real hardware for things like netperf) > because of each transition between non-present and present mapping needs an > explicit invalidation. It may slow down the whole VM. > - Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending > of IOTLB entries. (What we can do in this case is in fact userspace IOTLB > snooping, this could be done even without CM). > It was natural to think of ATS when designing interface between IOMMU and > device/remote IOTLBs. Do you see any drawbacks on ATS here? > > Thanks In fact at this point I'm confused. Any mapping needs to be programmed in the IOMMU. We need to implement this correctly. Once we do why do we need ATS? I think what you need is map/unmap notifiers that Aviv is working on. No? > > > > Also, could you remind me pls - can guests just disable ATS? > > > > What happens then? > > > >
[Qemu-devel] [PULL 4/6] qdev: fix use-after-free regression from becdfa00cfa
From: Marc-André Lureau Spotted by Coverity, CID 1365383. Signed-off-by: Marc-André Lureau Message-Id: <20161107095922.31676-1-marcandre.lur...@redhat.com> Signed-off-by: Paolo Bonzini --- hw/core/qdev-properties-system.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index c35f0f5..1b7ea50 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -200,18 +200,14 @@ static void set_chr(Object *obj, Visitor *v, const char *name, void *opaque, } s = qemu_chr_find(str); -g_free(str); if (s == NULL) { error_setg(errp, "Property '%s.%s' can't find value '%s'", object_get_typename(obj), prop->name, str); -return; -} - -if (!qemu_chr_fe_init(be, s, errp)) { +} else if (!qemu_chr_fe_init(be, s, errp)) { error_prepend(errp, "Property '%s.%s' can't take value '%s': ", object_get_typename(obj), prop->name, str); -return; } +g_free(str); } static void release_chr(Object *obj, const char *name, void *opaque) -- 1.8.3.1
Re: [Qemu-devel] [PATCH] smbios: Add 1 terminator if there is any string field defined in given table.
On 11/10/16 18:18, Laszlo Ersek wrote: > On 11/10/16 16:09, Michael S. Tsirkin wrote: >> On Tue, Sep 06, 2016 at 04:28:33PM +0800, Lin Ma wrote: >>> If user specifies binary file on command line to load smbios entries, then >>> will get error messages while decoding them in guest. >>> >>> Reproducer: >>> 1. dump a smbios table to a binary file from host or guest.(says table 1) >>> 2. load the binary file through command line: 'qemu -smbios file=...'. >>> 3. perform 'dmidecode' or 'dmidecode -t 1' in guest. >>> >>> It reports 'Invalid entry length...' because qemu doesn't add terminator(s) >>> for >>> the table correctly. >>> For smbios tables which have string field provided, qemu should add 1 >>> terminator. >>> For smbios tables which dont have string field provided, qemu should add 2. >>> >>> This patch fixed the issue. >>> >>> Signed-off-by: Lin Ma >> >> Seems to make sense superficially >> >> Acked-by: Michael S. Tsirkin >> >> Fam, would you like to take this? > > If we're not in a mortal hurry to enable QEMU to consume dmidecode > output directly, can we please think about enabling dmidecode to dump > binarily-correct tables? > > The commit message doesn't mention, but in the dmidecode manual, I see > "--dump-bin" and "--from-dump". Hm... The manual says, > >>--dump-bin FILE >> Do not decode the entries, instead dump the DMI data >> to a file in binary form. The generated file is suit- >> able to pass to --from-dump later. >> >>--from-dump FILE >> Read the DMI data from a binary file previously gener- >> ated using --dump-bin. >> [...] >> >> BINARY DUMP FILE FORMAT >>The binary dump files generated by --dump-bin and read using >>--from-dump are formatted as follows: >> >>* The SMBIOS or DMI entry point is located at offset 0x00. >> It is crafted to hard-code the table address at offset >> 0x20. >> >>* The DMI table is located at offset 0x20. > > Is this how the tables were dumped originally, in step 1? > > Actually, the manual also says, > >>Options --string, --type and --dump-bin determine the output >>format and are mutually exclusive. > > Since step 1 mentions "say[] table 1", I think --dump-bin was not used. > In that case however, dmidecode can only produce textual output, for > example, hexadecimal output, with "--dump". > > This means that some helper utility must have been used to turn the > hexadecimal output into binary. Since the dmidecode output has to be > post-processed anyway, I wonder if we should keep this data munging out > of QEMU. > > Anyway, I have some comments for the patch as well: > > >>> --- >>> hw/smbios/smbios.c | 90 >>> ++ >>> include/hw/smbios/smbios.h | 44 +++ >>> 2 files changed, 134 insertions(+) >>> >>> diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c >>> index 74c7102..6293bc5 100644 >>> --- a/hw/smbios/smbios.c >>> +++ b/hw/smbios/smbios.c >>> @@ -885,6 +885,9 @@ void smbios_entry_add(QemuOpts *opts) >>> { >>> const char *val; >>> >>> +int i, terminator_count = 2, table_str_field_count = 0; >>> +int *tables_str_field_offset = NULL; >>> + >>> assert(!smbios_immutable); >>> >>> val = qemu_opt_get(opts, "file"); >>> @@ -926,7 +929,94 @@ void smbios_entry_add(QemuOpts *opts) >>> smbios_type4_count++; >>> } >>> >>> +switch (header->type) { >>> +case 0: >>> +tables_str_field_offset = g_malloc0(sizeof(int) * \ >>> +TYPE_0_STR_FIELD_COUNT); >>> +tables_str_field_offset = (int []){\ >>> +TYPE_0_STR_FIELD_OFFSET_VENDOR, \ >>> +TYPE_0_STR_FIELD_OFFSET_BIOS_VERSION, \ >>> + >>> TYPE_0_STR_FIELD_OFFSET_BIOS_RELEASE_DATE}; > > This assignment doesn't do what you think it does. > "tables_str_field_offset" is a pointer, and the result of the > > (int []){...} > > compound literal is an unnamed array object with automatic storage > duration. The lifetime of the unnamed array object is limited to the > scope of the enclosing block, which means the "switch" statement here. > > The assignment does not copy the contents of the array into the > dynamically allocated area; instead, the unnamed array object is > converted to a pointer to its first element, and the > "tables_str_field_offset" pointer is set to that value. The original > dynamic allocation is leaked. > >>> +table_str_field_count = sizeof(tables_str_field_offset) / \ >>> +sizeof(tables_str_field_offset[0]); > > This is wrong again; the dividend is the size of the pointer, not that > of the pointed-to-array. The size of the array is not available through > the poin
[Qemu-devel] [PULL 1/6] target-i386: fix typo
The impact is small because kvm_get_vcpu_events fixes env->hflags, but it is wrong and could cause INITs to be delayed arbitrarily with -machine kernel_irqchip=off. Reported-by: Achille Fouilleul Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1c0864e..f62264a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2855,7 +2855,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) if (run->flags & KVM_RUN_X86_SMM) { env->hflags |= HF_SMM_MASK; } else { -env->hflags &= HF_SMM_MASK; +env->hflags &= ~HF_SMM_MASK; } if (run->if_flag) { env->eflags |= IF_MASK; -- 1.8.3.1
[Qemu-devel] [kvm-unit-tests PATCH v5 10/11] arm/arm64: gicv3: add an IPI test
Signed-off-by: Andrew Jones --- v5: - fix copy+paste error in gicv3_write_eoir [drew] - use modern register names [Andre] v4: - heavily comment gicv3_ipi_send_tlist() [Eric] - changes needed for gicv2 iar/irqstat fix to other patch v2: - use IRM for gicv3 broadcast --- arm/gic.c | 195 ++--- arm/unittests.cfg | 6 ++ lib/arm/asm/arch_gicv3.h | 23 ++ lib/arm64/asm/arch_gicv3.h | 22 + 4 files changed, 236 insertions(+), 10 deletions(-) diff --git a/arm/gic.c b/arm/gic.c index 06092aec7c08..ca68f8ad1cb9 100644 --- a/arm/gic.c +++ b/arm/gic.c @@ -3,6 +3,8 @@ * * GICv2 * + test sending/receiving IPIs + * GICv3 + * + test sending/receiving IPIs * * Copyright (C) 2016, Red Hat Inc, Andrew Jones * @@ -16,6 +18,19 @@ #include #include +struct gic { + struct { + void (*enable)(void); + void (*send_self)(void); + void (*send_tlist)(cpumask_t *); + void (*send_broadcast)(void); + } ipi; + u32 (*read_iar)(void); + u32 (*irqnr)(u32 iar); + void (*write_eoi)(u32); +}; + +static struct gic *gic; static int gic_version; static int acked[NR_CPUS], spurious[NR_CPUS]; static cpumask_t ready; @@ -69,13 +84,33 @@ static void check_acked(cpumask_t *mask) false, missing, extra, unexpected); } +static u32 gicv2_read_iar(void) +{ + return readl(gicv2_cpu_base() + GICC_IAR); +} + +static u32 gicv2_irqnr(u32 iar) +{ + return iar & GICC_IAR_INT_ID_MASK; +} + +static void gicv2_write_eoi(u32 irqstat) +{ + writel(irqstat, gicv2_cpu_base() + GICC_EOIR); +} + +static u32 gicv3_irqnr(u32 iar) +{ + return iar; +} + static void ipi_handler(struct pt_regs *regs __unused) { - u32 irqstat = readl(gicv2_cpu_base() + GICC_IAR); - u32 irqnr = irqstat & GICC_IAR_INT_ID_MASK; + u32 irqstat = gic->read_iar(); + u32 irqnr = gic->irqnr(irqstat); if (irqnr != GICC_INT_SPURIOUS) { - writel(irqstat, gicv2_cpu_base() + GICC_EOIR); + gic->write_eoi(irqstat); smp_rmb(); /* pairs with wmb in ipi_test functions */ ++acked[smp_processor_id()]; smp_wmb(); /* pairs with rmb in check_acked */ @@ -85,6 +120,112 @@ static void ipi_handler(struct pt_regs *regs __unused) } } +static void gicv2_ipi_send_self(void) +{ + writel(2 << 24, gicv2_dist_base() + GICD_SGIR); +} + +static void gicv2_ipi_send_tlist(cpumask_t *mask) +{ + u8 tlist = (u8)cpumask_bits(mask)[0]; + + writel(tlist << 16, gicv2_dist_base() + GICD_SGIR); +} + +static void gicv2_ipi_send_broadcast(void) +{ + writel(1 << 24, gicv2_dist_base() + GICD_SGIR); +} + +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ + (MPIDR_AFFINITY_LEVEL(cluster_id, level) << ICC_SGI1R_AFFINITY_## level ## _SHIFT) + +static void gicv3_ipi_send_tlist(cpumask_t *mask) +{ + u16 tlist; + int cpu; + + /* +* For each cpu in the mask collect its peers, which are also in +* the mask, in order to form target lists. +*/ + for_each_cpu(cpu, mask) { + u64 mpidr = cpus[cpu], sgi1r; + u64 cluster_id; + + /* +* GICv3 can send IPIs to up 16 peer cpus with a single +* write to ICC_SGI1R_EL1 (using the target list). Peers +* are cpus that have nearly identical MPIDRs, the only +* difference being Aff0. The matching upper affinity +* levels form the cluster ID. +*/ + cluster_id = mpidr & ~0xffUL; + tlist = 0; + + /* +* Sort of open code for_each_cpu in order to have a +* nested for_each_cpu loop. +*/ + while (cpu < nr_cpus) { + if ((mpidr & 0xff) >= 16) { + printf("cpu%d MPIDR:aff0 is %d (>= 16)!\n", + cpu, (int)(mpidr & 0xff)); + break; + } + + tlist |= 1 << (mpidr & 0xf); + + cpu = cpumask_next(cpu, mask); + if (cpu >= nr_cpus) + break; + + mpidr = cpus[cpu]; + + if (cluster_id != (mpidr & ~0xffUL)) { + /* +* The next cpu isn't in our cluster. Roll +* back the cpu index allowing the outer +* for_each_cpu to find it again with +* cpumask_next +*/ +
Re: [Qemu-devel] [PATCH v6 19/19] tcg: enable MTTCG by default for ARM on x86 hosts
Richard Henderson writes: > On 11/09/2016 03:57 PM, Alex Bennée wrote: >> This enables the multi-threaded system emulation by default for ARMv7 >> and ARMv8 guests using the x86_64 TCG backend. This means: >> >> - The x86_64 TCG backend supports cmpxchg based atomic ops >> - The x86_64 TCG backend emits barriers for barrier ops > > What tcg backend doesn't support what we need? For a weakly ordered target, > any of our hosts should work. True, but this comes with certification that I've tested it. But you are right adding this to configure is fugly. Should I just drop the backend config symbol requirement totally? -- Alex Bennée
[Qemu-devel] Call for help with QEMU's documentation
QEMU's documentation is in good need for improving. Currently there is an effort to add more pages to the QEMU wiki site. Here is the list of pages that are or will exist one day: From: http://wiki.qemu.org/Documentation/Platforms Alpha ARM CRIS i386/x86-64 LatticeMico32 68K Microblaze MIPS Moxie OpenRISC Power PowerPC SH4 Sparc s390x TileGX Tricore Unicore32 Xtensa If you know something that should be added to any of these pages, please do so. We would all benefit. Not everyone has write access to these pages, so if you would like me to upload your suggestions, I will be glad to do so. Suggestions: - pictures - example QEMU command-lines - links that would be useful to others - directions on how to accomplish something - whatever you feel should be added Thank you all who have or will contribute.
Re: [Qemu-devel] [PATCH v6 15/19] target-arm/cpu: don't reset TLB structures, use cputlb to do it
On 11/09/2016 03:57 PM, Alex Bennée wrote: +#ifdef CONFIG_SOFTMMU +memset(env, 0, offsetof(CPUARMState, tlb_table)); +tlb_flush(s, 0); +#else memset(env, 0, offsetof(CPUARMState, features)); +#endif I'd really prefer to see the tlb_flush be moved into parent_reset, so that we handle it identically for all targets. As for the memset, do we really need to distinguish softmmu? I don't like you picking out a variable name within CPU_COMMON. Better to use empty struct markers, like the struct {} start_init_save; that x86 uses. r~
[Qemu-devel] [kvm-unit-tests PATCH v5 08/11] libcflat: add IS_ALIGNED() macro, and page sizes
From: Peter Xu These macros will be useful to do page alignment checks. Reviewed-by: Andre Przywara Signed-off-by: Peter Xu [drew: also added SZ_64K] Signed-off-by: Andrew Jones --- lib/libcflat.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/lib/libcflat.h b/lib/libcflat.h index 82005f5d014f..143fc53061fe 100644 --- a/lib/libcflat.h +++ b/lib/libcflat.h @@ -33,6 +33,12 @@ #define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a) - 1) #define ALIGN(x, a)__ALIGN((x), (a)) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#define SZ_4K (0x1000) +#define SZ_64K (0x1) +#define SZ_2M (0x20) +#define SZ_1G (0x4000) typedef uint8_tu8; typedef int8_t s8; -- 2.7.4
Re: [Qemu-devel] [Qemu-ppc] [PATCH] spapr-vty: Fix bad assert() statement
On 10/11/2016 18:41, Thomas Huth wrote: > On 10.11.2016 15:41, Paolo Bonzini wrote: >> >> >> On 10/11/2016 10:06, Thomas Huth wrote: >>> When using the serial console in the GTK interface of QEMU (and >>> QEMU has been compiled with CONFIG_VTE), it is possible to trigger >>> the assert() statement in vty_receive() in spapr_vty.c by pasting >>> a chunk of text with length > 16 into the QEMU window. >>> Most of the other serial backends seem to simply drop characters >>> that they can not handle, so I think we should also do the same in >>> spapr-vty to fix this issue. And since it is quite ugly when pasted >>> text is chopped after 16 bytes, we also increase the size of the >>> input buffer here so that we can at least handle a couple of text >>> lines. >>> >>> Buglink: https://bugs.launchpad.net/qemu/+bug/1639322 >>> Signed-off-by: Thomas Huth >>> --- >>> hw/char/spapr_vty.c | 13 +++-- >>> 1 file changed, 11 insertions(+), 2 deletions(-) >>> >>> diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c >>> index 31822fe..bee6c34 100644 >>> --- a/hw/char/spapr_vty.c >>> +++ b/hw/char/spapr_vty.c >>> @@ -1,4 +1,5 @@ >>> #include "qemu/osdep.h" >>> +#include "qemu/error-report.h" >>> #include "qapi/error.h" >>> #include "qemu-common.h" >>> #include "cpu.h" >>> @@ -7,7 +8,7 @@ >>> #include "hw/ppc/spapr.h" >>> #include "hw/ppc/spapr_vio.h" >>> >>> -#define VTERM_BUFSIZE 16 >>> +#define VTERM_BUFSIZE 2048 >> >> This change causes a change in the migration protocol. > > Bummer! You're right, the buffer is listed in the vmstate_spapr_vty > description ... so please ignore this patch, I've got to think about a > better solution here... Well, the assert change is still good and should be in 2.8. Paolo
Re: [Qemu-devel] [Qemu-ppc] [PATCH] spapr-vty: Fix bad assert() statement
On 10.11.2016 15:41, Paolo Bonzini wrote: > > > On 10/11/2016 10:06, Thomas Huth wrote: >> When using the serial console in the GTK interface of QEMU (and >> QEMU has been compiled with CONFIG_VTE), it is possible to trigger >> the assert() statement in vty_receive() in spapr_vty.c by pasting >> a chunk of text with length > 16 into the QEMU window. >> Most of the other serial backends seem to simply drop characters >> that they can not handle, so I think we should also do the same in >> spapr-vty to fix this issue. And since it is quite ugly when pasted >> text is chopped after 16 bytes, we also increase the size of the >> input buffer here so that we can at least handle a couple of text >> lines. >> >> Buglink: https://bugs.launchpad.net/qemu/+bug/1639322 >> Signed-off-by: Thomas Huth >> --- >> hw/char/spapr_vty.c | 13 +++-- >> 1 file changed, 11 insertions(+), 2 deletions(-) >> >> diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c >> index 31822fe..bee6c34 100644 >> --- a/hw/char/spapr_vty.c >> +++ b/hw/char/spapr_vty.c >> @@ -1,4 +1,5 @@ >> #include "qemu/osdep.h" >> +#include "qemu/error-report.h" >> #include "qapi/error.h" >> #include "qemu-common.h" >> #include "cpu.h" >> @@ -7,7 +8,7 @@ >> #include "hw/ppc/spapr.h" >> #include "hw/ppc/spapr_vio.h" >> >> -#define VTERM_BUFSIZE 16 >> +#define VTERM_BUFSIZE 2048 > > This change causes a change in the migration protocol. Bummer! You're right, the buffer is listed in the vmstate_spapr_vty description ... so please ignore this patch, I've got to think about a better solution here... Thomas
[Qemu-devel] [kvm-unit-tests PATCH v5 07/11] arm/arm64: gicv2: add an IPI test
Reviewed-by: Eric Auger Signed-off-by: Andrew Jones --- v5: use modern registers [Andre] v4: properly mask irqnr in ipi_handler v2: add more details in the output if a test fails, report spurious interrupts if we get them --- arm/Makefile.common | 6 +- arm/gic.c| 195 +++ arm/unittests.cfg| 7 ++ lib/arm/asm/gic-v2.h | 2 + lib/arm/asm/gic.h| 4 ++ 5 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 arm/gic.c diff --git a/arm/Makefile.common b/arm/Makefile.common index 41239c37e092..bc38183ab86e 100644 --- a/arm/Makefile.common +++ b/arm/Makefile.common @@ -9,9 +9,9 @@ ifeq ($(LOADADDR),) LOADADDR = 0x4000 endif -tests-common = \ - $(TEST_DIR)/selftest.flat \ - $(TEST_DIR)/spinlock-test.flat +tests-common = $(TEST_DIR)/selftest.flat +tests-common += $(TEST_DIR)/spinlock-test.flat +tests-common += $(TEST_DIR)/gic.flat all: test_cases diff --git a/arm/gic.c b/arm/gic.c new file mode 100644 index ..06092aec7c08 --- /dev/null +++ b/arm/gic.c @@ -0,0 +1,195 @@ +/* + * GIC tests + * + * GICv2 + * + test sending/receiving IPIs + * + * Copyright (C) 2016, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include + +static int gic_version; +static int acked[NR_CPUS], spurious[NR_CPUS]; +static cpumask_t ready; + +static void nr_cpu_check(int nr) +{ + if (nr_cpus < nr) + report_abort("At least %d cpus required", nr); +} + +static void wait_on_ready(void) +{ + cpumask_set_cpu(smp_processor_id(), &ready); + while (!cpumask_full(&ready)) + cpu_relax(); +} + +static void check_acked(cpumask_t *mask) +{ + int missing = 0, extra = 0, unexpected = 0; + int nr_pass, cpu, i; + + /* Wait up to 5s for all interrupts to be delivered */ + for (i = 0; i < 50; ++i) { + mdelay(100); + nr_pass = 0; + for_each_present_cpu(cpu) { + smp_rmb(); + nr_pass += cpumask_test_cpu(cpu, mask) ? + acked[cpu] == 1 : acked[cpu] == 0; + } + if (nr_pass == nr_cpus) { + report("Completed in %d ms", true, ++i * 100); + return; + } + } + + for_each_present_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask)) { + if (!acked[cpu]) + ++missing; + else if (acked[cpu] > 1) + ++extra; + } else { + if (acked[cpu]) + ++unexpected; + } + } + + report("Timed-out (5s). ACKS: missing=%d extra=%d unexpected=%d", + false, missing, extra, unexpected); +} + +static void ipi_handler(struct pt_regs *regs __unused) +{ + u32 irqstat = readl(gicv2_cpu_base() + GICC_IAR); + u32 irqnr = irqstat & GICC_IAR_INT_ID_MASK; + + if (irqnr != GICC_INT_SPURIOUS) { + writel(irqstat, gicv2_cpu_base() + GICC_EOIR); + smp_rmb(); /* pairs with wmb in ipi_test functions */ + ++acked[smp_processor_id()]; + smp_wmb(); /* pairs with rmb in check_acked */ + } else { + ++spurious[smp_processor_id()]; + smp_wmb(); + } +} + +static void ipi_test_self(void) +{ + cpumask_t mask; + + report_prefix_push("self"); + memset(acked, 0, sizeof(acked)); + smp_wmb(); + cpumask_clear(&mask); + cpumask_set_cpu(0, &mask); + writel(2 << 24, gicv2_dist_base() + GICD_SGIR); + check_acked(&mask); + report_prefix_pop(); +} + +static void ipi_test_smp(void) +{ + cpumask_t mask; + unsigned long tlist; + + report_prefix_push("target-list"); + memset(acked, 0, sizeof(acked)); + smp_wmb(); + tlist = cpumask_bits(&cpu_present_mask)[0] & 0xaa; + cpumask_bits(&mask)[0] = tlist; + writel((u8)tlist << 16, gicv2_dist_base() + GICD_SGIR); + check_acked(&mask); + report_prefix_pop(); + + report_prefix_push("broadcast"); + memset(acked, 0, sizeof(acked)); + smp_wmb(); + cpumask_copy(&mask, &cpu_present_mask); + cpumask_clear_cpu(0, &mask); + writel(1 << 24, gicv2_dist_base() + GICD_SGIR); + check_acked(&mask); + report_prefix_pop(); +} + +static void ipi_enable(void) +{ + gicv2_enable_defaults(); +#ifdef __arm__ + install_exception_handler(EXCPTN_IRQ, ipi_handler); +#else + install_irq_handler(EL1H_IRQ, ipi_handler); +#endif + local_irq_enable(); +} + +static void ipi_recv(void) +{ + ipi_enable(); + cpumask_set_cpu(smp_processor_id(), &ready); + while (1)
Re: [Qemu-devel] [PATCH v6 11/19] cputlb: introduce tlb_flush_* async work.
On 11/10/2016 06:34 PM, Alex Bennée wrote: So to properly support ARM TLB flush semantics I want to move some of the looping in the helpers into cputlb.c so I'm thinking we'll have: tlb_flush_page_all_cpus tlb_flush_by_mmuidx_all_cpus tlb_flush_page_by_mmuidx_all_cpus Sounds good, thanks. In fact for ARM we'll be able to put off the reckoning until a DMB instruction comes along and we can force synchronisation at that point but I'm assuming there must be other architectures with stricter requirements. Yes, I can think of at least one arch for which the cross-cpu flush must finish before the source cpu continues. r~
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty
On 11/10/2016 05:14 PM, Alex Bennée wrote: Even worse than that we trip up the atomic.h QEMU_BUILD_BUG_ON with the atomic_cmpxchg. Now I believe we can use atomic_cmpxchg__nocheck without too much issue on x86 but we'll need to #ifdef it on detection of wide atomics. You've already got CONFIG_ATOMIC64. And what's the fallback? We ought not be enabling mttcg for 32-bit host and 64-bit guest at all. But that doesn't help much here, where we're otherwise guest width agnostic. r~
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty
Richard Henderson writes: > On 11/10/2016 05:14 PM, Alex Bennée wrote: >> Even worse than that we trip up the atomic.h QEMU_BUILD_BUG_ON with the >> atomic_cmpxchg. Now I believe we can use atomic_cmpxchg__nocheck without >> too much issue on x86 but we'll need to #ifdef it on detection of wide >> atomics. > > You've already got CONFIG_ATOMIC64. And what's the fallback? I'm going to re-factor cputlb a bit so all the TLB read and write's can be done in helper functions so I don't scatter stuff around too much. I was thinking something like: #ifdef CONFIG_ATOMIC64 .. as usual .. #else assert(!parallel_cpus) .. non atomic update .. #endif > We ought not be enabling mttcg for 32-bit host and 64-bit guest at all. But > that doesn't help much here, where we're otherwise guest width > agnostic. Hmm well the most common case (any guest on x86) should work. Currently the default mttcg code in cpus.c works when: #if defined(CONFIG_MTTCG_TARGET) && defined(CONFIG_MTTCG_HOST) I should probably expand that to default to false in the case of (sizeof target_ulong > sizeof void *) when we don't have CONFIG_ATOMIC64. Then if the user does force mttcg on they will quickly get an assert although maybe we want to report that in a nicer way? > > > r~ -- Alex Bennée
[Qemu-devel] [kvm-unit-tests PATCH v5 04/11] arm/arm64: add some delay routines
Allow a thread to wait some specified amount of time. Can specify in cycles, usecs, and msecs. Reviewed-by: Alex Bennée Reviewed-by: Eric Auger Signed-off-by: Andrew Jones --- lib/arm/asm/processor.h | 19 +++ lib/arm/processor.c | 15 +++ lib/arm64/asm/processor.h | 19 +++ lib/arm64/processor.c | 15 +++ 4 files changed, 68 insertions(+) diff --git a/lib/arm/asm/processor.h b/lib/arm/asm/processor.h index ecf5bbe1824a..bc46d1f980ee 100644 --- a/lib/arm/asm/processor.h +++ b/lib/arm/asm/processor.h @@ -5,7 +5,9 @@ * * This work is licensed under the terms of the GNU LGPL, version 2. */ +#include #include +#include enum vector { EXCPTN_RST, @@ -51,4 +53,21 @@ extern int mpidr_to_cpu(unsigned long mpidr); extern void start_usr(void (*func)(void *arg), void *arg, unsigned long sp_usr); extern bool is_user(void); +static inline u64 get_cntvct(void) +{ + u64 vct; + isb(); + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (vct)); + return vct; +} + +extern void delay(u64 cycles); +extern void udelay(unsigned long usecs); + +static inline void mdelay(unsigned long msecs) +{ + while (msecs--) + udelay(1000); +} + #endif /* _ASMARM_PROCESSOR_H_ */ diff --git a/lib/arm/processor.c b/lib/arm/processor.c index 54fdb87ef019..c2ee360df688 100644 --- a/lib/arm/processor.c +++ b/lib/arm/processor.c @@ -9,6 +9,7 @@ #include #include #include +#include static const char *processor_modes[] = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , @@ -141,3 +142,17 @@ bool is_user(void) { return current_thread_info()->flags & TIF_USER_MODE; } + +void delay(u64 cycles) +{ + u64 start = get_cntvct(); + while ((get_cntvct() - start) < cycles) + cpu_relax(); +} + +void udelay(unsigned long usec) +{ + unsigned int frq; + asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r" (frq)); + delay((u64)usec * frq / 100); +} diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h index 7e448dc81a6a..94f7ce35b65c 100644 --- a/lib/arm64/asm/processor.h +++ b/lib/arm64/asm/processor.h @@ -17,8 +17,10 @@ #define SCTLR_EL1_M(1 << 0) #ifndef __ASSEMBLY__ +#include #include #include +#include enum vector { EL1T_SYNC, @@ -89,5 +91,22 @@ extern int mpidr_to_cpu(unsigned long mpidr); extern void start_usr(void (*func)(void *arg), void *arg, unsigned long sp_usr); extern bool is_user(void); +static inline u64 get_cntvct(void) +{ + u64 vct; + isb(); + asm volatile("mrs %0, cntvct_el0" : "=r" (vct)); + return vct; +} + +extern void delay(u64 cycles); +extern void udelay(unsigned long usecs); + +static inline void mdelay(unsigned long msecs) +{ + while (msecs--) + udelay(1000); +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASMARM64_PROCESSOR_H_ */ diff --git a/lib/arm64/processor.c b/lib/arm64/processor.c index deeab4ec9c8a..50fa835c6f1e 100644 --- a/lib/arm64/processor.c +++ b/lib/arm64/processor.c @@ -9,6 +9,7 @@ #include #include #include +#include static const char *vector_names[] = { "el1t_sync", @@ -253,3 +254,17 @@ bool is_user(void) { return current_thread_info()->flags & TIF_USER_MODE; } + +void delay(u64 cycles) +{ + u64 start = get_cntvct(); + while ((get_cntvct() - start) < cycles) + cpu_relax(); +} + +void udelay(unsigned long usec) +{ + unsigned int frq; + asm volatile("mrs %0, cntfrq_el0" : "=r" (frq)); + delay((u64)usec * frq / 100); +} -- 2.7.4
[Qemu-devel] [PULL 3/6] target-i386/machine: fix migrate faile because of Hyper-V HV_X64_MSR_VP_RUNTIME
From: ZhuangYanying Hyper-V HV_X64_MSR_VP_RUNTIME was introduced in linux-4.4 + qemu-2.5. As long as the KVM module supports, qemu will save / load the vmstate_msr_hyperv_runtime register during the migration. Regardless of whether the hyperv_runtime configuration of x86_cpu_properties is enabled. The qemu-2.3 does not support this feature, of course, failed to migrate. linux-BGSfqC:/home/qemu # ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm \ -nodefaults -machine pc-i440fx-2.3,accel=kvm,usb=off -smp 4 -m 4096 -drive \ file=/work/suse/sles11sp3.img.bak,format=raw,if=none,id=drive-virtio-disk0,cache=none \ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0 \ -vnc :99 -device cirrus-vga,id=video0,vgamem_mb=8,bus=pci.0,addr=0x2 -monitor vc save_section_header:se->section_id=3,se->idstr:ram,se->instance_id=0,se->version_id=4 save_section_header:se->section_id=0,se->idstr:timer,se->instance_id=0,se->version_id=2 save_section_header:se->section_id=4,se->idstr:cpu_common,se->instance_id=0,se->version_id=1 save_section_header:se->section_id=5,se->idstr:cpu,se->instance_id=0,se->version_id=12 vmstate_subsection_save:vmsd->name:cpu/async_pf_msr hyperv_runtime_enable_needed:env->msr_hv_runtime=128902811 vmstate_subsection_save:vmsd->name:cpu/msr_hyperv_runtime Since hyperv_runtime is false, vm will not use hv->runtime_offset, then vmstate_msr_hyperv_runtime is no need to transfer while migrating. Signed-off-by: ann.zhuangyany...@huawei.com Message-Id: <1478247398-5016-1-git-send-email-ann.zhuangyany...@huawei.com> Signed-off-by: Paolo Bonzini --- target-i386/machine.c | 4 1 file changed, 4 insertions(+) diff --git a/target-i386/machine.c b/target-i386/machine.c index 48037f1..760f82b 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -709,6 +709,10 @@ static bool hyperv_runtime_enable_needed(void *opaque) X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; +if (!cpu->hyperv_runtime) { +return false; +} + return env->msr_hv_runtime != 0; } -- 1.8.3.1
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty
On 11/09/2016 03:57 PM, Alex Bennée wrote: +/* We currently can't handle more than 16 bits in the MMUIDX bitmask. + */ +QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); We already assert <= 12 in exec/cpu_ldst.h. Although really any such assert belongs in exec/cpu-defs.h, where we define CPU_TLB_BITS et al. That said, what's the technical restriction here? r~
[Qemu-devel] [Bug 616769] Re: interrupt problem x86_64
OK, thanks a lot for your response ... so let's close this bug now. ** Changed in: qemu Status: Incomplete => Invalid -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/616769 Title: interrupt problem x86_64 Status in QEMU: Invalid Bug description: Hello. I'm studing the x86_64 arch to port colinux to this new architecture. For who does not know, colinux is a port of linux that runs on windows NATIVELY. Colinux is 1) a small windows driver 2) a kernel patched 3) some windows user-space application that talk with linux kernel (network, console, ...) I have written the more internal part of colinux, that is the code that switch between windows and linux. This is done saving and restore the machine state : 1) CR3 2) IDT 3) registers The problem I see is that after the switch I see the reboot of my virtual machine. I would say that the new IDT and/or CR3 is not flushed. My code is written in asm/C so I can follow the code step by step. I can say exactly the instruction that is broken. All my code runs nicely on bochs. I don't have an x86_64 real PC. If someone wants to repair this bug I'm here. Paolo Minazzi To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/616769/+subscriptions
[Qemu-devel] [kvm-unit-tests PATCH v5 11/11] arm/arm64: gic: don't just use zero
Allow user to select who sends ipis and with which irq, rather than just always sending irq=0 from cpu0. Signed-off-by: Andrew Jones --- v4: improve structure and make sure spurious checking is done even when the sender isn't cpu0 v2: actually check that the irq received was the irq sent, and (for gicv2) that the sender is the expected one. --- arm/gic.c | 99 ++- 1 file changed, 73 insertions(+), 26 deletions(-) diff --git a/arm/gic.c b/arm/gic.c index ca68f8ad1cb9..af9e745e2e5c 100644 --- a/arm/gic.c +++ b/arm/gic.c @@ -11,6 +11,7 @@ * This work is licensed under the terms of the GNU LGPL, version 2. */ #include +#include #include #include #include @@ -34,6 +35,8 @@ static struct gic *gic; static int gic_version; static int acked[NR_CPUS], spurious[NR_CPUS]; static cpumask_t ready; +static int sender; +static u32 irq; static void nr_cpu_check(int nr) { @@ -86,7 +89,16 @@ static void check_acked(cpumask_t *mask) static u32 gicv2_read_iar(void) { - return readl(gicv2_cpu_base() + GICC_IAR); + u32 iar = readl(gicv2_cpu_base() + GICC_IAR); + int src = (iar >> 10) & 7; + + if (src != sender) { + report("cpu%d received IPI from unexpected source cpu%d " + "(expected cpu%d)", + false, smp_processor_id(), src, sender); + } + + return iar; } static u32 gicv2_irqnr(u32 iar) @@ -111,9 +123,15 @@ static void ipi_handler(struct pt_regs *regs __unused) if (irqnr != GICC_INT_SPURIOUS) { gic->write_eoi(irqstat); - smp_rmb(); /* pairs with wmb in ipi_test functions */ - ++acked[smp_processor_id()]; - smp_wmb(); /* pairs with rmb in check_acked */ + if (irqnr == irq) { + smp_rmb(); /* pairs with wmb in ipi_test functions */ + ++acked[smp_processor_id()]; + smp_wmb(); /* pairs with rmb in check_acked */ + } else { + report("cpu%d received unexpected irq %u " + "(expected %u)", + false, smp_processor_id(), irqnr, irq); + } } else { ++spurious[smp_processor_id()]; smp_wmb(); @@ -122,19 +140,19 @@ static void ipi_handler(struct pt_regs *regs __unused) static void gicv2_ipi_send_self(void) { - writel(2 << 24, gicv2_dist_base() + GICD_SGIR); + writel(2 << 24 | irq, gicv2_dist_base() + GICD_SGIR); } static void gicv2_ipi_send_tlist(cpumask_t *mask) { u8 tlist = (u8)cpumask_bits(mask)[0]; - writel(tlist << 16, gicv2_dist_base() + GICD_SGIR); + writel(tlist << 16 | irq, gicv2_dist_base() + GICD_SGIR); } static void gicv2_ipi_send_broadcast(void) { - writel(1 << 24, gicv2_dist_base() + GICD_SGIR); + writel(1 << 24 | irq, gicv2_dist_base() + GICD_SGIR); } #define ICC_SGI1R_AFFINITY_1_SHIFT 16 @@ -200,7 +218,7 @@ static void gicv3_ipi_send_tlist(cpumask_t *mask) /* Send the IPIs for the target list of this cluster */ sgi1r = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) | MPIDR_TO_SGI_AFFINITY(cluster_id, 2) | -/* irq << 24 | */ +irq << 24 | MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | tlist); @@ -222,7 +240,7 @@ static void gicv3_ipi_send_self(void) static void gicv3_ipi_send_broadcast(void) { - gicv3_write_sgi1r(1ULL << 40); + gicv3_write_sgi1r(1ULL << 40 | irq << 24); isb(); } @@ -234,7 +252,7 @@ static void ipi_test_self(void) memset(acked, 0, sizeof(acked)); smp_wmb(); cpumask_clear(&mask); - cpumask_set_cpu(0, &mask); + cpumask_set_cpu(smp_processor_id(), &mask); gic->ipi.send_self(); check_acked(&mask); report_prefix_pop(); @@ -249,7 +267,7 @@ static void ipi_test_smp(void) memset(acked, 0, sizeof(acked)); smp_wmb(); cpumask_copy(&mask, &cpu_present_mask); - for (i = 0; i < nr_cpus; i += 2) + for (i = smp_processor_id() & 1; i < nr_cpus; i += 2) cpumask_clear_cpu(i, &mask); gic->ipi.send_tlist(&mask); check_acked(&mask); @@ -259,7 +277,7 @@ static void ipi_test_smp(void) memset(acked, 0, sizeof(acked)); smp_wmb(); cpumask_copy(&mask, &cpu_present_mask); - cpumask_clear_cpu(0, &mask); + cpumask_clear_cpu(smp_processor_id(), &mask); gic->ipi.send_broadcast(); check_acked(&mask); report_prefix_pop(); @@ -276,6 +294,27 @@ static void ipi_enable(void) local_irq_enable(); } +static void ipi_send(void) +{ + int cpu; + + ipi_enable(); + wait_on_read