date:20181105

Re: [Qemu-devel] [Qemu-trivial] [PATCH 1/1] virtio-blk: fix comment for virtio_blk_rw_complete as nalloc is initially -1

2018-11-05 Thread Laurent Vivier

On 06/11/2018 05:52, Dongli Zhang wrote:
> The initial value of nalloc is -1, but not 1.
> 
> Signed-off-by: Dongli Zhang 
> ---
> This is based on git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git 
> tags/for_upstream
> 
>  hw/block/virtio-blk.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 83cf5c0..30999c3 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -96,7 +96,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>  trace_virtio_blk_rw_complete(vdev, req, ret);
>  
>  if (req->qiov.nalloc != -1) {
> -/* If nalloc is != 1 req->qiov is a local copy of the original
> +/* If nalloc is != -1 req->qiov is a local copy of the original
>   * external iovec. It was allocated in submit_requests to be
>   * able to merge requests. */
>  qemu_iovec_destroy(&req->qiov);
> 

Reviewed-by: Laurent Vivier

Re: [Qemu-devel] [PATCH v2 01/10] pci/pcie: rename hotplug handler callbacks

2018-11-05 Thread David Gibson

On Mon, Nov 05, 2018 at 11:20:35AM +0100, David Hildenbrand wrote:
> The callbacks are also called for cold plugged devices. Drop the "hot"
> to better match the actual callback names.
> 
> While at it, also rename  pcie_cap_slot_hotplug_common() to
> pcie_cap_slot_check_common().

Uh.. this part of the message doesn't appear to be accurate any more.

> 
> Signed-off-by: David Hildenbrand 

Apart from that,

Reviewed-by: David Gibson 

> ---
>  hw/pci/pcie.c | 17 -
>  hw/pci/pcie_port.c|  4 ++--
>  include/hw/pci/pcie.h |  8 
>  3 files changed, 14 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 6c91bd44a0..44737cc1cd 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -315,9 +315,8 @@ static void pcie_cap_slot_event(PCIDevice *dev, 
> PCIExpressHotPlugEvent event)
>  hotplug_event_notify(dev);
>  }
>  
> -static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev,
> - DeviceState *dev,
> - uint8_t **exp_cap, Error **errp)
> +static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState 
> *dev,
> +  uint8_t **exp_cap, Error **errp)
>  {
>  *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
>  uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA);
> @@ -331,13 +330,13 @@ static void pcie_cap_slot_hotplug_common(PCIDevice 
> *hotplug_dev,
>  }
>  }
>  
> -void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
> -  Error **errp)
> +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
> +   Error **errp)
>  {
>  uint8_t *exp_cap;
>  PCIDevice *pci_dev = PCI_DEVICE(dev);
>  
> -pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, 
> errp);
> +pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
>  
>  /* Don't send event when device is enabled during qemu machine creation:
>   * it is present on boot, no hotplug event is necessary. We do send an
> @@ -365,14 +364,14 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice 
> *dev, void *opaque)
>  object_unparent(OBJECT(dev));
>  }
>  
> -void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
> - DeviceState *dev, Error **errp)
> +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
> + DeviceState *dev, Error **errp)
>  {
>  uint8_t *exp_cap;
>  PCIDevice *pci_dev = PCI_DEVICE(dev);
>  PCIBus *bus = pci_get_bus(pci_dev);
>  
> -pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, 
> errp);
> +pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
>  
>  /* In case user cancel the operation of multi-function hot-add,
>   * remove the function that is unexposed to guest individually,
> diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
> index 6432b9ac1f..73e81e5847 100644
> --- a/hw/pci/pcie_port.c
> +++ b/hw/pci/pcie_port.c
> @@ -154,8 +154,8 @@ static void pcie_slot_class_init(ObjectClass *oc, void 
> *data)
>  HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
>  
>  dc->props = pcie_slot_props;
> -hc->plug = pcie_cap_slot_hotplug_cb;
> -hc->unplug_request = pcie_cap_slot_hot_unplug_request_cb;
> +hc->plug = pcie_cap_slot_plug_cb;
> +hc->unplug_request = pcie_cap_slot_unplug_request_cb;
>  }
>  
>  static const TypeInfo pcie_slot_type_info = {
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index b71e369703..735f8e8154 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -131,8 +131,8 @@ void pcie_ari_init(PCIDevice *dev, uint16_t offset, 
> uint16_t nextfn);
>  void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t 
> ser_num);
>  void pcie_ats_init(PCIDevice *dev, uint16_t offset);
>  
> -void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
> -  Error **errp);
> -void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
> - DeviceState *dev, Error **errp);
> +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
> +   Error **errp);
> +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
> + DeviceState *dev, Error **errp);
>  #endif /* QEMU_PCIE_H */

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH v2 0/2] x86/cpu: Enable direct stores cpu features

2018-11-05 Thread Liu Jingqi

Enable direct stores cpu features including MOVDIRI and MOVDIR64B.

MOVDIRI moves doubleword or quadword from register to memory through
direct store.
MOVDIR64B moves 64-bytes as direct-store with 64-bytes write atomicity.

Changelog:
v2:
Separated from the series 
http://lists.nongnu.org/archive/html/qemu-devel/2018-07/msg02330.html
since umonitor/umwait/tpause cpu features are not ready yet.
v1:
Sent out with umonitor/umwait/tpause cpu features.

Liu Jingqi (2):
  x86/cpu: Enable MOVDIRI cpu feature
  x86/cpu: Enable MOVDIR64B cpu feature

 target/i386/cpu.c | 4 ++--
 target/i386/cpu.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH v2 1/2] x86/cpu: Enable MOVDIRI cpu feature

2018-11-05 Thread Liu Jingqi

MOVDIRI moves doubleword or quadword from register to memory through
direct store which is implemented by using write combining (WC) for
writing data directly into memory without caching the data.

The bit definition:
CPUID.(EAX=7,ECX=0):ECX[bit 27] MOVDIRI

The release document ref below link:
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf

Cc: Xu Tao 
Signed-off-by: Liu Jingqi 
---
 target/i386/cpu.c | 2 +-
 target/i386/cpu.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index af7e9f0..d9ab68c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1023,7 +1023,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = 
{
 "avx512bitalg", NULL, "avx512-vpopcntdq", NULL,
 "la57", NULL, NULL, NULL,
 NULL, NULL, "rdpid", NULL,
-NULL, "cldemote", NULL, NULL,
+NULL, "cldemote", NULL, "movdiri",
 NULL, NULL, NULL, NULL,
 },
 .cpuid = {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ad0e0b4..3debba3 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -687,6 +687,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_ECX_LA57 (1U << 16)
 #define CPUID_7_0_ECX_RDPID(1U << 22)
 #define CPUID_7_0_ECX_CLDEMOTE (1U << 25)  /* CLDEMOTE Instruction */
+#define CPUID_7_0_ECX_MOVDIRI  (1U << 27)  /* MOVDIRI Instruction */
 
 #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network 
Instructions */
 #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation 
Single Precision */
-- 
2.7.4

[Qemu-devel] [PATCH v2 2/2] x86/cpu: Enable MOVDIR64B cpu feature

2018-11-05 Thread Liu Jingqi

MOVDIR64B moves 64-bytes as direct-store with 64-bytes write atomicity.
Direct store is implemented by using write combining (WC) for writing
data directly into memory without caching the data.

The bit definition:
CPUID.(EAX=7,ECX=0):ECX[bit 28] MOVDIR64B

The release document ref below link:
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf

Cc: Xu Tao 
Signed-off-by: Liu Jingqi 
---
 target/i386/cpu.c | 2 +-
 target/i386/cpu.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index d9ab68c..32e1551 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1024,7 +1024,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = 
{
 "la57", NULL, NULL, NULL,
 NULL, NULL, "rdpid", NULL,
 NULL, "cldemote", NULL, "movdiri",
-NULL, NULL, NULL, NULL,
+"movdir64b", NULL, NULL, NULL,
 },
 .cpuid = {
 .eax = 7,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 3debba3..937a3a2 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -688,6 +688,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_ECX_RDPID(1U << 22)
 #define CPUID_7_0_ECX_CLDEMOTE (1U << 25)  /* CLDEMOTE Instruction */
 #define CPUID_7_0_ECX_MOVDIRI  (1U << 27)  /* MOVDIRI Instruction */
+#define CPUID_7_0_ECX_MOVDIR64B (1U << 28) /* MOVDIR64B Instruction */
 
 #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network 
Instructions */
 #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation 
Single Precision */
-- 
2.7.4

Re: [Qemu-devel] [PATCH RFC v7 9/9] qemu_thread_create: propagate the error to callers to handle

2018-11-05 Thread Fei Li





On 11/05/2018 09:53 PM, Juan Quintela wrote:

Fei Li  wrote:

Make qemu_thread_create() return a Boolean to indicate if it succeeds
rather than failing with an error. And add an Error parameter to hold
the error message and let the callers handle it.

Nice work, thanks.



Signed-off-by: Fei Li 
---
  cpus.c  | 45 -
  dump.c  |  6 +++--
  hw/misc/edu.c   |  6 +++--
  hw/ppc/spapr_hcall.c| 10 +++--
  hw/rdma/rdma_backend.c  |  4 +++-
  hw/usb/ccid-card-emulated.c | 15 +
  include/qemu/thread.h   |  4 ++--
  io/task.c   |  3 ++-
  iothread.c  | 16 +-
  migration/migration.c   | 54 +
  migration/postcopy-ram.c| 14 ++--
  migration/ram.c | 41 +-
  migration/savevm.c  | 11 ++---
  tests/atomic_add-bench.c|  3 ++-
  tests/iothread.c|  2 +-
  tests/qht-bench.c   |  3 ++-
  tests/rcutorture.c  |  3 ++-
  tests/test-aio.c|  2 +-
  tests/test-rcu-list.c   |  3 ++-
  ui/vnc-jobs.c   | 17 +-
  ui/vnc-jobs.h   |  2 +-
  ui/vnc.c|  4 +++-
  util/compatfd.c | 12 --
  util/oslib-posix.c  | 17 ++
  util/qemu-thread-posix.c| 24 +---
  util/qemu-thread-win32.c| 16 ++
  util/rcu.c  |  3 ++-
  util/thread-pool.c  |  4 +++-
  28 files changed, 243 insertions(+), 101 deletions(-)

diff --git a/cpus.c b/cpus.c
index ed71618e1f..0510f90e06 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1949,15 +1949,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error 
**errp)
  snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
   cpu->cpu_index);
  
-qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,

-   cpu, QEMU_THREAD_JOINABLE);
+if (!qemu_thread_create(cpu->thread, thread_name,
+qemu_tcg_cpu_thread_fn, cpu,
+QEMU_THREAD_JOINABLE, errp)) {

I think that in this cases where you are not handling the error, you
should use an exit() here.  We can't continue.
Not sure whether I understand correctly, but we can not use exit() here 
as there

exists more than one caller and different caller has its own handling way.
Instead we pass the errp to further callers to let them handle. Take the
qemu_xxx_init_vcpu() for example, there are two callers:
- the main thread to create vcpu while starting the guest[1]:
  pc_cpus_init() {
   pc_new_cpu(, &error_fatal);
  }
- using hmp to hot-plug one cpu:
  hmp_cpu_add() {
      Error *err = NULL;
  qmp_cpu_add(, &err) {
           pc_hot_add_cpu(cpuid, &local_err) {
       pc_new_cpu(, &local_err);
   }
  }
  }

For the first case, if there's an error, qemu will exit when 
error_handle_fatal(&error_fatal, )

is called by error_propagate().
For the second case, hmp_handle_error() will handle the error.

I am not saying that you need to fix all the places that call
qmeu_thread_create() to handle the error gracefully, but in the places
where you don't do, you should just exit.

I.e. this patch should be split in something that does:

-   qemu_thread_create(, errp);
+   if (!qemu_thread_create(..., errp))  {
+  error_report_err(errp);
+  exit(1);
+   }

So, we can fix any caller independtly from here.
Otherwise, we are ignoring an important error.

What do you think?

Later, Juan.
BTW, for those fatal errors only has one caller, e.g. 
qio_task_run_in_thread(),

I just pass &error_abort to qemu_thread_create(). :)

Have a nice day, thanks
Fei


[1]
(gdb) bt
#0  0x5584b333 in qemu_init_vcpu (cpu=0x56927db0, 
errp=0x7fffda40)

    at /build/gitcode/qemu-build/cpus.c:2071
#1  0x55969861 in x86_cpu_realizefn (dev=0x56927db0, 
errp=0x7fffda40)

    at /build/gitcode/qemu-build/target/i386/cpu.c:5115
#2  0x55a9fed6 in device_set_realized (obj=0x56927db0, 
value=true, errp=0x7fffdc18) at hw/core/qdev.c:826

#3  0x55ce91b1 in property_set_bool (obj=0x56927db0, v=
    0x5693f380, name=0x55f4f1a0 "realized", 
opaque=0x569046c0, errp=0x7fffdc18) at qom/object.c:1991

#4  0x55ce707d in object_property_set (obj=0x56927db0, v=
    0x5693f380, name=0x55f4f1a0 "realized", errp=0x7fffdc18)
    at qom/object.c:1183
#5  0x55cea893 in object_property_set_qobject 
(obj=0x56927db0, value=0x569439b0, name=0x55f4f1a0 
"realized", errp=0x7fffdc18) at qom/qom-qobject.c:27
#6  0x55ce7416 in object_property_set_bool (obj=0x56927db0, 
value=true, name=0x55f4f1a0 "realized", errp=0x7fffdc18) at 
qom/object.c:1249
#7  0

Re: [Qemu-devel] [PATCH v4 00/13] arm: nRF51 Devices and Microbit Support

2018-11-05 Thread no-reply

Hi,

This series failed docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Type: series
Message-id: 20181102170730.12432-1-cont...@steffen-goertz.de
Subject: [Qemu-devel] [PATCH v4 00/13] arm: nRF51 Devices and Microbit Support

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-quick@centos7 SHOW_ENV=1 J=8
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
6bd6c20c05 arm: Add Clock peripheral stub to NRF51 SOC
a1ee75f008 arm: Instantiate NRF51 Timers
405c15c546 hw/timer/nrf51_timer: Add nRF51 Timer peripheral
64718f8a15 tests/microbit-test: Add Tests for nRF51 GPIO
96d96de605 arm: Instantiate NRF51 general purpose I/O
9ac11075d8 hw/gpio/nrf51_gpio: Add nRF51 GPIO peripheral
db23eec21f tests: Add bbc:microbit / nRF51 test suite
961ed7a01f arm: Instantiate NRF51 special NVM's and NVMC
192913352d hw/nvram/nrf51_nvm: Add nRF51 non-volatile memories
09a63139d5 arm: Instantiate NRF51 random number generator
919d4fabab hw/misc/nrf51_rng: Add NRF51 random number generator peripheral
f14fd1a1f8 arm: Add header to host common definition for nRF51 SOC peripherals
0d51318cba qtest: Add set_irq_in command to set IRQ/GPIO level

=== OUTPUT BEGIN ===
  BUILD   centos7
make[1]: Entering directory '/var/tmp/patchew-tester-tmp-_5r9zv2e/src'
  GEN 
/var/tmp/patchew-tester-tmp-_5r9zv2e/src/docker-src.2018-11-06-01.41.32.6018/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-_5r9zv2e/src/docker-src.2018-11-06-01.41.32.6018/qemu.tar.vroot'...
done.
Checking out files:  47% (3077/6455)   
Checking out files:  48% (3099/6455)   
Checking out files:  49% (3163/6455)   
Checking out files:  50% (3228/6455)   
Checking out files:  51% (3293/6455)   
Checking out files:  52% (3357/6455)   
Checking out files:  53% (3422/6455)   
Checking out files:  54% (3486/6455)   
Checking out files:  55% (3551/6455)   
Checking out files:  56% (3615/6455)   
Checking out files:  57% (3680/6455)   
Checking out files:  58% (3744/6455)   
Checking out files:  59% (3809/6455)   
Checking out files:  60% (3873/6455)   
Checking out files:  61% (3938/6455)   
Checking out files:  62% (4003/6455)   
Checking out files:  63% (4067/6455)   
Checking out files:  64% (4132/6455)   
Checking out files:  65% (4196/6455)   
Checking out files:  66% (4261/6455)   
Checking out files:  67% (4325/6455)   
Checking out files:  68% (4390/6455)   
Checking out files:  69% (4454/6455)   
Checking out files:  70% (4519/6455)   
Checking out files:  71% (4584/6455)   
Checking out files:  72% (4648/6455)   
Checking out files:  73% (4713/6455)   
Checking out files:  74% (4777/6455)   
Checking out files:  75% (4842/6455)   
Checking out files:  76% (4906/6455)   
Checking out files:  77% (4971/6455)   
Checking out files:  78% (5035/6455)   
Checking out files:  79% (5100/6455)   
Checking out files:  80% (5164/6455)   
Checking out files:  81% (5229/6455)   
Checking out files:  82% (5294/6455)   
Checking out files:  83% (5358/6455)   
Checking out files:  84% (5423/6455)   
Checking out files:  85% (5487/6455)   
Checking out files:  86% (5552/6455)   
Checking out files:  87% (5616/6455)   
Checking out files:  88% (5681/6455)   
Checking out files:  89% (5745/6455)   
Checking out files:  90% (5810/6455)   
Checking out files:  91% (5875/6455)   
Checking out files:  92% (5939/6455)   
Checking out files:  93% (6004/6455)   
Checking out files:  94% (6068/6455)   
Checking out files:  95% (6133/6455)   
Checking out files:  96% (6197/6455)   
Checking out files:  97% (6262/6455)   
Checking out files:  98% (6326/6455)   
Checking out files:  99% (6391/6455)   
Checking out files: 100% (6455/6455)   
Checking out files: 100% (6455/6455), done.
Your branch is up-to-date with 'origin/test'.
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 
'/var/tmp/patchew-tester-tmp-_5r9zv2e/src/docker-src.2018-11-06-01.41.32.6018/qemu.tar.vroot/dtc'...
Submodule path 'dtc': checked out '88f18909db731a627456f26d779445f84e449536'
Submodule 'ui/keycodemapdb' (git://git.qemu.org/keycodemapdb.git) registered 
for path 'ui/keycodemapdb'
Cloning into 
'/var/tmp/patchew-tester-tmp-_5r9zv2e/src/docker-src.2018-11-06-01.41.32.6018/qemu.tar.vroot/ui/keycodemapdb'...
Submodule path 'ui/keycodemapdb': checked out 
'6b3d716e2b6472eb7189d3220552280ef3d832ce'
  COPYRUNNER
RUN test-quick in qemu:centos7 
Packages installed:
SDL-devel-1.2.15-14.el7.x86_64
bison-3.0.4-1.el7.x86_64
bzip2-1.0.6-13.el7.x86_64
bzip2-devel-1.0.6-13.el7.x86_64
ccache-3.3.4-1.el7.x86_64
csnappy-devel-0-6.20150729gitd7bc683.el7.x86_64
flex-2.5.37-3.el7.x86_64
gcc-4.8.5-28.el7_5.1.x86_64
gettext-0.19.8.1-2.el7.x86_64
git-1.8.3.1-14.el7_5.x86_64
glib2-devel-2.54.2-2.el7.x86_64
libaio-devel-0.3.109-13.el7.x86_64
libepoxy-devel-1.3.1-2.el7_5.x86_64
libfdt-devel-1.4.6-1.el7.x86_64
lzo-devel-2.06-8

Re: [Qemu-devel] [PATCH 1/4] add QemuSupportState

2018-11-05 Thread Gerd Hoffmann

On Mon, Nov 05, 2018 at 11:49:40AM -0200, Eduardo Habkost wrote:
> On Mon, Nov 05, 2018 at 08:30:28AM +0100, Gerd Hoffmann wrote:
> >   Hi,
> > 
> > > > - Maintainers can deprecate stuffs
> > > > - Orphan code can become Supported
> > > > - Once scheduled for removal, there is no way back
> > > > - 'Unknown' seems pretty similar to 'Orphan'.
> > > 
> > > I'm still worried that the supported/unsupported distinction may
> > > cause unnecessary hassle for every downstream distributor of
> > > QEMU.  Do we really have a need to differentiate supported vs
> > > unsupported device types at runtime?
> > 
> > How do you suggest to handle cirrus then?
> > 
> > Trying to deprecate it outright didn't work very well, kind of
> > understandable given that it has been the default for a long time.
> > 
> > So I think we have to mark cirrus as "obsolete", printing a message for
> > the users that they should switch to another display device (stdvga for
> > example), but keep it working for a while.
> > 
> > There are also a bunch of devices where I suspect they are not used much
> > if at all.  All those isa sound cards for example.  Playing old DOS
> > games is pretty much the only use case I can think of.  But I'm
> > wondering whenever people actually use qemu for that.  There are
> > alternatives which probably handle that use case better, i.e. dosbox.
> > 
> > Simliar case is bluetooth emulation.  I can't remember having seen any
> > message or patch for years.
> > 
> > Tagging such devices as "obsolete", with a message asking people to
> > report their use cases, might help figuring if and why those devices are
> > used in the wild.
> 
> Thanks for the more detailed description of the use case you have
> in mind.  It makes sense to me.
> 
> Now, I have two questions:
> 
> 1) What's more important: telling the user they are relying on an
>obsolete feature, or that they are relying on an unsupported
>feature?  What exactly is the difference?

Maybe we should pick up the suggestion (by danp I think) to have two
states, one describing the support level, and one for usage hints (i.e
"you should not use this for performance reasons, unless you run a guest
older than a decade").

> 2) Do we really need to differentiate between "obsolete" and
>"deprecated" features?  What exactly is the difference?

"deprecated" - is on deprecation schedule according to qemu deprecation
   policy (remove after two releases).
"obsolete"   - not (yet) on deprecation schedule.

> In either case, I believe a simple supported/obsolete (or
> supported/unsupported) distinction is probably going to be more
> useful than a detailed
> supported/maintained/odd-fixes/orphan/obsolete model.
> 
> Reviewing a list of obsolete devices downstream (to decide if
> they should be still compiled in downstream) sounds doable.
> Fixing up detailed supported/maintained/odd-fixes/orphan data
> downstream sounds like unnecessary extra work.
> 
> 
> > 
> > > I'd prefer to make support/unsupported differentiation to be a
> > > build system feature (e.g. a CONFIG_UNSUPPORTED build-time
> > > option) instead of a QMP/runtime feature.
> > 
> > That would be nice too, but I think we need kbuild first, otherwise
> > it'll be pretty messy.
> 
> Agreed.
> 
> -- 
> Eduardo

Re: [Qemu-devel] [PATCH V2] migration/colo.c: Fix compilation issue when disable replication

2018-11-05 Thread Zhang Chen

On Mon, Nov 5, 2018 at 6:02 PM Thomas Huth  wrote:

> On 2018-11-01 03:12, Zhang Chen wrote:
> > This compilation issue will occur when user use --disable-replication
> > to config Qemu.
> >
> > Reported-by: Thomas Huth 
> > Signed-off-by: Zhang Chen 
> > ---
> >  migration/colo.c | 28 +++-
> >  1 file changed, 27 insertions(+), 1 deletion(-)
>
> Thanks, this fixes the compilation problem with --disable-replication
> for me:
>
> Tested-by: Thomas Huth 
>
> BTW, if colo can't be used without CONFIG_REPLICATION, would it make
> sense to disable the related objects (for the "--object" parameter) in
> an additional patch, too? Something like this (untested):
>

Hi Thomas,

Yes, you are right. current COLO project need work with replication, but in
the future we will develop COLO shared disk mode,
In this mode COLO-compare maybe not depend on replication. So I think we
can keep the independence of the two modules.

Thanks
Zhang Chen


>
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -10,7 +10,7 @@ common-obj-$(CONFIG_NETMAP) += netmap.o
>  common-obj-y += filter.o
>  common-obj-y += filter-buffer.o
>  common-obj-y += filter-mirror.o
> -common-obj-y += colo-compare.o
> +common-obj-$(CONFIG_REPLICATION) += colo-compare.o
>  common-obj-y += colo.o
>  common-obj-y += filter-rewriter.o
>  common-obj-y += filter-replay.o
>
>   Thomas
>

Re: [Qemu-devel] [PATCH v6 2/5] hw/riscv/virt: Connect the gpex PCIe

2018-11-05 Thread Bin Meng

Hi Alistair,

On Tue, Nov 6, 2018 at 3:47 AM Alistair Francis  wrote:
>
> On Mon, Nov 5, 2018 at 5:24 AM Bin Meng  wrote:
> >
> > Hi,
> >
> > On Wed, Oct 31, 2018 at 6:22 AM Alistair Francis
> >  wrote:
> > >
> > > Connect the gpex PCIe device based on the device tree included in the
> > > HiFive Unleashed ROM.
> > >
> > > Signed-off-by: Alistair Francis 
> > > ---
> > >  default-configs/riscv32-softmmu.mak |   6 +-
> > >  default-configs/riscv64-softmmu.mak |   6 +-
> > >  hw/riscv/virt.c | 111 
> > >  include/hw/riscv/virt.h |   8 +-
> > >  4 files changed, 127 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/default-configs/riscv32-softmmu.mak 
> > > b/default-configs/riscv32-softmmu.mak
> > > index 7937c69e22..3e3d195f37 100644
> > > --- a/default-configs/riscv32-softmmu.mak
> > > +++ b/default-configs/riscv32-softmmu.mak
> > > @@ -1,7 +1,11 @@
> > >  # Default configuration for riscv-softmmu
> > >
> > > +include pci.mak
> > > +
> > >  CONFIG_SERIAL=y
> > >  CONFIG_VIRTIO_MMIO=y
> > > -include virtio.mak
> > >
> > >  CONFIG_CADENCE=y
> > > +
> > > +CONFIG_PCI_GENERIC=y
> > > +CONFIG_PCI_XILINX=y
> > > diff --git a/default-configs/riscv64-softmmu.mak 
> > > b/default-configs/riscv64-softmmu.mak
> > > index 7937c69e22..3e3d195f37 100644
> > > --- a/default-configs/riscv64-softmmu.mak
> > > +++ b/default-configs/riscv64-softmmu.mak
> > > @@ -1,7 +1,11 @@
> > >  # Default configuration for riscv-softmmu
> > >
> > > +include pci.mak
> > > +
> > >  CONFIG_SERIAL=y
> > >  CONFIG_VIRTIO_MMIO=y
> > > -include virtio.mak
> > >
> > >  CONFIG_CADENCE=y
> > > +
> > > +CONFIG_PCI_GENERIC=y
> > > +CONFIG_PCI_XILINX=y
> > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> > > index 4a137a503c..2fbe58ba4b 100644
> > > --- a/hw/riscv/virt.c
> > > +++ b/hw/riscv/virt.c
> > > @@ -39,6 +39,8 @@
> > >  #include "sysemu/arch_init.h"
> > >  #include "sysemu/device_tree.h"
> > >  #include "exec/address-spaces.h"
> > > +#include "hw/pci/pci.h"
> > > +#include "hw/pci-host/gpex.h"
> > >  #include "elf.h"
> > >
> > >  #include 
> > > @@ -55,6 +57,10 @@ static const struct MemmapEntry {
> > >  [VIRT_UART0] ={ 0x1000,  0x100 },
> > >  [VIRT_VIRTIO] =   { 0x10001000, 0x1000 },
> > >  [VIRT_DRAM] = { 0x8000,0x0 },
> > > +[VIRT_PCIE_MMIO] = { 0x20, 0x400 },
> >
> > Does this work with RV32?
>
> That's a good point, probably not. This is based on the HiFive
> unleashed values to be as similar as possible.
>

Please specifying a 32-bit address to make it work for both 32-bit and 64-bit.

> >
> > > +[VIRT_PCIE_PIO] = { 0x201, 0x4000 },
> > > +[VIRT_PCIE_ECAM] = { 0x4000, 0x2000 },

Forgot to mention: the maximum size of ECAM is 0x1000 by spec.

> > > +
> > >  };
> > >
> > >  static uint64_t load_kernel(const char *kernel_filename)
> > > @@ -98,6 +104,37 @@ static hwaddr load_initrd(const char *filename, 
> > > uint64_t mem_size,
> > >  return *start + size;
> > >  }
> > >
> > > +#define INTERREUPT_MAP_WIDTH 7
> > > +
> > > +static void create_pcie_irq_map(void *fdt, char *nodename,
> > > +uint32_t plic_phandle)
> > > +{
> > > +int pin;
> > > +uint32_t full_irq_map[GPEX_NUM_IRQS * INTERREUPT_MAP_WIDTH] = { 0 };
> > > +uint32_t *irq_map = full_irq_map;
> > > +
> > > +for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
> > > +int irq_nr = PCIE_IRQ + (pin % PCI_NUM_PINS);
> > > +int i;
> > > +
> > > +uint32_t map[] = {
> > > +0, 0, 0,
> > > +pin + 1, plic_phandle, 0, irq_nr};
> > > +
> > > +/* Convert map to big endian */
> > > +for (i = 0; i < INTERREUPT_MAP_WIDTH; i++) {
> > > +irq_map[i] = cpu_to_be32(map[i]);
> > > +}
> > > +irq_map += INTERREUPT_MAP_WIDTH;
> > > +}
> > > +
> > > +qemu_fdt_setprop(fdt, nodename, "interrupt-map",
> > > + full_irq_map, sizeof(full_irq_map));
> > > +
> > > +qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
> > > +   0, 0, 0, 0x7);
> > > +}
> > > +
> > >  static void *create_fdt(RISCVVirtState *s, const struct MemmapEntry 
> > > *memmap,
> > >  uint64_t mem_size, const char *cmdline)
> > >  {
> > > @@ -233,6 +270,31 @@ static void *create_fdt(RISCVVirtState *s, const 
> > > struct MemmapEntry *memmap,
> > >  g_free(nodename);
> > >  }
> > >
> > > +nodename = g_strdup_printf("/pci@%lx",
> > > +(long) memmap[VIRT_PCIE_MMIO].base);
> > > +qemu_fdt_add_subnode(fdt, nodename);
> > > +qemu_fdt_setprop_cells(fdt, nodename, "#address-cells", 0x3);
> > > +qemu_fdt_setprop_cells(fdt, nodename, "#interrupt-cells", 0x1);
> > > +qemu_fdt_setprop_cells(fdt, nodename, "#size-cells", 0x2);
> > > +qemu_fdt_setprop_string(fdt, nodename, "compatible",
> > > +

Re: [Qemu-devel] [PATCH v4 00/13] arm: nRF51 Devices and Microbit Support

2018-11-05 Thread no-reply

Hi,

This series failed docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Type: series
Message-id: 20181102170730.12432-1-cont...@steffen-goertz.de
Subject: [Qemu-devel] [PATCH v4 00/13] arm: nRF51 Devices and Microbit Support

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-mingw@fedora SHOW_ENV=1 J=8
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
6bd6c20c05 arm: Add Clock peripheral stub to NRF51 SOC
a1ee75f008 arm: Instantiate NRF51 Timers
405c15c546 hw/timer/nrf51_timer: Add nRF51 Timer peripheral
64718f8a15 tests/microbit-test: Add Tests for nRF51 GPIO
96d96de605 arm: Instantiate NRF51 general purpose I/O
9ac11075d8 hw/gpio/nrf51_gpio: Add nRF51 GPIO peripheral
db23eec21f tests: Add bbc:microbit / nRF51 test suite
961ed7a01f arm: Instantiate NRF51 special NVM's and NVMC
192913352d hw/nvram/nrf51_nvm: Add nRF51 non-volatile memories
09a63139d5 arm: Instantiate NRF51 random number generator
919d4fabab hw/misc/nrf51_rng: Add NRF51 random number generator peripheral
f14fd1a1f8 arm: Add header to host common definition for nRF51 SOC peripherals
0d51318cba qtest: Add set_irq_in command to set IRQ/GPIO level

=== OUTPUT BEGIN ===
  BUILD   fedora
make[1]: Entering directory '/var/tmp/patchew-tester-tmp-ibmgvkmc/src'
  GEN 
/var/tmp/patchew-tester-tmp-ibmgvkmc/src/docker-src.2018-11-06-01.40.44.5367/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-ibmgvkmc/src/docker-src.2018-11-06-01.40.44.5367/qemu.tar.vroot'...
done.
Your branch is up-to-date with 'origin/test'.
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 
'/var/tmp/patchew-tester-tmp-ibmgvkmc/src/docker-src.2018-11-06-01.40.44.5367/qemu.tar.vroot/dtc'...
Submodule path 'dtc': checked out '88f18909db731a627456f26d779445f84e449536'
Submodule 'ui/keycodemapdb' (git://git.qemu.org/keycodemapdb.git) registered 
for path 'ui/keycodemapdb'
Cloning into 
'/var/tmp/patchew-tester-tmp-ibmgvkmc/src/docker-src.2018-11-06-01.40.44.5367/qemu.tar.vroot/ui/keycodemapdb'...
Submodule path 'ui/keycodemapdb': checked out 
'6b3d716e2b6472eb7189d3220552280ef3d832ce'
  COPYRUNNER
RUN test-mingw in qemu:fedora 
Packages installed:
SDL2-devel-2.0.8-5.fc28.x86_64
bc-1.07.1-5.fc28.x86_64
bison-3.0.4-9.fc28.x86_64
bluez-libs-devel-5.50-1.fc28.x86_64
brlapi-devel-0.6.7-19.fc28.x86_64
bzip2-1.0.6-26.fc28.x86_64
bzip2-devel-1.0.6-26.fc28.x86_64
ccache-3.4.2-2.fc28.x86_64
clang-6.0.1-1.fc28.x86_64
device-mapper-multipath-devel-0.7.4-3.git07e7bd5.fc28.x86_64
findutils-4.6.0-19.fc28.x86_64
flex-2.6.1-7.fc28.x86_64
gcc-8.1.1-5.fc28.x86_64
gcc-c++-8.1.1-5.fc28.x86_64
gettext-0.19.8.1-14.fc28.x86_64
git-2.17.1-3.fc28.x86_64
glib2-devel-2.56.1-4.fc28.x86_64
glusterfs-api-devel-4.1.2-2.fc28.x86_64
gnutls-devel-3.6.3-3.fc28.x86_64
gtk3-devel-3.22.30-1.fc28.x86_64
hostname-3.20-3.fc28.x86_64
libaio-devel-0.3.110-11.fc28.x86_64
libasan-8.1.1-5.fc28.x86_64
libattr-devel-2.4.48-3.fc28.x86_64
libcap-devel-2.25-9.fc28.x86_64
libcap-ng-devel-0.7.9-4.fc28.x86_64
libcurl-devel-7.59.0-6.fc28.x86_64
libfdt-devel-1.4.6-5.fc28.x86_64
libpng-devel-1.6.34-6.fc28.x86_64
librbd-devel-12.2.7-1.fc28.x86_64
libssh2-devel-1.8.0-7.fc28.x86_64
libubsan-8.1.1-5.fc28.x86_64
libusbx-devel-1.0.22-1.fc28.x86_64
libxml2-devel-2.9.8-4.fc28.x86_64
llvm-6.0.1-6.fc28.x86_64
lzo-devel-2.08-12.fc28.x86_64
make-4.2.1-6.fc28.x86_64
mingw32-SDL2-2.0.5-3.fc27.noarch
mingw32-bzip2-1.0.6-9.fc27.noarch
mingw32-curl-7.57.0-1.fc28.noarch
mingw32-glib2-2.56.1-1.fc28.noarch
mingw32-gmp-6.1.2-2.fc27.noarch
mingw32-gnutls-3.6.2-1.fc28.noarch
mingw32-gtk3-3.22.30-1.fc28.noarch
mingw32-libjpeg-turbo-1.5.1-3.fc27.noarch
mingw32-libpng-1.6.29-2.fc27.noarch
mingw32-libssh2-1.8.0-3.fc27.noarch
mingw32-libtasn1-4.13-1.fc28.noarch
mingw32-nettle-3.4-1.fc28.noarch
mingw32-pixman-0.34.0-3.fc27.noarch
mingw32-pkg-config-0.28-9.fc27.x86_64
mingw64-SDL2-2.0.5-3.fc27.noarch
mingw64-bzip2-1.0.6-9.fc27.noarch
mingw64-curl-7.57.0-1.fc28.noarch
mingw64-glib2-2.56.1-1.fc28.noarch
mingw64-gmp-6.1.2-2.fc27.noarch
mingw64-gnutls-3.6.2-1.fc28.noarch
mingw64-gtk3-3.22.30-1.fc28.noarch
mingw64-libjpeg-turbo-1.5.1-3.fc27.noarch
mingw64-libpng-1.6.29-2.fc27.noarch
mingw64-libssh2-1.8.0-3.fc27.noarch
mingw64-libtasn1-4.13-1.fc28.noarch
mingw64-nettle-3.4-1.fc28.noarch
mingw64-pixman-0.34.0-3.fc27.noarch
mingw64-pkg-config-0.28-9.fc27.x86_64
ncurses-devel-6.1-5.20180224.fc28.x86_64
nettle-devel-3.4-2.fc28.x86_64
nss-devel-3.38.0-1.0.fc28.x86_64
numactl-devel-2.0.11-8.fc28.x86_64
package PyYAML is not installed
package libjpeg-devel is not installed
perl-5.26.2-413.fc28.x86_64
pixman-devel-0.34.0-8.fc28.x86_64
python3-3.6.5-1.fc28.x86_64
snappy-devel-1.1.7-5.fc28.x86_64
sparse-0.5.2-1.fc28.x86_64
spice-server-devel-0.14.0-4.fc28.x86_64
systemtap-sdt-devel-3.3-1.fc28.x86_64
tar-1.30-3.fc28.x86_64
usbredir-devel-0.8.0-1.fc28.x86_64

Re: [Qemu-devel] [RFC PATCH spice v2 1/2] QXL interface: add functions to identify monitors in the guest

2018-11-05 Thread Gerd Hoffmann

  Hi,

> > I can see that it simplifies the logic in spice-server if we have a
> > single function call instead of two.  So we could deprecate
> > spice_qxl_set_max_monitors() in favour of your
> > spice_qxl_set_device_info() variant.
> > 
> > spice_qxl_set_max_monitors() would then basically do this:
> > 
> > spice_qxl_set_max_monitors(qxl, max)
> > {
> > spice_qxl_set_device_info(qxl, NULL, 0, max);
> > }
> 
> I can't actually do this, it does the wrong thing for the one-channel-
> per-head (virtio-gpu) case. For that case it would send all
> device_display_ids to 0 on all interfaces, but they need to be
> different numbers.

Well, qemu calls spice_qxl_set_max_monitors() only in case there are
multiple monitors in one channel, i.e. qxl.

cheers,
  Gerd

Re: [Qemu-devel] How to emulate block I/O timeout on qemu side?

2018-11-05 Thread Dongli Zhang




On 11/06/2018 01:49 AM, Eric Blake wrote:
> On 11/2/18 3:11 AM, Dongli Zhang wrote:
>> Hi,
>>
>> Is there any way to emulate I/O timeout on qemu side (not fault injection in 
>> VM
>> kernel) without modifying qemu source code?
> 
> You may be interested in Rich's work on nbdkit.  If you don't mind the 
> overhead
> of the host connecting through NBD, then you can use nbdkit's delay and
> fault-injection filters for inserting delays or even run-time-controllable
> failures to investigate how the guest reacts to those situations

Thank you all very much for the suggestions. I will take a look on nbdkit.

So far I am reproducing the issue with NFS (by shutdown the link to NFS where
the image is placed on purpose) but it did not work well.

> 
>>
>> For instance, I would like to observe/study/debug the I/O timeout handling of
>> nvme, scsi, virtio-blk (not supported) of VM kernel.
>>
>> Is there a way to trigger this on purpose on qemu side?
>>
>> Thank you very much!
>>
>> Dongli Zhang
>>
>>
> 

Dongli Zhang

Re: [Qemu-devel] [PATCH] target/ppc: fix mtmsr instruction for icount

2018-11-05 Thread Pavel Dovgalyuk

> From: David Gibson [mailto:da...@gibson.dropbear.id.au]
> On Tue, Oct 30, 2018 at 12:30:31PM +0300, Pavel Dovgalyuk wrote:
> > This patch fixes processing of mtmsr instructions in icount mode.
> > In this mode writing to interrupt/peripheral state is controlled
> > by can_do_io flag. This flag must be set explicitly before helper
> > function invocation.
> >
> > Signed-off-by: Maria Klimushenkova 
> > Signed-off-by: Pavel Dovgalyuk 
> 
> Applied to ppc-for-3.1, thanks.

Thanks. What about this one
https://patchew.org/QEMU/20181030122134.11055.15711.stgit@pasha-VirtualBox/
There is a mess with the subject, but the code is ok :)


Pavel Dovgalyuk

Re: [Qemu-devel] [PATCH RFC v7 6/9] migration: fix the multifd code when receiving less channels

2018-11-05 Thread Fei Li





On 11/02/2018 10:46 AM, Peter Xu wrote:

On Thu, Nov 01, 2018 at 06:17:12PM +0800, Fei Li wrote:

[...]


@@ -1339,7 +1339,7 @@ bool multifd_recv_all_channels_created(void)
  }
  
  /* Return true if multifd is ready for the migration, otherwise false */

-bool multifd_recv_new_channel(QIOChannel *ioc)
+bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
  {
  MultiFDRecvParams *p;
  Error *local_err = NULL;
@@ -1347,6 +1347,9 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
  
  id = multifd_recv_initial_packet(ioc, &local_err);

  if (id < 0) {
+error_propagate_prepend(errp, local_err,
+"failed to receive packet via multifd channel %x: ",

I'd use either %d or 0x%x, and some indent issue.

Otherwise looks good to me.  Thanks,


Thanks, fixed as:
 id = multifd_recv_initial_packet(ioc, &local_err);
 if (id < 0) {
+    error_propagate_prepend(errp, local_err,
+    "failed to receive packet"
+    " via multifd channel %d: ",
+    multifd_recv_state->count);

Have a nice day :)
Fei

[Qemu-devel] [PATCH 1/2] tpm: check localities index

2018-11-05 Thread P J P

From: Prasad J Pandit 

While performing mmio device r/w operations, guest could set 'addr'
parameter such that 'locty' index exceeds TPM_TIS_NUM_LOCALITIES=5.
Add check to avoid OOB access.

Reported-by: Cheng Feng 
Signed-off-by: Prasad J Pandit 
---
 hw/tpm/tpm_tis.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 12f5c9a759..20126dd838 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -293,6 +293,10 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret)
 uint8_t locty = s->cmd.locty;
 uint8_t l;
 
+if (locty >= TPM_TIS_NUM_LOCALITIES) {
+return;
+}
+
 if (s->cmd.selftest_done) {
 for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) {
 s->loc[locty].sts |= TPM_TIS_STS_SELFTEST_DONE;
@@ -401,7 +405,8 @@ static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr,
 uint32_t avail;
 uint8_t v;
 
-if (tpm_backend_had_startup_error(s->be_driver)) {
+if (tpm_backend_had_startup_error(s->be_driver)
+|| locty >= TPM_TIS_NUM_LOCALITIES) {
 return 0;
 }
 
@@ -530,7 +535,8 @@ static void tpm_tis_mmio_write(void *opaque, hwaddr addr,
 return;
 }
 
-if (tpm_backend_had_startup_error(s->be_driver)) {
+if (tpm_backend_had_startup_error(s->be_driver)
+|| locty >= TPM_TIS_NUM_LOCALITIES) {
 return;
 }
 
-- 
2.17.2

[Qemu-devel] [PATCH 2/2] tpm: use loop iterator to set sts data field

2018-11-05 Thread P J P

From: Prasad J Pandit 

When TIS request is done, set 'sts' data field across all localities.

Signed-off-by: Prasad J Pandit 
---
 hw/tpm/tpm_tis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 20126dd838..58d90645bc 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -299,7 +299,7 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret)
 
 if (s->cmd.selftest_done) {
 for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) {
-s->loc[locty].sts |= TPM_TIS_STS_SELFTEST_DONE;
+s->loc[l].sts |= TPM_TIS_STS_SELFTEST_DONE;
 }
 }
 
-- 
2.17.2

Re: [Qemu-devel] [PATCH RFC v7 1/9] Fix segmentation fault when qemu_signal_init fails

2018-11-05 Thread Fei Li


Hi,


On 11/05/2018 09:32 PM, Juan Quintela wrote:

Fei Li  wrote:

When qemu_signal_init() fails in qemu_init_main_loop(), we return
without setting an error.  Its callers crash then when they try to
report the error with error_report_err().

To avoid such segmentation fault, add a new Error parameter to make
the call trace to propagate the err to the final caller.

Hi

I agree that there is a bug that exist here.  But I think that the patch
is not 100% correct.  What is the warrantee that when we call
qemu_signal_init() errp is not *already* assigned.

I think that we need to use here the same code that in the call to
aio_context_new() ...

i.e.


intsead of this


  init_clocks(qemu_timer_notify_cb);
  
-ret = qemu_signal_init();

+ret = qemu_signal_init(errp);
  if (ret) {
  return ret;
  }

 init_clocks(qemu_timer_notify_cb);

 ret = qemu_signal_init();
 ret = qemu_signal_init(&local_error);
 if (ret) {
  error_propagate(errp, local_error);
  return ret;
 }

This way it works correctly if errp is NULL, errp is already assigned,
etc, etc,

Or I am missing something?

Later, Juan.
We have discussed this in the first round of this patch series, just as 
Daniel

and Fam said, we only need the local_err & error_propagate() when functions
like object_new_with_propv() returns void, in that way we need the 
&local_err to

check whether that function succeeds.
But in qemu_signal_init, we have the "if (ret) {...}" to judge whether 
it succeeds.

For more details, the following threads can be referred:

09/04/2018 07:26 PM
Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when 
qemu_signal_init fails


BTW, if qemu_signalfd() fails, we use an "error_setg_errno()" to handle:
- for NULL errp, we just set the error message to errp;
- for not-NULL errp, besides the error_setv() we have the 
error_handle_fatal(errp, err).
  If the passed errp is &error_fatal/&error_abort, qemu will exit(1) 
right here.


Have a nice day, thanks :)
Fei

Re: [Qemu-devel] [PULL 03/48] qemu-timer: introduce timer attributes

2018-11-05 Thread Artem Pisarenko

> hw/core/ptimer.o: In function `timer_new_tl':
> /home/eblake/qemu/include/qemu/timer.h:536: undefined reference to
> `timer_init_tl'
> collect2: error: ld returned 1 exit status
> make: *** [/home/eblake/qemu/rules.mak:124: tests/ptimer-test] Error 1
> make: *** Waiting for unfinished jobs

I wasn't able to reproduce that on
commit 89a603a0c80ae3d6a8711571550b2ae9a01ea909 (is it that commit you
point to ?).
Neither 'make check' fails, nor "include/qemu/timer.h:536" points to
meaningful lines of code, nor full project text search shows anything like
'timer_new_tl' or 'timer_init_tl'. Same for merge
comit b312532fd03413d0e6ae6767ec793a3e30f487b8.
Looks like 'make' just failed to rebuild dependencies correctly and needs
clean/distclean.

Re: [Qemu-devel] [PATCH RFC v7 7/9] migration: remove unused &local_err parameter in migrate_set_error

2018-11-05 Thread Fei Li





On 11/05/2018 09:59 PM, Juan Quintela wrote:

Fei Li  wrote:

Always call migrate_set_error() to set the error state without relying
on whether multifd_save_cleanup() succeeds. As the passed &local_err
is never used in multifd_save_cleanup(), remove it.

Error is not used, you are right.

But then just change the prototype to:

void multifd_save_cleanup(void);

??

With that change,

Reviewed-by: Juan Quintela 

Thanks for the review!
Have changed that function to be void, besides, correct the previous 
erroneous

function name in the patch title:
s/migratr_set_error/multifd_save_cleanup/g.

[Qemu-devel] [PATCH 1/1] virtio-blk: fix comment for virtio_blk_rw_complete as nalloc is initially -1

2018-11-05 Thread Dongli Zhang

The initial value of nalloc is -1, but not 1.

Signed-off-by: Dongli Zhang 
---
This is based on git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git 
tags/for_upstream

 hw/block/virtio-blk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 83cf5c0..30999c3 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -96,7 +96,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 trace_virtio_blk_rw_complete(vdev, req, ret);
 
 if (req->qiov.nalloc != -1) {
-/* If nalloc is != 1 req->qiov is a local copy of the original
+/* If nalloc is != -1 req->qiov is a local copy of the original
  * external iovec. It was allocated in submit_requests to be
  * able to merge requests. */
 qemu_iovec_destroy(&req->qiov);
-- 
2.7.4

Re: [Qemu-devel] [RFC 0/2] vhost-vfio: introduce mdev based HW vhost backend

2018-11-05 Thread Jason Wang




On 2018/10/16 下午9:23, Xiao Wang wrote:

What's this
===
Following the patch (vhost: introduce mdev based hardware vhost backend)
https://lwn.net/Articles/750770/, which defines a generic mdev device for
vhost data path acceleration (aliased as vDPA mdev below), this patch set
introduces a new net client type: vhost-vfio.



Thanks a lot for a such interesting series. Some generic questions:


If we consider to use software backend (e.g vhost-kernel or a rely of 
virito-vhost-user or other cases) as well in the future, maybe 
vhost-mdev is better which mean it does not tie to VFIO anyway.





Currently we have 2 types of vhost backends in QEMU: vhost kernel (tap)
and vhost-user (e.g. DPDK vhost), in order to have a kernel space HW vhost
acceleration framework, the vDPA mdev device works as a generic configuring
channel.



Does "generic" configuring channel means dpdk will also go for this way? 
E.g it will have a vhost mdev pmd?




  It exposes to user space a non-vendor-specific configuration
interface for setting up a vhost HW accelerator,



Or even a software translation layer on top of exist hardware.



based on this, this patch
set introduces a third vhost backend called vhost-vfio.

How does it work

The vDPA mdev defines 2 BAR regions, BAR0 and BAR1. BAR0 is the main
device interface, vhost messages can be written to or read from this
region following below format. All the regular vhost messages about vring
addr, negotiated features, etc., are written to this region directly.



If I understand this correctly, the mdev was not used for passed through 
to guest directly. So what's the reason of inventing a PCI like device 
here? I'm asking since:


- vhost protocol is transport indepedent, we should consider to support 
transport other than PCI. I know we can even do it with the exist design 
but it looks rather odd if we do e.g ccw device with a PCI like mediated 
device.


- can we try to reuse vhost-kernel ioctl? Less API means less bugs and 
code reusing. E.g virtio-user can benefit from the vhost kernel ioctl 
API almost with no changes I believe.





struct vhost_vfio_op {
__u64 request;
__u32 flags;
/* Flag values: */
#define VHOST_VFIO_NEED_REPLY 0x1 /* Whether need reply */
__u32 size;
union {
__u64 u64;
struct vhost_vring_state state;
struct vhost_vring_addr addr;
struct vhost_memory memory;
} payload;
};

BAR1 is defined to be a region of doorbells, QEMU can use this region as
host notifier for virtio. To optimize virtio notify, vhost-vfio trys to
mmap the corresponding page on BAR1 for each queue and leverage EPT to let
guest virtio driver kick vDPA device doorbell directly. For virtio 0.95
case in which we cannot set host notifier memory region, QEMU will help to
relay the notify to vDPA device.

Note: EPT mapping requires each queue's notify address locates at the
beginning of a separate page, parameter "page-per-vq=on" could help.



I think qemu should prepare a fallback for this if page-per-vq is off.




For interrupt setting, vDPA mdev device leverages existing VFIO API to
enable interrupt config in user space. In this way, KVM's irqfd for virtio
can be set to mdev device by QEMU using ioctl().

vhost-vfio net client will set up a vDPA mdev device which is specified
by a "sysfsdev" parameter, during the net client init, the device will be
opened and parsed using VFIO API, the VFIO device fd and device BAR region
offset will be kept in a VhostVFIO structure, this initialization provides
a channel to configure vhost information to the vDPA device driver.

To do later
===
1. The net client initialization uses raw VFIO API to open vDPA mdev
device, it's better to provide a set of helpers in hw/vfio/common.c
to help vhost-vfio initialize device easily.

2. For device DMA mapping, QEMU passes memory region info to mdev device
and let kernel parent device driver program IOMMU. This is a temporary
implementation, for future when IOMMU driver supports mdev bus, we
can use VFIO API to program IOMMU directly for parent device.
Refer to the patch (vfio/mdev: IOMMU aware mediated device):
https://lkml.org/lkml/2018/10/12/225



As Steve mentioned in the KVM forum. It's better to have at least one 
sample driver e.g virtio-net itself.


Then it would be more convenient for the reviewer to evaluate the whole 
stack.


Thanks




Vhost-vfio usage

# Query the number of available mdev instances
$ cat 
/sys/class/mdev_bus/:84:00.3/mdev_supported_types/ifcvf_vdpa-vdpa_virtio/available_instances

# Create a mdev instance
$ echo $UUID > 
/sys/class/mdev_bus/:84:00.3/mdev_supported_types/ifcvf_vdpa-vdpa_virtio/create

# Launch QEMU with a virtio-net device
 qemu-system-x86_64 -cpu host -enable-kvm \
 
 -mem-prealloc \
 -netdev type=vhost-vfio,sysfsdev=/sys/bus/mdev/devices/$UUID,id=mynet\
 -device virtio-net-pc

Re: [Qemu-devel] [PULL 05/33] virtio-blk: fix comment for virtio_blk_rw_complete

2018-11-05 Thread Michael S. Tsirkin

On Tue, Nov 06, 2018 at 11:17:03AM +0800, Dongli Zhang wrote:
> 
> 
> On 11/06/2018 02:15 AM, Michael S. Tsirkin wrote:
> > From: Yaowei Bai 
> > 
> > Here should be submit_requests, there is no submit_merged_requests
> > function.
> > 
> > Signed-off-by: Yaowei Bai 
> > Reviewed-by: Michael S. Tsirkin 
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> >  hw/block/virtio-blk.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> > index 225fe44b7a..83cf5c01f9 100644
> > --- a/hw/block/virtio-blk.c
> > +++ b/hw/block/virtio-blk.c
> > @@ -97,8 +97,8 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> >  
> >  if (req->qiov.nalloc != -1) {
> >  /* If nalloc is != 1 req->qiov is a local copy of the original
> 
> Should it be "If nalloc is != -1" in the comment? Seems the initial state is 
> -1.

Makes sense. Patch?

> > - * external iovec. It was allocated in submit_merged_requests
> > - * to be able to merge requests. */
> > + * external iovec. It was allocated in submit_requests to be
> > + * able to merge requests. */
> >  qemu_iovec_destroy(&req->qiov);
> >  }
> >  
> > 
> 
> Dongli Zhang

Re: [Qemu-devel] [PATCH for 3.2 v2 0/7] hw/arm/bcm2835: Add basic support for cprman (clock subsystem)

2018-11-05 Thread no-reply

Hi,

This series failed docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Type: series
Message-id: 20181102001303.32640-1-f4...@amsat.org
Subject: [Qemu-devel] [PATCH for 3.2 v2 0/7] hw/arm/bcm2835: Add basic support 
for cprman (clock subsystem)

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-mingw@fedora SHOW_ENV=1 J=8
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
e88244ce63 MAINTAINERS: Volunteer to review Raspi patches
ccd97451e2 hw/arm/bcm2835: Add basic support for cprman (clock subsystem)
c040413670 hw/arm/bcm2835: Add various unimplemented peripherals
8738d4cc1d hw/arm/bcm2835: Rename some definitions
5cad3a8d3f hw/arm/bcm2835: Use 0x prefix for hex numbers
d5b6b933ad hw/misc/bcm2835_property: Handle the 'domain state' property
14f6bd3c7f MAINTAINERS: Add an entry for the Raspberry Pi machines

=== OUTPUT BEGIN ===
  BUILD   fedora
make[1]: Entering directory '/var/tmp/patchew-tester-tmp-pvww0as4/src'
  GEN 
/var/tmp/patchew-tester-tmp-pvww0as4/src/docker-src.2018-11-05-22.26.31.3469/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-pvww0as4/src/docker-src.2018-11-05-22.26.31.3469/qemu.tar.vroot'...
done.
Checking out files:  20% (1319/6453)   
Checking out files:  21% (1356/6453)   
Checking out files:  22% (1420/6453)   
Checking out files:  23% (1485/6453)   
Checking out files:  24% (1549/6453)   
Checking out files:  25% (1614/6453)   
Checking out files:  26% (1678/6453)   
Checking out files:  27% (1743/6453)   
Checking out files:  28% (1807/6453)   
Checking out files:  29% (1872/6453)   
Checking out files:  30% (1936/6453)   
Checking out files:  31% (2001/6453)   
Checking out files:  32% (2065/6453)   
Checking out files:  33% (2130/6453)   
Checking out files:  34% (2195/6453)   
Checking out files:  35% (2259/6453)   
Checking out files:  36% (2324/6453)   
Checking out files:  37% (2388/6453)   
Checking out files:  38% (2453/6453)   
Checking out files:  39% (2517/6453)   
Checking out files:  40% (2582/6453)   
Checking out files:  41% (2646/6453)   
Checking out files:  42% (2711/6453)   
Checking out files:  43% (2775/6453)   
Checking out files:  44% (2840/6453)   
Checking out files:  45% (2904/6453)   
Checking out files:  46% (2969/6453)   
Checking out files:  47% (3033/6453)   
Checking out files:  48% (3098/6453)   
Checking out files:  49% (3162/6453)   
Checking out files:  50% (3227/6453)   
Checking out files:  51% (3292/6453)   
Checking out files:  52% (3356/6453)   
Checking out files:  53% (3421/6453)   
Checking out files:  53% (3423/6453)   
Checking out files:  54% (3485/6453)   
Checking out files:  55% (3550/6453)   
Checking out files:  56% (3614/6453)   
Checking out files:  57% (3679/6453)   
Checking out files:  58% (3743/6453)   
Checking out files:  59% (3808/6453)   
Checking out files:  60% (3872/6453)   
Checking out files:  61% (3937/6453)   
Checking out files:  62% (4001/6453)   
Checking out files:  63% (4066/6453)   
Checking out files:  64% (4130/6453)   
Checking out files:  65% (4195/6453)   
Checking out files:  66% (4259/6453)   
Checking out files:  67% (4324/6453)   
Checking out files:  68% (4389/6453)   
Checking out files:  69% (4453/6453)   
Checking out files:  70% (4518/6453)   
Checking out files:  71% (4582/6453)   
Checking out files:  72% (4647/6453)   
Checking out files:  73% (4711/6453)   
Checking out files:  74% (4776/6453)   
Checking out files:  75% (4840/6453)   
Checking out files:  76% (4905/6453)   
Checking out files:  77% (4969/6453)   
Checking out files:  78% (5034/6453)   
Checking out files:  79% (5098/6453)   
Checking out files:  80% (5163/6453)   
Checking out files:  81% (5227/6453)   
Checking out files:  82% (5292/6453)   
Checking out files:  82% (5324/6453)   
Checking out files:  83% (5356/6453)   
Checking out files:  84% (5421/6453)   
Checking out files:  85% (5486/6453)   
Checking out files:  86% (5550/6453)   
Checking out files:  86% (5566/6453)   
Checking out files:  87% (5615/6453)   
Checking out files:  88% (5679/6453)   
Checking out files:  89% (5744/6453)   
Checking out files:  90% (5808/6453)   
Checking out files:  91% (5873/6453)   
Checking out files:  92% (5937/6453)   
Checking out files:  93% (6002/6453)   
Checking out files:  94% (6066/6453)   
Checking out files:  95% (6131/6453)   
Checking out files:  96% (6195/6453)   
Checking out files:  97% (6260/6453)   
Checking out files:  98% (6324/6453)   
Checking out files:  99% (6389/6453)   
Checking out files: 100% (6453/6453)   
Checking out files: 100% (6453/6453), done.
Your branch is up-to-date with 'origin/test'.
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 
'/var/tmp/patchew-tester-tmp-pvww0as4/src/docker-src.2018-11-05-22.26.31.3469/qemu.tar.vroot/dtc'...
Submodule path 'dtc': checked out

Re: [Qemu-devel] [PULL 05/33] virtio-blk: fix comment for virtio_blk_rw_complete

2018-11-05 Thread Dongli Zhang




On 11/06/2018 02:15 AM, Michael S. Tsirkin wrote:
> From: Yaowei Bai 
> 
> Here should be submit_requests, there is no submit_merged_requests
> function.
> 
> Signed-off-by: Yaowei Bai 
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  hw/block/virtio-blk.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 225fe44b7a..83cf5c01f9 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -97,8 +97,8 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>  
>  if (req->qiov.nalloc != -1) {
>  /* If nalloc is != 1 req->qiov is a local copy of the original

Should it be "If nalloc is != -1" in the comment? Seems the initial state is -1.

> - * external iovec. It was allocated in submit_merged_requests
> - * to be able to merge requests. */
> + * external iovec. It was allocated in submit_requests to be
> + * able to merge requests. */
>  qemu_iovec_destroy(&req->qiov);
>  }
>  
> 

Dongli Zhang

Re: [Qemu-devel] [PATCH 3/4] scsi-generic: avoid invalid access to struct when emulating block limits

2018-11-05 Thread Max Reitz

On 29.10.18 18:34, Paolo Bonzini wrote:
> Emulation of the block limits VPD page called back into scsi-disk.c,
> which however expected the request to be for a SCSIDiskState and
> accessed a scsi-generic device outside the bounds of its struct
> (namely to retrieve s->max_unmap_size and s->max_io_size).
> 
> To avoid this, move the emulation code to a separate function that
> takes a new SCSIBlockLimits struct and marshals it into the VPD
> response format.
> 
> Reported-by: Max Reitz 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/scsi/Makefile.objs   |  2 +-
>  hw/scsi/emulation.c | 42 +
>  hw/scsi/scsi-disk.c | 92 -
>  hw/scsi/scsi-generic.c  | 22 +++--
>  include/hw/scsi/emulation.h | 16 +++
>  include/hw/scsi/scsi.h  |  1 -
>  6 files changed, 98 insertions(+), 77 deletions(-)
>  create mode 100644 hw/scsi/emulation.c
>  create mode 100644 include/hw/scsi/emulation.h
> 

[...]

> diff --git a/hw/scsi/emulation.c b/hw/scsi/emulation.c
> new file mode 100644
> index 00..94c2254bb4
> --- /dev/null
> +++ b/hw/scsi/emulation.c
> @@ -0,0 +1,42 @@
> +#include "qemu/osdep.h"
> +#include "qemu/units.h"
> +#include "qemu/bswap.h"
> +#include "hw/scsi/emulation.h"
> +
> +int scsi_emulate_block_limits(uint8_t *outbuf, SCSIBlockLimits *bl)

I'd make @bl a const *, but it's not like qemu is the kind of code base
to complain about strict const-ness.

> +{
> +/* required VPD size with unmap support */
> +memset(outbuf, 0, 0x3C);

Upper case hex here...

> +
> +outbuf[0] = bl->wsnz; /* wsnz */
> +
> +if (bl->max_io_sectors) {
> +/* optimal transfer length granularity.  This field and the optimal
> + * transfer length can't be greater than maximum transfer length.
> + */
> +stw_be_p(outbuf + 2, MIN(bl->min_io_size, bl->max_io_sectors));
> +
> +/* maximum transfer length */
> +stl_be_p(outbuf + 4, bl->max_io_sectors);
> +
> +/* optimal transfer length */
> +stl_be_p(outbuf + 8, MIN(bl->opt_io_size, bl->max_io_sectors));
> +} else {
> +stw_be_p(outbuf + 2, bl->min_io_size);
> +stl_be_p(outbuf + 8, bl->opt_io_size);
> +}
> +
> +/* max unmap LBA count */
> +stl_be_p(outbuf + 16, bl->max_unmap_sectors);
> +
> +/* max unmap descriptors */
> +stl_be_p(outbuf + 20, bl->max_unmap_descr);
> +
> +/* optimal unmap granularity; alignment is zero */
> +stl_be_p(outbuf + 24, bl->unmap_sectors);
> +
> +/* max write same size, make it the same as maximum transfer length */
> +stl_be_p(outbuf + 36, bl->max_io_sectors);
> +
> +return 0x3c;

...and lower case here?  (Sorry, I couldn't just bear it.)

> +}

[...]

> diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
> index c5497bbea8..8fc74ef0bd 100644
> --- a/hw/scsi/scsi-generic.c
> +++ b/hw/scsi/scsi-generic.c
> @@ -16,6 +16,7 @@
>  #include "qemu-common.h"
>  #include "qemu/error-report.h"
>  #include "hw/scsi/scsi.h"
> +#include "hw/scsi/emulation.h"
>  #include "sysemu/block-backend.h"
>  
>  #ifdef __linux__
> @@ -209,9 +210,24 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, 
> SCSIDevice *s)
>  }
>  }
>  
> -static int scsi_emulate_block_limits(SCSIGenericReq *r)
> +static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice 
> *s)
>  {
> -r->buflen = scsi_disk_emulate_vpd_page(&r->req, r->buf);
> +int len, buflen;

buflen is unused, so this does not compile for me.

> +uint8_t buf[64];
> +
> +SCSIBlockLimits bl = {
> +.max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize
> +};
> +
> +memset(r->buf, 0, r->buflen);
> +stb_p(buf, s->type);
> +stb_p(buf + 1, 0xb0);
> +len = scsi_emulate_block_limits(buf + 4, &bl);
> +assert(len <= sizeof(buf) - 4);

Let's hope our stack grows downwards, otherwise we'll never get back
here if there was an overflow.  Maybe it would be better to pass the
buffer length to scsi_emulate_block_limits() and then move the assertion
there.

Or if you know that qemu does not support any architecture ABIs where
the stack can grow up, that's OK, too.


With buflen dropped, and you taking full responsibility for any future
bugs on ABIs with upwards stacks when someone extended
scsi_emulate_block_limits(), forgetting to adjust the buffer size here
(:-)):

Reviewed-by: Max Reitz 

> +stw_be_p(buf + 2, len);
> +
> +memcpy(r->buf, buf, MIN(r->buflen, len + 4));
> +
>  r->io_header.sb_len_wr = 0;
>  
>  /*



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 4/4] scsi-generic: do not do VPD emulation for sense other than ILLEGAL_REQUEST

2018-11-05 Thread Max Reitz

On 29.10.18 18:34, Paolo Bonzini wrote:
> Pass other sense, such as UNIT_ATTENTION or BUSY, directly to the
> guest.
> 
> Reported-by: Max Reitz 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/scsi/scsi-generic.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)

[...]

> @@ -269,12 +268,15 @@ static void scsi_read_complete(void * opaque, int ret)
>   * resulted in sense error but would need emulation.
>   * In this case, emulate a valid VPD response.
>   */
> -if (s->needs_vpd_bl_emulation) {
> +if (ret == 0 &&
> +(r->io_header.driver_status & SG_ERR_DRIVER_SENSE) &&
> +scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr).key == 
> ILLEGAL_REQUEST &&

As noted by Patchew, this line needs to be split.  With that done:

Reviewed-by: Max Reitz 


Thanks for the series!

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 1/4] scsi-generic: keep VPD page list sorted

2018-11-05 Thread Max Reitz

On 29.10.18 18:34, Paolo Bonzini wrote:
> Block limits emulation is just placing 0xb0 as the final byte of the
> VPD pages list.  However, VPD page numbers must be sorted, so change
> that to an in-place insert.  Since I couldn't find any disk that triggered
> the loop more than once, this was tested by adding manually 0xb1
> at the end of the list and checking that 0xb0 was added before.
> 
> Reported-by: Max Reitz 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/scsi/scsi-generic.c | 19 +++
>  1 file changed, 15 insertions(+), 4 deletions(-)

Reviewed-by: Max Reitz 



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 2/4] scsi-generic: avoid out-of-bounds access to VPD page list

2018-11-05 Thread Max Reitz

On 29.10.18 18:34, Paolo Bonzini wrote:
> A device can report an excessive number of VPD pages when asked for a
> list; this can cause an out-of-bounds access to buf in
> scsi_generic_set_vpd_bl_emulation.  It should not happen, but
> it is technically not incorrect so handle it: do not check any byte
> past the allocation length that was sent to the INQUIRY command.

(Minor note: Since the list must be kept in ascending order, it's fully
correct to only check the first sizeof(buf) - 4 == 0xf6 bytes, because
it actually has to be at index 0xb0 or before, if the device supports it.)

> Reported-by: Max Reitz 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/scsi/scsi-generic.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
> index aebb7cdd82..c5497bbea8 100644
> --- a/hw/scsi/scsi-generic.c
> +++ b/hw/scsi/scsi-generic.c
> @@ -538,7 +538,7 @@ static void scsi_generic_set_vpd_bl_emulation(SCSIDevice 
> *s)
>  }
>  
>  page_len = buf[3];

You lost your enthusiasm for fixing the accesses to be proper big-endian
16-bit accesses so quickly? :-)

> -for (i = 4; i < page_len + 4; i++) {
> +for (i = 4; i < MIN(sizeof(buf), page_len + 4); i++) {

Reviewed-by: Max Reitz 

>  if (buf[i] == 0xb0) {
>  s->needs_vpd_bl_emulation = false;
>  return;
> 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] block: Make more block drivers compile-time configurable

2018-11-05 Thread Max Reitz

On 05.11.18 16:25, Markus Armbruster wrote:
> Max Reitz  writes:
> 
>> On 19.10.18 13:34, Markus Armbruster wrote:
>>> From: Jeff Cody 
>>>
>>> This adds configure options to control the following block drivers:
>>>
>>> * Bochs
>>> * Cloop
>>> * Dmg
>>> * Qcow (V1)
>>> * Vdi
>>> * Vvfat
>>> * qed
>>> * parallels
>>> * sheepdog
>>>
>>> Each of these defaults to being enabled.
>>>
>>> Signed-off-by: Jeff Cody 
>>> Signed-off-by: Markus Armbruster 
>>> ---
>>>
>>> Hmm, we got quite a few --enable-BLOCK-DRIVER now.  Perhaps a single
>>> list-valued option similar --target-list would be better.  Could be
>>> done on top.
>>>
>>>  block/Makefile.objs | 22 ---
>>>  configure   | 91 +
>>>  2 files changed, 107 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>> index c8337bf186..1cad9fc4f1 100644
>>> --- a/block/Makefile.objs
>>> +++ b/block/Makefile.objs
>@@ -1,10 +1,18 @@
>-block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o 
> vvfat.o dmg.o
>+block-obj-y += raw-format.o vmdk.o vpc.o
>+block-obj-$(CONFIG_QCOW1) += qcow.o
>+block-obj-$(CONFIG_VDI) += vdi.o
>+block-obj-$(CONFIG_CLOOP) += cloop.o
>+block-obj-$(CONFIG_BOCHS) += bochs.o
>+block-obj-$(CONFIG_VVFAT) += vvfat.o
>+block-obj-$(CONFIG_DMG) += dmg.o
>+
> block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
> qcow2-cache.o qcow2-bitmap.o
>>
>> [...]
>>
>>> @@ -45,7 +54,8 @@ gluster.o-libs := $(GLUSTERFS_LIBS)
>>>  vxhs.o-libs:= $(VXHS_LIBS)
>>>  ssh.o-cflags   := $(LIBSSH2_CFLAGS)
>>>  ssh.o-libs := $(LIBSSH2_LIBS)
>>> -block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
>>> +block-obj-dmg-bz2$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
>>> +block-obj-$(CONFIG_DMG) += $(block-obj-dmg-bz2-y)
>>
>> This defines "block-obj-dmg-bz2m" or "block-obj-dmg-bz2n", so
>> "block-obj-dmg-bz2-y" is never defined (note both the missing hyphen and
>> the "m" vs. "y").
>>
>> How about:
>>
>> block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
> 
> As far as I can tell, CONFIG_BZIP2 is either undefined or "y".  Thus,
> block-obj-dmg-bz2-y is either left undefined or set to dmg-bz2.o.

Yes.

> Perhaps the '+=' be ':=', but we seem to use '+=' pretty
> indiscriminately.

Yep.  I don't know.  Whatever works, and both do, so...

>> block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
> 
> As far as I can tell, CONFIG_DMG is also either undefined or "y".  So,
> this adds dmg-bz2.o to block-obj-m if both CONFIG_BZIP2 and CONFIG_DMG
> are enabled.

Yes.

> Shouldn't it be added to block-obj-y, like dmg.o, or am I confused?

The behavior before this patch was to add it to block-obj-m.
27685a8dd08c051fa6d641fe46106fc0dfa51073 has the explanation: We want
the bz2 part to be a module so you can launch qemu even without libbz2
around.  Only when you use dmg will it load that module.

(And if you dig deeper, it was 88d88798b7efe that (intentionally) broke
 that intended behavior, until it was restored by the above commit.)

>> Bonus point: The "+=" are naturally aligned!
> 
> Woot!

:-)

Max

>> (Fun fact on the side: I tried downloading some dmg image, but qemu
>> refused to open that.  ("sector count 409600 for chunk 4 is larger than
>> max (131072)" -- yeah, yeah, I know that I'm not the largest guy) -- but
>> you can test it just by replacing "dmg-bz2.o" by "does-not-exist.o", and
>> then make complains normally, but stops complaining with --disable-dmg
>> or --disable-bzip2.)
> 
> Thanks!
> 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 0/5] Various option help readability improvement suggestions

2018-11-05 Thread Max Reitz

On 05.11.18 15:18, Kevin Wolf wrote:
> Am 19.10.2018 um 18:49 hat Max Reitz geschrieben:
>> I noticed that with the (more or less) recent series from Marc-André the
>> output of qemu-img amend -f qcow2 -o help changed to this:
>>
>> $ ./qemu-img amend -f qcow2 -o help
>> Creation options for 'qcow2':
>> qcow2-create-opts.backing_file=str - File name of a base image
>> qcow2-create-opts.backing_fmt=str - Image format of the base image
>> qcow2-create-opts.cluster_size=size - qcow2 cluster size
>> qcow2-create-opts.compat=str - Compatibility level (0.10 or 1.1)
>> [...]
>>
>> The types are a nice addition, but I didn't like having the list name
>> printed in every single line (in fact, the list name does not make any
>> sense here at all, because there already is a caption which reads
>> "Creation options for 'qcow2'"), and I did not like the use of '=' for
>> types.
>>
>> In general, I don't like the robot-y appearance, which is even worse in
>> things like -device virtio-blk,help, which gives you this (among
>> other lines):
>>
>>> virtio-blk-pci.iothread=link
>>
>> Sadly, there isn't much we can do about the "link", so this
>> series doesn't improve on that point.
>>
>> What this series does do, however, is it changes these lists not to
>> print the list name on every single line, but only as a caption (and for
>> option lists, this caption is option, because the caller may want to
>> print a custom caption that is more expressive -- as is the case for
>> qemu-img amend -o help).
>>
>> Consequentially, all list items are indented by two spaces to make clear
>> they belong to the caption.  I can already see that some people might
>> disagree on having this indentation, but I like it, so I have it in this
>> series.
>>
>> Furthermore, types are now enclosed by angle brackets, and the alignment
>> we originally had for descriptions is restored (although now after 24
>> instead of 16 characters, because every option name is now accompanied
>> by indentation and a type).
>>
>>
>> Thus, after this series, the amend output looks like this:
>>
>> $ ./qemu-img amend -f qcow2 -o help
>> Creation options for 'qcow2':
>>   backing_file= - File name of a base image
>>   backing_fmt=  - Image format of the base image
>>   cluster_size=- qcow2 cluster size
>>   compat=   - Compatibility level (0.10 or 1.1)
>> [...]
>>
>>
>> virtio-blk's list presents itself like so:
>>
>> $ x86_64-softmmu/qemu-system-x86_64 -device virtio-blk,help
>> virtio-blk-pci options:
>>   iothread=>
>>   request-merging= - on/off
>>   secs=
>> [...]
>>
>>
>> And now we even print something when there are no options:
>>
>> $ x86_64-softmmu/qemu-system-x86_64 -object can-bus,help
>> There are no options for can-bus.
>>
>> (Before this series, there just is no output.)
>>
>>
>> As a side effect, patch 1 fixes iotest 082.
> 
> Thanks, applied to the block branch.

Thank you both for the quick pong. :-)

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PULL 03/48] qemu-timer: introduce timer attributes

2018-11-05 Thread Eric Blake


On 10/18/18 3:31 PM, Paolo Bonzini wrote:

From: Artem Pisarenko 

Attributes are simple flags, associated with individual timers for their
whole lifetime.  They intended to be used to mark individual timers for
special handling when they fire.

New/init functions family in timer interface updated and refactored (new
'attribute' argument added, timer_list replaced with timer_list_group+type
combinations, comments improved to avoid info duplication).  Also existing
aio interface extended with attribute-enabled variants of functions,
which create/initialize timers.

Signed-off-by: Artem Pisarenko 
Message-Id: 

Signed-off-by: Paolo Bonzini 
---


git bisect points to this patch as the reason that 'make check' is 
failing for me with:


  LINKtests/ptimer-test
  LINKtests/test-qapi-util
hw/core/ptimer.o: In function `timer_new_tl':
/home/eblake/qemu/include/qemu/timer.h:536: undefined reference to 
`timer_init_tl'

collect2: error: ld returned 1 exit status
make: *** [/home/eblake/qemu/rules.mak:124: tests/ptimer-test] Error 1
make: *** Waiting for unfinished jobs

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH 1/2] contrib/elf2dmp: use GLib file mapping

2018-11-05 Thread Eric Blake


On 11/1/18 7:28 PM, Viktor Prutyanov wrote:

Replace POSIX mmap with GLib g_mapped_file_new to make elf2dmp
cross-paltform. After this patch there are no direct POSIX calls.


s/paltform/platform/



Signed-off-by: Viktor Prutyanov 
---
  Makefile  |  2 +-



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH] qapi: misc: change the 'pc' to unsinged 64 in CpuInfo

2018-11-05 Thread Eric Blake


On 11/2/18 6:01 AM, Li Qiang wrote:

When trigger a 'query-cpus' qmp, the pc is an signed value like
following:
{"arch": "x86", ...  "pc": -1732653994, "halted": true,...}
It is strange. Change it to uint64_t.

Signed-off-by: Li Qiang 
---
  qapi/misc.json | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)


I don't see this as causing any major backwards-incompatible behavior to 
clients that can parse full 64-bit unsigned numbers (note that not all 
JSON parsers do so - here's frowning at you, jansson - but libvirt is okay).


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH] include: Add a comment to explain the origin of sizes' lookup table

2018-11-05 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20181103015821.30074-1-lbl...@janustech.com
Subject: [Qemu-devel] [PATCH] include: Add a comment to explain the origin of 
sizes' lookup table

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
4b786d4cd6 include: Add a comment to explain the origin of sizes' lookup table

=== OUTPUT BEGIN ===
Checking PATCH 1/1: include: Add a comment to explain the origin of sizes' 
lookup table...
ERROR: code indent should never use tabs
#43: FILE: include/qemu/units.h:27:
+ *  ^Isuffix="KMGTPE";$

ERROR: code indent should never use tabs
#44: FILE: include/qemu/units.h:28:
+ *  ^Ifor(i=10; i<64; i++) {$

ERROR: code indent should never use tabs
#45: FILE: include/qemu/units.h:29:
+ *  ^I^Ival=2**i;$

ERROR: code indent should never use tabs
#46: FILE: include/qemu/units.h:30:
+ *  ^I^Is=substr(suffix, int(i/10), 1);$

ERROR: code indent should never use tabs
#47: FILE: include/qemu/units.h:31:
+ *  ^I^In=2**(i%10);$

ERROR: code indent should never use tabs
#48: FILE: include/qemu/units.h:32:
+ *  ^I^Ipad=21-int(log(n)/log(10));$

ERROR: code indent should never use tabs
#49: FILE: include/qemu/units.h:33:
+ *  ^I^Iprintf("#define S_%d%siB %*d\n", n, s, pad, val);$

ERROR: code indent should never use tabs
#50: FILE: include/qemu/units.h:34:
+ *  ^I}$

total: 8 errors, 0 warnings, 24 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [PATCH RFC 4/4] net/slirp: add ipv6-hostfwd option for user netdev type

2018-11-05 Thread Eric Blake


On 10/25/18 7:03 PM, Maxim Samoylov wrote:

This allows forwarding TCP6 and UDP6 connections down to
netdev=user connected guests.

Signed-off-by: Maxim Samoylov 
---
  hmp-commands.hx |  31 
  include/net/slirp.h |   2 +
  net/slirp.c | 214 
  qapi/net.json   |   3 +-
  4 files changed, 249 insertions(+), 1 deletion(-)



+++ b/qapi/net.json
@@ -201,7 +201,8 @@
  '*smbserver': 'str',
  '*hostfwd':   ['String'],
  '*guestfwd':  ['String'],
-'*tftp-server-name': 'str' } }
+'*tftp-server-name': 'str',
+'*ipv6-hostfwd': ['String']} }


Missing documentation of the new member.  Don't forget a '(since 3.1)' 
comment (if this is still appropriate for the current release; which may 
be doubtful since we are in soft freeze, in which case it will be 3.2 or 
4.0, depending on what the next release is numbered).


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH v3 1/6] qemu-nbd: add support for authorization of TLS clients

2018-11-05 Thread Eric Blake


On 10/9/18 8:23 AM, Daniel P. Berrangé wrote:

From: "Daniel P. Berrange" 

Currently any client which can complete the TLS handshake is able to use
the NBD server. The server admin can turn on the 'verify-peer' option
for the x509 creds to require the client to provide a x509 certificate.
This means the client will have to acquire a certificate from the CA
before they are permitted to use the NBD server. This is still a fairly
low bar to cross.

This adds a '--tls-authz OBJECT-ID' option to the qemu-nbd command which
takes the ID of a previously added 'QAuthZ' object instance. This will
be used to validate the client's x509 distinguished name. Clients
failing the authorization check will not be permitted to use the NBD
server.

For example to setup authorization that only allows connection from a client
whose x509 certificate distinguished name is

CN=laptop.example.com,O=Example Org,L=London,ST=London,C=GB

use:

   qemu-nbd --object tls-creds-x509,id=tls0,dir=/home/berrange/qemutls,\
 endpoint=server,verify-peer=yes \
--object authz-simple,id=auth0,identity=CN=laptop.example.com,,\
 O=Example Org,,L=London,,ST=London,,C=GB \


Missing shell quoting around the space in 'Example Org'. It's also 
fairly obvious that actual shell commands can't have leading space 
between \-newline line continuations.



--tls-creds tls0 \
--tls-authz authz0
   other qemu-nbd args...

Signed-off-by: Daniel P. Berrange 
---
  include/block/nbd.h |  2 +-
  nbd/server.c| 10 +-
  qemu-nbd.c  | 13 -
  qemu-nbd.texi   |  4 
  4 files changed, 22 insertions(+), 7 deletions(-)




+++ b/qemu-nbd.c
@@ -52,6 +52,7 @@
  #define QEMU_NBD_OPT_TLSCREDS  261
  #define QEMU_NBD_OPT_IMAGE_OPTS262
  #define QEMU_NBD_OPT_FORK  263
+#define QEMU_NBD_OPT_TLSAUTHZ  264
  



@@ -532,6 +534,7 @@ int main(int argc, char **argv)
  { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
  { "trace", required_argument, NULL, 'T' },
  { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
+{ "tls-authz", no_argument, NULL, QEMU_NBD_OPT_TLSAUTHZ },
  { NULL, 0, NULL, 0 }
  };


Missing a change to qemu-nbd --help to describe the new option.

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH v3] oslib-posix: Use MAP_STACK in qemu_alloc_stack() on OpenBSD

2018-11-05 Thread Brad Smith


ping.

On 10/19/2018 8:52 AM, Brad Smith wrote:

Use MAP_STACK in qemu_alloc_stack() on OpenBSD.

Added to our 6.4 release.

MAP_STACK  Indicate that the mapping is used as a stack.  This
flag must be used in combination with MAP_ANON and
MAP_PRIVATE.

Implement MAP_STACK option for mmap().  Synchronous faults (pagefault and
syscall) confirm the stack register points at MAP_STACK memory, otherwise
SIGSEGV is delivered. sigaltstack() and pthread_attr_setstack() are modified
to create a MAP_STACK sub-region which satisfies alignment requirements.
Observe that MAP_STACK can only be set/cleared by mmap(), which zeroes the
contents of the region -- there is no mprotect() equivalent operation, so
there is no MAP_STACK-adding gadget.


Signed-off-by: Brad Smith 
Reviewed-by: Kamil Rytarowski 
Reviewed-by: Peter Maydell 

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index fbd0dc8c57..c1bee2a581 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -596,6 +596,7 @@ pid_t qemu_fork(Error **errp)
  void *qemu_alloc_stack(size_t *sz)
  {
  void *ptr, *guardpage;
+int flags;
  #ifdef CONFIG_DEBUG_STACK_USAGE
  void *ptr2;
  #endif
@@ -610,8 +611,18 @@ void *qemu_alloc_stack(size_t *sz)
  /* allocate one extra page for the guard page */
  *sz += pagesz;
  
-ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,

-   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#if defined(MAP_STACK) && defined(__OpenBSD__)
+/* Only enable MAP_STACK on OpenBSD. Other OS's such as
+ * Linux/FreeBSD/NetBSD have a flag with the same name
+ * but have differing functionality. OpenBSD will SEGV
+ * if it spots execution with a stack pointer pointing
+ * at memory that was not allocated with MAP_STACK.
+ */
+flags |= MAP_STACK;
+#endif
+
+ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
  if (ptr == MAP_FAILED) {
  perror("failed to allocate memory for stack");
  abort();

[Qemu-devel] [PATCH v5 0/5] off-by-one and NULL pointer accesses detected by static analysis

2018-11-05 Thread Liam Merwick

Below are a number of fixes to some off-by-one, read outside array bounds, and
NULL pointer accesses detected by an internal Oracle static analysis tool 
(Parfait).
https://labs.oracle.com/pls/apex/f?p=labs:49:P49_PROJECT_ID:13

v1 -> v2
Based on feedback from Eric Blake:
patch2: reworded commit message to clarify issue
patch6: Reverted common qlist routines and added assert to qlist_dump instead
patch7: Fixed incorrect logic
patch8: Added QEMU_BUILD_BUG_ON to catch future іnstance at compile-time

v2 -> v3
Based on feedback from Eric Blake:
patch6: removed double space from commit message
patch8: removed unnecessary comment and updated QEMU_BUILD_BUG_ON to use 
ARRAY_SIZE
Added Eric's R-b to patches 6,7,8

v3 -> v4
Based on feedback from Max Reitz:
patch2: Added R-b from John Snow
patch3: fixed blk_get_attached_dev_id() instead of checking return value
patch4: switched to assert()
patch5: numerous changes based on feedback from Max
patch6: updated commit message
patch7: (was patch8): Added Max's R-b
patch8: (new): patch fixing NULL pointer dereference in kvm_arch_init_vcpu()

v4 -> v5
Based on further feedback from Max Reitz:
Dropped v4 patch1 (configure --disable-avx2) as Thomas Huth already pulled it. 
Dropped v4 patch6 (dump_qlist) as it was just an unnecessary assert
Dropped v4 patch8 'patch fixing NULL pointer dereference in 
kvm_arch_init_vcpu()'
  so as to limit this seies to block changes (will send in a separate series).
patch1: no change (v4 patch2)
patch2: Switched to using ?: in return (v4 patch3)
patch3: Added Max's R-b (v4 patch4)
patch4: couple of changes based on feedback from Max (v4 patch5)
patch5: no change (v4 patch7)

Liam Merwick (5):
  job: Fix off-by-one assert checks for JobSTT and JobVerbTable
  block: Null pointer dereference in blk_root_get_parent_desc()
  qemu-img: assert block_job_get() does not return NULL in img_commit()
  block: Fix potential Null pointer dereferences in vvfat.c
  qcow2: Read outside array bounds in qcow2_pre_write_overlap_check()

 block/block-backend.c  |  3 ++-
 block/qcow2-refcount.c | 18 ++
 block/vvfat.c  | 49 +
 job.c  |  4 ++--
 qemu-img.c |  1 +
 5 files changed, 48 insertions(+), 27 deletions(-)

-- 
1.8.3.1

[Qemu-devel] [PATCH v5 3/5] qemu-img: assert block_job_get() does not return NULL in img_commit()

2018-11-05 Thread Liam Merwick

Although the function block_job_get() can return NULL, it would be a
serious bug if it did so (because the job yields before executing anything
(if it started successfully); but otherwise, commit_active_start() would
have returned an error).  However, as a precaution, before dereferencing
the 'job' pointer in img_commit() assert it is not NULL.

Signed-off-by: Liam Merwick 
Reviewed-by: Max Reitz 
---
 qemu-img.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/qemu-img.c b/qemu-img.c
index b12f4cd19b0a..457aa152296b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1029,6 +1029,7 @@ static int img_commit(int argc, char **argv)
 }
 
 job = block_job_get("commit");
+assert(job);
 run_block_job(job, &local_err);
 if (local_err) {
 goto unref_backing;
-- 
1.8.3.1

[Qemu-devel] [PATCH v5 4/5] block: Fix potential Null pointer dereferences in vvfat.c

2018-11-05 Thread Liam Merwick

The calls to find_mapping_for_cluster() may return NULL but it
isn't always checked for before dereferencing the value returned.
Additionally, add some asserts to cover cases where NULL can't
be returned but which might not be obvious at first glance.

Signed-off-by: Liam Merwick 
---
 block/vvfat.c | 50 ++
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index fc41841a5c3c..263274d9739a 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -100,30 +100,26 @@ static inline void array_free(array_t* array)
 /* does not automatically grow */
 static inline void* array_get(array_t* array,unsigned int index) {
 assert(index < array->next);
+assert(array->pointer);
 return array->pointer + index * array->item_size;
 }
 
-static inline int array_ensure_allocated(array_t* array, int index)
+static inline void array_ensure_allocated(array_t *array, int index)
 {
 if((index + 1) * array->item_size > array->size) {
 int new_size = (index + 32) * array->item_size;
 array->pointer = g_realloc(array->pointer, new_size);
-if (!array->pointer)
-return -1;
+assert(array->pointer);
 memset(array->pointer + array->size, 0, new_size - array->size);
 array->size = new_size;
 array->next = index + 1;
 }
-
-return 0;
 }
 
 static inline void* array_get_next(array_t* array) {
 unsigned int next = array->next;
 
-if (array_ensure_allocated(array, next) < 0)
-return NULL;
-
+array_ensure_allocated(array, next);
 array->next = next + 1;
 return array_get(array, next);
 }
@@ -2428,16 +2424,13 @@ static int commit_direntries(BDRVVVFATState* s,
 direntry_t* direntry = array_get(&(s->directory), dir_index);
 uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
 mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
-
 int factor = 0x10 * s->sectors_per_cluster;
 int old_cluster_count, new_cluster_count;
-int current_dir_index = mapping->info.dir.first_dir_index;
-int first_dir_index = current_dir_index;
+int current_dir_index;
+int first_dir_index;
 int ret, i;
 uint32_t c;
 
-DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", 
mapping->path, parent_mapping_index));
-
 assert(direntry);
 assert(mapping);
 assert(mapping->begin == first_cluster);
@@ -2445,6 +2438,15 @@ DLOG(fprintf(stderr, "commit_direntries for %s, 
parent_mapping_index %d\n", mapp
 assert(mapping->mode & MODE_DIRECTORY);
 assert(dir_index == 0 || is_directory(direntry));
 
+if (mapping == NULL) {
+return -1;
+}
+
+DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n",
+mapping->path, parent_mapping_index));
+
+current_dir_index = mapping->info.dir.first_dir_index;
+first_dir_index = current_dir_index;
 mapping->info.dir.parent_mapping_index = parent_mapping_index;
 
 if (first_cluster == 0) {
@@ -2494,6 +2496,9 @@ DLOG(fprintf(stderr, "commit_direntries for %s, 
parent_mapping_index %d\n", mapp
 direntry = array_get(&(s->directory), first_dir_index + i);
 if (is_directory(direntry) && !is_dot(direntry)) {
 mapping = find_mapping_for_cluster(s, first_cluster);
+if (mapping == NULL) {
+return -1;
+}
 assert(mapping->mode & MODE_DIRECTORY);
 ret = commit_direntries(s, first_dir_index + i,
 array_index(&(s->mapping), mapping));
@@ -2522,6 +2527,10 @@ static int commit_one_file(BDRVVVFATState* s,
 assert(offset < size);
 assert((offset % s->cluster_size) == 0);
 
+if (mapping == NULL) {
+return -1;
+}
+
 for (i = s->cluster_size; i < offset; i += s->cluster_size)
 c = modified_fat_get(s, c);
 
@@ -2668,8 +2677,12 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
 if (commit->action == ACTION_RENAME) {
 mapping_t* mapping = find_mapping_for_cluster(s,
 commit->param.rename.cluster);
-char* old_path = mapping->path;
+char *old_path;
 
+if (mapping == NULL) {
+return -1;
+}
+old_path = mapping->path;
 assert(commit->path);
 mapping->path = commit->path;
 if (rename(old_path, mapping->path))
@@ -2690,10 +2703,15 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
 direntry_t* d = direntry + i;
 
 if (is_file(d) || (is_directory(d) && !is_dot(d))) {
+int l;
+char *new_path;
 mapping_t* m = find_mapping_for_cluster(s,
 begin_of_direntry(d));
-int l = strlen(m->path);
-c

[Qemu-devel] [PATCH v5 5/5] qcow2: Read outside array bounds in qcow2_pre_write_overlap_check()

2018-11-05 Thread Liam Merwick

The commit for 0e4e4318eaa5 increments QCOW2_OL_MAX_BITNR but does not
add an array entry for QCOW2_OL_BITMAP_DIRECTORY_BITNR to metadata_ol_names[].
As a result, an array dereference of metadata_ol_names[8] in
qcow2_pre_write_overlap_check() could result in a read outside of the array 
bounds.

Fixes: 0e4e4318eaa5 ('qcow2: add overlap check for bitmap directory')

Cc: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Liam Merwick 
Reviewed-by: Eric Blake 
Reviewed-by: Max Reitz 
---
 block/qcow2-refcount.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 3c539f02e5ec..46082aeac1d6 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2719,15 +2719,17 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, 
int ign, int64_t offset,
 }
 
 static const char *metadata_ol_names[] = {
-[QCOW2_OL_MAIN_HEADER_BITNR]= "qcow2_header",
-[QCOW2_OL_ACTIVE_L1_BITNR]  = "active L1 table",
-[QCOW2_OL_ACTIVE_L2_BITNR]  = "active L2 table",
-[QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
-[QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
-[QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
-[QCOW2_OL_INACTIVE_L1_BITNR]= "inactive L1 table",
-[QCOW2_OL_INACTIVE_L2_BITNR]= "inactive L2 table",
+[QCOW2_OL_MAIN_HEADER_BITNR]= "qcow2_header",
+[QCOW2_OL_ACTIVE_L1_BITNR]  = "active L1 table",
+[QCOW2_OL_ACTIVE_L2_BITNR]  = "active L2 table",
+[QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
+[QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
+[QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
+[QCOW2_OL_INACTIVE_L1_BITNR]= "inactive L1 table",
+[QCOW2_OL_INACTIVE_L2_BITNR]= "inactive L2 table",
+[QCOW2_OL_BITMAP_DIRECTORY_BITNR]   = "bitmap directory",
 };
+QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
 
 /*
  * First performs a check for metadata overlaps (through
-- 
1.8.3.1

[Qemu-devel] [PATCH v5 2/5] block: Null pointer dereference in blk_root_get_parent_desc()

2018-11-05 Thread Liam Merwick

The dev_id returned by the call to blk_get_attached_dev_id() in
blk_root_get_parent_desc() can be NULL (an internal call to
object_get_canonical_path may have returned NULL).

Instead of just checking this case before before dereferencing,
adjust blk_get_attached_dev_id() to return the empty string if no
object path can be found (similar to the case when blk->dev is NULL
and an empty string is returned).

Signed-off-by: Liam Merwick 
---
 block/block-backend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index dc0cd5772413..a2061a565024 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -918,7 +918,8 @@ char *blk_get_attached_dev_id(BlockBackend *blk)
 } else if (dev->id) {
 return g_strdup(dev->id);
 }
-return object_get_canonical_path(OBJECT(dev));
+
+return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
 }
 
 /*
-- 
1.8.3.1

[Qemu-devel] [PATCH v5 1/5] job: Fix off-by-one assert checks for JobSTT and JobVerbTable

2018-11-05 Thread Liam Merwick

In the assert checking the array dereference of JobVerbTable[verb]
in job_apply_verb() the check of the index, verb, allows an overrun
because an index equal to the array size is permitted.

Similarly, in the assert check of JobSTT[s0][s1] with index s1
in job_state_transition(), an off-by-one overrun is not flagged
either.

This is not a run-time issue as there are no callers actually
passing in the max value.

Signed-off-by: Liam Merwick 
Reviewed-by: Darren Kenny 
Reviewed-by: Mark Kanda 
Reviewed-by: Eric Blake 
Reviewed-by: John Snow 
---
 job.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/job.c b/job.c
index c65e01bbfa34..da8e4b7bf2f3 100644
--- a/job.c
+++ b/job.c
@@ -159,7 +159,7 @@ bool job_is_internal(Job *job)
 static void job_state_transition(Job *job, JobStatus s1)
 {
 JobStatus s0 = job->status;
-assert(s1 >= 0 && s1 <= JOB_STATUS__MAX);
+assert(s1 >= 0 && s1 < JOB_STATUS__MAX);
 trace_job_state_transition(job, job->ret,
JobSTT[s0][s1] ? "allowed" : "disallowed",
JobStatus_str(s0), JobStatus_str(s1));
@@ -174,7 +174,7 @@ static void job_state_transition(Job *job, JobStatus s1)
 int job_apply_verb(Job *job, JobVerb verb, Error **errp)
 {
 JobStatus s0 = job->status;
-assert(verb >= 0 && verb <= JOB_VERB__MAX);
+assert(verb >= 0 && verb < JOB_VERB__MAX);
 trace_job_apply_verb(job, JobStatus_str(s0), JobVerb_str(verb),
  JobVerbTable[verb][s0] ? "allowed" : "prohibited");
 if (JobVerbTable[verb][s0]) {
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH v4 5/8] block: Fix potential Null pointer dereferences in vvfat.c

2018-11-05 Thread Liam Merwick





On 05/11/18 00:19, Max Reitz wrote:

On 19.10.18 22:39, Liam Merwick wrote:

The calls to find_mapping_for_cluster() may return NULL but it
isn't always checked for before dereferencing the value returned.
Additionally, add some asserts to cover cases where NULL can't
be returned but which might not be obvious at first glance.

Signed-off-by: Liam Merwick 
---
  block/vvfat.c | 33 -
  1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index fc41841a5c3c..19f6725054a0 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -100,6 +100,7 @@ static inline void array_free(array_t* array)
  /* does not automatically grow */
  static inline void* array_get(array_t* array,unsigned int index) {
  assert(index < array->next);
+assert(array->pointer);
  return array->pointer + index * array->item_size;
  }
  
@@ -108,8 +109,7 @@ static inline int array_ensure_allocated(array_t* array, int index)

  if((index + 1) * array->item_size > array->size) {
  int new_size = (index + 32) * array->item_size;
  array->pointer = g_realloc(array->pointer, new_size);
-if (!array->pointer)
-return -1;
+assert(array->pointer);


It would make sense to make this function not return any value (because
it just always returns 0 now), but I fully understand if you don't want
to mess around with vvfat more than you have to.  (Neither do I.)


It had occurred to me too but wasn't sure if it'd be preferred to roll 
that change in. 3 of the 4 callers ignored the return value already, so 
I bit the bullet and made the change.






  memset(array->pointer + array->size, 0, new_size - array->size);
  array->size = new_size;
  array->next = index + 1;
@@ -2261,6 +2261,9 @@ static mapping_t* insert_mapping(BDRVVVFATState* s,
  }
  if (index >= s->mapping.next || mapping->begin > begin) {
  mapping = array_insert(&(s->mapping), index, 1);
+if (mapping == NULL) {
+return NULL;
+}


array_insert() will never return NULL.



Removed.




  mapping->path = NULL;
  adjust_mapping_indices(s, index, +1);
  }
@@ -2428,6 +2431,9 @@ static int commit_direntries(BDRVVVFATState* s,
  direntry_t* direntry = array_get(&(s->directory), dir_index);
  uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
  mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
+if (mapping == NULL) {
+return -1;
+}


This should be moved below the declarations that still follow here.


Done. (It resulted in a bit more code rearranging and I had to fix two 
checkpatch warnings in existing code)




  
  int factor = 0x10 * s->sectors_per_cluster;

  int old_cluster_count, new_cluster_count;


[...]


@@ -3193,6 +3215,7 @@ static int enable_write_target(BlockDriverState *bs, 
Error **errp)
  
  backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,

 &error_abort);
+assert(backing);
  *(void**) backing->opaque = s;


I personally wouldn't use an assert() here because it's clear that the
value is dereferenced immediately, so that is the assertion that it is
non-NULL, but I won't give too much of a fight.

The thing is, I believe we should write code for humans, not machines.
Fixing machines to understand what we produce is possible -- fixing
humans is more difficult.

On top of that, it would be a bug if NULL is returned and it would be
good if a static analyzer could catch that case.  Just fully silencing
it with assert() is not ideal.  Ideal would be if it would know that
setting &error_abort to any value crashes the program, and could thus
infer whether this function will actually ever get to return NULL when
&error_abort has been passed to it.



I'm investigating if the tool's config file syntax can describe that 
error_handle_fatal() exits when particular error_xxx parameters are passed.


I'll drop that assert in any case.

Regards,
Liam



Max

  
  bdrv_set_backing_hd(s->bs, backing, &error_abort);

Re: [Qemu-devel] [PATCH v4 6/8] block: dump_qlist() may dereference a Null pointer

2018-11-05 Thread Liam Merwick





On 05/11/18 00:07, Max Reitz wrote:

On 19.10.18 22:39, Liam Merwick wrote:

A NULL 'list' passed into function dump_qlist() isn't correctly
validated and can be passed to qlist_first() where it is dereferenced.

Given that dump_qlist() is static, and callers already do the right
thing, just add an assert to catch future potential bugs (plus the
added benefit of suppressing a warning from a static analysis tool
and removing this noise will help us better find real issues).


But can't you fix the tool? 


I don't have access to the tool source but have been filing bugs against 
it as I run it on the QEMU codebase and discover false positives.



My opinion is still that large parts of our
code do not assert that some parameter is not NULL, and I think it isn't
a good idea to make them assert that.  


Yeah, that can be a slippery slope


I don't know what makes this
function special, and I wonder why it is special to your tool -- as I've
said in the last version, dump_qdict() is basically the same in this
regard.  I wonder why your tool doesn't mind that.



I had gone though the code paths to try to see how the tool was happy 
with one and not the other - the implementation differed slightly w.r.t 
macro usage but I couldn't see any obvious reason.



Can you not whitelist something as false positives?  I know we have a
lot of those in Coverity, and we just mark them as such, and that's it.


Yeah, I can flag this as a FP and have it fall off my list.

I'll will drop this patch in v5

Regards,
Liam



Finally, one could argue that the nonnull GCC function attribute would
be a better fit, actually.

But overall, I just don't think it's a good idea to start changing the
code to accommodate for false positives in static analyzers, because in
my experience the number of false positives only rises with time.

Max


Signed-off-by: Liam Merwick 
Reviewed-by: Eric Blake 
---
  block/qapi.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/block/qapi.c b/block/qapi.c
index c66f949db839..e81be604217c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -740,6 +740,8 @@ static void dump_qlist(fprintf_function func_fprintf, void 
*f, int indentation,
  const QListEntry *entry;
  int i = 0;
  
+assert(list);

+
  for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
  QType type = qobject_type(entry->value);
  bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);

Re: [Qemu-devel] [PATCH v4 3/8] block: Null pointer dereference in blk_root_get_parent_desc()

2018-11-05 Thread Liam Merwick





On 04/11/18 23:57, Max Reitz wrote:

On 19.10.18 22:39, Liam Merwick wrote:

The dev_id returned by the call to blk_get_attached_dev_id() in
blk_root_get_parent_desc() can be NULL (an internal call to
object_get_canonical_path may have returned NULL).

Instead of just checking this case before before dereferencing,
adjust blk_get_attached_dev_id() to return the empty string if no
object path can be found (similar to the case when blk->dev is NULL
and an empty string is returned).

Signed-off-by: Liam Merwick 
---
  block/block-backend.c | 6 +-
  dtc   | 2 +-
  2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index dc0cd5772413..e628920f3cd8 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -909,6 +909,7 @@ void *blk_get_attached_dev(BlockBackend *blk)
  char *blk_get_attached_dev_id(BlockBackend *blk)
  {
  DeviceState *dev;
+char *dev_id;
  
  assert(!blk->legacy_dev);

  dev = blk->dev;
@@ -918,7 +919,10 @@ char *blk_get_attached_dev_id(BlockBackend *blk)
  } else if (dev->id) {
  return g_strdup(dev->id);
  }
-return object_get_canonical_path(OBJECT(dev));
+
+dev_id = object_get_canonical_path(OBJECT(dev));
+
+return dev_id ? dev_id : g_strdup("");
  }
  
  /*


Looks good, but since you'll have to respin anyway because of the hunk
below, you may want to consider

 return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");

instead.  (We have several instances of binary "?:" in the code already,
so it's fine to use it.  Of course you don't have to, though, if you
don't like it.)



I like it. Will make that change in v5



diff --git a/dtc b/dtc
index 88f18909db73..e54388015af1 16
--- a/dtc
+++ b/dtc
@@ -1 +1 @@
-Subproject commit 88f18909db731a627456f26d779445f84e449536
+Subproject commit e54388015af1fb4bf04d0bca99caba1074d9cc42


I don't think this hunk belongs here.



Indeed.

Regards,
Liam

[Qemu-devel] [PATCH 1/1 V2] Add vhost-pci-blk driver

2018-11-05 Thread Vitaly Mayatskikh

This driver uses the kernel-mode acceleration for virtio-blk and
allows to get a near bare metal disk performance inside a VM.

Signed-off-by: Vitaly Mayatskikh 
---
 configure |  10 +
 default-configs/virtio.mak|   1 +
 hw/block/Makefile.objs|   1 +
 hw/block/vhost-blk.c  | 429 ++
 hw/virtio/virtio-pci.c|  60 +
 hw/virtio/virtio-pci.h|  19 ++
 include/hw/virtio/vhost-blk.h |  43 
 7 files changed, 563 insertions(+)
 create mode 100644 hw/block/vhost-blk.c
 create mode 100644 include/hw/virtio/vhost-blk.h

diff --git a/configure b/configure
index 46ae1e8c76..787bc780da 100755
--- a/configure
+++ b/configure
@@ -371,6 +371,7 @@ vhost_crypto="no"
 vhost_scsi="no"
 vhost_vsock="no"
 vhost_user=""
+vhost_blk=""
 kvm="no"
 hax="no"
 hvf="no"
@@ -869,6 +870,7 @@ Linux)
   vhost_crypto="yes"
   vhost_scsi="yes"
   vhost_vsock="yes"
+  vhost_blk="yes"
   QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers 
$QEMU_INCLUDES"
   supported_os="yes"
   libudev="yes"
@@ -1263,6 +1265,10 @@ for opt do
   ;;
   --enable-vhost-vsock) vhost_vsock="yes"
   ;;
+  --disable-vhost-blk) vhost_blk="no"
+  ;;
+  --enable-vhost-blk) vhost_blk="yes"
+  ;;
   --disable-opengl) opengl="no"
   ;;
   --enable-opengl) opengl="yes"
@@ -6000,6 +6006,7 @@ echo "vhost-crypto support $vhost_crypto"
 echo "vhost-scsi support $vhost_scsi"
 echo "vhost-vsock support $vhost_vsock"
 echo "vhost-user support $vhost_user"
+echo "vhost-blk support $vhost_blk"
 echo "Trace backends$trace_backends"
 if have_backend "simple"; then
 echo "Trace output file $trace_file-"
@@ -6461,6 +6468,9 @@ fi
 if test "$vhost_user" = "yes" ; then
   echo "CONFIG_VHOST_USER=y" >> $config_host_mak
 fi
+if test "$vhost_blk" = "yes" ; then
+  echo "CONFIG_VHOST_BLK=y" >> $config_host_mak
+fi
 if test "$blobs" = "yes" ; then
   echo "INSTALL_BLOBS=yes" >> $config_host_mak
 fi
diff --git a/default-configs/virtio.mak b/default-configs/virtio.mak
index 1304849018..765c0a2a04 100644
--- a/default-configs/virtio.mak
+++ b/default-configs/virtio.mak
@@ -1,5 +1,6 @@
 CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
 CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
+CONFIG_VHOST_BLK=$(CONFIG_LINUX)
 CONFIG_VIRTIO=y
 CONFIG_VIRTIO_9P=y
 CONFIG_VIRTIO_BALLOON=y
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index 53ce5751ae..857ce823fc 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -14,3 +14,4 @@ obj-$(CONFIG_SH4) += tc58128.o
 obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
 obj-$(CONFIG_VIRTIO_BLK) += dataplane/
 obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
+obj-$(CONFIG_VHOST_BLK) += vhost-blk.o
diff --git a/hw/block/vhost-blk.c b/hw/block/vhost-blk.c
new file mode 100644
index 00..4ca8040ee7
--- /dev/null
+++ b/hw/block/vhost-blk.c
@@ -0,0 +1,429 @@
+/*
+ * vhost-blk host device
+ *
+ * Copyright(C) 2018 IBM Corporation
+ *
+ * Authors:
+ *  Vitaly Mayatskikh 
+ *
+ * Largely based on the "vhost-user-blk.c" implemented by:
+ * Changpeng Liu 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/cutils.h"
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-blk.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include 
+#include 
+
+static const int feature_bits[] = {
+VIRTIO_BLK_F_SIZE_MAX,
+VIRTIO_BLK_F_SEG_MAX,
+VIRTIO_BLK_F_BLK_SIZE,
+VIRTIO_BLK_F_TOPOLOGY,
+VIRTIO_BLK_F_MQ,
+VIRTIO_BLK_F_RO,
+VIRTIO_BLK_F_FLUSH,
+VIRTIO_BLK_F_CONFIG_WCE,
+VIRTIO_F_VERSION_1,
+VIRTIO_RING_F_INDIRECT_DESC,
+VIRTIO_RING_F_EVENT_IDX,
+VIRTIO_F_NOTIFY_ON_EMPTY,
+VHOST_INVALID_FEATURE_BIT
+};
+
+static void vhost_blk_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+VHostBlk *s = VHOST_BLK(vdev);
+memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config));
+}
+
+static void vhost_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+VHostBlk *s = VHOST_BLK(vdev);
+struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
+int ret;
+
+if (blkcfg->wce == s->blkcfg.wce) {
+return;
+}
+
+ret = vhost_dev_set_config(&s->dev, &blkcfg->wce,
+   offsetof(struct virtio_blk_config, wce),
+   sizeof(blkcfg->wce),
+   VHOST_SET_CONFIG_TYPE_MASTER);
+if (ret) {
+error_report("set device config space failed");
+return;
+}
+
+s->blkcfg.wce = blkcfg->wce;
+}
+
+static int vhost_blk_handle_config_change(struct vhost_dev *dev)
+{
+int ret;
+struct virtio_blk_config blkcfg;
+VHost

[Qemu-devel] [PATCH 0/1 V2] Add vhost-pci-blk driver

2018-11-05 Thread Vitaly Mayatskikh

V2 changes:
- checkpatch style fixes
- correct size detection of disk image placed on a file system

This driver moves virtio-blk host-side processing to kernel (via new
vhost_blk kernel driver). It accelerates virtual disk performance
close to the bare metal levels, especially for parellel loads.

For example, fio numjobs=16 gets 101k randread IOPS using virtio-blk
and 1202k IOPS using vhost-blk, close to 1480k of raw disk performance.

See the IOPS numbers below.

The kernel part if you want to try:
- vhost_blk: https://lkml.org/lkml/2018/11/2/648
- vhost num-queues scalability fix: https://lkml.org/lkml/2018/11/2/550

# fio num-jobs
# A: bare metal over block
# B: bare metal over file
# C: virtio-blk over block
# D: virtio-blk over file
# E: vhost-blk over block
# F: vhost-blk over file
#
#  A B CDE F

1  171k  151k  148k 151k 187k  175k
2  328k  302k  249k 241k 334k  296k
3  479k  437k  179k 174k 464k  404k
4  622k  568k  143k 183k 580k  492k
5  755k  697k  136k 128k 693k  579k
6  887k  808k  131k 120k 782k  640k
7  1004k 926k  126k 131k 863k  693k
8  1099k 1015k 117k 115k 931k  712k
9  1194k 1119k 115k 111k 991k  711k
10 1278k 1207k 109k 114k 1046k 695k
11 1345k 1280k 110k 108k 1091k 663k
12 1411k 1356k 104k 106k 1142k 629k
13 1466k 1423k 106k 106k 1170k 607k
14 1517k 1486k 103k 106k 1179k 589k
15 1552k 1543k 102k 102k 1191k 571k
16 1480k 1506k 101k 102k 1202k 566k

Vitaly Mayatskikh (1):
  Add vhost-pci-blk driver

 configure |  10 +
 default-configs/virtio.mak|   1 +
 hw/block/Makefile.objs|   1 +
 hw/block/vhost-blk.c  | 429 ++
 hw/virtio/virtio-pci.c|  60 +
 hw/virtio/virtio-pci.h|  19 ++
 include/hw/virtio/vhost-blk.h |  43 
 7 files changed, 563 insertions(+)
 create mode 100644 hw/block/vhost-blk.c
 create mode 100644 include/hw/virtio/vhost-blk.h

-- 
2.17.1

Re: [Qemu-devel] [PATCH v3 1/7] qapi: use qemu_strtoi64() in parse_str

2018-11-05 Thread Markus Armbruster

David Hildenbrand  writes:

> On 05.11.18 16:37, Markus Armbruster wrote:
>> David Hildenbrand  writes:
>> 
>>> On 31.10.18 18:55, Markus Armbruster wrote:
 David Hildenbrand  writes:

> On 31.10.18 15:40, Markus Armbruster wrote:
>> David Hildenbrand  writes:
>>
>>> The qemu api claims to be easier to use, and the resulting code seems to
>>> agree.
 [...]
>>> @@ -60,9 +61,7 @@ static int parse_str(StringInputVisitor *siv, const 
>>> char *name, Error **errp)
>>>  }
>>>  
>>>  do {
>>> -errno = 0;
>>> -start = strtoll(str, &endptr, 0);
>>> -if (errno == 0 && endptr > str) {
>>> +if (!qemu_strtoi64(str, &endptr, 0, &start)) {
>>>  if (*endptr == '\0') {
>>>  cur = g_malloc0(sizeof(*cur));
>>>  range_set_bounds(cur, start, start);
>>> @@ -71,11 +70,7 @@ static int parse_str(StringInputVisitor *siv, const 
>>> char *name, Error **errp)
>>>  str = NULL;
>>>  } else if (*endptr == '-') {
>>>  str = endptr + 1;
>>> -errno = 0;
>>> -end = strtoll(str, &endptr, 0);
>>> -if (errno == 0 && endptr > str && start <= end &&
>>> -(start > INT64_MAX - 65536 ||
>>> - end < start + 65536)) {
>>> +if (!qemu_strtoi64(str, &endptr, 0, &end) && start < 
>>> end) {
>>
>> You deleted (start > INT64_MAX - 65536 || end < start + 65536).  Can you
>> explain that to me?  I'm feeling particularly dense today...
>
> qemu_strtoi64 performs all different kinds of error handling completely
> internally. This old code here was an attempt to filter out -EWHATEVER
> from the response. No longer needed as errors and the actual value are
> reported via different ways.

 I understand why errno == 0 && endptr > str go away.  They also do in
 the previous hunk.

 The deletion of (start > INT64_MAX - 65536 || end < start + 65536) is
 unobvious.  What does it do before the patch?

 The condition goes back to commit 659268ffbff, which predates my watch
 as maintainer.  Its commit message is of no particular help.  Its code
 is... allright, the less I say about that, the better.

 We're parsing a range here.  We already parsed its lower bound into
 @start (and guarded against errors), and its upper bound into @end (and
 guarded against errors).

 If the condition you delete is false, we goto error.  So the condition
 is about range validity.  I figure it's an attempt to require valid
 ranges to be no "wider" than 65535.  The second part end < start + 65536
 checks exactly that, except shit happens when start + 65536 overflows.
 The first part attempts to guard against that, but

 (1) INT64_MAX is *wrong*, because we compute in long long, and

 (2) it rejects even small ranges like INT64_MAX - 2 .. INT64_MAX - 1.

 WTF?!?

 Unless I'm mistaken, the condition is not about handling any of the
 errors that qemu_strtoi64() handles for us.

 The easiest way for you out of this morass is probably to keep the
 condition exactly as it was, then use the "my patch doesn't make things
 any worse" get-out-of-jail-free card.

>>>
>>> Looking at the code in qapi/string-output-visitor.c related to range and
>>> list handling I feel like using the get-out-of-jail-free card to get out
>>> of qapi code now :) Too much magic in that code and too little time for
>>> me to understand it all.
>>>
>>> Thanks for your time and review anyway. My time is better invested in
>>> other parts of QEMU. I will drop both patches from this series.
>> 
>> Understand.
>> 
>> When I first looked at the ranges stuff in the string input visitor, I
>> felt the urge to clean it up, then sat on my hands until it passed.
>> 
>> The rest is reasonable once you understand how it works.  The learning
>> curve is less than pleasant, though.
>> 
>
> Maybe I'll pick this up again when I have more time to invest.
>
> The general concept
>
> 1. of having an input visitor that is able to parse different types
> (expected by e.g. a property) sounds sane to me.
>
> 2. of having a list of *something*, assuming it is int64_t, and assuming
> it is to be parsed into a list of ranges sounds completely broken to me.

Starting point: the string visitors can only do scalars.  We have a need
for lists of integers (see below).  The general solution would be
generalizing these visitors to lists (and maybe objects while we're at
it).  YAGNI.  So we put in a quick hack that can do just lists of
integers.

Except applying YAGNI to stable interfaces is *bonkers*.

> I was not even able to find an example QEMU comand line for 2. Is this
> maybe some very old code that nobody actually uses anymore? (wh

Re: [Qemu-devel] [PULL v2 00/28] pci, pc, virtio: fixes, features

2018-11-05 Thread Michael S. Tsirkin

On Mon, Nov 05, 2018 at 03:05:27PM +, Peter Maydell wrote:
> On 25 October 2018 at 01:52, Michael S. Tsirkin  wrote:
> > The following changes since commit 13399aad4fa87b2878c49d02a5d3bafa6c966ba3:
> >
> >   Merge remote-tracking branch 'remotes/armbru/tags/pull-error-2018-10-22' 
> > into staging (2018-10-23 17:20:23 +0100)
> >
> > are available in the Git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > for you to fetch changes up to 6a9fb4e1ba5594cde7739068617ad88e6117db93:
> >
> >   vhost-scsi: prevent using uninitialized vqs (2018-10-24 20:50:13 -0400)
> >
> > 
> > pci, pc, virtio: fixes, features
> >
> > AMD IOMMU VAPIC support + fixes all over the place.
> >
> > Signed-off-by: Michael S. Tsirkin 
> >
> 
> Hi Michael -- just a reminder that you need to submit a respin
> of this by 1200 UTC tomorrow (with my symlink-of-data-files
> patchset) if you want it to get into rc0.
> 
> thanks
> -- PMM

Resent - hope it's fine now.

-- 
MST

Re: [Qemu-devel] [PATCH 0/1 resend] Add vhost-pci-blk driver

2018-11-05 Thread Vitaly Mayatskih

On Mon, Nov 5, 2018 at 12:45 PM Michael S. Tsirkin  wrote:

> I think you should Cc more widely to get meaningful
> review. At least virtio-blk and block layer core people.

Thanks, it turns out I missed the existence of qemu/scripts directory
completely.

-- 
wbr, Vitaly

Re: [Qemu-devel] [PATCH] target/mips: Add initrd support for Boston board

2018-11-05 Thread Paul Burton

Hi Aleksandar,

On Tue, Oct 23, 2018 at 03:12:14PM +0200, Aleksandar Markovic wrote:
> From: Aleksandar Rikalo 
> 
> Add support for initial ramdisk loading for the Mips Boston board.
> 
> Reviewed-by: Philippe Mathieu-Daudé 
> Signed-off-by: Aleksandar Rikalo 
> Signed-off-by: Aleksandar Markovic 
> ---
> v2->v3:
>   - a comment was reformatted
>   - rebased to the latest QEMU code
> v1->v2:
>   - 'long inird_size' is changed to 'target_ulong initrd_size',
> as it should be
>   - error_report() is used instead of fprintf()
> ---
>  hw/mips/boston.c | 55 ++-
>  1 file changed, 46 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/mips/boston.c b/hw/mips/boston.c
> index 6c9c20a..788bf69 100644
> --- a/hw/mips/boston.c
> +++ b/hw/mips/boston.c
> @@ -31,6 +31,7 @@
>  #include "hw/loader-fit.h"
>  #include "hw/mips/cps.h"
>  #include "hw/mips/cpudevs.h"
> +#include "hw/mips/mips.h"
>  #include "hw/pci-host/xilinx-pcie.h"
>  #include "qapi/error.h"
>  #include "qemu/error-report.h"
> @@ -333,10 +334,12 @@ static const void *boston_fdt_filter(void *opaque, 
> const void *fdt_orig,
>  {
>  BostonState *s = BOSTON(opaque);
>  MachineState *machine = s->mach;
> -const char *cmdline;
> +GString *cmdline;
>  int err;
>  void *fdt;
>  size_t fdt_sz, ram_low_sz, ram_high_sz;
> +target_ulong initrd_size;
> +ram_addr_t initrd_offset;
>  
>  fdt_sz = fdt_totalsize(fdt_orig) * 2;
>  fdt = g_malloc0(fdt_sz);
> @@ -347,20 +350,54 @@ static const void *boston_fdt_filter(void *opaque, 
> const void *fdt_orig,
>  return NULL;
>  }
>  
> -cmdline = (machine->kernel_cmdline && machine->kernel_cmdline[0])
> -? machine->kernel_cmdline : " ";

Just a bit of background (which I probably ought to have written as a
comment here): the string consisting of a single space character there
is a workaround for a Linux bug wherein if the DT doesn't contain a
bootargs property on its /chosen node, or it does but it's of length
zero, then any kernel command line arguments that are built-in using
CONFIG_CMDLINE get duplicated. That is, the final command line ends up
being CONFIG_CMDLINE concatenated with itself (with a space character in
the middle).

That can be problematic for some arguments, including the earlycon
argument which is commonly part of CONFIG_CMDLINE - when given twice the
kernel tries to register the same early console twice & shows a scary
warning about it.

This problem should be fixed from Linux v4.16 onwards by commit
8ce355cf2e38 ("MIPS: Setup boot_command_line before plat_mem_setup") and
later commit 951d223c6c16 ("MIPS: Fix CONFIG_CMDLINE handling").

> -err = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
> -if (err < 0) {
> -fprintf(stderr, "couldn't set /chosen/bootargs\n");
> -return NULL;
> -}
> -
>  ram_low_sz = MIN(256 * MiB, machine->ram_size);
>  ram_high_sz = machine->ram_size - ram_low_sz;
>  qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg",
>   1, 0x, 1, ram_low_sz,
>   1, 0x9000, 1, ram_high_sz);
>  
> +cmdline = g_string_new(machine->kernel_cmdline);

As such, I suspect this may be problematic for Linux earlier than v4.16
since it appears to remove that workaround. If you want to remove the
workaround anyway then fair enough, but it seems a little too soon to
me.

> +
> +/* load initrd */
> +initrd_offset = 0;
> +if (machine->initrd_filename) {
> +initrd_size = get_image_size(machine->initrd_filename);
> +if (initrd_size != (target_ulong) -1) {
> +/*
> + * The kernel allocates the bootmap memory in the low memory 
> after
> + * the initrd. It takes at most 128kiB for 2GB RAM and 4kiB 
> pages.
> + */

Just FYI this may have changed from Linux v4.20 where we switch from the
old bootmem allocator to memblock. But if the kernel tries to use memory
containing the initrd then I'd say that's a kernel bug anyway & we
should fix it there. So catering for the old bootmem case probably makes
most sense, to deal with pre-v4.20 kernels.

There's also conceptually no reason Boston has to be limited to 2GB RAM
- it could in theory have more.

> +initrd_offset = (ram_low_sz - initrd_size - 131072
> + - ~INITRD_PAGE_MASK) & INITRD_PAGE_MASK;

As such I wonder if the magic number 131072 would be best changed to
something like this:

  uint32_t min_page = 4 * KiB;

  machine->ram_size / min_page / BITS_PER_BYTE

Although the result of that for 2GB RAM is 64KB for the bitmap - if it
really should be 128KB then it'd probably be worth commenting why &
changing the arithmetic accordingly.

Or given that we constrain Boston to 2GB RAM at the moment anyway
perhaps it'd be better to just go with the hardcoded number & deal with
it if & when we remove the 2

Re: [Qemu-devel] [PATCH v6 2/5] hw/riscv/virt: Connect the gpex PCIe

2018-11-05 Thread Alistair Francis

On Mon, Nov 5, 2018 at 5:24 AM Bin Meng  wrote:
>
> Hi,
>
> On Wed, Oct 31, 2018 at 6:22 AM Alistair Francis
>  wrote:
> >
> > Connect the gpex PCIe device based on the device tree included in the
> > HiFive Unleashed ROM.
> >
> > Signed-off-by: Alistair Francis 
> > ---
> >  default-configs/riscv32-softmmu.mak |   6 +-
> >  default-configs/riscv64-softmmu.mak |   6 +-
> >  hw/riscv/virt.c | 111 
> >  include/hw/riscv/virt.h |   8 +-
> >  4 files changed, 127 insertions(+), 4 deletions(-)
> >
> > diff --git a/default-configs/riscv32-softmmu.mak 
> > b/default-configs/riscv32-softmmu.mak
> > index 7937c69e22..3e3d195f37 100644
> > --- a/default-configs/riscv32-softmmu.mak
> > +++ b/default-configs/riscv32-softmmu.mak
> > @@ -1,7 +1,11 @@
> >  # Default configuration for riscv-softmmu
> >
> > +include pci.mak
> > +
> >  CONFIG_SERIAL=y
> >  CONFIG_VIRTIO_MMIO=y
> > -include virtio.mak
> >
> >  CONFIG_CADENCE=y
> > +
> > +CONFIG_PCI_GENERIC=y
> > +CONFIG_PCI_XILINX=y
> > diff --git a/default-configs/riscv64-softmmu.mak 
> > b/default-configs/riscv64-softmmu.mak
> > index 7937c69e22..3e3d195f37 100644
> > --- a/default-configs/riscv64-softmmu.mak
> > +++ b/default-configs/riscv64-softmmu.mak
> > @@ -1,7 +1,11 @@
> >  # Default configuration for riscv-softmmu
> >
> > +include pci.mak
> > +
> >  CONFIG_SERIAL=y
> >  CONFIG_VIRTIO_MMIO=y
> > -include virtio.mak
> >
> >  CONFIG_CADENCE=y
> > +
> > +CONFIG_PCI_GENERIC=y
> > +CONFIG_PCI_XILINX=y
> > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> > index 4a137a503c..2fbe58ba4b 100644
> > --- a/hw/riscv/virt.c
> > +++ b/hw/riscv/virt.c
> > @@ -39,6 +39,8 @@
> >  #include "sysemu/arch_init.h"
> >  #include "sysemu/device_tree.h"
> >  #include "exec/address-spaces.h"
> > +#include "hw/pci/pci.h"
> > +#include "hw/pci-host/gpex.h"
> >  #include "elf.h"
> >
> >  #include 
> > @@ -55,6 +57,10 @@ static const struct MemmapEntry {
> >  [VIRT_UART0] ={ 0x1000,  0x100 },
> >  [VIRT_VIRTIO] =   { 0x10001000, 0x1000 },
> >  [VIRT_DRAM] = { 0x8000,0x0 },
> > +[VIRT_PCIE_MMIO] = { 0x20, 0x400 },
>
> Does this work with RV32?

That's a good point, probably not. This is based on the HiFive
unleashed values to be as similar as possible.

>
> > +[VIRT_PCIE_PIO] = { 0x201, 0x4000 },
> > +[VIRT_PCIE_ECAM] = { 0x4000, 0x2000 },
> > +
> >  };
> >
> >  static uint64_t load_kernel(const char *kernel_filename)
> > @@ -98,6 +104,37 @@ static hwaddr load_initrd(const char *filename, 
> > uint64_t mem_size,
> >  return *start + size;
> >  }
> >
> > +#define INTERREUPT_MAP_WIDTH 7
> > +
> > +static void create_pcie_irq_map(void *fdt, char *nodename,
> > +uint32_t plic_phandle)
> > +{
> > +int pin;
> > +uint32_t full_irq_map[GPEX_NUM_IRQS * INTERREUPT_MAP_WIDTH] = { 0 };
> > +uint32_t *irq_map = full_irq_map;
> > +
> > +for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
> > +int irq_nr = PCIE_IRQ + (pin % PCI_NUM_PINS);
> > +int i;
> > +
> > +uint32_t map[] = {
> > +0, 0, 0,
> > +pin + 1, plic_phandle, 0, irq_nr};
> > +
> > +/* Convert map to big endian */
> > +for (i = 0; i < INTERREUPT_MAP_WIDTH; i++) {
> > +irq_map[i] = cpu_to_be32(map[i]);
> > +}
> > +irq_map += INTERREUPT_MAP_WIDTH;
> > +}
> > +
> > +qemu_fdt_setprop(fdt, nodename, "interrupt-map",
> > + full_irq_map, sizeof(full_irq_map));
> > +
> > +qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
> > +   0, 0, 0, 0x7);
> > +}
> > +
> >  static void *create_fdt(RISCVVirtState *s, const struct MemmapEntry 
> > *memmap,
> >  uint64_t mem_size, const char *cmdline)
> >  {
> > @@ -233,6 +270,31 @@ static void *create_fdt(RISCVVirtState *s, const 
> > struct MemmapEntry *memmap,
> >  g_free(nodename);
> >  }
> >
> > +nodename = g_strdup_printf("/pci@%lx",
> > +(long) memmap[VIRT_PCIE_MMIO].base);
> > +qemu_fdt_add_subnode(fdt, nodename);
> > +qemu_fdt_setprop_cells(fdt, nodename, "#address-cells", 0x3);
> > +qemu_fdt_setprop_cells(fdt, nodename, "#interrupt-cells", 0x1);
> > +qemu_fdt_setprop_cells(fdt, nodename, "#size-cells", 0x2);
> > +qemu_fdt_setprop_string(fdt, nodename, "compatible",
> > +"pci-host-ecam-generic");
> > +qemu_fdt_setprop_string(fdt, nodename, "device_type", "pci");
> > +qemu_fdt_setprop_cell(fdt, nodename, "linux,pci-domain", 0);
> > +qemu_fdt_setprop_cells(fdt, nodename, "bus-range", 0,
> > +   memmap[VIRT_PCIE_ECAM].base /
> > +   PCIE_MMCFG_SIZE_MIN - 1);
> > +qemu_fdt_setprop(fdt, nodename, "dma-coherent", NULL, 0);
> > +qemu_fdt_setprop_cells(fdt, nodename, "reg", 0x20, 0,
> > +

[Qemu-devel] [PATCH v1 1/1] riscv: spike: Fix memory leak in the board init

2018-11-05 Thread Alistair Francis

Coverity caught a malloc() call that was never freed. This patch ensures
that we free the memory but also updates the allocation to use
g_strdup_printf() instead of malloc().

Signed-off-by: Alistair Francis 
Suggested-by: Peter Maydell 
---
 hw/riscv/spike.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index 8a712ed490..268df04c3c 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -316,9 +316,7 @@ static void spike_v1_09_1_board_init(MachineState *machine)
 
 /* build config string with supplied memory size */
 char *isa = riscv_isa_string(&s->soc.harts[0]);
-size_t config_string_size = strlen(config_string_tmpl) + 48;
-char *config_string = malloc(config_string_size);
-snprintf(config_string, config_string_size, config_string_tmpl,
+char *config_string = g_strdup_printf(config_string_tmpl,
 (uint64_t)memmap[SPIKE_CLINT].base + SIFIVE_TIME_BASE,
 (uint64_t)memmap[SPIKE_DRAM].base,
 (uint64_t)ram_size, isa,
@@ -345,6 +343,8 @@ static void spike_v1_09_1_board_init(MachineState *machine)
 /* Core Local Interruptor (timer and IPI) */
 sifive_clint_create(memmap[SPIKE_CLINT].base, memmap[SPIKE_CLINT].size,
 smp_cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE);
+
+g_free(config_string);
 }
 
 static void spike_v1_09_1_machine_init(MachineClass *mc)
-- 
2.19.1

Re: [Qemu-devel] [Qemu-arm] [PATCH 2/2] target/arm: Fix ATS1Hx instructions

2018-11-05 Thread Edgar E. Iglesias

On Tue, Oct 16, 2018 at 10:37:03AM +0100, Peter Maydell wrote:
> ATS1HR and ATS1HW (which allow AArch32 EL2 to do address translations
> on the EL2 translation regime) were implemented in commit 14db7fe09a2c8.
> However, we got them wrong: these should do stage 1 address translations
> as defined for NS-EL2, which is ARMMMUIdx_S1E2. We were incorrectly
> making them perform stage 2 translations.
> 
> A few years later in commit 1313e2d7e2cd we forgot entirely that
> we'd implemented ATS1Hx, and added a comment that ATS1Hx were
> "not supported yet". Remove the comment; there is no extra code
> needed to handle these operations in do_ats_write(), because
> arm_s1_regime_using_lpae_format() returns true for ARMMMUIdx_S1E2,
> which forces 64-bit PAR format.
> 
> Signed-off-by: Peter Maydell 

Oops, yes:
Reviewed-by: Edgar E. Iglesias 



> ---
>  target/arm/helper.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index dc849b09893..903a832f1fa 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -2316,7 +2316,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t 
> value,
>   *
>   * (Note that HCR.DC makes HCR.VM behave as if it is 1.)
>   *
> - * ATS1Hx always uses the 64bit format (not supported yet).
> + * ATS1Hx always uses the 64bit format.
>   */
>  format64 = arm_s1_regime_using_lpae_format(env, mmu_idx);
>  
> @@ -2441,7 +2441,7 @@ static void ats1h_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : 
> MMU_DATA_LOAD;
>  uint64_t par64;
>  
> -par64 = do_ats_write(env, value, access_type, ARMMMUIdx_S2NS);
> +par64 = do_ats_write(env, value, access_type, ARMMMUIdx_S1E2);
>  
>  A32_BANKED_CURRENT_REG_SET(env, par, par64);
>  }
> -- 
> 2.19.0
> 
>

Re: [Qemu-devel] [PULL 00/36] Block layer patches

2018-11-05 Thread Peter Maydell

On 5 November 2018 at 16:37, Kevin Wolf  wrote:
> The following changes since commit b2f7a038bb4c4fc5ce6b8486e8513dfd97665e2a:
>
>   Merge remote-tracking branch 'remotes/rth/tags/pull-softfloat-20181104' 
> into staging (2018-11-05 10:32:49 +)
>
> are available in the Git repository at:
>
>   git://repo.or.cz/qemu/kevin.git tags/for-upstream
>
> for you to fetch changes up to 1240ac558d348f6c7a5752b1a57c1da58e4efe3e:
>
>   include: Add a comment to explain the origin of sizes' lookup table 
> (2018-11-05 15:29:59 +0100)
>
> 
> Block layer patches:
>
> - auto-read-only option to fix commit job when used with -blockdev
> - Fix help text related qemu-iotests failure (by improving the help text
>   and updating the reference output)
> - quorum: Add missing checks when adding/removing child nodes
> - Don't take address of fields in packed structs
> - vvfat: Fix crash when reporting error about too many files in directory
>
> 

Applied, thanks.

-- PMM

[Qemu-devel] [PATCH v7 11/12] target/arm: Implement PMSWINC

2018-11-05 Thread Aaron Lindsay

Signed-off-by: Aaron Lindsay 
Reviewed-by: Richard Henderson 
---
 target/arm/helper.c | 39 +--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 11eb62bdda..cff3a5a562 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1027,6 +1027,15 @@ static bool event_always_supported(CPUARMState *env)
 return true;
 }
 
+static uint64_t swinc_get_count(CPUARMState *env)
+{
+/*
+ * SW_INCR events are written directly to the pmevcntr's by writes to
+ * PMSWINC, so there is no underlying count maintained by the PMU itself
+ */
+return 0;
+}
+
 /*
  * Return the underlying cycle count for the PMU cycle counters. If we're in
  * usermode, simply return 0.
@@ -1054,6 +1063,10 @@ static uint64_t instructions_get_count(CPUARMState *env)
 #endif
 
 static const pm_event pm_events[] = {
+{ .number = 0x000, /* SW_INCR */
+  .supported = event_always_supported,
+  .get_count = swinc_get_count,
+},
 #ifndef CONFIG_USER_ONLY
 { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */
   .supported = instructions_supported,
@@ -1378,6 +1391,24 @@ static void pmcr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 pmu_op_finish(env);
 }
 
+static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+unsigned int i;
+for (i = 0; i < pmu_num_counters(env); i++) {
+/* Increment a counter's count iff: */
+if ((value & (1 << i)) && /* counter's bit is set */
+/* counter is enabled and not filtered */
+pmu_counter_enabled(env, i) &&
+/* counter is SW_INCR */
+(env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) {
+pmevcntr_op_start(env, i);
+env->cp15.c14_pmevcntr[i]++;
+pmevcntr_op_finish(env, i);
+}
+}
+}
+
 static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
 uint64_t ret;
@@ -1798,9 +1829,13 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
   .writefn = pmovsr_write,
   .raw_writefn = raw_write },
-/* Unimplemented so WI. */
 { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
-  .access = PL0_W, .accessfn = pmreg_access_swinc, .type = ARM_CP_NOP },
+  .access = PL0_W, .accessfn = pmreg_access_swinc, .type = ARM_CP_NO_RAW,
+  .writefn = pmswinc_write },
+{ .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64,
+  .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4,
+  .access = PL0_W, .accessfn = pmreg_access_swinc, .type = ARM_CP_NO_RAW,
+  .writefn = pmswinc_write },
 { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
   .access = PL0_RW, .type = ARM_CP_ALIAS,
   .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr),
-- 
2.19.1

[Qemu-devel] [PATCH v7 10/12] target/arm: PMU: Set PMCR.N to 4

2018-11-05 Thread Aaron Lindsay

This both advertises that we support four counters and enables them
because the pmu_num_counters() reads this value from PMCR.

Signed-off-by: Aaron Lindsay 
Signed-off-by: Aaron Lindsay 
---
 target/arm/helper.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index e3ec36490c..11eb62bdda 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1753,7 +1753,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .access = PL1_W, .type = ARM_CP_NOP },
 /* Performance monitors are implementation defined in v7,
  * but with an ARM recommended set of registers, which we
- * follow (although we don't actually implement any counters)
+ * follow.
  *
  * Performance registers fall into three categories:
  *  (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR)
@@ -5508,10 +5508,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 }
 if (arm_feature(env, ARM_FEATURE_V7)) {
 /* v7 performance monitor control register: same implementor
- * field as main ID register, and we implement only the cycle
- * count register.
+ * field as main ID register, and we implement four counters in
+ * addition to the cycle count register.
  */
-unsigned int i, pmcrn = 0;
+unsigned int i, pmcrn = 4;
 ARMCPRegInfo pmcr = {
 .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 
0,
 .access = PL0_RW,
@@ -5526,7 +5526,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 .access = PL0_RW, .accessfn = pmreg_access,
 .type = ARM_CP_IO,
 .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
-.resetvalue = cpu->midr & 0xff00,
+.resetvalue = (cpu->midr & 0xff00) | (pmcrn << PMCRN_SHIFT),
 .writefn = pmcr_write, .raw_writefn = raw_write,
 };
 define_one_arm_cp_reg(cpu, &pmcr);
-- 
2.19.1

[Qemu-devel] [PATCH v7 12/12] target/arm: Send interrupts on PMU counter overflow

2018-11-05 Thread Aaron Lindsay

Setup a QEMUTimer to get a callback when we expect counters to next
overflow and trigger an interrupt at that time.

Signed-off-by: Aaron Lindsay 
---
 target/arm/cpu.c|  11 
 target/arm/cpu.h|   7 +++
 target/arm/helper.c | 126 +---
 3 files changed, 138 insertions(+), 6 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d1c766d180..7cb6a76afb 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -764,6 +764,12 @@ static void arm_cpu_finalizefn(Object *obj)
 QLIST_REMOVE(hook, node);
 g_free(hook);
 }
+#ifndef CONFIG_USER_ONLY
+if (arm_feature(&cpu->env, ARM_FEATURE_PMU)) {
+timer_deinit(cpu->pmu_timer);
+timer_free(cpu->pmu_timer);
+}
+#endif
 }
 
 static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
@@ -967,6 +973,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 arm_register_pre_el_change_hook(cpu, &pmu_pre_el_change, 0);
 arm_register_el_change_hook(cpu, &pmu_post_el_change, 0);
 }
+
+#ifndef CONFIG_USER_ONLY
+cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, arm_pmu_timer_cb,
+cpu);
+#endif
 } else {
 cpu->pmceid0 = 0x;
 cpu->pmceid1 = 0x;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 067f6efdb6..fa49dc4c47 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -730,6 +730,8 @@ struct ARMCPU {
 
 /* Timers used by the generic (architected) timer */
 QEMUTimer *gt_timer[NUM_GTIMERS];
+/* Timer used by the PMU */
+QEMUTimer *pmu_timer;
 /* GPIO outputs for generic timer */
 qemu_irq gt_timer_outputs[NUM_GTIMERS];
 /* GPIO output for GICv3 maintenance interrupt signal */
@@ -988,6 +990,11 @@ void pmccntr_op_finish(CPUARMState *env);
 void pmu_op_start(CPUARMState *env);
 void pmu_op_finish(CPUARMState *env);
 
+/**
+ * Called when a PMU counter is due to overflow
+ */
+void arm_pmu_timer_cb(void *opaque);
+
 /**
  * Functions to register as EL change hooks for PMU mode filtering
  */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index cff3a5a562..6c3f997b0e 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -977,6 +977,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
 /* Definitions for the PMU registers */
 #define PMCRN_MASK  0xf800
 #define PMCRN_SHIFT 11
+#define PMCRLC  0x40
 #define PMCRDP  0x10
 #define PMCRD   0x8
 #define PMCRC   0x4
@@ -996,6 +997,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
PMXEVTYPER_M | PMXEVTYPER_MT | \
PMXEVTYPER_EVTCOUNT)
 
+#define PMEVCNTR_OVERFLOW_MASK ((uint64_t)1 << 31)
+
 #define PMCCFILTR 0xf800
 #define PMCCFILTR_M   PMXEVTYPER_M
 #define PMCCFILTR_EL0 (PMCCFILTR | PMCCFILTR_M)
@@ -1020,6 +1023,11 @@ typedef struct pm_event {
  * counters hold a difference from the return value from this function
  */
 uint64_t (*get_count)(CPUARMState *);
+/* Return how many nanoseconds it will take (at a minimum) for count events
+ * to occur. A negative value indicates the counter will never overflow, or
+ * that the counter has otherwise arranged for the overflow bit to be set
+ * and the PMU interrupt to be raised on overflow. */
+int64_t (*ns_per_count)(uint64_t);
 } pm_event;
 
 static bool event_always_supported(CPUARMState *env)
@@ -1036,6 +1044,11 @@ static uint64_t swinc_get_count(CPUARMState *env)
 return 0;
 }
 
+static int64_t swinc_ns_per(uint64_t ignored)
+{
+return -1;
+}
+
 /*
  * Return the underlying cycle count for the PMU cycle counters. If we're in
  * usermode, simply return 0.
@@ -1051,6 +1064,11 @@ static uint64_t cycles_get_count(CPUARMState *env)
 }
 
 #ifndef CONFIG_USER_ONLY
+static int64_t cycles_ns_per(uint64_t cycles)
+{
+return (ARM_CPU_FREQ / NANOSECONDS_PER_SECOND) * cycles;
+}
+
 static bool instructions_supported(CPUARMState *env)
 {
 return use_icount == 1 /* Precise instruction counting */;
@@ -1060,21 +1078,29 @@ static uint64_t instructions_get_count(CPUARMState *env)
 {
 return (uint64_t)cpu_get_icount_raw();
 }
+
+static int64_t instructions_ns_per(uint64_t icount)
+{
+return cpu_icount_to_ns((int64_t)icount);
+}
 #endif
 
 static const pm_event pm_events[] = {
 { .number = 0x000, /* SW_INCR */
   .supported = event_always_supported,
   .get_count = swinc_get_count,
+  .ns_per_count = swinc_ns_per,
 },
 #ifndef CONFIG_USER_ONLY
 { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */
   .supported = instructions_supported,
   .get_count = instructions_get_count,
+  .ns_per_count = instructions_ns_per,
 },
 { .number = 0x011, /* CPU_CYCLES, Cycle */
   .supported = event_always_supported,
   .get_count = cycles_get_count,
+  .ns_per_count = cycles_ns_per,
 }
 #endif
 };
@@ -1273,6 +1299,13 @@ static bool pmu_coun

Re: [Qemu-devel] [PATCH 0/2] target/mips: Fix decoding mechanisms of R5900 M{F, T}{HI, LO}1 and DIV[U]1

2018-11-05 Thread Aleksandar Markovic

Hello, Fredrik.

I appreciate your response and efforts!

>
> From: Fredrik Noring 
>
> Subject: Re: [PATCH 0/2] target/mips: Fix decoding mechanisms of R5900 
> M{F,T}{HI,LO}1 and DIV[> U]1
>
> Thank you for your review, Aleksandar,
>
> > For LL, SC, LLD and SCD instructions, there is a need to properly insulate
> > their R5900 versions too, similar to this:
> >
> > case OPC_SC:
> > if(ctx->insn_flags & INSN_R5900) {
> >  check_insn_opc_user_only(ctx, INSN_R5900);
> > } else {
> > check_insn(ctx, ISA_MIPS2);
> > }
> > gen_st_cond(ctx, op, rt, rs, imm);
> > break;
>
> Would you accept the simplification to omit the else clause? Like this:
>
> case OPC_SC:
> if (ctx->insn_flags & INSN_R5900) {
> check_insn_opc_user_only(ctx, INSN_R5900);
> }
> check_insn(ctx, ISA_MIPS2);
> check_insn_opc_removed(ctx, ISA_MIPS32R6);
> gen_st_cond(ctx, op, rt, rs, imm);
> break;
>

I think the following code would be even better:

case OPC_SC:
check_insn(ctx, ISA_MIPS2);
check_insn_opc_removed(ctx, ISA_MIPS32R6);
if (ctx->insn_flags & INSN_R5900) {
check_insn_opc_user_only(ctx, INSN_R5900);
}
gen_st_cond(ctx, op, rt, rs, imm);
break;

> The code will, of course, expand into a double-check of INSN_R5900:
>
> if (ctx->insn_flags & INSN_R5900) {
> #ifndef CONFIG_USER_ONLY
> if (unlikely(ctx->insn_flags & INSN_R5900)) {
> generate_exception_end(ctx, EXCP_RI);
> }
> #endif
> }
>

I don't mind. Later on, we can drop the second argument of 
check_insn_opc_user_only() altogether, and the code would expand to the minimal 
and clear:

if (ctx->insn_flags & INSN_R5900) {
#ifndef CONFIG_USER_ONLY
generate_exception_end(ctx, EXCP_RI);
#endif
}

but at this moment this is not a source of concern to me at all.

> > (the code above is just a form of pseudocode illustrating the idea; I
> > don't guarantee the correctness for build purposes, or if this is the best
> > code organization)
> >
> > Non-R5900 code (for the time being) should never invoke
> > check_insn_opc_user_only(). *The only way* of distinguishing R5900 code
> > paths from the other CPUs code paths should be by using
> > "if(ctx->insn_flags & INSN_R5900)"!
>
> OK.
>
> > For changes in decode_opc_special_legacy(), there shouldn't be there, but
> > there should be a separate function decode_opc_special_tx59() or so.
>
> Sure, I will copy the 82 line function then...
>

No, no, you don't need to copy 82 lines. First, you can assume that you are 
already in INSN_R5900 case - no need for frequent check_insn()s. Further, you 
can omit everything that is not needed for R5900 (for example, entire OPC_MOVCI 
case).

> ..., and patch the following:
>
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -23904,7 +23904,7 @@ static void decode_opc_special_legacy(CPUMIPSState 
> *env, DisasContext > *ctx)
>  case OPC_MOVN: /* Conditional move */
>  case OPC_MOVZ:
>  check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
> -   INSN_LOONGSON2E | INSN_LOONGSON2F | INSN_R5900);
> +   INSN_LOONGSON2E | INSN_LOONGSON2F);
>  gen_cond_move(ctx, op1, rd, rs, rt);
>  break;
>  case OPC_MFHI:  /* Move from HI/LO */
> @@ -23931,8 +23931,6 @@ static void decode_opc_special_legacy(CPUMIPSState 
> *env, DisasContext > *ctx)
>  check_insn(ctx, INSN_VR54XX);
>  op1 = MASK_MUL_VR54XX(ctx->opcode);
>  gen_mul_vr54xx(ctx, op1, rd, rs, rt);
> -} else if (ctx->insn_flags & INSN_R5900) {
> -gen_mul_txx9(ctx, op1, rd, rs, rt);
>  } else {
>  gen_muldiv(ctx, op1, rd & 3, rs, rt);
>  }
> @@ -23947,7 +23945,6 @@ static void decode_opc_special_legacy(CPUMIPSState 
> *env, DisasContext > *ctx)
>  case OPC_DDIV:
>  case OPC_DDIVU:
>  check_insn(ctx, ISA_MIPS3);
> -check_insn_opc_user_only(ctx, INSN_R5900);
>  check_mips_64(ctx);
>  gen_muldiv(ctx, op1, 0, rs, rt);
>  break;
>

Exactly!

> Fredrik
>

Aleksandar


From: Fredrik Noring 
Sent: Monday, November 5, 2018 7:12:42 PM
To: Aleksandar Markovic
Cc: Aurelien Jarno; Philippe Mathieu-Daudé; Jürgen Urban; Maciej W. Rozycki; 
qemu-devel@nongnu.org
Subject: Re: [PATCH 0/2] target/mips: Fix decoding mechanisms of R5900 
M{F,T}{HI,LO}1 and DIV[U]1

Thank you for your review, Aleksandar,

> For LL, SC, LLD and SCD instructions, there is a need to properly insulate
> their R5900 versions too, similar to this:
>
> case OPC_SC:
> if(ctx->insn_flags & INSN_R5900) {
>  check_insn_opc_user_only(ctx, INSN_R5900);
> } else {
> check_insn(ctx, ISA_MIPS2);
> }
> gen_st_cond(ctx, op, rt, rs, imm);
>

[Qemu-devel] [PATCH v7 09/12] target/arm: PMU: Add instruction and cycle events

2018-11-05 Thread Aaron Lindsay

The instruction event is only enabled when icount is used, cycles are
always supported. Always defining get_cycle_count (but altering its
behavior depending on CONFIG_USER_ONLY) allows us to remove some
CONFIG_USER_ONLY #defines throughout the rest of the code.

Signed-off-by: Aaron Lindsay 
Signed-off-by: Aaron Lindsay 
Reviewed-by: Peter Maydell 
---
 target/arm/helper.c | 90 ++---
 1 file changed, 44 insertions(+), 46 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index b7297d72a8..e3ec36490c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -15,6 +15,7 @@
 #include "arm_ldst.h"
 #include  /* For crc32 */
 #include "exec/semihost.h"
+#include "sysemu/cpus.h"
 #include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
 #include "qemu/range.h"
@@ -1021,9 +1022,50 @@ typedef struct pm_event {
 uint64_t (*get_count)(CPUARMState *);
 } pm_event;
 
+static bool event_always_supported(CPUARMState *env)
+{
+return true;
+}
+
+/*
+ * Return the underlying cycle count for the PMU cycle counters. If we're in
+ * usermode, simply return 0.
+ */
+static uint64_t cycles_get_count(CPUARMState *env)
+{
+#ifndef CONFIG_USER_ONLY
+return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+#else
+return cpu_get_host_ticks();
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+static bool instructions_supported(CPUARMState *env)
+{
+return use_icount == 1 /* Precise instruction counting */;
+}
+
+static uint64_t instructions_get_count(CPUARMState *env)
+{
+return (uint64_t)cpu_get_icount_raw();
+}
+#endif
+
 static const pm_event pm_events[] = {
+#ifndef CONFIG_USER_ONLY
+{ .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */
+  .supported = instructions_supported,
+  .get_count = instructions_get_count,
+},
+{ .number = 0x011, /* CPU_CYCLES, Cycle */
+  .supported = event_always_supported,
+  .get_count = cycles_get_count,
+}
+#endif
 };
-#define MAX_EVENT_ID 0x0
+#define MAX_EVENT_ID 0x11
 #define UNSUPPORTED_EVENT UINT16_MAX
 static uint16_t supported_event_map[MAX_EVENT_ID + 1];
 
@@ -1116,8 +1158,6 @@ static CPAccessResult pmreg_access_swinc(CPUARMState *env,
 return pmreg_access(env, ri, isread);
 }
 
-#ifndef CONFIG_USER_ONLY
-
 static CPAccessResult pmreg_access_selr(CPUARMState *env,
 const ARMCPRegInfo *ri,
 bool isread)
@@ -1228,9 +1268,7 @@ static bool pmu_counter_enabled(CPUARMState *env, uint8_t 
counter)
  */
 void pmccntr_op_start(CPUARMState *env)
 {
-uint64_t cycles = 0;
-cycles = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-  ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+uint64_t cycles = cycles_get_count(env);
 
 if (pmu_counter_enabled(env, 31)) {
 uint64_t eff_cycles = cycles;
@@ -1376,42 +1414,6 @@ static void pmccntr_write32(CPUARMState *env, const 
ARMCPRegInfo *ri,
 pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value));
 }
 
-#else /* CONFIG_USER_ONLY */
-
-void pmccntr_op_start(CPUARMState *env)
-{
-}
-
-void pmccntr_op_finish(CPUARMState *env)
-{
-}
-
-void pmevcntr_op_start(CPUARMState *env, uint8_t i)
-{
-}
-
-void pmevcntr_op_finish(CPUARMState *env, uint8_t i)
-{
-}
-
-void pmu_op_start(CPUARMState *env)
-{
-}
-
-void pmu_op_finish(CPUARMState *env)
-{
-}
-
-void pmu_pre_el_change(ARMCPU *cpu, void *ignored)
-{
-}
-
-void pmu_post_el_change(ARMCPU *cpu, void *ignored)
-{
-}
-
-#endif
-
 static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 uint64_t value)
 {
@@ -1799,7 +1801,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
 /* Unimplemented so WI. */
 { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
   .access = PL0_W, .accessfn = pmreg_access_swinc, .type = ARM_CP_NOP },
-#ifndef CONFIG_USER_ONLY
 { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
   .access = PL0_RW, .type = ARM_CP_ALIAS,
   .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr),
@@ -1821,7 +1822,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt),
   .readfn = pmccntr_read, .writefn = pmccntr_write,
   .raw_readfn = raw_read, .raw_writefn = raw_write, },
-#endif
 { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 
7,
   .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32,
   .access = PL0_RW, .accessfn = pmreg_access,
@@ -5512,7 +5512,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
  * count register.
  */
 unsigned int i, pmcrn = 0;
-#ifndef CONFIG_USER_ONLY
 ARMCPRegInfo pmcr = {
 .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 
0,
 .access = PL0_RW,
@@ -5569,7 +5568,6 @@ void register_cp_regs_for

[Qemu-devel] [PATCH v7 07/12] target/arm: Add array for supported PMU events, generate PMCEID[01]

2018-11-05 Thread Aaron Lindsay

This commit doesn't add any supported events, but provides the framework
for adding them. We store the pm_event structs in a simple array, and
provide the mapping from the event numbers to array indexes in the
supported_event_map array. Because the value of PMCEID[01] depends upon
which events are supported at runtime, generate it dynamically.

Signed-off-by: Aaron Lindsay 
---
 target/arm/cpu.c| 20 +---
 target/arm/cpu.h| 10 ++
 target/arm/cpu64.c  |  4 
 target/arm/helper.c | 42 ++
 4 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 9e54c56379..d1c766d180 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -957,9 +957,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 if (!cpu->has_pmu) {
 unset_feature(env, ARM_FEATURE_PMU);
 cpu->id_aa64dfr0 &= ~0xf00;
-} else if (!kvm_enabled()) {
-arm_register_pre_el_change_hook(cpu, &pmu_pre_el_change, 0);
-arm_register_el_change_hook(cpu, &pmu_post_el_change, 0);
+}
+if (arm_feature(env, ARM_FEATURE_PMU)) {
+uint64_t pmceid = get_pmceid(&cpu->env);
+cpu->pmceid0 = extract64(pmceid, 0, 32);
+cpu->pmceid1 = extract64(pmceid, 32, 32);
+
+if (!kvm_enabled()) {
+arm_register_pre_el_change_hook(cpu, &pmu_pre_el_change, 0);
+arm_register_el_change_hook(cpu, &pmu_post_el_change, 0);
+}
+} else {
+cpu->pmceid0 = 0x;
+cpu->pmceid1 = 0x;
 }
 
 if (!arm_feature(env, ARM_FEATURE_EL2)) {
@@ -1601,8 +1611,6 @@ static void cortex_a7_initfn(Object *obj)
 cpu->id_pfr0 = 0x1131;
 cpu->id_pfr1 = 0x00011011;
 cpu->id_dfr0 = 0x02010555;
-cpu->pmceid0 = 0x;
-cpu->pmceid1 = 0x;
 cpu->id_afr0 = 0x;
 cpu->id_mmfr0 = 0x10101105;
 cpu->id_mmfr1 = 0x4000;
@@ -1647,8 +1655,6 @@ static void cortex_a15_initfn(Object *obj)
 cpu->id_pfr0 = 0x1131;
 cpu->id_pfr1 = 0x00011011;
 cpu->id_dfr0 = 0x02010555;
-cpu->pmceid0 = 0x000;
-cpu->pmceid1 = 0x;
 cpu->id_afr0 = 0x;
 cpu->id_mmfr0 = 0x10201105;
 cpu->id_mmfr1 = 0x2000;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 92282cd976..f991ff370e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -991,6 +991,16 @@ void pmu_op_finish(CPUARMState *env);
 void pmu_pre_el_change(ARMCPU *cpu, void *ignored);
 void pmu_post_el_change(ARMCPU *cpu, void *ignored);
 
+/*
+ * get_pmceid
+ * @env: CPUARMState
+ *
+ * Return the PMCEID[01] register values corresponding to the counters which
+ * are supported given the current configuration (0 is low 32, 1 is high 32
+ * bits)
+ */
+uint64_t get_pmceid(CPUARMState *env);
+
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
  * for both old and new bit meanings. Code which tests against those
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 873f059bf2..a1aad772fa 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -138,8 +138,6 @@ static void aarch64_a57_initfn(Object *obj)
 cpu->isar.id_isar6 = 0;
 cpu->isar.id_aa64pfr0 = 0x;
 cpu->id_aa64dfr0 = 0x10305106;
-cpu->pmceid0 = 0x;
-cpu->pmceid1 = 0x;
 cpu->isar.id_aa64isar0 = 0x00011120;
 cpu->id_aa64mmfr0 = 0x1124;
 cpu->dbgdidr = 0x3516d000;
@@ -246,8 +244,6 @@ static void aarch64_a72_initfn(Object *obj)
 cpu->isar.id_isar5 = 0x00011121;
 cpu->isar.id_aa64pfr0 = 0x;
 cpu->id_aa64dfr0 = 0x10305106;
-cpu->pmceid0 = 0x;
-cpu->pmceid1 = 0x;
 cpu->isar.id_aa64isar0 = 0x00011120;
 cpu->id_aa64mmfr0 = 0x1124;
 cpu->dbgdidr = 0x3516d000;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 6724d97346..b9d8441497 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1009,6 +1009,48 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env)
   return (1 << 31) | ((1 << pmu_num_counters(env)) - 1);
 }
 
+typedef struct pm_event {
+uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */
+/* If the event is supported on this CPU (used to generate PMCEID[01]) */
+bool (*supported)(CPUARMState *);
+/*
+ * Retrieve the current count of the underlying event. The programmed
+ * counters hold a difference from the return value from this function
+ */
+uint64_t (*get_count)(CPUARMState *);
+} pm_event;
+
+static const pm_event pm_events[] = {
+};
+#define MAX_EVENT_ID 0x0
+#define UNSUPPORTED_EVENT UINT16_MAX
+static uint16_t supported_event_map[MAX_EVENT_ID + 1];
+
+/*
+ * Called upon initialization to build PMCEID0 (low 32 bits) and PMCEID1 (high
+ * 32). We also use it to build a map of ARM event numbers to indices in
+ * our pm_events array.
+ */
+uint64_t get_pmceid(CPUARMState *env)

[Qemu-devel] [PATCH v7 08/12] target/arm: Finish implementation of PM[X]EVCNTR and PM[X]EVTYPER

2018-11-05 Thread Aaron Lindsay

Add arrays to hold the registers, the definitions themselves, access
functions, and logic to reset counters when PMCR.P is set. Update
filtering code to support counters other than PMCCNTR. Support migration
with raw read/write functions.

Signed-off-by: Aaron Lindsay 
Signed-off-by: Aaron Lindsay 
Reviewed-by: Richard Henderson 
---
 target/arm/cpu.h|   3 +
 target/arm/helper.c | 296 +---
 2 files changed, 282 insertions(+), 17 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f991ff370e..067f6efdb6 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -482,6 +482,9 @@ typedef struct CPUARMState {
  * pmccntr_op_finish.
  */
 uint64_t c15_ccnt_delta;
+uint64_t c14_pmevcntr[31];
+uint64_t c14_pmevcntr_delta[31];
+uint64_t c14_pmevtyper[31];
 uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
 uint64_t vpidr_el2; /* Virtualization Processor ID Register */
 uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index b9d8441497..b7297d72a8 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -979,6 +979,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
 #define PMCRDP  0x10
 #define PMCRD   0x8
 #define PMCRC   0x4
+#define PMCRP   0x2
 #define PMCRE   0x1
 
 #define PMXEVTYPER_P  0x8000
@@ -1051,6 +1052,17 @@ uint64_t get_pmceid(CPUARMState *env)
 return pmceid;
 }
 
+/*
+ * Check at runtime whether a PMU event is supported for the current machine
+ */
+static bool event_supported(uint16_t number)
+{
+if (number > MAX_EVENT_ID) {
+return false;
+}
+return supported_event_map[number] != UNSUPPORTED_EVENT;
+}
+
 static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
 {
@@ -1170,9 +1182,11 @@ static bool pmu_counter_enabled(CPUARMState *env, 
uint8_t counter)
 prohibited = env->cp15.c9_pmcr & PMCRDP;
 }
 
-/* TODO Remove assert, set filter to correct PMEVTYPER */
-assert(counter == 31);
-filter = env->cp15.pmccfiltr_el0;
+if (counter == 31) {
+filter = env->cp15.pmccfiltr_el0;
+} else {
+filter = env->cp15.c14_pmevtyper[counter];
+}
 
 p   = filter & PMXEVTYPER_P;
 u   = filter & PMXEVTYPER_U;
@@ -1192,6 +1206,17 @@ static bool pmu_counter_enabled(CPUARMState *env, 
uint8_t counter)
 filtered = m != p;
 }
 
+if (counter != 31) {
+/*
+ * If not checking PMCCNTR, ensure the counter is setup to an event we
+ * support
+ */
+uint16_t event = filter & PMXEVTYPER_EVTCOUNT;
+if (!event_supported(event)) {
+return false;
+}
+}
+
 return enabled && !prohibited && !filtered;
 }
 
@@ -1238,14 +1263,47 @@ void pmccntr_op_finish(CPUARMState *env)
 }
 }
 
+static void pmevcntr_op_start(CPUARMState *env, uint8_t counter)
+{
+
+uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT;
+uint64_t count = 0;
+if (event_supported(event)) {
+uint16_t event_idx = supported_event_map[event];
+count = pm_events[event_idx].get_count(env);
+}
+
+if (pmu_counter_enabled(env, counter)) {
+env->cp15.c14_pmevcntr[counter] =
+count - env->cp15.c14_pmevcntr_delta[counter];
+}
+env->cp15.c14_pmevcntr_delta[counter] = count;
+}
+
+static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter)
+{
+if (pmu_counter_enabled(env, counter)) {
+env->cp15.c14_pmevcntr_delta[counter] -=
+env->cp15.c14_pmevcntr[counter];
+}
+}
+
 void pmu_op_start(CPUARMState *env)
 {
+unsigned int i;
 pmccntr_op_start(env);
+for (i = 0; i < pmu_num_counters(env); i++) {
+pmevcntr_op_start(env, i);
+}
 }
 
 void pmu_op_finish(CPUARMState *env)
 {
+unsigned int i;
 pmccntr_op_finish(env);
+for (i = 0; i < pmu_num_counters(env); i++) {
+pmevcntr_op_finish(env, i);
+}
 }
 
 void pmu_pre_el_change(ARMCPU *cpu, void *ignored)
@@ -1268,6 +1326,13 @@ static void pmcr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 env->cp15.c15_ccnt = 0;
 }
 
+if (value & PMCRP) {
+unsigned int i;
+for (i = 0; i < pmu_num_counters(env); i++) {
+env->cp15.c14_pmevcntr[i] = 0;
+}
+}
+
 /* only the DP, X, D and E bits are writable */
 env->cp15.c9_pmcr &= ~0x39;
 env->cp15.c9_pmcr |= (value & 0x39);
@@ -1321,6 +1386,14 @@ void pmccntr_op_finish(CPUARMState *env)
 {
 }
 
+void pmevcntr_op_start(CPUARMState *env, uint8_t i)
+{
+}
+
+void pmevcntr_op_finish(CPUARMState *env, uint8_t i)
+{
+}
+
 void pmu_op_start(CPUARMState *env)
 {
 }
@@ -1391,30 +1464,174 @@ static void pmovsset_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 env->cp15.c9_pmovsr |= value;
 }
 
-static void pmxevtyper

[Qemu-devel] [PATCH v7 02/12] target/arm: Reorganize PMCCNTR accesses

2018-11-05 Thread Aaron Lindsay

pmccntr_read and pmccntr_write contained duplicate code that was already
being handled by pmccntr_sync. Consolidate the duplicated code into two
functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
c15_ccnt in CPUARMState so that we can simultaneously save both the
architectural register value and the last underlying cycle count - this
ensures time isn't lost and will also allow us to access the 'old'
architectural register value in order to detect overflows in later
patches.

Signed-off-by: Aaron Lindsay 
Signed-off-by: Aaron Lindsay 
---
 target/arm/cpu.h|  38 +++
 target/arm/helper.c | 114 +++-
 2 files changed, 99 insertions(+), 53 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index b5eff79f73..50a0862c84 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -468,10 +468,20 @@ typedef struct CPUARMState {
 uint64_t oslsr_el1; /* OS Lock Status */
 uint64_t mdcr_el2;
 uint64_t mdcr_el3;
-/* If the counter is enabled, this stores the last time the counter
- * was reset. Otherwise it stores the counter value
+/* Stores the architectural value of the counter *the last time it was
+ * updated* by pmccntr_op_start. Accesses should always be surrounded
+ * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
+ * architecturally-correct value is being read/set.
  */
 uint64_t c15_ccnt;
+/* Stores the delta between the architectural value and the underlying
+ * cycle count during normal operation. It is used to update c15_ccnt
+ * to be the correct architectural value before accesses. During
+ * accesses, c15_ccnt_delta contains the underlying count being used
+ * for the access, after which it reverts to the delta value in
+ * pmccntr_op_finish.
+ */
+uint64_t c15_ccnt_delta;
 uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
 uint64_t vpidr_el2; /* Virtualization Processor ID Register */
 uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
@@ -953,15 +963,27 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo,
void *puc);
 
 /**
- * pmccntr_sync
+ * pmccntr_op_start/finish
+ * @env: CPUARMState
+ *
+ * Convert the counter in the PMCCNTR between its delta form (the typical mode
+ * when it's enabled) and the guest-visible value. These two calls must always
+ * surround any action which might affect the counter.
+ */
+void pmccntr_op_start(CPUARMState *env);
+void pmccntr_op_finish(CPUARMState *env);
+
+/**
+ * pmu_op_start/finish
  * @env: CPUARMState
  *
- * Synchronises the counter in the PMCCNTR. This must always be called twice,
- * once before any action that might affect the timer and again afterwards.
- * The function is used to swap the state of the register if required.
- * This only happens when not in user mode (!CONFIG_USER_ONLY)
+ * Convert all PMU counters between their delta form (the typical mode when
+ * they are enabled) and the guest-visible values. These two calls must
+ * surround any action which might affect the counters, and the return value
+ * from pmu_op_start must be supplied as the second argument to pmu_op_finish.
  */
-void pmccntr_sync(CPUARMState *env);
+void pmu_op_start(CPUARMState *env);
+void pmu_op_finish(CPUARMState *env);
 
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0ea95b0815..281bcff1da 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1085,28 +1085,63 @@ static inline bool arm_ccnt_enabled(CPUARMState *env)
 
 return true;
 }
-
-void pmccntr_sync(CPUARMState *env)
+/*
+ * Ensure c15_ccnt is the guest-visible count so that operations such as
+ * enabling/disabling the counter or filtering, modifying the count itself,
+ * etc. can be done logically. This is essentially a no-op if the counter is
+ * not enabled at the time of the call.
+ */
+void pmccntr_op_start(CPUARMState *env)
 {
-uint64_t temp_ticks;
-
-temp_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+uint64_t cycles = 0;
+cycles = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
 
-if (env->cp15.c9_pmcr & PMCRD) {
-/* Increment once every 64 processor clock cycles */
-temp_ticks /= 64;
+if (arm_ccnt_enabled(env)) {
+uint64_t eff_cycles = cycles;
+if (env->cp15.c9_pmcr & PMCRD) {
+/* Increment once every 64 processor clock cycles */
+eff_cycles /= 64;
+}
+
+env->cp15.c15_ccnt = eff_cycles - env->cp15.c15_ccnt_delta;
 }
+env->cp15.c15_ccnt_delta = cycles;
+}
 
+/*
+ * If PMCCNTR is enabled, recalculate the delta between the clock and the

[Qemu-devel] [PATCH v7 06/12] target/arm: Implement PMOVSSET

2018-11-05 Thread Aaron Lindsay

Add an array for PMOVSSET so we only define it for v7ve+ platforms

Signed-off-by: Aaron Lindsay 
Reviewed-by: Richard Henderson 
---
 target/arm/helper.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0522a606a4..6724d97346 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1342,6 +1342,13 @@ static void pmovsr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 env->cp15.c9_pmovsr &= ~value;
 }
 
+static void pmovsset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+value &= pmu_counter_mask(env);
+env->cp15.c9_pmovsr |= value;
+}
+
 static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
  uint64_t value)
 {
@@ -1709,6 +1716,24 @@ static const ARMCPRegInfo v7mp_cp_reginfo[] = {
 REGINFO_SENTINEL
 };
 
+static const ARMCPRegInfo pmovsset_cp_reginfo[] = {
+/* PMOVSSET is not implemented in v7 before v7ve */
+{ .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3,
+  .access = PL0_RW, .accessfn = pmreg_access,
+  .type = ARM_CP_ALIAS,
+  .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr),
+  .writefn = pmovsset_write,
+  .raw_writefn = raw_write },
+{ .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64,
+  .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3,
+  .access = PL0_RW, .accessfn = pmreg_access,
+  .type = ARM_CP_ALIAS,
+  .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
+  .writefn = pmovsset_write,
+  .raw_writefn = raw_write },
+REGINFO_SENTINEL
+};
+
 static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 uint64_t value)
 {
@@ -5212,6 +5237,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 !arm_feature(env, ARM_FEATURE_PMSA)) {
 define_arm_cp_regs(cpu, v7mp_cp_reginfo);
 }
+if (arm_feature(env, ARM_FEATURE_V7VE)) {
+define_arm_cp_regs(cpu, pmovsset_cp_reginfo);
+}
 if (arm_feature(env, ARM_FEATURE_V7)) {
 /* v7 performance monitor control register: same implementor
  * field as main ID register, and we implement only the cycle
-- 
2.19.1

[Qemu-devel] [PATCH v7 03/12] target/arm: Swap PMU values before/after migrations

2018-11-05 Thread Aaron Lindsay

Because of the PMU's design, many register accesses have side effects
which are inter-related, meaning that the normal method of saving CP
registers can result in inconsistent state. These side-effects are
largely handled in pmu_op_start/finish functions which can be called
before and after the state is saved/restored. By doing this and adding
raw read/write functions for the affected registers, we avoid
migration-related inconsistencies.

Signed-off-by: Aaron Lindsay 
---
 target/arm/helper.c  |  6 --
 target/arm/machine.c | 20 
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 281bcff1da..5deff3d11f 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1450,11 +1450,13 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0,
   .access = PL0_RW, .accessfn = pmreg_access_ccntr,
   .type = ARM_CP_IO,
-  .readfn = pmccntr_read, .writefn = pmccntr_write, },
+  .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt),
+  .readfn = pmccntr_read, .writefn = pmccntr_write,
+  .raw_readfn = raw_read, .raw_writefn = raw_write, },
 #endif
 { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7,
-  .writefn = pmccfiltr_write,
+  .writefn = pmccfiltr_write, .raw_writefn = raw_write,
   .access = PL0_RW, .accessfn = pmreg_access,
   .type = ARM_CP_IO,
   .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0),
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 239fe4e84d..6d14b08e0c 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -604,6 +604,8 @@ static int cpu_pre_save(void *opaque)
 {
 ARMCPU *cpu = opaque;
 
+pmu_op_start(&cpu->env);
+
 if (kvm_enabled()) {
 if (!write_kvmstate_to_list(cpu)) {
 /* This should never fail */
@@ -625,6 +627,20 @@ static int cpu_pre_save(void *opaque)
 return 0;
 }
 
+static int cpu_post_save(void *opaque)
+{
+ARMCPU *cpu = opaque;
+pmu_op_finish(&cpu->env);
+return 0;
+}
+
+static int cpu_pre_load(void *opaque)
+{
+ARMCPU *cpu = opaque;
+pmu_op_start(&cpu->env);
+return 0;
+}
+
 static int cpu_post_load(void *opaque, int version_id)
 {
 ARMCPU *cpu = opaque;
@@ -672,6 +688,8 @@ static int cpu_post_load(void *opaque, int version_id)
 hw_breakpoint_update_all(cpu);
 hw_watchpoint_update_all(cpu);
 
+pmu_op_finish(&cpu->env);
+
 return 0;
 }
 
@@ -680,6 +698,8 @@ const VMStateDescription vmstate_arm_cpu = {
 .version_id = 22,
 .minimum_version_id = 22,
 .pre_save = cpu_pre_save,
+.post_save = cpu_post_save,
+.pre_load = cpu_pre_load,
 .post_load = cpu_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16),
-- 
2.19.1

[Qemu-devel] [PATCH v7 05/12] target/arm: Allow AArch32 access for PMCCFILTR

2018-11-05 Thread Aaron Lindsay

Signed-off-by: Aaron Lindsay 
Reviewed-by: Peter Maydell 
Reviewed-by: Richard Henderson 
---
 target/arm/helper.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 63d4e993f4..0522a606a4 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -994,6 +994,10 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
PMXEVTYPER_M | PMXEVTYPER_MT | \
PMXEVTYPER_EVTCOUNT)
 
+#define PMCCFILTR 0xf800
+#define PMCCFILTR_M   PMXEVTYPER_M
+#define PMCCFILTR_EL0 (PMCCFILTR | PMCCFILTR_M)
+
 static inline uint32_t pmu_num_counters(CPUARMState *env)
 {
   return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
@@ -1297,10 +1301,26 @@ static void pmccfiltr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 uint64_t value)
 {
 pmccntr_op_start(env);
-env->cp15.pmccfiltr_el0 = value & 0xfc00;
+env->cp15.pmccfiltr_el0 = value & PMCCFILTR_EL0;
+pmccntr_op_finish(env);
+}
+
+static void pmccfiltr_write_a32(CPUARMState *env, const ARMCPRegInfo *ri,
+uint64_t value)
+{
+pmccntr_op_start(env);
+/* M is not accessible from AArch32 */
+env->cp15.pmccfiltr_el0 = (env->cp15.pmccfiltr_el0 & PMCCFILTR_M) |
+(value & PMCCFILTR);
 pmccntr_op_finish(env);
 }
 
+static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+/* M is not visible in AArch32 */
+return env->cp15.pmccfiltr_el0 & PMCCFILTR;
+}
+
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
 uint64_t value)
 {
@@ -1536,6 +1556,11 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .readfn = pmccntr_read, .writefn = pmccntr_write,
   .raw_readfn = raw_read, .raw_writefn = raw_write, },
 #endif
+{ .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 
7,
+  .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32,
+  .access = PL0_RW, .accessfn = pmreg_access,
+  .type = ARM_CP_ALIAS | ARM_CP_IO,
+  .resetvalue = 0, },
 { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7,
   .writefn = pmccfiltr_write, .raw_writefn = raw_write,
-- 
2.19.1

[Qemu-devel] [PATCH v7 04/12] target/arm: Filter cycle counter based on PMCCFILTR_EL0

2018-11-05 Thread Aaron Lindsay

Rename arm_ccnt_enabled to pmu_counter_enabled, and add logic to only
return 'true' if the specified counter is enabled and neither prohibited
or filtered.

Signed-off-by: Aaron Lindsay 
Signed-off-by: Aaron Lindsay 
Reviewed-by: Peter Maydell 
Reviewed-by: Richard Henderson 
---
 target/arm/cpu.c|  3 ++
 target/arm/cpu.h| 10 -
 target/arm/helper.c | 96 +
 3 files changed, 101 insertions(+), 8 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 784a4c2dfc..9e54c56379 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -957,6 +957,9 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 if (!cpu->has_pmu) {
 unset_feature(env, ARM_FEATURE_PMU);
 cpu->id_aa64dfr0 &= ~0xf00;
+} else if (!kvm_enabled()) {
+arm_register_pre_el_change_hook(cpu, &pmu_pre_el_change, 0);
+arm_register_el_change_hook(cpu, &pmu_post_el_change, 0);
 }
 
 if (!arm_feature(env, ARM_FEATURE_EL2)) {
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 50a0862c84..92282cd976 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -985,6 +985,12 @@ void pmccntr_op_finish(CPUARMState *env);
 void pmu_op_start(CPUARMState *env);
 void pmu_op_finish(CPUARMState *env);
 
+/**
+ * Functions to register as EL change hooks for PMU mode filtering
+ */
+void pmu_pre_el_change(ARMCPU *cpu, void *ignored);
+void pmu_post_el_change(ARMCPU *cpu, void *ignored);
+
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
  * for both old and new bit meanings. Code which tests against those
@@ -1046,7 +1052,8 @@ void pmu_op_finish(CPUARMState *env);
 
 #define MDCR_EPMAD(1U << 21)
 #define MDCR_EDAD (1U << 20)
-#define MDCR_SPME (1U << 17)
+#define MDCR_SPME (1U << 17)  /* MDCR_EL3 */
+#define MDCR_HPMD (1U << 17)  /* MDCR_EL2 */
 #define MDCR_SDD  (1U << 16)
 #define MDCR_SPD  (3U << 14)
 #define MDCR_TDRA (1U << 11)
@@ -1056,6 +1063,7 @@ void pmu_op_finish(CPUARMState *env);
 #define MDCR_HPME (1U << 7)
 #define MDCR_TPM  (1U << 6)
 #define MDCR_TPMCR(1U << 5)
+#define MDCR_HPMN (0x1fU)
 
 /* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */
 #define SDCR_VALID_MASK (MDCR_EPMAD | MDCR_EDAD | MDCR_SPME | MDCR_SPD)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 5deff3d11f..63d4e993f4 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -976,10 +976,24 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
 /* Definitions for the PMU registers */
 #define PMCRN_MASK  0xf800
 #define PMCRN_SHIFT 11
+#define PMCRDP  0x10
 #define PMCRD   0x8
 #define PMCRC   0x4
 #define PMCRE   0x1
 
+#define PMXEVTYPER_P  0x8000
+#define PMXEVTYPER_U  0x4000
+#define PMXEVTYPER_NSK0x2000
+#define PMXEVTYPER_NSU0x1000
+#define PMXEVTYPER_NSH0x0800
+#define PMXEVTYPER_M  0x0400
+#define PMXEVTYPER_MT 0x0200
+#define PMXEVTYPER_EVTCOUNT   0x
+#define PMXEVTYPER_MASK   (PMXEVTYPER_P | PMXEVTYPER_U | PMXEVTYPER_NSK | \
+   PMXEVTYPER_NSU | PMXEVTYPER_NSH | \
+   PMXEVTYPER_M | PMXEVTYPER_MT | \
+   PMXEVTYPER_EVTCOUNT)
+
 static inline uint32_t pmu_num_counters(CPUARMState *env)
 {
   return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
@@ -1075,16 +1089,66 @@ static CPAccessResult pmreg_access_ccntr(CPUARMState 
*env,
 return pmreg_access(env, ri, isread);
 }
 
-static inline bool arm_ccnt_enabled(CPUARMState *env)
+/* Returns true if the counter (pass 31 for PMCCNTR) should count events using
+ * the current EL, security state, and register configuration.
+ */
+static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
 {
-/* This does not support checking PMCCFILTR_EL0 register */
+uint64_t filter;
+bool e, p, u, nsk, nsu, nsh, m;
+bool enabled, prohibited, filtered;
+bool secure = arm_is_secure(env);
+int el = arm_current_el(env);
+uint8_t hpmn = env->cp15.mdcr_el2 & MDCR_HPMN;
 
-if (!(env->cp15.c9_pmcr & PMCRE) || !(env->cp15.c9_pmcnten & (1 << 31))) {
-return false;
+if (!arm_feature(env, ARM_FEATURE_EL2) ||
+(counter < hpmn || counter == 31)) {
+e = env->cp15.c9_pmcr & PMCRE;
+} else {
+e = env->cp15.mdcr_el2 & MDCR_HPME;
 }
+enabled = e && (env->cp15.c9_pmcnten & (1 << counter));
 
-return true;
+if (!secure) {
+if (el == 2 && (counter < hpmn || counter == 31)) {
+prohibited = env->cp15.mdcr_el2 & MDCR_HPMD;
+} else {
+prohibited = false;
+}
+} else {
+prohibited = arm_feature(env, ARM_FEATURE_EL3) &&
+   (env->cp15.mdcr_el3 & MDCR_SPME);
+}
+
+if (prohibited && counter == 31) {
+prohibited = env->cp15.c9_pmcr & PMCRDP;
+

[Qemu-devel] [PATCH v7 01/12] migration: Add post_save function to VMStateDescription

2018-11-05 Thread Aaron Lindsay

In some cases it may be helpful to modify state before saving it for
migration, and then modify the state back after it has been saved. The
existing pre_save function provides half of this functionality. This
patch adds a post_save function to provide the second half.

Signed-off-by: Aaron Lindsay 
Cc: Dr. David Alan Gilbert 
---
 docs/devel/migration.rst|  9 +++--
 include/migration/vmstate.h |  1 +
 migration/vmstate.c | 13 -
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 687570754d..92fb521ad2 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -419,8 +419,13 @@ The functions to do that are inside a vmstate definition, 
and are called:
 
   This function is called before we save the state of one device.
 
-Example: You can look at hpet.c, that uses the three function to
-massage the state that is transferred.
+- ``int (*post_save)(void *opaque);``
+
+  This function is called after we save the state of one device
+  (even upon failure, unless the call to pre_save returned an error).
+
+Example: You can look at hpet.c, that uses the first three functions
+to massage the state that is transferred.
 
 The ``VMSTATE_WITH_TMP`` macro may be useful when the migration
 data doesn't match the stored device data well; it allows an
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 2b501d0466..9355d83056 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -185,6 +185,7 @@ struct VMStateDescription {
 int (*pre_load)(void *opaque);
 int (*post_load)(void *opaque, int version_id);
 int (*pre_save)(void *opaque);
+int (*post_save)(void *opaque);
 bool (*needed)(void *opaque);
 VMStateField *fields;
 const VMStateDescription **subsections;
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 0bc240a317..c15d75260a 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -387,6 +387,9 @@ int vmstate_save_state_v(QEMUFile *f, const 
VMStateDescription *vmsd,
 if (ret) {
 error_report("Save of field %s/%s failed",
  vmsd->name, field->name);
+if (vmsd->post_save) {
+vmsd->post_save(opaque);
+}
 return ret;
 }
 
@@ -412,7 +415,15 @@ int vmstate_save_state_v(QEMUFile *f, const 
VMStateDescription *vmsd,
 json_end_array(vmdesc);
 }
 
-return vmstate_subsection_save(f, vmsd, opaque, vmdesc);
+ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc);
+
+if (vmsd->post_save) {
+int ps_ret = vmsd->post_save(opaque);
+if (!ret) {
+ret = ps_ret;
+}
+}
+return ret;
 }
 
 static const VMStateDescription *
-- 
2.19.1

[Qemu-devel] [PATCH v7 00/12] More fully implement ARM PMUv3

2018-11-05 Thread Aaron Lindsay

The ARM PMU implementation currently contains a basic cycle counter, but
it is often useful to gather counts of other events, filter them based
on execution mode, and/or be notified on counter overflow. These patches
flesh out the implementations of various PMU registers including
PM[X]EVCNTR and PM[X]EVTYPER, add a struct definition to represent
arbitrary counter types, implement mode filtering, send interrupts on
counter overflow, and add instruction, cycle, and software increment
events.

Since v6 [1] I have made the following changes:
* Use cpu_get_host_ticks() for the cycle counter value for user mode
* Re-staged "PMU: Set PMCR.N to 4" so that the value of the pmcrn local
  variable matches the architectural value of PMCR.N
* Re-ordered "Reorganize PMCCNTR accesses" to come first to eliminate
  the churn of *_op_start/finish function names and definitions
* Use extract64 and ARRAY_SIZE macros where applicable
* Add a return value to the post_save migration function

[1] - https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg02036.html

Aaron Lindsay (12):
  migration: Add post_save function to VMStateDescription
  target/arm: Reorganize PMCCNTR accesses
  target/arm: Swap PMU values before/after migrations
  target/arm: Filter cycle counter based on PMCCFILTR_EL0
  target/arm: Allow AArch32 access for PMCCFILTR
  target/arm: Implement PMOVSSET
  target/arm: Add array for supported PMU events, generate PMCEID[01]
  target/arm: Finish implementation of PM[X]EVCNTR and PM[X]EVTYPER
  target/arm: PMU: Add instruction and cycle events
  target/arm: PMU: Set PMCR.N to 4
  target/arm: Implement PMSWINC
  target/arm: Send interrupts on PMU counter overflow

 docs/devel/migration.rst|   9 +-
 include/migration/vmstate.h |   1 +
 migration/vmstate.c |  13 +-
 target/arm/cpu.c|  28 +-
 target/arm/cpu.h|  68 +++-
 target/arm/cpu64.c  |   4 -
 target/arm/helper.c | 774 
 target/arm/machine.c|  20 +
 8 files changed, 816 insertions(+), 101 deletions(-)

-- 
2.19.1

Re: [Qemu-devel] [PATCH 0/1 resend] Add vhost-pci-blk driver

2018-11-05 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20181105140327.8363-1-v.mayats...@gmail.com
Subject: [Qemu-devel] [PATCH 0/1 resend] Add vhost-pci-blk driver

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
19803cc4ae Add vhost-pci-blk driver

=== OUTPUT BEGIN ===
Checking PATCH 1/1: Add vhost-pci-blk driver...
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#82: 
new file mode 100644

ERROR: Error messages should not contain newlines
#229: FILE: hw/block/vhost-blk.c:143:
+error_report("Error opening backing store: %d\n", -errno);

ERROR: braces {} are necessary for all arms of this statement
#241: FILE: hw/block/vhost-blk.c:155:
+if (s->bs_fd > 0)
[...]

ERROR: space prohibited after that '-' (ctx:WxW)
#327: FILE: hw/block/vhost-blk.c:241:
+int fd = - 1;
  ^

WARNING: line over 80 characters
#331: FILE: hw/block/vhost-blk.c:245:
+   error_report("Can't open device %s: %d\n", 
blk_bs(s->blk)->filename, errno);

ERROR: code indent should never use tabs
#331: FILE: hw/block/vhost-blk.c:245:
+^Ierror_report("Can't open device %s: %d\n", blk_bs(s->blk)->filename, 
errno);$

ERROR: Error messages should not contain newlines
#331: FILE: hw/block/vhost-blk.c:245:
+   error_report("Can't open device %s: %d\n", 
blk_bs(s->blk)->filename, errno);

ERROR: code indent should never use tabs
#332: FILE: hw/block/vhost-blk.c:246:
+^Igoto out;$

ERROR: code indent should never use tabs
#336: FILE: hw/block/vhost-blk.c:250:
+^Iret = ioctl(fd, BLKGETSIZE, &var);$

ERROR: code indent should never use tabs
#337: FILE: hw/block/vhost-blk.c:251:
+^Ivar64 = var;$

ERROR: code indent should never use tabs
#340: FILE: hw/block/vhost-blk.c:254:
+^Ierror_report("Can't get drive size: %d\n", errno);$

ERROR: Error messages should not contain newlines
#340: FILE: hw/block/vhost-blk.c:254:
+   error_report("Can't get drive size: %d\n", errno);

ERROR: code indent should never use tabs
#341: FILE: hw/block/vhost-blk.c:255:
+^Igoto out;$

ERROR: line over 90 characters
#347: FILE: hw/block/vhost-blk.c:261:
+   error_report("Can't get drive logical sector size, assuming 512: 
%d\n", errno);

ERROR: code indent should never use tabs
#347: FILE: hw/block/vhost-blk.c:261:
+^Ierror_report("Can't get drive logical sector size, assuming 512: %d\n", 
errno);$

ERROR: Error messages should not contain newlines
#347: FILE: hw/block/vhost-blk.c:261:
+   error_report("Can't get drive logical sector size, assuming 512: 
%d\n", errno);

ERROR: code indent should never use tabs
#348: FILE: hw/block/vhost-blk.c:262:
+^Ivar = 512;$

ERROR: braces {} are necessary for all arms of this statement
#360: FILE: hw/block/vhost-blk.c:274:
+if (fd > 0)
[...]

ERROR: code indent should never use tabs
#361: FILE: hw/block/vhost-blk.c:275:
+^Iclose(fd);$

ERROR: code indent should never use tabs
#410: FILE: hw/block/vhost-blk.c:324:
+^Igoto virtio_err;$

ERROR: line over 90 characters
#413: FILE: hw/block/vhost-blk.c:327:
+ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)s->vhostfd, 
VHOST_BACKEND_TYPE_KERNEL, 0);

total: 19 errors, 2 warnings, 613 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [PATCH RFC v7 0/9] qemu_thread_create: propagate errors to callers to check

2018-11-05 Thread no-reply

Hi,

This series failed docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Type: series
Message-id: 20181101101715.9443-1-...@suse.com
Subject: [Qemu-devel] [PATCH RFC v7 0/9] qemu_thread_create: propagate errors 
to callers to check

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-quick@centos7 SHOW_ENV=1 J=8
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
3e277b70f1 qemu_thread_create: propagate the error to callers to handle
39551a72c7 migration: add more error handling for postcopy_ram_enable_notify
733f7a07cb migration: remove unused &local_err parameter in migrate_set_error
46e34c2c06 migration: fix the multifd code when receiving less channels
d2c4705d34 migration: fix the multifd code when sending less channels
8ce0f380f1 migration: fix some segmentation faults when using multifd
37dbd330d4 qemu_thread_join: fix segmentation fault
b10333fc35 qemu_init_vcpu: add a new Error parameter to propagate
3626dde437 Fix segmentation fault when qemu_signal_init fails

=== OUTPUT BEGIN ===
  BUILD   centos7
make[1]: Entering directory '/var/tmp/patchew-tester-tmp-skht4o_b/src'
  GEN 
/var/tmp/patchew-tester-tmp-skht4o_b/src/docker-src.2018-11-05-13.16.56.15391/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-skht4o_b/src/docker-src.2018-11-05-13.16.56.15391/qemu.tar.vroot'...
done.
Checking out files:   9% (632/6451)   
Checking out files:  10% (646/6451)   
Checking out files:  11% (710/6451)   
Checking out files:  12% (775/6451)   
Checking out files:  13% (839/6451)   
Checking out files:  14% (904/6451)   
Checking out files:  14% (932/6451)   
Checking out files:  15% (968/6451)   
Checking out files:  16% (1033/6451)   
Checking out files:  17% (1097/6451)   
Checking out files:  17% (1135/6451)   
Checking out files:  18% (1162/6451)   
Checking out files:  19% (1226/6451)   
Checking out files:  20% (1291/6451)   
Checking out files:  21% (1355/6451)   
Checking out files:  22% (1420/6451)   
Checking out files:  23% (1484/6451)   
Checking out files:  24% (1549/6451)   
Checking out files:  25% (1613/6451)   
Checking out files:  25% (1629/6451)   
Checking out files:  26% (1678/6451)   
Checking out files:  27% (1742/6451)   
Checking out files:  28% (1807/6451)   
Checking out files:  29% (1871/6451)   
Checking out files:  30% (1936/6451)   
Checking out files:  31% (2000/6451)   
Checking out files:  32% (2065/6451)   
Checking out files:  33% (2129/6451)   
Checking out files:  34% (2194/6451)   
Checking out files:  35% (2258/6451)   
Checking out files:  36% (2323/6451)   
Checking out files:  37% (2387/6451)   
Checking out files:  38% (2452/6451)   
Checking out files:  39% (2516/6451)   
Checking out files:  40% (2581/6451)   
Checking out files:  41% (2645/6451)   
Checking out files:  42% (2710/6451)   
Checking out files:  43% (2774/6451)   
Checking out files:  44% (2839/6451)   
Checking out files:  45% (2903/6451)   
Checking out files:  46% (2968/6451)   
Checking out files:  47% (3032/6451)   
Checking out files:  48% (3097/6451)   
Checking out files:  49% (3161/6451)   
Checking out files:  50% (3226/6451)   
Checking out files:  51% (3291/6451)   
Checking out files:  52% (3355/6451)   
Checking out files:  53% (3420/6451)   
Checking out files:  54% (3484/6451)   
Checking out files:  55% (3549/6451)   
Checking out files:  56% (3613/6451)   
Checking out files:  57% (3678/6451)   
Checking out files:  58% (3742/6451)   
Checking out files:  59% (3807/6451)   
Checking out files:  60% (3871/6451)   
Checking out files:  61% (3936/6451)   
Checking out files:  62% (4000/6451)   
Checking out files:  63% (4065/6451)   
Checking out files:  64% (4129/6451)   
Checking out files:  65% (4194/6451)   
Checking out files:  66% (4258/6451)   
Checking out files:  67% (4323/6451)   
Checking out files:  68% (4387/6451)   
Checking out files:  69% (4452/6451)   
Checking out files:  70% (4516/6451)   
Checking out files:  71% (4581/6451)   
Checking out files:  72% (4645/6451)   
Checking out files:  73% (4710/6451)   
Checking out files:  74% (4774/6451)   
Checking out files:  75% (4839/6451)   
Checking out files:  75% (4894/6451)   
Checking out files:  76% (4903/6451)   
Checking out files:  77% (4968/6451)   
Checking out files:  78% (5032/6451)   
Checking out files:  79% (5097/6451)   
Checking out files:  80% (5161/6451)   
Checking out files:  81% (5226/6451)   
Checking out files:  82% (5290/6451)   
Checking out files:  83% (5355/6451)   
Checking out files:  84% (5419/6451)   
Checking out files:  85% (5484/6451)   
Checking out files:  86% (5548/6451)   
Checking out files:  87% (5613/6451)   
Checking out files:  88% (5677/6451)   
Checking out files:  89% (5742/6451)   
Checking out files:  90% (5806/6451)   
Checking out files:  91% (5871/6451)   
Checking out files:  92% (5935/6451)

[Qemu-devel] [PULL 29/33] pci_bridge: fix typo in comment

2018-11-05 Thread Michael S. Tsirkin

From: Mao Zhongyi 

Signed-off-by: Mao Zhongyi 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci/pci_bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 08b7e44e2e..ee9dff2d3a 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -399,7 +399,7 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
 
 /*
  * before qdev initialization(qdev_init()), this function sets bus_name and
- * map_irq callback which are necessry for pci_bridge_initfn() to
+ * map_irq callback which are necessary for pci_bridge_initfn() to
  * initialize bus.
  */
 void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
-- 
MST

[Qemu-devel] [PULL 26/33] hw/pci-bridge/xio3130: Remove unused functions

2018-11-05 Thread Michael S. Tsirkin

From: Philippe Mathieu-Daudé 

Introduced in 48ebf2f90f8 and faf1e708d5b, these functions
were never used. Remove them.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-bridge/xio3130_downstream.h | 11 ---
 hw/pci-bridge/xio3130_upstream.h   | 10 --
 hw/pci-bridge/xio3130_downstream.c | 28 +---
 hw/pci-bridge/xio3130_upstream.c   | 24 +---
 4 files changed, 2 insertions(+), 71 deletions(-)
 delete mode 100644 hw/pci-bridge/xio3130_downstream.h
 delete mode 100644 hw/pci-bridge/xio3130_upstream.h

diff --git a/hw/pci-bridge/xio3130_downstream.h 
b/hw/pci-bridge/xio3130_downstream.h
deleted file mode 100644
index 8426d9ffa6..00
--- a/hw/pci-bridge/xio3130_downstream.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef QEMU_XIO3130_DOWNSTREAM_H
-#define QEMU_XIO3130_DOWNSTREAM_H
-
-#include "hw/pci/pcie_port.h"
-
-PCIESlot *xio3130_downstream_init(PCIBus *bus, int devfn, bool multifunction,
-  const char *bus_name, pci_map_irq_fn map_irq,
-  uint8_t port, uint8_t chassis,
-  uint16_t slot);
-
-#endif /* QEMU_XIO3130_DOWNSTREAM_H */
diff --git a/hw/pci-bridge/xio3130_upstream.h b/hw/pci-bridge/xio3130_upstream.h
deleted file mode 100644
index d0ab7577e2..00
--- a/hw/pci-bridge/xio3130_upstream.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef QEMU_XIO3130_UPSTREAM_H
-#define QEMU_XIO3130_UPSTREAM_H
-
-#include "hw/pci/pcie_port.h"
-
-PCIEPort *xio3130_upstream_init(PCIBus *bus, int devfn, bool multifunction,
-const char *bus_name, pci_map_irq_fn map_irq,
-uint8_t port);
-
-#endif /* QEMU_XIO3130_UPSTREAM_H */
diff --git a/hw/pci-bridge/xio3130_downstream.c 
b/hw/pci-bridge/xio3130_downstream.c
index b202657954..467bbabe4c 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -23,7 +23,7 @@
 #include "hw/pci/pci_ids.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/pcie.h"
-#include "xio3130_downstream.h"
+#include "hw/pci/pcie_port.h"
 #include "qapi/error.h"
 
 #define PCI_DEVICE_ID_TI_XIO3130D   0x8233  /* downstream port */
@@ -127,32 +127,6 @@ static void xio3130_downstream_exitfn(PCIDevice *d)
 pci_bridge_exitfn(d);
 }
 
-PCIESlot *xio3130_downstream_init(PCIBus *bus, int devfn, bool multifunction,
-  const char *bus_name, pci_map_irq_fn map_irq,
-  uint8_t port, uint8_t chassis,
-  uint16_t slot)
-{
-PCIDevice *d;
-PCIBridge *br;
-DeviceState *qdev;
-
-d = pci_create_multifunction(bus, devfn, multifunction,
- "xio3130-downstream");
-if (!d) {
-return NULL;
-}
-br = PCI_BRIDGE(d);
-
-qdev = DEVICE(d);
-pci_bridge_map_irq(br, bus_name, map_irq);
-qdev_prop_set_uint8(qdev, "port", port);
-qdev_prop_set_uint8(qdev, "chassis", chassis);
-qdev_prop_set_uint16(qdev, "slot", slot);
-qdev_init_nofail(qdev);
-
-return PCIE_SLOT(d);
-}
-
 static Property xio3130_downstream_props[] = {
 DEFINE_PROP_BIT(COMPAT_PROP_PCP, PCIDevice, cap_present,
 QEMU_PCIE_SLTCAP_PCP_BITNR, true),
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index bca2f9a5ea..b524908cf1 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -23,7 +23,7 @@
 #include "hw/pci/pci_ids.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/pcie.h"
-#include "xio3130_upstream.h"
+#include "hw/pci/pcie_port.h"
 
 #define PCI_DEVICE_ID_TI_XIO3130U   0x8232  /* upstream port */
 #define XIO3130_REVISION0x2
@@ -108,28 +108,6 @@ static void xio3130_upstream_exitfn(PCIDevice *d)
 pci_bridge_exitfn(d);
 }
 
-PCIEPort *xio3130_upstream_init(PCIBus *bus, int devfn, bool multifunction,
- const char *bus_name, pci_map_irq_fn map_irq,
- uint8_t port)
-{
-PCIDevice *d;
-PCIBridge *br;
-DeviceState *qdev;
-
-d = pci_create_multifunction(bus, devfn, multifunction, "x3130-upstream");
-if (!d) {
-return NULL;
-}
-br = PCI_BRIDGE(d);
-
-qdev = DEVICE(d);
-pci_bridge_map_irq(br, bus_name, map_irq);
-qdev_prop_set_uint8(qdev, "port", port);
-qdev_init_nofail(qdev);
-
-return PCIE_PORT(d);
-}
-
 static const VMStateDescription vmstate_xio3130_upstream = {
 .name = "xio3130-express-upstream-port",
 .priority = MIG_PRI_PCI_BUS,
-- 
MST

[Qemu-devel] [PULL 22/33] hw/pci-host/x86: extract get_pci_hole64_start_value() helpers

2018-11-05 Thread Michael S. Tsirkin

From: Laszlo Ersek 

Expose the calculated "hole64 start" GPAs as plain uint64_t values,
extracting the internals of the current property getters.

This patch doesn't change behavior.

Cc: "Michael S. Tsirkin" 
Cc: Alex Williamson 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Marcel Apfelbaum 
Signed-off-by: Laszlo Ersek 
Reviewed-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-host/piix.c | 15 +++
 hw/pci-host/q35.c  | 15 +++
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 47293a3915..ce271da9b0 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -249,9 +249,7 @@ static void i440fx_pcihost_get_pci_hole_end(Object *obj, 
Visitor *v,
  * the 64bit PCI hole will start after "over 4G RAM" and the
  * reserved space for memory hotplug if any.
  */
-static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v,
-const char *name,
-void *opaque, Error **errp)
+static uint64_t i440fx_pcihost_get_pci_hole64_start_value(Object *obj)
 {
 PCIHostState *h = PCI_HOST_BRIDGE(obj);
 I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj);
@@ -263,7 +261,16 @@ static void i440fx_pcihost_get_pci_hole64_start(Object 
*obj, Visitor *v,
 if (!value && s->pci_hole64_fix) {
 value = pc_pci_hole64_start();
 }
-visit_type_uint64(v, name, &value, errp);
+return value;
+}
+
+static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v,
+const char *name,
+void *opaque, Error **errp)
+{
+uint64_t hole64_start = i440fx_pcihost_get_pci_hole64_start_value(obj);
+
+visit_type_uint64(v, name, &hole64_start, errp);
 }
 
 /*
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 966a7cf92d..ceb00f7706 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -113,9 +113,7 @@ static void q35_host_get_pci_hole_end(Object *obj, Visitor 
*v,
  * the 64bit PCI hole will start after "over 4G RAM" and the
  * reserved space for memory hotplug if any.
  */
-static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v,
-  const char *name, void *opaque,
-  Error **errp)
+static uint64_t q35_host_get_pci_hole64_start_value(Object *obj)
 {
 PCIHostState *h = PCI_HOST_BRIDGE(obj);
 Q35PCIHost *s = Q35_HOST_DEVICE(obj);
@@ -127,7 +125,16 @@ static void q35_host_get_pci_hole64_start(Object *obj, 
Visitor *v,
 if (!value && s->pci_hole64_fix) {
 value = pc_pci_hole64_start();
 }
-visit_type_uint64(v, name, &value, errp);
+return value;
+}
+
+static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v,
+  const char *name, void *opaque,
+  Error **errp)
+{
+uint64_t hole64_start = q35_host_get_pci_hole64_start_value(obj);
+
+visit_type_uint64(v, name, &hole64_start, errp);
 }
 
 /*
-- 
MST

[Qemu-devel] [PULL 19/33] x86_iommu/amd: Enable Guest virtual APIC support

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

Now that amd-iommu support interrupt remapping, enable the GASup in IVRS
table and GASup in extended feature register to indicate that IOMMU
support guest virtual APIC mode. GASup provides option to guest OS to
make use of 128-bit IRTE.

Note that the GAMSup is set to zero to indicate that amd-iommu does not
support guest virtual APIC mode (aka AVIC) which would be used for the
nested VMs.

See Table 21 from IOMMU spec for interrupt virtualization controls

Signed-off-by: Brijesh Singh 
Reviewed-by: Peter Xu 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h  | 12 ++--
 hw/i386/acpi-build.c |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 8061e9c49c..c52886f3ed 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -176,7 +176,7 @@
 /* extended feature support */
 #define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
 AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
-AMDVI_GATS_MODE | AMDVI_HATS_MODE)
+AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
 
 /* capabilities header */
 #define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \
@@ -242,11 +242,11 @@
 #define AMDVI_IOAPIC_INT_TYPE_EINT   0x7
 
 /* Pass through interrupt */
-#define AMDVI_DEV_INT_PASS_MASK (1UL << 56)
-#define AMDVI_DEV_EINT_PASS_MASK(1UL << 57)
-#define AMDVI_DEV_NMI_PASS_MASK (1UL << 58)
-#define AMDVI_DEV_LINT0_PASS_MASK   (1UL << 62)
-#define AMDVI_DEV_LINT1_PASS_MASK   (1UL << 63)
+#define AMDVI_DEV_INT_PASS_MASK (1ULL << 56)
+#define AMDVI_DEV_EINT_PASS_MASK(1ULL << 57)
+#define AMDVI_DEV_NMI_PASS_MASK (1ULL << 58)
+#define AMDVI_DEV_LINT0_PASS_MASK   (1ULL << 62)
+#define AMDVI_DEV_LINT1_PASS_MASK   (1ULL << 63)
 
 /* Interrupt remapping table fields (Guest VAPIC not enabled) */
 union irte {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1ef396ddbb..236a20eaa8 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2518,7 +2518,8 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
 build_append_int_noprefix(table_data,
  (48UL << 30) | /* HATS   */
  (48UL << 28) | /* GATS   */
- (1UL << 2),/* GTSup  */
+ (1UL << 2)   | /* GTSup  */
+ (1UL << 6),/* GASup  */
  4);
 /*
  *   Type 1 device entry reporting all devices
-- 
MST

[Qemu-devel] [PULL 32/33] piix_pci: fix i440fx data sheet link

2018-11-05 Thread Michael S. Tsirkin

From: Li Qiang 

It seems that the intel link is unavailable, change it to point to the
qemu site.

Signed-off-by: Li Qiang 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Marcel Apfelbaum 
Acked-by: Michael S. Tsirkin 
Reviewed-by: Michael S. Tsirkin 
---
 hw/pci-host/piix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 761a8158d7..d9c70f7ce6 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -40,7 +40,7 @@
 
 /*
  * I440FX chipset data sheet.
- * http://download.intel.com/design/chipsets/datashts/29054901.pdf
+ * https://wiki.qemu.org/File:29054901.pdf
  */
 
 #define I440FX_PCI_HOST_BRIDGE(obj) \
-- 
MST

[Qemu-devel] [PULL 14/33] x86_iommu/amd: make the address space naming consistent with intel-iommu

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

To be consistent with intel-iommu:

- rename the address space to use '_' instead of '-'
- update the memory region relationships

Signed-off-by: Brijesh Singh 
Reviewed-by: Peter Xu 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.c | 34 +++---
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 7206bb09c2..4bec1c6688 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -55,6 +55,7 @@ struct AMDVIAddressSpace {
 uint8_t bus_num;/* bus number   */
 uint8_t devfn;  /* device function  */
 AMDVIState *iommu_state;/* AMDVI - one per machine  */
+MemoryRegion root;  /* AMDVI Root memory map region */
 IOMMUMemoryRegion iommu;/* Device's address translation region  */
 MemoryRegion iommu_ir;  /* Device's interrupt remapping region  */
 AddressSpace as;/* device's corresponding address space */
@@ -1032,8 +1033,9 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion 
*iommu, hwaddr addr,
 
 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
+char name[128];
 AMDVIState *s = opaque;
-AMDVIAddressSpace **iommu_as;
+AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
 int bus_num = pci_bus_num(bus);
 
 iommu_as = s->address_spaces[bus_num];
@@ -1046,19 +1048,37 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
 
 /* set up AMD-Vi region */
 if (!iommu_as[devfn]) {
+snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
+
 iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
 iommu_as[devfn]->bus_num = (uint8_t)bus_num;
 iommu_as[devfn]->devfn = (uint8_t)devfn;
 iommu_as[devfn]->iommu_state = s;
 
-memory_region_init_iommu(&iommu_as[devfn]->iommu,
- sizeof(iommu_as[devfn]->iommu),
+amdvi_dev_as = iommu_as[devfn];
+
+/*
+ * Memory region relationships looks like (Address range shows
+ * only lower 32 bits to make it short in length...):
+ *
+ * |-+---+--|
+ * | Name| Address range | Priority |
+ * |-+---+--+
+ * | amdvi_root  | - |0 |
+ * |  amdvi_iommu| - |1 |
+ * |-+---+--|
+ */
+memory_region_init_iommu(&amdvi_dev_as->iommu,
+ sizeof(amdvi_dev_as->iommu),
  TYPE_AMD_IOMMU_MEMORY_REGION,
  OBJECT(s),
- "amd-iommu", UINT64_MAX);
-address_space_init(&iommu_as[devfn]->as,
-   MEMORY_REGION(&iommu_as[devfn]->iommu),
-   "amd-iommu");
+ "amd_iommu", UINT64_MAX);
+memory_region_init(&amdvi_dev_as->root, OBJECT(s),
+   "amdvi_root", UINT64_MAX);
+address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
+memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
+
MEMORY_REGION(&amdvi_dev_as->iommu),
+1);
 }
 return &iommu_as[devfn]->as;
 }
-- 
MST

[Qemu-devel] [PULL 30/33] i440fx: use ARRAY_SIZE for pam_regions

2018-11-05 Thread Michael S. Tsirkin

From: Li Qiang 

Cc: qemu-triv...@nongnu.org

Signed-off-by: Li Qiang 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-host/piix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index ec5441583a..b5ded1a806 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -142,7 +142,7 @@ static void i440fx_update_memory_mappings(PCII440FXState *d)
 PCIDevice *pd = PCI_DEVICE(d);
 
 memory_region_transaction_begin();
-for (i = 0; i < 13; i++) {
+for (i = 0; i < ARRAY_SIZE(d->pam_regions); i++) {
 pam_update(&d->pam_regions[i], i,
pd->config[I440FX_PAM + DIV_ROUND_UP(i, 2)]);
 }
@@ -412,7 +412,7 @@ PCIBus *i440fx_init(const char *host_type, const char 
*pci_type,
 
 init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space,
  &f->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE);
-for (i = 0; i < 12; ++i) {
+for (i = 0; i < ARRAY_SIZE(f->pam_regions) - 1; ++i) {
 init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space,
  &f->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE,
  PAM_EXPAN_SIZE);
-- 
MST

[Qemu-devel] [PULL 20/33] MAINTAINERS: list "tests/acpi-test-data" files in ACPI/SMBIOS section

2018-11-05 Thread Michael S. Tsirkin

From: Laszlo Ersek 

The "tests/acpi-test-data" files are currently not covered by any section
in MAINTAINERS, and "scripts/checkpatch.pl" complains when new data files
are added.

Cc: "Michael S. Tsirkin" 
Cc: Alex Williamson 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Marcel Apfelbaum 
Signed-off-by: Laszlo Ersek 
Reviewed-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfabc14b59..c121a99fc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1116,6 +1116,8 @@ F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
 F: tests/bios-tables-test.c
 F: tests/acpi-utils.[hc]
+F: tests/acpi-test-data/*
+F: tests/acpi-test-data/*/*
 
 ppc4xx
 M: Alexander Graf 
-- 
MST

[Qemu-devel] [PULL 28/33] hw/pci: Add missing include

2018-11-05 Thread Michael S. Tsirkin

From: Philippe Mathieu-Daudé 

Noted while refactoring:

  CC  mips-softmmu/hw/mips/gt64xxx_pci.o
In file included from include/hw/pci-host/gt64xxx.h:2,
 from hw/mips/gt64xxx_pci.c:30:
include/hw/pci/pci_bus.h:23:5: error: unknown type name ‘PCIIOMMUFunc’
 PCIIOMMUFunc iommu_fn;
 ^~~~
include/hw/pci/pci_bus.h:27:5: error: unknown type name ‘pci_set_irq_fn’
 pci_set_irq_fn set_irq;
 ^~
include/hw/pci/pci_bus.h:28:5: error: unknown type name ‘pci_map_irq_fn’
 pci_map_irq_fn map_irq;
 ^~
include/hw/pci/pci_bus.h:29:5: error: unknown type name ‘pci_route_irq_fn’
 pci_route_irq_fn route_intx_to_irq;
 ^~~~
include/hw/pci/pci_bus.h:31:24: error: ‘PCI_SLOT_MAX’ undeclared here (not 
in a function)
 PCIDevice *devices[PCI_SLOT_MAX * PCI_FUNC_MAX];
^~~~
include/hw/pci/pci_bus.h:31:39: error: ‘PCI_FUNC_MAX’ undeclared here (not 
in a function)
 PCIDevice *devices[PCI_SLOT_MAX * PCI_FUNC_MAX];
   ^~~~
make[1]: *** [rules.mak:69: hw/mips/gt64xxx_pci.o] Error 1
make: *** [Makefile:482: subdir-mips-softmmu] Error 2

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/pci/pci_bus.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index b7da8f555b..dfb75752cb 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -1,6 +1,8 @@
 #ifndef QEMU_PCI_BUS_H
 #define QEMU_PCI_BUS_H
 
+#include "hw/pci/pci.h"
+
 /*
  * PCI Bus datastructures.
  *
-- 
MST

[Qemu-devel] [PULL 13/33] x86_iommu/amd: remove V=1 check from amdvi_validate_dte()

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

Currently, the amdvi_validate_dte() assumes that a valid DTE will
always have V=1. This is not true. The V=1 means that bit[127:1] are
valid. A valid DTE can have IV=1 and V=0 (i.e address translation
disabled and interrupt remapping enabled)

Remove the V=1 check from amdvi_validate_dte(), make the caller
responsible to check for V or IV bits.

This also fixes a bug in existing code that when error is
detected during the translation we'll fail the translation
instead of assuming a passthrough mode.

Signed-off-by: Brijesh Singh 
Reviewed-by: Peter Xu 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 1fd669fef8..7206bb09c2 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -807,7 +807,7 @@ static inline uint64_t amdvi_get_perms(uint64_t entry)
AMDVI_DEV_PERM_SHIFT;
 }
 
-/* a valid entry should have V = 1 and reserved bits honoured */
+/* validate that reserved bits are honoured */
 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
uint64_t *dte)
 {
@@ -820,7 +820,7 @@ static bool amdvi_validate_dte(AMDVIState *s, uint16_t 
devid,
 return false;
 }
 
-return dte[0] & AMDVI_DEV_VALID;
+return true;
 }
 
 /* get a device table entry given the devid */
@@ -966,8 +966,12 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, 
hwaddr addr,
 return;
 }
 
-/* devices with V = 0 are not translated */
 if (!amdvi_get_dte(s, devid, entry)) {
+return;
+}
+
+/* devices with V = 0 are not translated */
+if (!(entry[0] & AMDVI_DEV_VALID)) {
 goto out;
 }
 
-- 
MST

[Qemu-devel] [PULL 24/33] bios-tables-test: prepare expected files for mmio64

2018-11-05 Thread Michael S. Tsirkin

test will be added by follow-up patch.

Signed-off-by: Michael S. Tsirkin 
---
 tests/data/acpi/q35/DSDT.mmio64 | Bin 0 -> 8947 bytes
 tests/data/acpi/q35/SRAT.mmio64 | Bin 0 -> 224 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/data/acpi/q35/DSDT.mmio64
 create mode 100644 tests/data/acpi/q35/SRAT.mmio64

diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64
new file mode 100644
index 
..a058ff2ee31a22a55b5b198bc1531c7f20b243f6
GIT binary patch
literal 8947
zcmb7KTW=f38J*=#t05&VrL`MZVWfQbOALxMpqxlJWYk&d;`r6l^h@YaKZ)QiHSyDi(2XemIbG~oR
z?ChN7uKcdw`*w{n>)+KKuhA`4ueUuPeHLSk+Vt%-GdEcO(6819Rz8)n`gil9jgPAB
zKI2!f)vdqogosUE7X2foN>3n|cX7u4F;Vnj>cWyXTzK9FEkXI)mSOBY=3Je5j)p_ihUv>SyOn{MDhyy1gK0NAyD1*>LjSOZGnnFsJk`H@j@r!I
z2`Xu3;j4F++P39Y@zHAI!?}9)`LwD?)%r)zc6uI)%5#>-
zIiiuS@H2HQVi9vvYs{)!2kd~Qa__PJ(F|8q`E-WR9HqWjxhk!3Z%Ly>U+!Z4{me;0htJimchm2tZF}ud
z6slz~C+kX4){0kMoQ5Xkv@{RW57NwDb3~d=N^>cZhNskUXiiVflX{Rs9vgY=cGKHr
zZef6LE83&T`O|9_odU=Euee;=onCu!>+QqEdt13-o{V5BXht&C;1TLNvxd&BU@B-N<0V&YWN>XwKA$
zP}ezQ=$tWh&X_t8>N;&hr)}u8O`QmJo!B9?nVvOt&YC(A>N>fhlN&m@sS}~DGjHh3
z8#?o*PK3Hn>{QzMEEqZqrcQ*q&N)NpoS}2h)QM2nIdABkH+0UMIuYtR7Yv;XhRy|3
zCqiAPW9W1YosOv!p{}!N=qwsKi>6M5y3R#I=c1u=(bS1h*STcqTrzYnnK}{bI$cAj
zYv^=Mod|WEXAGTZ44r38od|WE%ZAQnL+7%o6QQp2tfBL)q4TV%6QQni#n8E8=v*;%
zBGh%B6U<7y6P^>yYP{KSv6{|8Z}qVnh0f08wOg@
z47A>Do){?FC>$Fo0+f|O8i6Vb1C=;oNfz#R1W-9TCkBcTLl;;WsKmw*G1cUffl8=gl7S*rI$@v^E0{1)4N3+o
zp@K;Uicsl$|V8TE(C>f}P3MLsSLZuT1DzSnI
z1J$5ppb{#WWS|I@P8g`f3MLFxgOY(ts9=(TB2+qIpb{&XFi;Ij1}dR~Nd}5g>4bqw
ztYE@GH7FUVgbF4ZC_<$Z1}d?F2?N!jWS|l%m}H;`l};F_#0n-1RD+U%N~mCxfg)5o
zVW1K#m@rTcN(L&Sf=LF7Q0at$N~~bQKs6{CsDuh887M-f69y`=f(ZlFpk$yDDwt%T
z2$fD4sKg2;3{-=Xfl8=gl7S*rI$@v^E0{1)4N3+op@K;UicslAsK$hWYD^fY
z#v}vPm}H(JJRC7xPiqL27
z`2j4HjS55YV|7eFNbl$9tJ1&wi|5nyE0vxV(9;O3-Px_cad)fq;n9b0GdvsFtx@5EYq*G?&4i_32&%V!^<{H9iZQ9ik#!TZhceureGEDXdAu*L(G3uzytlePf$={C6AuVd(G{x<#mErEU)%**(@GLWZgN-NwAv^|tNPWLV*smHaTon~X4G$)uR!Q9UyuNSp~w
zR}zlNx1x!W0UcT4{jJQXF_}u7&*`andNLQ_$TeMPFzfAfdxg`4PCZ`}gN5Q~e~PY%
zr;@z{eIwb4DVRFC{)aEos=@_^KP*tJEPG7r3u`c_^on_IGyI&V8$EX(@BVsu62tcP
z-@qk$Jz(wyEaG~*HmH>^;WUXCRJ5S!++yF(YS&@VXv?<*>#)%dIu?G1^OgQl<3)J|A+y}g6Pf*H5R_iO{fB+Jxb48-
z?%-AFbK2CS}Z~By_Lkaf3UW9nz5SP7HU~eJf~rTg$pLj!gSZrk!^9rBpHm+l
zKcLtWokh`U-8t!@R*R0cM*hr@y0Lyb8<&c!{Nl&3o`;f2D#;{?Lf*zD&XIlqO=!-8
z8#k9Rpg}~H>2O5J>0m_Tq(ku;BHBsw%4PRSti!-co`8r}r#h*j`(rGuUS8@d&mQxczv3w<@f3%Ye+GYGN09~Brcdn+a

literal 0
HcmV?d1

diff --git a/tests/data/acpi/q35/SRAT.mmio64 b/tests/data/acpi/q35/SRAT.mmio64
new file mode 100644
index 
..ac35f3dac4f47b86e41c7f35ee40bac14174b37e
GIT binary patch
literal 224
zcmWFzatwLEz`($0?d0$55v<@85#SsQ6k`O6f!H7#gyBE{mCvXFmw__4-~!0{5bA&i
cfWZfLm_qF8V6xb0gn4lH?0~6chB1IN0P8^t0RR91

literal 0
HcmV?d1

-- 
MST

[Qemu-devel] [PULL 27/33] hw/pci-bridge/ioh3420: Remove unuseful header

2018-11-05 Thread Michael S. Tsirkin

From: Philippe Mathieu-Daudé 

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-bridge/ioh3420.h | 6 --
 hw/pci-bridge/ioh3420.c | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)
 delete mode 100644 hw/pci-bridge/ioh3420.h

diff --git a/hw/pci-bridge/ioh3420.h b/hw/pci-bridge/ioh3420.h
deleted file mode 100644
index ea423cb991..00
--- a/hw/pci-bridge/ioh3420.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef QEMU_IOH3420_H
-#define QEMU_IOH3420_H
-
-#include "hw/pci/pcie_port.h"
-
-#endif /* QEMU_IOH3420_H */
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index a451d74ee6..81f2de6f07 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -24,7 +24,7 @@
 #include "hw/pci/pci_ids.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/pcie.h"
-#include "ioh3420.h"
+#include "hw/pci/pcie_port.h"
 
 #define PCI_DEVICE_ID_IOH_EPORT 0x3420  /* D0:F0 express mode */
 #define PCI_DEVICE_ID_IOH_REV   0x2
-- 
MST

[Qemu-devel] [PULL 10/33] vhost-user-blk: start vhost when guest kicks

2018-11-05 Thread Michael S. Tsirkin

From: Yongji Xie 

Some old guests (before commit 7a11370e5: "virtio_blk: enable VQs early")
kick virtqueue before setting VIRTIO_CONFIG_S_DRIVER_OK. This violates
the virtio spec. But virtio 1.0 transitional devices support this behaviour.
So we should start vhost when guest kicks in this case.

Signed-off-by: Yongji Xie 
Signed-off-by: Chai Wen 
Signed-off-by: Ni Xun 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/block/vhost-user-blk.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index d755223643..1451940845 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -217,7 +217,32 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice 
*vdev,
 
 static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
+VHostUserBlk *s = VHOST_USER_BLK(vdev);
+int i;
 
+if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
+!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
+return;
+}
+
+if (s->dev.started) {
+return;
+}
+
+/* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
+ * vhost here instead of waiting for .set_status().
+ */
+vhost_user_blk_start(vdev);
+
+/* Kick right away to begin processing requests already in vring */
+for (i = 0; i < s->dev.nvqs; i++) {
+VirtQueue *kick_vq = virtio_get_queue(vdev, i);
+
+if (!virtio_queue_get_desc_addr(vdev, i)) {
+continue;
+}
+event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
+}
 }
 
 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
-- 
MST

[Qemu-devel] [PULL 15/33] x86_iommu/amd: Prepare for interrupt remap support

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

Register the interrupt remapping callback and read/write ops for the
amd-iommu-ir memory region.

amd-iommu-ir is set to higher priority to ensure that this region won't
be masked out by other memory regions.

Signed-off-by: Brijesh Singh 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h  |  14 +-
 hw/i386/amd_iommu.c  | 106 +++
 hw/i386/trace-events |   5 ++
 3 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 874030582d..4e7cc271c4 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -206,8 +206,18 @@
 
 #define AMDVI_COMMAND_SIZE   16
 
-#define AMDVI_INT_ADDR_FIRST 0xfee0
-#define AMDVI_INT_ADDR_LAST  0xfeef
+#define AMDVI_INT_ADDR_FIRST0xfee0
+#define AMDVI_INT_ADDR_LAST 0xfeef
+#define AMDVI_INT_ADDR_SIZE (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 
1)
+#define AMDVI_MSI_ADDR_HI_MASK  (0xULL)
+#define AMDVI_MSI_ADDR_LO_MASK  (0xULL)
+
+/* SB IOAPIC is always on this device in AMD systems */
+#define AMDVI_IOAPIC_SB_DEVID   PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
+
+/* Interrupt remapping errors */
+#define AMDVI_IR_ERR0x1
+
 
 #define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
 #define AMD_IOMMU_DEVICE(obj)\
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 4bec1c6688..9118a75530 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -26,6 +26,7 @@
 #include "amd_iommu.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "hw/i386/apic_internal.h"
 #include "trace.h"
 
 /* used AMD-Vi MMIO registers */
@@ -1031,6 +1032,99 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion 
*iommu, hwaddr addr,
 return ret;
 }
 
+/* Interrupt remapping for MSI/MSI-X entry */
+static int amdvi_int_remap_msi(AMDVIState *iommu,
+   MSIMessage *origin,
+   MSIMessage *translated,
+   uint16_t sid)
+{
+assert(origin && translated);
+
+trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
+
+if (!iommu || !X86_IOMMU_DEVICE(iommu)->intr_supported) {
+memcpy(translated, origin, sizeof(*origin));
+goto out;
+}
+
+if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
+trace_amdvi_err("MSI address high 32 bits non-zero when "
+"Interrupt Remapping enabled.");
+return -AMDVI_IR_ERR;
+}
+
+if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
+trace_amdvi_err("MSI is not from IOAPIC.");
+return -AMDVI_IR_ERR;
+}
+
+out:
+trace_amdvi_ir_remap_msi(origin->address, origin->data,
+ translated->address, translated->data);
+return 0;
+}
+
+static int amdvi_int_remap(X86IOMMUState *iommu,
+   MSIMessage *origin,
+   MSIMessage *translated,
+   uint16_t sid)
+{
+return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
+   translated, sid);
+}
+
+static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
+  uint64_t value, unsigned size,
+  MemTxAttrs attrs)
+{
+int ret;
+MSIMessage from = { 0, 0 }, to = { 0, 0 };
+uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
+
+from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
+from.data = (uint32_t) value;
+
+trace_amdvi_mem_ir_write_req(addr, value, size);
+
+if (!attrs.unspecified) {
+/* We have explicit Source ID */
+sid = attrs.requester_id;
+}
+
+ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
+if (ret < 0) {
+/* TODO: log the event using IOMMU log event interface */
+error_report_once("failed to remap interrupt from devid 0x%x", sid);
+return MEMTX_ERROR;
+}
+
+apic_get_class()->send_msi(&to);
+
+trace_amdvi_mem_ir_write(to.address, to.data);
+return MEMTX_OK;
+}
+
+static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size,
+ MemTxAttrs attrs)
+{
+return MEMTX_OK;
+}
+
+static const MemoryRegionOps amdvi_ir_ops = {
+.read_with_attrs = amdvi_mem_ir_read,
+.write_with_attrs = amdvi_mem_ir_write,
+.endianness = DEVICE_LITTLE_ENDIAN,
+.impl = {
+.min_access_size = 4,
+.max_access_size = 4,
+},
+.valid = {
+.min_access_size = 4,
+.max_access_size = 4,
+}
+};
+
 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int dev

[Qemu-devel] [PULL 33/33] vhost-scsi: prevent using uninitialized vqs

2018-11-05 Thread Michael S. Tsirkin

From: yuchenlin 

There are 3 virtqueues (ctrl, event and cmd) for virtio scsi device,
but seabios will only set the physical address for the 3rd one (cmd).
Then in vhost_virtqueue_start(), virtio_queue_get_desc_addr()
will be 0 for ctrl and event vq.

In this case, ctrl and event vq are not initialized.
vhost_verify_ring_mappings may use uninitialized vhost_virtqueue
such that vhost_verify_ring_part_mapping returns ENOMEM.

When encountered this problem, we got the following logs:

qemu-system-x86_64: Unable to map available ring for ring 0
qemu-system-x86_64: Verify ring failure on region 0

Signed-off-by: Forrest Liu 
Signed-off-by: yuchenlin 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/scsi/vhost-scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index becf550085..7f21b4f9d6 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -183,7 +183,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error 
**errp)
 }
 
 vsc->dev.nvqs = VHOST_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
-vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs);
+vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
 vsc->dev.vq_index = 0;
 vsc->dev.backend_features = 0;
 
-- 
MST

[Qemu-devel] [PULL 09/33] intel_iommu: handle invalid ce for shadow sync

2018-11-05 Thread Michael S. Tsirkin

From: Peter Xu 

We should handle VTD_FR_CONTEXT_ENTRY_P properly when synchronizing
shadow page tables.  Having invalid context entry there is perfectly
valid when we move a device out of an existing domain.  When that
happens, instead of posting an error we invalidate the whole region.

Without this patch, QEMU will crash if we do these steps:

(1) start QEMU with VT-d IOMMU and two 10G NICs (ixgbe)
(2) bind the NICs with vfio-pci in the guest
(3) start testpmd with the NICs applied
(4) stop testpmd
(5) rebind the NIC back to ixgbe kernel driver

The patch should fix it.

Reported-by: Pei Zhang 
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1627272
Signed-off-by: Peter Xu 
Reviewed-by: Eric Auger 
Reviewed-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 25e54671f4..f24ebfca1c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -38,6 +38,7 @@
 #include "trace.h"
 
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
+static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
 uint64_t wmask, uint64_t w1cmask)
@@ -1066,11 +1067,27 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace 
*vtd_as)
 {
 int ret;
 VTDContextEntry ce;
+IOMMUNotifier *n;
 
 ret = vtd_dev_to_context_entry(vtd_as->iommu_state,
pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
 if (ret) {
+if (ret == -VTD_FR_CONTEXT_ENTRY_P) {
+/*
+ * It's a valid scenario to have a context entry that is
+ * not present.  For example, when a device is removed
+ * from an existing domain then the context entry will be
+ * zeroed by the guest before it was put into another
+ * domain.  When this happens, instead of synchronizing
+ * the shadow pages we should invalidate all existing
+ * mappings and notify the backends.
+ */
+IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
+vtd_address_space_unmap(vtd_as, n);
+}
+ret = 0;
+}
 return ret;
 }
 
-- 
MST

[Qemu-devel] [PULL 25/33] tests/bios-tables-test: add 64-bit PCI MMIO aperture round-up test on Q35

2018-11-05 Thread Michael S. Tsirkin

From: Laszlo Ersek 

In commit 9fa99d2519cb ("hw/pci-host: Fix x86 Host Bridges 64bit PCI
hole", 2017-11-16), we meant to expose such a 64-bit PCI MMIO aperture in
the ACPI DSDT that would be at least as large as the new "pci-hole64-size"
property (2GB on i440fx, 32GB on q35). The goal was to offer "enough"
64-bit MMIO aperture to the guest OS for hotplug purposes.

Previous patch fixed the issue that the aperture is extended relative to
a possibly incorrect base.  This may result in an aperture size that is
smaller than the intent of commit 9fa99d2519cb.

This patch adds a test to make sure it won't happen again.

In the test case being added:
- use 128 MB initial RAM size,
- ask for one DIMM hotplug slot,
- ask for 2 GB maximum RAM size,
- use a pci-testdev with a 64-bit BAR of 2 GB size.

Consequences:

(1) In pc_memory_init() [hw/i386/pc.c], the DIMM hotplug area size is
initially set to 2048-128 = 1920 MB. (Maximum RAM size minus initial
RAM size.)

(2) The DIMM area base is set to 4096 MB (because the initial RAM is only
128 MB -- there is no initial "high RAM").

(3) Due to commit 085f8e88ba73 ("pc: count in 1Gb hugepage alignment when
sizing hotplug-memory container", 2014-11-24), we add 1 GB for the one
DIMM hotplug slot that was specified. This sets the DIMM area size to
1920+1024 = 2944 MB.

(4) The reserved-memory-end address (exclusive) is set to 4096 + 2944 =
7040 MB (DIMM area base plus DIMM area size).

(5) The reserved-memory-end address is rounded up to GB alignment,
yielding 7 GB (7168 MB).

(6) Given the 2 GB BAR size of pci-testdev, SeaBIOS allocates said 64-bit
BAR in 64-bit address space.

(7) Because reserved-memory-end is at 7 GB, it is unaligned for the 2 GB
BAR. Therefore SeaBIOS allocates the BAR at 8 GB. QEMU then
(correctly) assigns the root bridge aperture base this BAR address, to
be exposed in \_SB.PCI0._CRS.

(8) The intent of commit 9fa99d2519cb dictates that QEMU extend the
aperture size to 32 GB, implying a 40 GB end address. However, QEMU
performs the extension relative to reserved-memory-end (7 GB), not
relative to the bridge aperture base that was correctly deduced from
SeaBIOS's BAR programming (8 GB). Therefore we see 39 GB as the
aperture end address in \_SB.PCI0._CRS:

> QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, 
> ReadWrite,
> 0x, // Granularity
> 0x0002, // Range Minimum
> 0x0009BFFF, // Range Maximum
> 0x, // Translation Offset
> 0x0007C000, // Length
> ,, , AddressRangeMemory, TypeStatic)

Cc: "Michael S. Tsirkin" 
Cc: Alex Williamson 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Marcel Apfelbaum 
Signed-off-by: Laszlo Ersek 
Reviewed-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/bios-tables-test.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index b14c8eaa17..d661d9be62 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -708,6 +708,21 @@ static void test_acpi_q35_tcg_bridge(void)
 free_test_data(&data);
 }
 
+static void test_acpi_q35_tcg_mmio64(void)
+{
+test_data data = {
+.machine = MACHINE_Q35,
+.variant = ".mmio64",
+.required_struct_types = base_required_struct_types,
+.required_struct_types_len = ARRAY_SIZE(base_required_struct_types)
+};
+
+test_acpi_one("-m 128M,slots=1,maxmem=2G "
+  "-device pci-testdev,membar=2G",
+  &data);
+free_test_data(&data);
+}
+
 static void test_acpi_piix4_tcg_cphp(void)
 {
 test_data data;
@@ -875,6 +890,7 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/piix4/bridge", test_acpi_piix4_tcg_bridge);
 qtest_add_func("acpi/q35", test_acpi_q35_tcg);
 qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge);
+qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64);
 qtest_add_func("acpi/piix4/ipmi", test_acpi_piix4_tcg_ipmi);
 qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi);
 qtest_add_func("acpi/piix4/cpuhp", test_acpi_piix4_tcg_cphp);
-- 
MST

[Qemu-devel] [PULL 31/33] piix: use TYPE_FOO constants than string constats

2018-11-05 Thread Michael S. Tsirkin

From: Li Qiang 

Make them more QOMConventional.
Cc:qemu-triv...@nongnu.org

Signed-off-by: Li Qiang 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-host/piix.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index b5ded1a806..761a8158d7 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -95,6 +95,9 @@ typedef struct PIIX3State {
 #define I440FX_PCI_DEVICE(obj) \
 OBJECT_CHECK(PCII440FXState, (obj), TYPE_I440FX_PCI_DEVICE)
 
+#define TYPE_PIIX3_DEVICE "PIIX3"
+#define TYPE_PIIX3_XEN_DEVICE "PIIX3-xen"
+
 struct PCII440FXState {
 /*< private >*/
 PCIDevice parent_obj;
@@ -424,13 +427,13 @@ PCIBus *i440fx_init(const char *host_type, const char 
*pci_type,
  * These additional routes can be discovered through ACPI. */
 if (xen_enabled()) {
 PCIDevice *pci_dev = pci_create_simple_multifunction(b,
- -1, true, "PIIX3-xen");
+ -1, true, TYPE_PIIX3_XEN_DEVICE);
 piix3 = PIIX3_PCI_DEVICE(pci_dev);
 pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq,
 piix3, XEN_PIIX_NUM_PIRQS);
 } else {
 PCIDevice *pci_dev = pci_create_simple_multifunction(b,
- -1, true, "PIIX3");
+ -1, true, TYPE_PIIX3_DEVICE);
 piix3 = PIIX3_PCI_DEVICE(pci_dev);
 pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3,
 PIIX_NUM_PIRQS);
@@ -748,7 +751,7 @@ static void piix3_class_init(ObjectClass *klass, void *data)
 }
 
 static const TypeInfo piix3_info = {
-.name  = "PIIX3",
+.name  = TYPE_PIIX3_DEVICE,
 .parent= TYPE_PIIX3_PCI_DEVICE,
 .class_init= piix3_class_init,
 };
@@ -761,7 +764,7 @@ static void piix3_xen_class_init(ObjectClass *klass, void 
*data)
 };
 
 static const TypeInfo piix3_xen_info = {
-.name  = "PIIX3-xen",
+.name  = TYPE_PIIX3_XEN_DEVICE,
 .parent= TYPE_PIIX3_PCI_DEVICE,
 .class_init= piix3_xen_class_init,
 };
-- 
MST

[Qemu-devel] [PULL 12/33] x86_iommu: move vtd_generate_msi_message in common file

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

The vtd_generate_msi_message() in intel-iommu is used to construct a MSI
Message from IRQ. A similar function will be needed when we add interrupt
remapping support in amd-iommu. Moving the function in common file to
avoid the code duplication. Rename it to x86_iommu_irq_to_msi_message().
There is no logic changes in the code flow.

Signed-off-by: Brijesh Singh 
Suggested-by: Peter Xu 
Reviewed-by: Eduardo Habkost 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/i386/intel_iommu.h | 59 ---
 include/hw/i386/x86-iommu.h   | 66 +++
 hw/i386/intel_iommu.c | 32 +++--
 hw/i386/x86-iommu.c   | 24 +
 4 files changed, 94 insertions(+), 87 deletions(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index fbfedcb1c0..ed4e758273 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -66,8 +66,6 @@ typedef struct VTDIOTLBEntry VTDIOTLBEntry;
 typedef struct VTDBus VTDBus;
 typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
-typedef struct VTDIrq VTDIrq;
-typedef struct VTD_MSIMessage VTD_MSIMessage;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -197,63 +195,6 @@ union VTD_IR_MSIAddress {
 uint32_t data;
 };
 
-/* Generic IRQ entry information */
-struct VTDIrq {
-/* Used by both IOAPIC/MSI interrupt remapping */
-uint8_t trigger_mode;
-uint8_t vector;
-uint8_t delivery_mode;
-uint32_t dest;
-uint8_t dest_mode;
-
-/* only used by MSI interrupt remapping */
-uint8_t redir_hint;
-uint8_t msi_addr_last_bits;
-};
-
-struct VTD_MSIMessage {
-union {
-struct {
-#ifdef HOST_WORDS_BIGENDIAN
-uint32_t __addr_head:12; /* 0xfee */
-uint32_t dest:8;
-uint32_t __reserved:8;
-uint32_t redir_hint:1;
-uint32_t dest_mode:1;
-uint32_t __not_used:2;
-#else
-uint32_t __not_used:2;
-uint32_t dest_mode:1;
-uint32_t redir_hint:1;
-uint32_t __reserved:8;
-uint32_t dest:8;
-uint32_t __addr_head:12; /* 0xfee */
-#endif
-uint32_t __addr_hi;
-} QEMU_PACKED;
-uint64_t msi_addr;
-};
-union {
-struct {
-#ifdef HOST_WORDS_BIGENDIAN
-uint16_t trigger_mode:1;
-uint16_t level:1;
-uint16_t __resved:3;
-uint16_t delivery_mode:3;
-uint16_t vector:8;
-#else
-uint16_t vector:8;
-uint16_t delivery_mode:3;
-uint16_t __resved:3;
-uint16_t level:1;
-uint16_t trigger_mode:1;
-#endif
-uint16_t __resved1;
-} QEMU_PACKED;
-uint32_t msi_data;
-};
-};
-
 /* When IR is enabled, all MSI/MSI-X data bits should be zero */
 #define VTD_IR_MSI_DATA  (0)
 
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index 7c71fc7470..2b22a579a3 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -22,6 +22,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
 
 #define  TYPE_X86_IOMMU_DEVICE  ("x86-iommu")
 #define  X86_IOMMU_DEVICE(obj) \
@@ -35,6 +36,8 @@
 
 typedef struct X86IOMMUState X86IOMMUState;
 typedef struct X86IOMMUClass X86IOMMUClass;
+typedef struct X86IOMMUIrq X86IOMMUIrq;
+typedef struct X86IOMMU_MSIMessage X86IOMMU_MSIMessage;
 
 typedef enum IommuType {
 TYPE_INTEL,
@@ -78,6 +81,63 @@ struct X86IOMMUState {
 QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
 };
 
+/* Generic IRQ entry information when interrupt remapping is enabled */
+struct X86IOMMUIrq {
+/* Used by both IOAPIC/MSI interrupt remapping */
+uint8_t trigger_mode;
+uint8_t vector;
+uint8_t delivery_mode;
+uint32_t dest;
+uint8_t dest_mode;
+
+/* only used by MSI interrupt remapping */
+uint8_t redir_hint;
+uint8_t msi_addr_last_bits;
+};
+
+struct X86IOMMU_MSIMessage {
+union {
+struct {
+#ifdef HOST_WORDS_BIGENDIAN
+uint32_t __addr_head:12; /* 0xfee */
+uint32_t dest:8;
+uint32_t __reserved:8;
+uint32_t redir_hint:1;
+uint32_t dest_mode:1;
+uint32_t __not_used:2;
+#else
+uint32_t __not_used:2;
+uint32_t dest_mode:1;
+uint32_t redir_hint:1;
+uint32_t __reserved:8;
+uint32_t dest:8;
+uint32_t __addr_head:12; /* 0xfee */
+#endif
+uint32_t __addr_hi;
+} QEMU_PACKED;
+uint64_t msi_addr;
+};
+union {
+struct {
+#ifdef HOST_WORDS_BIGENDIAN
+uin

[Qemu-devel] [PULL 17/33] i386: acpi: add IVHD device entry for IOAPIC

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

When interrupt remapping is enabled, add a special IVHD device
(type IOAPIC).

Signed-off-by: Brijesh Singh 
Acked-by: Peter Xu 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/acpi-build.c | 28 +++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1599caa7c5..1ef396ddbb 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2467,9 +2467,12 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
  *   IVRS table as specified in AMD IOMMU Specification v2.62, Section 5.2
  *   accessible here http://support.amd.com/TechDocs/48882_IOMMU.pdf
  */
+#define IOAPIC_SB_DEVID   (uint64_t)PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
+
 static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker)
 {
+int ivhd_table_len = 28;
 int iommu_start = table_data->len;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 
@@ -2491,8 +2494,16 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
  (1UL << 6) | /* PrefSup  */
  (1UL << 7),  /* PPRSup   */
  1);
+
+/*
+ * When interrupt remapping is supported, we add a special IVHD device
+ * for type IO-APIC.
+ */
+if (x86_iommu_get_default()->intr_supported) {
+ivhd_table_len += 8;
+}
 /* IVHD length */
-build_append_int_noprefix(table_data, 28, 2);
+build_append_int_noprefix(table_data, ivhd_table_len, 2);
 /* DeviceID */
 build_append_int_noprefix(table_data, s->devid, 2);
 /* Capability offset */
@@ -2516,6 +2527,21 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
  */
 build_append_int_noprefix(table_data, 0x001, 4);
 
+/*
+ * Add a special IVHD device type.
+ * Refer to spec - Table 95: IVHD device entry type codes
+ *
+ * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
+ * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
+ */
+if (x86_iommu_get_default()->intr_supported) {
+build_append_int_noprefix(table_data,
+ (0x1ull << 56) |   /* type IOAPIC */
+ (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
+ 0x48,  /* special device 
*/
+ 8);
+}
+
 build_header(linker, table_data, (void *)(table_data->data + iommu_start),
  "IVRS", table_data->len - iommu_start, 1, NULL, NULL);
 }
-- 
MST

[Qemu-devel] [PULL 08/33] intel_iommu: move ce fetching out when sync shadow

2018-11-05 Thread Michael S. Tsirkin

From: Peter Xu 

There are two callers for vtd_sync_shadow_page_table_range(): one
provided a valid context entry and one not.  Move that fetching
operation into the caller vtd_sync_shadow_page_table() where we need to
fetch the context entry.

Meanwhile, remove the error_report_once() directly since we're already
tracing all the error cases in the previous call.  Instead, return error
number back to caller.  This will not change anything functional since
callers are dropping it after all.

We do this move majorly because we want to do something more later in
vtd_sync_shadow_page_table().

Signed-off-by: Peter Xu 
Reviewed-by: Eric Auger 
Reviewed-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 41 +
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 306708eb3b..25e54671f4 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1045,7 +1045,6 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry,
 return 0;
 }
 
-/* If context entry is NULL, we'll try to fetch it on our own. */
 static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
 VTDContextEntry *ce,
 hwaddr addr, hwaddr size)
@@ -1057,39 +1056,25 @@ static int 
vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
 .notify_unmap = true,
 .aw = s->aw_bits,
 .as = vtd_as,
+.domain_id = VTD_CONTEXT_ENTRY_DID(ce->hi),
 };
-VTDContextEntry ce_cache;
-int ret;
 
-if (ce) {
-/* If the caller provided context entry, use it */
-ce_cache = *ce;
-} else {
-/* If the caller didn't provide ce, try to fetch */
-ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
-   vtd_as->devfn, &ce_cache);
-if (ret) {
-/*
- * This should not really happen, but in case it happens,
- * we just skip the sync for this time.  After all we even
- * don't have the root table pointer!
- */
-error_report_once("%s: invalid context entry for bus 0x%x"
-  " devfn 0x%x",
-  __func__, pci_bus_num(vtd_as->bus),
-  vtd_as->devfn);
-return 0;
-}
-}
-
-info.domain_id = VTD_CONTEXT_ENTRY_DID(ce_cache.hi);
-
-return vtd_page_walk(&ce_cache, addr, addr + size, &info);
+return vtd_page_walk(ce, addr, addr + size, &info);
 }
 
 static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
 {
-return vtd_sync_shadow_page_table_range(vtd_as, NULL, 0, UINT64_MAX);
+int ret;
+VTDContextEntry ce;
+
+ret = vtd_dev_to_context_entry(vtd_as->iommu_state,
+   pci_bus_num(vtd_as->bus),
+   vtd_as->devfn, &ce);
+if (ret) {
+return ret;
+}
+
+return vtd_sync_shadow_page_table_range(vtd_as, &ce, 0, UINT64_MAX);
 }
 
 /*
-- 
MST

[Qemu-devel] [PULL 23/33] hw/pci-host/x86: extend the 64-bit PCI hole relative to the fw-assigned base

2018-11-05 Thread Michael S. Tsirkin

From: Laszlo Ersek 

In commit 9fa99d2519cb ("hw/pci-host: Fix x86 Host Bridges 64bit PCI
hole", 2017-11-16), we meant to expose such a 64-bit PCI MMIO aperture in
the ACPI DSDT that would be at least as large as the new "pci-hole64-size"
property (2GB on i440fx, 32GB on q35). The goal was to offer "enough"
64-bit MMIO aperture to the guest OS for hotplug purposes.

In that commit, we added or modified five functions:

- pc_pci_hole64_start(): shared between i440fx and q35. Provides a default
  64-bit base, which starts beyond the cold-plugged 64-bit RAM, and skips
  the DIMM hotplug area too (if any).

- i440fx_pcihost_get_pci_hole64_start(), q35_host_get_pci_hole64_start():
  board-specific 64-bit base property getters called abstractly by the
  ACPI generator. Both of these fall back to pc_pci_hole64_start() if the
  firmware didn't program any 64-bit hole (i.e. if the firmware didn't
  assign a 64-bit GPA to any MMIO BAR on any device). Otherwise, they
  honor the firmware's BAR assignments (i.e., they treat the lowest 64-bit
  GPA programmed by the firmware as the base address for the aperture).

- i440fx_pcihost_get_pci_hole64_end(), q35_host_get_pci_hole64_end():
  these intended to extend the aperture to our size recommendation,
  calculated relative to the base of the aperture.

Despite the original intent, i440fx_pcihost_get_pci_hole64_end() and
q35_host_get_pci_hole64_end() currently only extend the aperture relative
to the default base (pc_pci_hole64_start()), ignoring any programming done
by the firmware. This means that our size recommendation may not be met.
Fix it by honoring the firmware's address assignments.

The strange extension sizes were spotted by Alex, in the log of a guest
kernel running on top of OVMF (which prefers to assign 64-bit GPAs to
64-bit BARs).

This change only affects DSDT generation, therefore no new compat property
is being introduced.

Using an i440fx OVMF guest with 5GB RAM, an example _CRS change is:

> @@ -881,9 +881,9 @@
>  QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, 
> Cacheable, ReadWrite,
>  0x, // Granularity
>  0x0008, // Range Minimum
> -0x00080001C0FF, // Range Maximum
> +0x00087FFF, // Range Maximum
>  0x, // Translation Offset
> -0x0001C100, // Length
> +0x8000, // Length
>  ,, , AddressRangeMemory, TypeStatic)
>  })
>  Device (GPE0)

(On i440fx, the low RAM split is at 3GB, in this case. Therefore, with 5GB
guest RAM and no DIMM hotplug range, pc_pci_hole64_start() returns 4 +
(5-3) = 6 GB. Adding the 2GB extension to that yields 8GB, which is below
the firmware-programmed base of 32GB, before the patch. Therefore, before
the patch, the extension is ineffective. After the patch, we add the 2GB
extension to the firmware-programmed base, namely 32GB.)

Using a q35 OVMF guest with 5GB RAM, an example _CRS change is:

> @@ -3162,9 +3162,9 @@
>  QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, 
> Cacheable, ReadWrite,
>  0x, // Granularity
>  0x0008, // Range Minimum
> -0x0009BFFF, // Range Maximum
> +0x000F, // Range Maximum
>  0x, // Translation Offset
> -0x0001C000, // Length
> +0x0008, // Length
>  ,, , AddressRangeMemory, TypeStatic)
>  })
>  Device (GPE0)

(On Q35, the low RAM split is at 2GB. Therefore, with 5GB guest RAM and no
DIMM hotplug range, pc_pci_hole64_start() returns 4 + (5-2) = 7 GB. Adding
the 32GB extension to that yields 39GB (0x_0009_BFFF_ + 1), before
the patch. After the patch, we add the 32GB extension to the
firmware-programmed base, namely 32GB.)

The ACPI test data for the bios-tables-test case that we added earlier in
this series are corrected too, as follows:

> @@ -3339,9 +3339,9 @@
>  QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, 
> Cacheable, ReadWrite,
>  0x, // Granularity
>  0x0002, // Range Minimum
> -0x0009BFFF, // Range Maximum
> +0x0009, // Range Maximum
>  0x, // Translation Offset
> -0x0007C000, // Length
> +0x0008, // Length
>  ,, , AddressRangeMemory, TypeStatic)
>  })
>  Device (GPE0)

Cc: "Michael S. Tsirkin" 
Cc: Alex Williamson 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Marcel Apfelbaum 
Fixes: 9fa99d2519cbf71f871e46871df12cb446dc1c3e
Signed-off-by: Laszlo Ersek 
Reviewed-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsir

[Qemu-devel] [PULL 21/33] pci-testdev: add optional memory bar

2018-11-05 Thread Michael S. Tsirkin

From: Gerd Hoffmann 

Add memory bar to pci-testdev.  Size is configurable using the membar
property.  Setting the size to zero (default) turns it off.  Can be used
to check whether guests handle large pci bars correctly.

Reviewed-by: Marc-André Lureau 
Reviewed-by: Laszlo Ersek 
Tested-by: Laszlo Ersek 
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/specs/pci-testdev.txt | 15 ++-
 hw/misc/pci-testdev.c  | 19 +++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/docs/specs/pci-testdev.txt b/docs/specs/pci-testdev.txt
index 128ae222ef..4280a1e73c 100644
--- a/docs/specs/pci-testdev.txt
+++ b/docs/specs/pci-testdev.txt
@@ -1,11 +1,11 @@
 pci-test is a device used for testing low level IO
 
-device implements up to two BARs: BAR0 and BAR1.
-Each BAR can be memory or IO. Guests must detect
-BAR type and act accordingly.
+device implements up to three BARs: BAR0, BAR1 and BAR2.
+Each of BAR 0+1 can be memory or IO. Guests must detect
+BAR types and act accordingly.
 
-Each BAR size is up to 4K bytes.
-Each BAR starts with the following header:
+BAR 0+1 size is up to 4K bytes each.
+BAR 0+1 starts with the following header:
 
 typedef struct PCITestDevHdr {
 uint8_t test;  <- write-only, starts a given test number
@@ -24,3 +24,8 @@ All registers are little endian.
 device is expected to always implement tests 0 to N on each BAR, and to add new
 tests with higher numbers.  In this way a guest can scan test numbers until it
 detects an access type that it does not support on this BAR, then stop.
+
+BAR2 is a 64bit memory bar, without backing storage.  It is disabled
+by default and can be enabled using the membar= property.  This
+can be used to test whether guests handle pci bars of a specific
+(possibly quite large) size correctly.
diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
index 32041f535f..1282d151cb 100644
--- a/hw/misc/pci-testdev.c
+++ b/hw/misc/pci-testdev.c
@@ -85,6 +85,9 @@ typedef struct PCITestDevState {
 MemoryRegion portio;
 IOTest *tests;
 int current;
+
+uint64_t membar_size;
+MemoryRegion membar;
 } PCITestDevState;
 
 #define TYPE_PCI_TEST_DEV "pci-testdev"
@@ -253,6 +256,16 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error 
**errp)
 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->portio);
 
+if (d->membar_size) {
+memory_region_init(&d->membar, OBJECT(d), "pci-testdev-membar",
+   d->membar_size);
+pci_register_bar(pci_dev, 2,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ &d->membar);
+}
+
 d->current = -1;
 d->tests = g_malloc0(IOTEST_MAX * sizeof *d->tests);
 for (i = 0; i < IOTEST_MAX; ++i) {
@@ -305,6 +318,11 @@ static void qdev_pci_testdev_reset(DeviceState *dev)
 pci_testdev_reset(d);
 }
 
+static Property pci_testdev_properties[] = {
+DEFINE_PROP_SIZE("membar", PCITestDevState, membar_size, 0),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static void pci_testdev_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -319,6 +337,7 @@ static void pci_testdev_class_init(ObjectClass *klass, void 
*data)
 dc->desc = "PCI Test Device";
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->reset = qdev_pci_testdev_reset;
+dc->props = pci_testdev_properties;
 }
 
 static const TypeInfo pci_testdev_info = {
-- 
MST

[Qemu-devel] [PULL 16/33] x86_iommu/amd: Add interrupt remap support when VAPIC is not enabled

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

Emulate the interrupt remapping support when guest virtual APIC is
not enabled.

For more info Refer: AMD IOMMU spec Rev 3.0 - section 2.2.5.1

When VAPIC is not enabled, it uses interrupt remapping as defined in
Table 20 and Figure 15 from IOMMU spec.

Signed-off-by: Brijesh Singh 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h  |  44 ++
 hw/i386/amd_iommu.c  | 199 ++-
 hw/i386/trace-events |   7 ++
 3 files changed, 249 insertions(+), 1 deletion(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 4e7cc271c4..f73be48fca 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -217,7 +217,51 @@
 
 /* Interrupt remapping errors */
 #define AMDVI_IR_ERR0x1
+#define AMDVI_IR_GET_IRTE   0x2
+#define AMDVI_IR_TARGET_ABORT   0x3
 
+/* Interrupt remapping */
+#define AMDVI_IR_REMAP_ENABLE   1ULL
+#define AMDVI_IR_INTCTL_SHIFT   60
+#define AMDVI_IR_INTCTL_ABORT   0
+#define AMDVI_IR_INTCTL_PASS1
+#define AMDVI_IR_INTCTL_REMAP   2
+
+#define AMDVI_IR_PHYS_ADDR_MASK (((1ULL << 45) - 1) << 6)
+
+/* MSI data 10:0 bits (section 2.2.5.1 Fig 14) */
+#define AMDVI_IRTE_OFFSET   0x7ff
+
+/* Delivery mode of MSI data (same as IOAPIC deilver mode encoding) */
+#define AMDVI_IOAPIC_INT_TYPE_FIXED  0x0
+#define AMDVI_IOAPIC_INT_TYPE_ARBITRATED 0x1
+#define AMDVI_IOAPIC_INT_TYPE_SMI0x2
+#define AMDVI_IOAPIC_INT_TYPE_NMI0x4
+#define AMDVI_IOAPIC_INT_TYPE_INIT   0x5
+#define AMDVI_IOAPIC_INT_TYPE_EINT   0x7
+
+/* Pass through interrupt */
+#define AMDVI_DEV_INT_PASS_MASK (1UL << 56)
+#define AMDVI_DEV_EINT_PASS_MASK(1UL << 57)
+#define AMDVI_DEV_NMI_PASS_MASK (1UL << 58)
+#define AMDVI_DEV_LINT0_PASS_MASK   (1UL << 62)
+#define AMDVI_DEV_LINT1_PASS_MASK   (1UL << 63)
+
+/* Interrupt remapping table fields (Guest VAPIC not enabled) */
+union irte {
+uint32_t val;
+struct {
+uint32_t valid:1,
+ no_fault:1,
+ int_type:3,
+ rq_eoi:1,
+ dm:1,
+ guest_mode:1,
+ destination:8,
+ vector:8,
+ rsvd:8;
+} fields;
+};
 
 #define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
 #define AMD_IOMMU_DEVICE(obj)\
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 9118a75530..8e2f13c029 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -28,6 +28,7 @@
 #include "qemu/error-report.h"
 #include "hw/i386/apic_internal.h"
 #include "trace.h"
+#include "hw/i386/apic-msidef.h"
 
 /* used AMD-Vi MMIO registers */
 const char *amdvi_mmio_low[] = {
@@ -1032,21 +1033,146 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion 
*iommu, hwaddr addr,
 return ret;
 }
 
+static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
+  union irte *irte, uint16_t devid)
+{
+uint64_t irte_root, offset;
+
+irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
+offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
+
+trace_amdvi_ir_irte(irte_root, offset);
+
+if (dma_memory_read(&address_space_memory, irte_root + offset,
+irte, sizeof(*irte))) {
+trace_amdvi_ir_err("failed to get irte");
+return -AMDVI_IR_GET_IRTE;
+}
+
+trace_amdvi_ir_irte_val(irte->val);
+
+return 0;
+}
+
+static int amdvi_int_remap_legacy(AMDVIState *iommu,
+  MSIMessage *origin,
+  MSIMessage *translated,
+  uint64_t *dte,
+  X86IOMMUIrq *irq,
+  uint16_t sid)
+{
+int ret;
+union irte irte;
+
+/* get interrupt remapping table */
+ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
+if (ret < 0) {
+return ret;
+}
+
+if (!irte.fields.valid) {
+trace_amdvi_ir_target_abort("RemapEn is disabled");
+return -AMDVI_IR_TARGET_ABORT;
+}
+
+if (irte.fields.guest_mode) {
+error_report_once("guest mode is not zero");
+return -AMDVI_IR_ERR;
+}
+
+if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
+error_report_once("reserved int_type");
+return -AMDVI_IR_ERR;
+}
+
+irq->delivery_mode = irte.fields.int_type;
+irq->vector = irte.fields.vector;
+irq->dest_mode = irte.fields.dm;
+irq->redir_hint = irte.fields.rq_eoi;
+irq->dest = irte.fields.destination;
+
+return 0;
+}
+
+static int __amdvi_int_remap_msi(AMDVIState *iommu,
+ MSIMessage *origin,
+

[Qemu-devel] [PULL 07/33] intel_iommu: better handling of dmar state switch

2018-11-05 Thread Michael S. Tsirkin

From: Peter Xu 

QEMU is not handling the global DMAR switch well, especially when from
"on" to "off".

Let's first take the example of system reset.

Assuming that a guest has IOMMU enabled.  When it reboots, we will drop
all the existing DMAR mappings to handle the system reset, however we'll
still keep the existing memory layouts which has the IOMMU memory region
enabled.  So after the reboot and before the kernel reloads again, there
will be no mapping at all for the host device.  That's problematic since
any software (for example, SeaBIOS) that runs earlier than the kernel
after the reboot will assume the IOMMU is disabled, so any DMA from the
software will fail.

For example, a guest that boots on an assigned NVMe device might fail to
find the boot device after a system reboot/reset and we'll be able to
observe SeaBIOS errors if we capture the debugging log:

  WARNING - Timeout at nvme_wait:144!

Meanwhile, we should see DMAR errors on the host of that NVMe device.
It's the DMA fault that caused a NVMe driver timeout.

The correct fix should be that we do proper switching of device DMA
address spaces when system resets, which will setup correct memory
regions and notify the backend of the devices.  This might not affect
much on non-assigned devices since QEMU VT-d emulation will assume a
default passthrough mapping if DMAR is not enabled in the GCMD
register (please refer to vtd_iommu_translate).  However that's required
for an assigned devices, since that'll rebuild the correct GPA to HPA
mapping that is needed for any DMA operation during guest bootstrap.

Besides the system reset, we have some other places that might change
the global DMAR status and we'd better do the same thing there.  For
example, when we change the state of GCMD register, or the DMAR root
pointer.  Do the same refresh for all these places.  For these two
places we'll also need to explicitly invalidate the context entry cache
and iotlb cache.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1625173
CC: QEMU Stable 
Reported-by: Cong Li 
Signed-off-by: Peter Xu 
--
v2:
- do the same for GCMD write, or root pointer update [Alex]
- test is carried out by me this time, by observing the
  vtd_switch_address_space tracepoint after system reboot
v3:
- rewrite commit message as suggested by Alex
Signed-off-by: Peter Xu 
Reviewed-by: Eric Auger 
Reviewed-by: Jason Wang 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1137861a9d..306708eb3b 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -37,6 +37,8 @@
 #include "kvm_i386.h"
 #include "trace.h"
 
+static void vtd_address_space_refresh_all(IntelIOMMUState *s);
+
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
 uint64_t wmask, uint64_t w1cmask)
 {
@@ -1436,7 +1438,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState 
*s)
 vtd_reset_context_cache_locked(s);
 }
 vtd_iommu_unlock(s);
-vtd_switch_address_space_all(s);
+vtd_address_space_refresh_all(s);
 /*
  * From VT-d spec 6.5.2.1, a global context entry invalidation
  * should be followed by a IOTLB global invalidation, so we should
@@ -1727,6 +1729,8 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
 vtd_root_table_setup(s);
 /* Ok - report back to driver */
 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
+vtd_reset_caches(s);
+vtd_address_space_refresh_all(s);
 }
 
 /* Set Interrupt Remap Table Pointer */
@@ -1759,7 +1763,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool 
en)
 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
 }
 
-vtd_switch_address_space_all(s);
+vtd_reset_caches(s);
+vtd_address_space_refresh_all(s);
 }
 
 /* Handle Interrupt Remap Enable/Disable */
@@ -3059,6 +3064,12 @@ static void vtd_address_space_unmap_all(IntelIOMMUState 
*s)
 }
 }
 
+static void vtd_address_space_refresh_all(IntelIOMMUState *s)
+{
+vtd_address_space_unmap_all(s);
+vtd_switch_address_space_all(s);
+}
+
 static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
 {
 memory_region_notify_one((IOMMUNotifier *)private, entry);
@@ -3231,11 +3242,7 @@ static void vtd_reset(DeviceState *dev)
 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
 
 vtd_init(s);
-
-/*
- * When device reset, throw away all mappings and external caches
- */
-vtd_address_space_unmap_all(s);
+vtd_address_space_refresh_all(s);
 }
 
 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
-- 
MST

[Qemu-devel] [PULL 18/33] x86_iommu/amd: Add interrupt remap support when VAPIC is enabled

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

Emulate the interrupt remapping support when guest virtual APIC is
enabled.

For more information refer: IOMMU spec rev 3.0 (section 2.2.5.2)

When VAPIC is enabled, it uses interrupt remapping as defined in
Table 22 and Figure 17 from IOMMU spec.

Signed-off-by: Brijesh Singh 
Reviewed-by: Peter Xu 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h  | 36 +++
 hw/i386/amd_iommu.c  | 69 +++-
 hw/i386/trace-events |  2 ++
 3 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index f73be48fca..8061e9c49c 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -103,6 +103,7 @@
 #define AMDVI_MMIO_CONTROL_EVENTINTEN (1ULL << 3)
 #define AMDVI_MMIO_CONTROL_COMWAITINTEN   (1ULL << 4)
 #define AMDVI_MMIO_CONTROL_CMDBUFLEN  (1ULL << 12)
+#define AMDVI_MMIO_CONTROL_GAEN   (1ULL << 17)
 
 /* MMIO status register bits */
 #define AMDVI_MMIO_STATUS_CMDBUF_RUN  (1 << 4)
@@ -263,6 +264,38 @@ union irte {
 } fields;
 };
 
+/* Interrupt remapping table fields (Guest VAPIC is enabled) */
+union irte_ga_lo {
+  uint64_t val;
+
+  /* For int remapping */
+  struct {
+  uint64_t  valid:1,
+no_fault:1,
+/* -- */
+int_type:3,
+rq_eoi:1,
+dm:1,
+/* -- */
+guest_mode:1,
+destination:8,
+rsvd_1:48;
+  } fields_remap;
+};
+
+union irte_ga_hi {
+  uint64_t val;
+  struct {
+  uint64_t  vector:8,
+rsvd_2:56;
+  } fields;
+};
+
+struct irte_ga {
+  union irte_ga_lo lo;
+  union irte_ga_hi hi;
+};
+
 #define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
 #define AMD_IOMMU_DEVICE(obj)\
 OBJECT_CHECK(AMDVIState, (obj), TYPE_AMD_IOMMU_DEVICE)
@@ -332,6 +365,9 @@ typedef struct AMDVIState {
 
 /* IOTLB */
 GHashTable *iotlb;
+
+/* Interrupt remapping */
+bool ga_enabled;
 } AMDVIState;
 
 #endif
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 8e2f13c029..353a810e6b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -608,6 +608,7 @@ static void amdvi_handle_control_write(AMDVIState *s)
 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
 s->cmdbuf_enabled = s->enabled && !!(control &
 AMDVI_MMIO_CONTROL_CMDBUFLEN);
+s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
 
 /* update the flags depending on the control register */
 if (s->cmdbuf_enabled) {
@@ -1094,6 +1095,65 @@ static int amdvi_int_remap_legacy(AMDVIState *iommu,
 return 0;
 }
 
+static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
+ struct irte_ga *irte, uint16_t devid)
+{
+uint64_t irte_root, offset;
+
+irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
+offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
+trace_amdvi_ir_irte(irte_root, offset);
+
+if (dma_memory_read(&address_space_memory, irte_root + offset,
+irte, sizeof(*irte))) {
+trace_amdvi_ir_err("failed to get irte_ga");
+return -AMDVI_IR_GET_IRTE;
+}
+
+trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
+return 0;
+}
+
+static int amdvi_int_remap_ga(AMDVIState *iommu,
+  MSIMessage *origin,
+  MSIMessage *translated,
+  uint64_t *dte,
+  X86IOMMUIrq *irq,
+  uint16_t sid)
+{
+int ret;
+struct irte_ga irte;
+
+/* get interrupt remapping table */
+ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
+if (ret < 0) {
+return ret;
+}
+
+if (!irte.lo.fields_remap.valid) {
+trace_amdvi_ir_target_abort("RemapEn is disabled");
+return -AMDVI_IR_TARGET_ABORT;
+}
+
+if (irte.lo.fields_remap.guest_mode) {
+error_report_once("guest mode is not zero");
+return -AMDVI_IR_ERR;
+}
+
+if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
+error_report_once("reserved int_type is set");
+return -AMDVI_IR_ERR;
+}
+
+irq->delivery_mode = irte.lo.fields_remap.int_type;
+irq->vector = irte.hi.fields.vector;
+irq->dest_mode = irte.lo.fields_remap.dm;
+irq->redir_hint = irte.lo.fields_remap.rq_eoi;
+irq->dest = irte.lo.fields_remap.destination;
+
+return 0;
+}
+
 static int __amdvi_int_remap_msi(AMDVIState *iommu,
  MSIMessage *origin,
  MSIMessage *translated,
@@ -1101,6 +1161,7 @@ static int __amdvi_int_remap_msi(AMDVIState *i

[Qemu-devel] [PULL 11/33] x86_iommu: move the kernel-irqchip check in common code

2018-11-05 Thread Michael S. Tsirkin

From: "Singh, Brijesh" 

Interrupt remapping needs kernel-irqchip={off|split} on both Intel and AMD
platforms. Move the check in common place.

Signed-off-by: Brijesh Singh 
Reviewed-by: Peter Xu 
Cc: Peter Xu 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Marcel Apfelbaum 
Cc: Tom Lendacky 
Cc: Suravee Suthikulpanit 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 7 ---
 hw/i386/x86-iommu.c   | 9 +
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f24ebfca1c..015a6fc492 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3262,13 +3262,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
 {
 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
 
-/* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
-if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
-!kvm_irqchip_is_split()) {
-error_setg(errp, "Intel Interrupt Remapping cannot work with "
- "kernel-irqchip=on, please use 'split|off'.");
-return false;
-}
 if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
 error_setg(errp, "eim=on cannot be selected without intremap=on");
 return false;
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 8a01a2dd25..7440cb8d60 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -25,6 +25,7 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "trace.h"
+#include "sysemu/kvm.h"
 
 void x86_iommu_iec_register_notifier(X86IOMMUState *iommu,
  iec_notify_fn fn, void *data)
@@ -94,6 +95,14 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
 return;
 }
 
+/* Both Intel and AMD IOMMU IR only support "kernel-irqchip={off|split}" */
+if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
+!kvm_irqchip_is_split()) {
+error_setg(errp, "Interrupt Remapping cannot work with "
+ "kernel-irqchip=on, please use 'split|off'.");
+return;
+}
+
 if (x86_class->realize) {
 x86_class->realize(dev, errp);
 }
-- 
MST

1 2 3 4 >

1 - 100 of 366 matches

Mail list logo