[PATCH 1/2] configure: add configure option avx512f_opt

2020-02-12 Thread Robert Hoo
Like previous avx2_opt option, config-host.mak will have CONFIG_AVX512F_OPT
defined if compiling host has the ability.

AVX512F instruction set is available since Intel Skylake.
More info:
https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf

Signed-off-by: Robert Hoo 
---
 configure | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/configure b/configure
index 115dc38..9bf8de0 100755
--- a/configure
+++ b/configure
@@ -1382,6 +1382,11 @@ for opt do
   ;;
   --enable-avx2) avx2_opt="yes"
   ;;
+  --disable-avx512f) avx512f_opt="no"
+  ;;
+  --enable-avx512f) avx512f_opt="yes"
+  ;;
+
   --enable-glusterfs) glusterfs="yes"
   ;;
   --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
@@ -1811,6 +1816,7 @@ disabled with --disable-FEATURE, default is enabled if 
available:
   tcmalloctcmalloc support
   jemallocjemalloc support
   avx2AVX2 optimization support
+  avx512f AVX512F optimization support
   replication replication support
   opengl  opengl support
   virglrenderer   virgl rendering support
@@ -5481,6 +5487,34 @@ EOF
   fi
 fi
 
+##
+# avx512f optimization requirement check
+#
+# There is no point enabling this if cpuid.h is not usable,
+# since we won't be able to select the new routines.
+
+if test "$cpuid_h" = "yes" && test "$avx512f_opt" != "no"; then
+  cat > $TMPC << EOF
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#include 
+#include 
+static int bar(void *a) {
+__m512i x = *(__m512i *)a;
+return _mm512_test_epi64_mask(x, x);
+}
+int main(int argc, char *argv[])
+{
+   return bar(argv[0]);
+}
+EOF
+  if compile_object "" ; then
+avx512f_opt="yes"
+  else
+avx512f_opt="no"
+  fi
+fi
+
 
 # check if __[u]int128_t is usable.
 
@@ -6605,6 +6639,7 @@ echo "libxml2   $libxml2"
 echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
+echo "avx512f optimization $avx512f_opt"
 echo "replication support $replication"
 echo "VxHS block device $vxhs"
 echo "bochs support $bochs"
@@ -7152,6 +7187,10 @@ if test "$avx2_opt" = "yes" ; then
   echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
 fi
 
+if test "$avx512f_opt" = "yes" ; then
+  echo "CONFIG_AVX512F_OPT=y" >> $config_host_mak
+fi
+
 if test "$lzo" = "yes" ; then
   echo "CONFIG_LZO=y" >> $config_host_mak
 fi
-- 
1.8.3.1




[PATCH 2/2] util: add util function buffer_zero_avx512()

2020-02-12 Thread Robert Hoo
And initialize buffer_is_zero() with it, when Intel AVX512F is
available on host.

This function utilizes Intel AVX512 fundamental instructions which
perform over previous AVX2 instructions.

Signed-off-by: Robert Hoo 
---
 include/qemu/cpuid.h |  3 +++
 util/bufferiszero.c  | 56 +---
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 6930170..09fc245 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -45,6 +45,9 @@
 #ifndef bit_AVX2
 #define bit_AVX2(1 << 5)
 #endif
+#ifndef bit_AVX512F
+#define bit_AVX512F(1 << 16)
+#endif
 #ifndef bit_BMI2
 #define bit_BMI2(1 << 8)
 #endif
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index bfb2605..cbb854a 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -187,12 +187,54 @@ buffer_zero_avx2(const void *buf, size_t len)
 #pragma GCC pop_options
 #endif /* CONFIG_AVX2_OPT */
 
+#ifdef CONFIG_AVX512F_OPT
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#include 
+
+static bool
+buffer_zero_avx512(const void *buf, size_t len)
+{
+__m512i t;
+__m512i *p, *e;
+
+if (unlikely(len < 64)) { /*buff less than 512 bits, unlikely*/
+return buffer_zero_int(buf, len);
+}
+/* Begin with an unaligned head of 64 bytes.  */
+t = _mm512_loadu_si512(buf);
+p = (__m512i *)(((uintptr_t)buf + 5 * 64) & -64);
+e = (__m512i *)(((uintptr_t)buf + len) & -64);
+
+/* Loop over 64-byte aligned blocks of 256.  */
+while (p < e) {
+__builtin_prefetch(p);
+if (unlikely(_mm512_test_epi64_mask(t, t))) {
+return false;
+}
+t = p[-4] | p[-3] | p[-2] | p[-1];
+p += 4;
+}
+
+t |= _mm512_loadu_si512(buf + len - 4 * 64);
+t |= _mm512_loadu_si512(buf + len - 3 * 64);
+t |= _mm512_loadu_si512(buf + len - 2 * 64);
+t |= _mm512_loadu_si512(buf + len - 1 * 64);
+
+return !_mm512_test_epi64_mask(t, t);
+
+}
+#pragma GCC pop_options
+#endif
+
+
 /* Note that for test_buffer_is_zero_next_accel, the most preferred
  * ISA must have the least significant bit.
  */
-#define CACHE_AVX21
-#define CACHE_SSE42
-#define CACHE_SSE24
+#define CACHE_AVX512F 1
+#define CACHE_AVX22
+#define CACHE_SSE44
+#define CACHE_SSE26
 
 /* Make sure that these variables are appropriately initialized when
  * SSE2 is enabled on the compiler command-line, but the compiler is
@@ -226,6 +268,11 @@ static void init_accel(unsigned cache)
 fn = buffer_zero_avx2;
 }
 #endif
+#ifdef CONFIG_AVX512F_OPT
+if (cache & CACHE_AVX512F) {
+fn = buffer_zero_avx512;
+}
+#endif
 buffer_accel = fn;
 }
 
@@ -255,6 +302,9 @@ static void __attribute__((constructor)) 
init_cpuid_cache(void)
 if ((bv & 6) == 6 && (b & bit_AVX2)) {
 cache |= CACHE_AVX2;
 }
+if ((bv & 6) == 6 && (b & bit_AVX512F)) {
+cache |= CACHE_AVX512F;
+}
 }
 }
 cpuid_cache = cache;
-- 
1.8.3.1




[PATCH 0/2] Add AVX512F optimization option and buffer_zero_avx512()

2020-02-12 Thread Robert Hoo
1. Add avx512_opt option and enable it when host has the ability

2. Implement new buffer_zero_avx512() with AVX512F instructions

Robert Hoo (2):
  configure: add configure option avx512f_opt
  util: add function buffer_zero_avx512()

 configure| 39 
 include/qemu/cpuid.h |  3 +++
 util/bufferiszero.c  | 56 +---
 3 files changed, 95 insertions(+), 3 deletions(-)

-- 
1.8.3.1




Re: Question about (and problem with) pflash data access

2020-02-12 Thread Alexey Kardashevskiy



On 13/02/2020 10:50, Philippe Mathieu-Daudé wrote:
> Cc'ing Paolo and Alexey.
> 
> On 2/13/20 12:09 AM, Guenter Roeck wrote:
>> On Wed, Feb 12, 2020 at 10:39:30PM +0100, Philippe Mathieu-Daudé wrote:
>>> Cc'ing Jean-Christophe and Peter.
>>>
>>> On 2/12/20 7:46 PM, Guenter Roeck wrote:
 Hi,

 I have been playing with pflash recently. For the most part it works,
 but I do have an odd problem when trying to instantiate pflash on sx1.

 My data file looks as follows.

 000 0001       
 020        
 *
 0002000 0002       
 0002020        
 *
 0004000 0003       
 0004020        
 ...

 In the sx1 machine, this becomes:

 000 6001       
 020        
 *
 0002000 6002       
 0002020        
 *
 0004000 6003       
 0004020        
 *
 ...

 pflash is instantiated with "-drive
 file=flash.32M.test,format=raw,if=pflash".

 I don't have much success with pflash tracing - data accesses don't
 show up there.

 I did find a number of problems with the sx1 emulation, but I have
 no clue
 what is going on with pflash. As far as I can see pflash works fine on
 other machines. Can someone give me a hint what to look out for ?
>>>
>>> This is specific to the SX1, introduced in commit 997641a84ff:
>>>
>>>   64 static uint64_t static_read(void *opaque, hwaddr offset,
>>>   65 unsigned size)
>>>   66 {
>>>   67 uint32_t *val = (uint32_t *) opaque;
>>>   68 uint32_t mask = (4 / size) - 1;
>>>   69
>>>   70 return *val >> ((offset & mask) << 3);
>>>   71 }
>>>
>>> Only guessing, this looks like some hw parity, and I imagine you need to
>>> write the parity bits in your flash.32M file before starting QEMU,
>>> then it
>>> would appear "normal" within the guest.
>>>
>> I thought this might be related, but that is not the case. I added log
>> messages, and even ran the code in gdb. static_read() and static_write()
>> are not executed.
>>
>> Also,
>>
>>  memory_region_init_io([0], NULL, _ops, ,
>>    "sx1.cs0", OMAP_CS0_SIZE - flash_size);
>>   ^^
>>  memory_region_add_subregion(address_space,
>>  OMAP_CS0_BASE + flash_size, [0]);
>>  ^^
>>
>> suggests that the code is only executed for memory accesses _after_
>> the actual flash. The memory tree is:
>>
>> memory-region: system
>>    - (prio 0, i/o): system
>>  -01ff (prio 0, romd): omap_sx1.flash0-1
>>  -01ff (prio 0, rom): omap_sx1.flash0-0
> 
> Eh two memory regions with same size and same priority... Is this legal?


I'd say yes if used with memory_region_set_enabled() to make sure only
one is enabled. Having both enabled is weird and we should print a
warning. Thanks,



> 
> (qemu) info mtree -f -d
> FlatView #0
>  AS "memory", root: system
>  AS "cpu-memory-0", root: system
>  Root memory region: system
>   -01ff (prio 0, romd): omap_sx1.flash0-1
>   0200-03ff (prio 0, i/o): sx1.cs0
>   0400-07ff (prio 0, i/o): sx1.cs1
>   0800-0bff (prio 0, i/o): sx1.cs3
>   1000-11ff (prio 0, ram): omap1.dram
>   2000-2002 (prio 0, ram): omap1.sram
>   ...
>   Dispatch
>     Physical sections
>   #0 @.. (noname) [unassigned]
>   #1 @..01ff omap_sx1.flash0-1 [not dirty]
>   #2 @0200..03ff sx1.cs0 [ROM]
>   #3 @0400..07ff sx1.cs1 [watch]
>   #4 @0800..0bff sx1.cs3
>   #5 @1000..11ff omap1.dram
>   #6 @2000..2002 omap1.sram
>   ...
>     Nodes (9 bits per level, 6 levels) ptr=[3] skip=4
>   [0]
>   0   skip=3  ptr=[3]
>   1..511  skip=1  ptr=NIL
>   [1]
>   0   skip=2  ptr=[3]
>   1..511  skip=1  ptr=NIL
>   [2]
>   0   skip=1  ptr=[3]
>   1..511  skip=1  ptr=NIL
>   [3]
>   0   skip=1  ptr=[4]
>   1   skip=1  ptr=[5]
>   2   skip=2  ptr=[7]
>   3..13   skip=1  ptr=NIL
>  14   skip=2  ptr=[9]
>  15   skip=2  ptr=[11]
>  16..511  skip=1  ptr=NIL
> 

[PATCH v2 2/4] acpi/cppc: Add ACPI CPPC registers

2020-02-12 Thread Ying Fang
The Continuous Performance Control Package is used to
describe the ACPI CPPC registers.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/arm/virt-acpi-build.c| 74 -
 hw/arm/virt.c   |  1 +
 include/hw/acpi/acpi-defs.h | 32 
 include/hw/arm/virt.h   |  1 +
 4 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index bd5f771e9b..1f1a0ed324 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -41,6 +41,7 @@
 #include "hw/acpi/pci.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/generic_event_device.h"
+#include "hw/acpi/acpi-defs.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
 #include "hw/arm/virt.h"
@@ -51,7 +52,70 @@
 
 #define ARM_SPI_BASE 32
 
-static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
+
+static void acpi_dsdt_add_psd(Aml *dev, int cpus)
+{
+Aml *pkg;
+Aml *sub;
+
+sub = aml_package(5);
+aml_append(sub, aml_int(5));
+aml_append(sub, aml_int(0));
+/* Assume all vCPUs belong to the same domain */
+aml_append(sub, aml_int(0));
+/* SW_ANY: OSPM coordinate, initiate on any processor */
+aml_append(sub, aml_int(0xFD));
+aml_append(sub, aml_int(cpus));
+
+pkg = aml_package(1);
+aml_append(pkg, sub);
+
+aml_append(dev, aml_name_decl("_PSD", pkg));
+}
+
+static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base)
+{
+Aml *cpc;
+int i;
+
+/* ACPI 6.3 8.4.7.1, version 3 of the CPPC table is used */
+cpc = aml_package(23);
+aml_append(cpc, aml_int(23));
+aml_append(cpc, aml_int(3));
+
+for (i = 0; i < CPPC_REG_COUNT; i++) {
+Aml *res;
+uint8_t reg_width;
+uint8_t acc_type;
+uint64_t addr;
+/* Only some necessary registers are emulated */
+if ((i >= MIN_PERF && i < REFERENCE_CTR) ||
+(i >= ENABLE && i < LOWEST_FREQ)) {
+reg_width = 0;
+acc_type = AML_ANY_ACC;
+addr = 0;
+} else {
+addr = cpu_base + i * 4;
+if (i == REFERENCE_CTR || i == DELIVERED_CTR) {
+reg_width = 64;
+acc_type = AML_QWORD_ACC;
+} else {
+reg_width = 32;
+acc_type = AML_DWORD_ACC;
+}
+}
+
+res = aml_resource_template();
+aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0,
+ acc_type, addr));
+aml_append(cpc, res);
+}
+
+aml_append(dev, aml_name_decl("_CPC", cpc));
+}
+
+static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus,
+   const MemMapEntry *cppc_memmap)
 {
 uint16_t i;
 
@@ -59,6 +123,12 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
 Aml *dev = aml_device("C%.03X", i);
 aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
 aml_append(dev, aml_name_decl("_UID", aml_int(i)));
+/*
+ * Append _CPC and _PSD to show CPU frequency
+ */
+acpi_dsdt_add_cppc(dev,
+   cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE);
+acpi_dsdt_add_psd(dev, smp_cpus);
 aml_append(scope, dev);
 }
 }
@@ -736,7 +806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
  * the RTC ACPI device at all when using UEFI.
  */
 scope = aml_scope("\\_SB");
-acpi_dsdt_add_cpus(scope, vms->smp_cpus);
+acpi_dsdt_add_cpus(scope, vms->smp_cpus, [VIRT_CPUFREQ]);
 acpi_dsdt_add_uart(scope, [VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 acpi_dsdt_add_flash(scope, [VIRT_FLASH]);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f788fe27d6..ed9dc38b60 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -144,6 +144,7 @@ static const MemMapEntry base_memmap[] = {
 [VIRT_PCDIMM_ACPI] ={ 0x0907, MEMORY_HOTPLUG_IO_LEN },
 [VIRT_ACPI_GED] =   { 0x0908, ACPI_GED_EVT_SEL_LEN },
 [VIRT_MMIO] =   { 0x0a00, 0x0200 },
+[VIRT_CPUFREQ] ={ 0x0b00, 0x0001 },
 /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
 [VIRT_PLATFORM_BUS] =   { 0x0c00, 0x0200 },
 [VIRT_SECURE_MEM] = { 0x0e00, 0x0100 },
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 57a3f58b0c..3a33f7220d 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -634,4 +634,36 @@ struct AcpiIortRC {
 } QEMU_PACKED;
 typedef struct AcpiIortRC AcpiIortRC;
 
+/*
+ * CPPC register definition from kernel header
+ * include/acpi/cppc_acpi.h
+ * The last element is newly added for easy use
+ */
+enum cppc_regs {
+HIGHEST_PERF,
+NOMINAL_PERF,
+LOW_NON_LINEAR_PERF,
+LOWEST_PERF,
+GUARANTEED_PERF,
+DESIRED_PERF,
+

[PATCH v2 3/4] arm: Add the cpufreq device model

2020-02-12 Thread Ying Fang
On ARM64 platform, CPU frequency is retrieved by ACPI CPPC,
so here we create the virtual cpufreq device to present
the CPPC registers and ACPI _CPC objects.

The default frequency is set host CPU nominal frequency, which
is obtained from the host CPPC sysfs. Other performance data
are set to the same value, since we don't support guest performance scaling.

Performance counters are also not emulated and they simply return 1
if readed, and guest should fallback to use the desired performance
value as the current performance.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/cpufreq.c | 249 ++
 2 files changed, 250 insertions(+)
 create mode 100644 hw/acpi/cpufreq.c

diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 777da07f4d..61530675d4 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -16,6 +16,7 @@ common-obj-y += bios-linker-loader.o
 common-obj-y += aml-build.o utils.o
 common-obj-$(CONFIG_ACPI_PCI) += pci.o
 common-obj-$(CONFIG_TPM) += tpm.o
+common-obj-$(CONFIG_CPUFREQ) += cpufreq.o
 
 common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(call lnot,$(CONFIG_IPMI)) += ipmi-stub.o
diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c
new file mode 100644
index 00..57fa35a29e
--- /dev/null
+++ b/hw/acpi/cpufreq.c
@@ -0,0 +1,249 @@
+/*
+ * ACPI CPPC register device
+ *
+ * Support for showing CPU frequency in guest OS.
+ *
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "chardev/char.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "qemu/option.h"
+#include "sysemu/sysemu.h"
+#include "hw/acpi/acpi-defs.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "hw/boards.h"
+
+#define TYPE_CPUFREQ "cpufreq"
+#define CPUFREQ(obj) OBJECT_CHECK(CpufreqState, (obj), TYPE_CPUFREQ)
+#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq"
+#define CPU_MAX_FREQ_FILE 
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"
+#define HZ_MAX_LENGTH 1024
+#define MAX_SUPPORT_SPACE 0x1
+
+typedef struct CpufreqState {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+uint32_t HighestPerformance;
+uint32_t NominalPerformance;
+uint32_t LowestNonlinearPerformance;
+uint32_t LowestPerformance;
+uint32_t GuaranteedPerformance;
+uint32_t DesiredPerformance;
+uint64_t ReferencePerformanceCounter;
+uint64_t DeliveredPerformanceCounter;
+uint32_t PerformanceLimited;
+uint32_t LowestFreq;
+uint32_t NominalFreq;
+uint32_t reg_size;
+} CpufreqState;
+
+
+static uint64_t cpufreq_read(void *opaque, hwaddr offset,
+ unsigned size)
+{
+CpufreqState *s = (CpufreqState *)opaque;
+uint64_t r;
+uint64_t n;
+
+MachineState *ms = MACHINE(qdev_get_machine());
+unsigned int smp_cpus = ms->smp.cpus;
+
+if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) {
+warn_report("cpufreq_read: offset 0x%" HWADDR_PRIx " out of range",
+offset);
+return 0;
+}
+
+n = offset % CPPC_REG_PER_CPU_STRIDE;
+switch (n) {
+case 0:
+r = s->HighestPerformance;
+break;
+case 4:
+r = s->NominalPerformance;
+break;
+case 8:
+r = s->LowestNonlinearPerformance;
+break;
+case 12:
+r = s->LowestPerformance;
+break;
+case 16:
+r = s->GuaranteedPerformance;
+break;
+case 20:
+r = s->DesiredPerformance;
+break;
+/*
+ * We don't have real counters and it is hard to emulate, so always set the
+ * counter value to 1 to rely on Linux to use the DesiredPerformance value
+ * directly.
+ */
+case 24:
+r = s->ReferencePerformanceCounter;
+break;
+/*
+ * Guest may still access the register by 32bit; add the process to
+ * eliminate unnecessary warnings
+ */
+case 28:
+r = s->ReferencePerformanceCounter >> 32;
+break;
+case 32:
+r = s->DeliveredPerformanceCounter;
+break;
+case 36:
+r = s->DeliveredPerformanceCounter >> 32;
+break;
+
+case 40:
+r = s->PerformanceLimited;
+break;
+case 44:
+

[PATCH v2 4/4] arm: Create the cpufreq device

2020-02-12 Thread Ying Fang
Add the cpufreq device to arm64 virt machine

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 default-configs/aarch64-softmmu.mak |  1 +
 hw/acpi/Kconfig |  4 
 hw/arm/virt.c   | 13 +
 3 files changed, 18 insertions(+)

diff --git a/default-configs/aarch64-softmmu.mak 
b/default-configs/aarch64-softmmu.mak
index 958b1e08e4..0a030e853f 100644
--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -6,3 +6,4 @@ include arm-softmmu.mak
 CONFIG_XLNX_ZYNQMP_ARM=y
 CONFIG_XLNX_VERSAL=y
 CONFIG_SBSA_REF=y
+CONFIG_CPUFREQ=y
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 54209c6f2f..7d8aa58492 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -38,3 +38,7 @@ config ACPI_VMGENID
 depends on PC
 
 config ACPI_HW_REDUCED
+
+config CPUFREQ
+bool
+default y
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ed9dc38b60..53638f9557 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -764,6 +764,17 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
 g_free(nodename);
 }
 
+static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem)
+{
+hwaddr base = vms->memmap[VIRT_CPUFREQ].base;
+DeviceState *dev = qdev_create(NULL, "cpufreq");
+SysBusDevice *s = SYS_BUS_DEVICE(dev);
+
+qdev_init_nofail(dev);
+memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0));
+}
+
+
 static void create_rtc(const VirtMachineState *vms)
 {
 char *nodename;
@@ -1723,6 +1734,8 @@ static void machvirt_init(MachineState *machine)
 
 create_uart(vms, VIRT_UART, sysmem, serial_hd(0));
 
+create_cpufreq(vms, sysmem);
+
 if (vms->secure) {
 create_secure_ram(vms, secure_sysmem);
 create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
-- 
2.19.1





[PATCH v2 1/4] acpi: Add aml_generic_register

2020-02-12 Thread Ying Fang
The generic register descriptor describes the localtion of a
fixed width register within any of the ACPI-defined address space.

This is needed to declare the ACPI CPPC registers.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/acpi/aml-build.c | 22 ++
 include/hw/acpi/aml-build.h |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 2c3702b882..79b1431f07 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1370,6 +1370,28 @@ Aml *aml_sleep(uint64_t msec)
 return var;
 }
 
+/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+  uint8_t reg_offset, AmlAccessType type, uint64_t 
addr)
+{
+int i;
+Aml *var = aml_alloc();
+build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */
+build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */
+build_append_byte(var->buf, 0);/* Length, bits[15:8] value = 0 */
+build_append_byte(var->buf, rs);   /* Address Space ID */
+build_append_byte(var->buf, reg_width);   /* Register Bit Width */
+build_append_byte(var->buf, reg_offset);  /* Register Bit Offset */
+build_append_byte(var->buf, type);/* Access Size */
+
+/* Register address */
+for (i = 0; i < 8; i++) {
+build_append_byte(var->buf, extract64(addr, i * 8, 8));
+}
+
+return var;
+}
+
 static uint8_t Hex2Byte(const char *src)
 {
 int hi, lo;
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index de4a406568..37a047b156 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -364,6 +364,9 @@ Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed min_fixed,
 Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz,
  uint8_t channel);
 Aml *aml_sleep(uint64_t msec);
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+  uint8_t reg_offset, AmlAccessType type,
+  uint64_t addr);
 Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source);
 
 /* Block AML object primitives */
-- 
2.19.1





[PATCH v2 0/4] arm64: Add the cpufreq device to show cpufreq info to guest

2020-02-12 Thread Ying Fang
On ARM64 platform, cpu frequency is retrieved via ACPI CPPC.
A virtual cpufreq device based on ACPI CPPC is created to
present cpu frequency info to the guest.

The default frequency is set to host cpu nominal frequency,
which is obtained from the host CPPC sysfs. Other performance
data are set to the same value, since we don't support guest
performance scaling here.

Performance counters are also not emulated and they simply
return 1 if read, and guest should fallback to use desired
performance value as the current performance.

Guest kernel version above 4.18 is required to make it work.

Ying Fang (4):
  acpi: Add aml_generic_register
  acpi/cppc: Add ACPI CPPC registers
  arm: Add the cpufreq device model
  arm: Create the cpufreq device

 default-configs/aarch64-softmmu.mak |   1 +
 hw/acpi/Kconfig |   4 +
 hw/acpi/Makefile.objs   |   1 +
 hw/acpi/aml-build.c |  22 +++
 hw/acpi/cpufreq.c   | 249 
 hw/arm/virt-acpi-build.c|  74 -
 hw/arm/virt.c   |  14 ++
 include/hw/acpi/acpi-defs.h |  32 
 include/hw/acpi/aml-build.h |   3 +
 include/hw/arm/virt.h   |   1 +
 10 files changed, 399 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/cpufreq.c

-- 
2.19.1





Re: [PATCH v3 4/5] a standone-alone tool to directly share disk image file via vhost-user protocol

2020-02-12 Thread Coiby Xu
I forgot to put backends/vhost-user-blk-server.o as dependency for
qemu-vu target in Makefile,

 qemu-img$(EXESUF): qemu-img.o $(authz-obj-y) $(block-obj-y)
$(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
 qemu-nbd$(EXESUF): qemu-nbd.o $(authz-obj-y) $(block-obj-y)
$(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+
+ifdef CONFIG_LINUX
+qemu-vu$(EXESUF): qemu-vu.o backends/vhost-user-blk-server.o
$(authz-obj-y) $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y)
$(COMMON_LDADDS) libvhost-user.a
+endif
 qemu-io$(EXESUF): qemu-io.o $(authz-obj-y) $(block-obj-y)
$(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)

I also noticed in the latest version of QEMU, `make check-qtest`
somehow doesn't run qos-test. If you need to run vhost-user-blk
testsuite, please execute the following command after applying the
above fix and the 5th patch,

  MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64
QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_VU_BINARY=./qemu-vu
tests/qos-test -m=quick -k --tap < /dev/null | ./scripts/tap-driver.pl
--test-name="qos-test"


On Wed, Feb 12, 2020 at 5:52 PM Coiby Xu  wrote:
>
> vhost-user-blk could have played as vhost-user backend but it only supports 
> raw
> file and don't support VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES
> operations on raw file (ioctl(fd, BLKDISCARD) is only valid for real
> block device).
>
> In the future Kevin's qemu-storage-daemon will be used to replace this
> tool.
>
> Signed-off-by: Coiby Xu 
> ---
>  Makefile  |   4 +
>  configure |   3 +
>  qemu-vu.c | 252 ++
>  3 files changed, 259 insertions(+)
>  create mode 100644 qemu-vu.c
>
> diff --git a/Makefile b/Makefile
> index f0e1a2fc1d..0bfd2f1ddd 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -572,6 +572,10 @@ qemu-img.o: qemu-img-cmds.h
>
>  qemu-img$(EXESUF): qemu-img.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) 
> $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
>  qemu-nbd$(EXESUF): qemu-nbd.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) 
> $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
> +
> +ifdef CONFIG_LINUX
> +qemu-vu$(EXESUF): qemu-vu.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) 
> $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS) libvhost-user.a
> +endif
>  qemu-io$(EXESUF): qemu-io.o $(authz-obj-y) $(block-obj-y) $(crypto-obj-y) 
> $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
>
>  qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
> diff --git a/configure b/configure
> index 115dc38085..e87c9a5587 100755
> --- a/configure
> +++ b/configure
> @@ -6217,6 +6217,9 @@ if test "$want_tools" = "yes" ; then
>if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
>  tools="qemu-nbd\$(EXESUF) $tools"
>fi
> +  if [ "$linux" = "yes" ] ; then
> +tools="qemu-vu\$(EXESUF) $tools"
> +  fi
>if [ "$ivshmem" = "yes" ]; then
>  tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
>fi
> diff --git a/qemu-vu.c b/qemu-vu.c
> new file mode 100644
> index 00..dd1032b205
> --- /dev/null
> +++ b/qemu-vu.c
> @@ -0,0 +1,252 @@
> +/*
> + *  Copyright (C) 2020  Coiby Xu 
> + *
> + *  standone-alone vhost-user-blk device server backend
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; under version 2 of the License.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, see .
> + */
> +
> +#include "qemu/osdep.h"
> +#include 
> +#include 
> +#include "backends/vhost-user-blk-server.h"
> +#include "block/block_int.h"
> +#include "io/net-listener.h"
> +#include "qapi/error.h"
> +#include "qapi/qmp/qdict.h"
> +#include "qapi/qmp/qstring.h"
> +#include "qemu/config-file.h"
> +#include "qemu/cutils.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/module.h"
> +#include "qemu/option.h"
> +#include "qemu-common.h"
> +#include "qemu-version.h"
> +#include "qom/object_interfaces.h"
> +#include "sysemu/block-backend.h"
> +#define QEMU_VU_OPT_CACHE 256
> +#define QEMU_VU_OPT_AIO   257
> +#define QEMU_VU_OBJ_ID   "vu_disk"
> +static QemuOptsList qemu_object_opts = {
> +.name = "object",
> +.implied_opt_name = "qom-type",
> +.head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
> +.desc = {
> +{ }
> +},
> +};
> +static char *srcpath;
> +
> +static void usage(const char *name)
> +{
> +(printf) (
> +"Usage: %s [OPTIONS] FILE\n"
> +"  or:  %s -L [OPTIONS]\n"
> +"QEMU Vhost-user Server 

Re: [PATCH v3 4/4] target/i386: Add notes for versioned CPU models

2020-02-12 Thread Tao Xu

On 2/12/2020 5:00 PM, Igor Mammedov wrote:

On Wed, 12 Feb 2020 16:13:28 +0800
Tao Xu  wrote:


Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models, to keep the model name
unchanged at /proc/cpuinfo inside the VM.

Signed-off-by: Tao Xu 
---

Changes in v2:
 - correct the note of Cascadelake v3 (Xiaoyao)
---
  target/i386/cpu.c | 54 ++-
  1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 81a039beb6..739ef4ce91 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c

[...]

@@ -3142,6 +3130,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  .versions = (X86CPUVersionDefinition[]) {
  { .version = 1 },
  { .version = 2,
+  .note = "ARCH_CAPABILITIES",


what's ARCH_CAPABILITIES?



These are some features exposed by MSR_IA32_ARCH_CAPABILITIES. For 
Cascadelake, these are "rdctl-no" "ibrs-all" "skip-l1dfl-vmentry" "mds-no"




Re: [RESEND RFC PATCH v2 1/2] target/arm: Allow to inject SError interrupt

2020-02-12 Thread Richard Henderson
On 2/12/20 7:49 PM, Gavin Shan wrote:
> On 2/12/20 10:34 PM, Peter Maydell wrote:
>> Yeah, this is on my list to look at; Richard Henderson also could
>> have a look at it. From a quick scan I suspect you may be missing
>> handling for AArch32.
>>
> 
> [Thanks for copying Richard Henderson]
> 
> Yes, the functionality is only supported on aarch64 currently by intention
> because the next patch enables it on "max" and "host" CPU models and both
> of them are running in aarch64 mode.

We shouldn't leave the aarch32 exception entry paths unimplemented though.  C.f.

AArch32.TakePhysicalSErrorException()
AArch32.TakeVirtualSErrorException()

It really shouldn't be more than a couple of lines, just like
arm_cpu_do_interrupt_aarch64.  Remember both arm_cpu_do_interrupt_aarch32 and
arm_cpu_do_interrupt_aarch32_hyp.

> However, it seems there is a long list of aarch32 CPU models, defined
> in target/arm/cpu.c::arm_cpus. so which CPU models you prefer to see with
> this supported? I think we might choose one or two popular CPU models if
> you agree.

Even qemu-system-aarch64 -cpu max can exercise this path when EL1 is running in
aarch32 mode.  Admittedly it would be easier if we had the rest of the plumbing
so that -cpu max,aarch64=off worked.

FWIW, the rest of the patch looks good.


r~



Re: [PATCH qemu v6 1/6] ppc: Start CPU in the default mode which is big-endian 32bit

2020-02-12 Thread David Gibson
On Thu, Feb 13, 2020 at 02:09:17PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 12/02/2020 16:43, David Gibson wrote:
> > On Mon, Feb 03, 2020 at 02:29:38PM +1100, Alexey Kardashevskiy wrote:
> >> At the moment we enforce 64bit mode on a CPU when reset. This does not
> >> make difference as SLOF or Linux set the desired mode straight away.
> >> However if we ever boot something other than these two,
> >> this might not work as, for example, GRUB expects the default MSR state
> >> and does not work properly.
> >>
> >> This removes setting MSR_SF from the PPC CPU reset.
> > 
> > Hrm.  This is in the core cpu model so it doesn't just affect pseries,
> > but powernv (and theoretically others) as well.  Generally the cpu
> > model should have the bare metal behaviour, and we can override it in
> > the pseries machine if necessary.
> > 
> > So for a bare metal POWER system, what mode do we start in?  I'm
> > guessing it probably doesn't matter in practice, since the skiboot
> > firmware also probably does a mode set on entry, but it'd be nice to
> > get this right in theory.
> 
> 
> Huh. "Figure 65.  MSR setting due to interrupt" of PowerISA 3.0 says
> "The SF bit is set to 1" so after all the existing behavior is correct
> and my patch is just wrong. Cool.

Well, I guess SF after interrupt isn't *necessarily* the same as SF at
reset, but it's a good place to start.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH v3 0/5] linux-user: Implement x86_64 vsyscalls

2020-02-12 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20200213032223.14643-1-richard.hender...@linaro.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v3 0/5] linux-user: Implement x86_64 vsyscalls
Message-id: 20200213032223.14643-1-richard.hender...@linaro.org
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
4f8c326 linux-user: Flush out implementation of gettimeofday
0337695 linux-user: Add x86_64 vsyscall page to /proc/self/maps
1035d94 linux-user/i386: Emulate x86_64 vsyscalls
a06e82b linux-user/i386: Split out gen_signal
70797ff target/i386: Renumber EXCP_SYSCALL

=== OUTPUT BEGIN ===
1/5 Checking commit 70797ffa7857 (target/i386: Renumber EXCP_SYSCALL)
2/5 Checking commit a06e82b1d3e0 (linux-user/i386: Split out gen_signal)
3/5 Checking commit 1035d94360a8 (linux-user/i386: Emulate x86_64 vsyscalls)
4/5 Checking commit 0337695b9a91 (linux-user: Add x86_64 vsyscall page to 
/proc/self/maps)
ERROR: trailing whitespace
#30: FILE: linux-user/syscall.c:7020:
+dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx $

total: 1 errors, 0 warnings, 16 lines checked

Patch 4/5 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

5/5 Checking commit 4f8c32688f82 (linux-user: Flush out implementation of 
gettimeofday)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20200213032223.14643-1-richard.hender...@linaro.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [RESEND RFC PATCH v2 1/2] target/arm: Allow to inject SError interrupt

2020-02-12 Thread Gavin Shan

On 2/12/20 10:34 PM, Peter Maydell wrote:

On Wed, 12 Feb 2020 at 06:39, Gavin Shan  wrote:

On 2/5/20 10:05 PM, Gavin Shan wrote:

This allows to inject SError interrupt, which will be used on receiving
QMP/HMP "nmi" command in next patch.

Signed-off-by: Gavin Shan 
---
   target/arm/cpu.c| 11 +++
   target/arm/cpu.h| 12 +---
   target/arm/helper.c |  4 
   3 files changed, 24 insertions(+), 3 deletions(-)



Hi Peter, could you please take a look when you get a chance? I'm not sure
the implementation is good enough to inject SError. If there are somebody
else who can help to review, please let me know so that I can copy her/him
either.


Yeah, this is on my list to look at; Richard Henderson also could
have a look at it. From a quick scan I suspect you may be missing
handling for AArch32.



[Thanks for copying Richard Henderson]

Yes, the functionality is only supported on aarch64 currently by intention
because the next patch enables it on "max" and "host" CPU models and both
of them are running in aarch64 mode.

https://patchwork.kernel.org/patch/11366119/

If you really want to get this supported for aarch32 either, I can do
it. However, it seems there is a long list of aarch32 CPU models, defined
in target/arm/cpu.c::arm_cpus. so which CPU models you prefer to see with
this supported? I think we might choose one or two popular CPU models if
you agree.

Thanks,
Gavin




Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support

2020-02-12 Thread Jason Wang



On 2020/2/12 下午5:16, Michael S. Tsirkin wrote:

Thanks for the advice.:)

Actually when looking into pci, the queue_msix_vector/msix_config is the
msi vector index, which is the same as the mmio register MsiVecSel
(0x0d0).

So we don't introduce two extra registers for mapping even in sharing
mode.

What do you think?


I'm not sure I get the point, but I still prefer the separate vector_sel
from queue_msix_vector.

Btw, Michael propose per vq registers which could also work.

Thanks


Right and I'd even ask a question: do we need shared MSI at all?



I guess it is still needed at least for the current virtio code. 
Technically we may have thousands queues.


Thanks



Is it somehow better than legacy interrupt? And why?
Performance numbers please.






Re: [PATCH v2 1/5] virtio-mmio: add notify feature for per-queue

2020-02-12 Thread Jason Wang



On 2020/2/12 下午5:55, Michael S. Tsirkin wrote:

On Wed, Feb 12, 2020 at 05:33:06PM +0800, Jason Wang wrote:

On 2020/2/12 下午4:53, Jason Wang wrote:

On 2020/2/12 下午4:18, Michael S. Tsirkin wrote:

On Wed, Feb 12, 2020 at 11:39:54AM +0800, Jason Wang wrote:

On 2020/2/11 下午7:33, Michael S. Tsirkin wrote:

On Mon, Feb 10, 2020 at 05:05:17PM +0800, Zha Bin wrote:

From: Liu Jiang

The standard virtio-mmio devices use notification register to signal
backend. This will cause vmexits and slow down the
performance when we
passthrough the virtio-mmio devices to guest virtual machines.
We proposed to update virtio over MMIO spec to add the per-queue
notify feature VIRTIO_F_MMIO_NOTIFICATION[1]. It can allow the VMM to
configure notify location for each queue.

[1]https://lkml.org/lkml/2020/1/21/31

Signed-off-by: Liu Jiang
Co-developed-by: Zha Bin
Signed-off-by: Zha Bin
Co-developed-by: Jing Liu
Signed-off-by: Jing Liu
Co-developed-by: Chao Peng
Signed-off-by: Chao Peng

Hmm. Any way to make this static so we don't need
base and multiplier?

E.g page per vq?

Thanks

Problem is, is page size well defined enough?
Are there cases where guest and host page sizes differ?
I suspect there might be.


Right, so it looks better to keep base and multiplier, e.g for vDPA.



But I also think this whole patch is unproven. Is someone actually
working on QEMU code to support pass-trough of virtio-pci
as virtio-mmio for nested guests? What's the performance
gain like?


I don't know.

Thanks


Btw, I think there's no need for a nested environment to test. Current
eventfd hook to MSIX should still work for MMIO.

Thanks


Oh yes it's the wildcard thingy but how much extra performance does one get
from it with MMIO? A couple % might not be worth the trouble for MMIO.



The cover letter have some numbers but I'm not sure whether or not it 
was measured by vhost or other which needs some clarification.


Thanks









[PATCH v3 5/5] linux-user: Flush out implementation of gettimeofday

2020-02-12 Thread Richard Henderson
The first argument, timeval, is allowed to be NULL.

The second argument, timezone, was missing.  While its use is
deprecated, it is still present in the syscall.

Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/syscall.c | 27 +--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b684727b83..a006e53088 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1224,6 +1224,23 @@ static inline abi_long 
host_to_target_timespec64(abi_ulong target_addr,
 return 0;
 }
 
+static inline abi_long copy_to_user_timezone(abi_ulong target_tz_addr,
+ struct timezone *tz)
+{
+struct target_timezone *target_tz;
+
+if (!lock_user_struct(VERIFY_WRITE, target_tz, target_tz_addr, 1)) {
+return -TARGET_EFAULT;
+}
+
+__put_user(tz->tz_minuteswest, _tz->tz_minuteswest);
+__put_user(tz->tz_dsttime, _tz->tz_dsttime);
+
+unlock_user_struct(target_tz, target_tz_addr, 1);
+
+return 0;
+}
+
 static inline abi_long copy_from_user_timezone(struct timezone *tz,
abi_ulong target_tz_addr)
 {
@@ -8575,10 +8592,16 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 case TARGET_NR_gettimeofday:
 {
 struct timeval tv;
-ret = get_errno(gettimeofday(, NULL));
+struct timezone tz;
+
+ret = get_errno(gettimeofday(, ));
 if (!is_error(ret)) {
-if (copy_to_user_timeval(arg1, ))
+if (arg1 && copy_to_user_timeval(arg1, )) {
 return -TARGET_EFAULT;
+}
+if (arg2 && copy_to_user_timezone(arg2, )) {
+return -TARGET_EFAULT;
+}
 }
 }
 return ret;
-- 
2.20.1




[PATCH v3 3/5] linux-user/i386: Emulate x86_64 vsyscalls

2020-02-12 Thread Richard Henderson
Notice the magic page during translate, much like we already
do for the arm32 commpage.  At runtime, raise an exception to
return cpu_loop for emulation.

Reviewed-by: Paolo Bonzini 
Signed-off-by: Richard Henderson 
---
v3: Adjust the gotos.  Define TARGET_VSYSCALL_PAGE.
---
 target/i386/cpu.h  |   7 +++
 linux-user/i386/cpu_loop.c | 108 +
 target/i386/translate.c|  14 -
 3 files changed, 128 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 08b4422f36..39be555db3 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1001,6 +1001,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 
 #define EXCP_VMEXIT 0x100 /* only for system emulation */
 #define EXCP_SYSCALL0x101 /* only for user emulation */
+#define EXCP_VSYSCALL   0x102 /* only for user emulation */
 
 /* i386-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_POLL  CPU_INTERRUPT_TGT_EXT_1
@@ -2215,4 +2216,10 @@ static inline bool hyperv_feat_enabled(X86CPU *cpu, int 
feat)
 return !!(cpu->hyperv_features & BIT(feat));
 }
 
+#if defined(TARGET_X86_64) && \
+defined(CONFIG_USER_ONLY) && \
+defined(CONFIG_LINUX)
+# define TARGET_VSYSCALL_PAGE  (UINT64_C(-10) << 20)
+#endif
+
 #endif /* I386_CPU_H */
diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index e217cca5ee..70cde417e6 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -92,6 +92,109 @@ static void gen_signal(CPUX86State *env, int sig, int code, 
abi_ptr addr)
 queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
 }
 
+#ifdef TARGET_X86_64
+static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
+{
+/*
+ * For all the vsyscalls, NULL means "don't write anything" not
+ * "write it at address 0".
+ */
+if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) {
+return true;
+}
+
+env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
+gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
+return false;
+}
+
+/*
+ * Since v3.1, the kernel traps and emulates the vsyscall page.
+ * Entry points other than the official generate SIGSEGV.
+ */
+static void emulate_vsyscall(CPUX86State *env)
+{
+int syscall;
+abi_ulong ret;
+uint64_t caller;
+
+/*
+ * Validate the entry point.  We have already validated the page
+ * during translation to get here; now verify the offset.
+ */
+switch (env->eip & ~TARGET_PAGE_MASK) {
+case 0x000:
+syscall = TARGET_NR_gettimeofday;
+break;
+case 0x400:
+syscall = TARGET_NR_time;
+break;
+case 0x800:
+syscall = TARGET_NR_getcpu;
+break;
+default:
+goto sigsegv;
+}
+
+/*
+ * Validate the return address.
+ * Note that the kernel treats this the same as an invalid entry point.
+ */
+if (get_user_u64(caller, env->regs[R_ESP])) {
+goto sigsegv;
+}
+
+/*
+ * Validate the the pointer arguments.
+ */
+switch (syscall) {
+case TARGET_NR_gettimeofday:
+if (!write_ok_or_segv(env, env->regs[R_EDI],
+  sizeof(struct target_timeval)) ||
+!write_ok_or_segv(env, env->regs[R_ESI],
+  sizeof(struct target_timezone))) {
+return;
+}
+break;
+case TARGET_NR_time:
+if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
+return;
+}
+break;
+case TARGET_NR_getcpu:
+if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
+!write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
+return;
+}
+break;
+default:
+g_assert_not_reached();
+}
+
+/*
+ * Perform the syscall.  None of the vsyscalls should need restarting.
+ */
+ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
+ env->regs[R_EDX], env->regs[10], env->regs[8],
+ env->regs[9], 0, 0);
+g_assert(ret != -TARGET_ERESTARTSYS);
+g_assert(ret != -TARGET_QEMU_ESIGRETURN);
+if (ret == -TARGET_EFAULT) {
+goto sigsegv;
+}
+env->regs[R_EAX] = ret;
+
+/* Emulate a ret instruction to leave the vsyscall page.  */
+env->eip = caller;
+env->regs[R_ESP] += 8;
+return;
+
+ sigsegv:
+/* Like force_sig(SIGSEGV).  */
+gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
+}
+#endif
+
 void cpu_loop(CPUX86State *env)
 {
 CPUState *cs = env_cpu(env);
@@ -141,6 +244,11 @@ void cpu_loop(CPUX86State *env)
 env->regs[R_EAX] = ret;
 }
 break;
+#endif
+#ifdef TARGET_X86_64
+case EXCP_VSYSCALL:
+emulate_vsyscall(env);
+break;
 #endif
 case EXCP0B_NOSEG:
 case EXCP0C_STACK:
diff --git a/target/i386/translate.c b/target/i386/translate.c

[PATCH v3 2/5] linux-user/i386: Split out gen_signal

2020-02-12 Thread Richard Henderson
This is a bit tidier than open-coding the 5 lines necessary
to initialize the target_siginfo_t.  In addition, this zeros
the remaining bytes of the target_siginfo_t, rather than
passing in garbage.

Reviewed-by: Paolo Bonzini 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/i386/cpu_loop.c | 93 ++
 1 file changed, 33 insertions(+), 60 deletions(-)

diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index 024b6f4d58..e217cca5ee 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -81,13 +81,23 @@ static void set_idt(int n, unsigned int dpl)
 }
 #endif
 
+static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
+{
+target_siginfo_t info = {
+.si_signo = sig,
+.si_code = code,
+._sifields._sigfault._addr = addr
+};
+
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+}
+
 void cpu_loop(CPUX86State *env)
 {
 CPUState *cs = env_cpu(env);
 int trapnr;
 abi_ulong pc;
 abi_ulong ret;
-target_siginfo_t info;
 
 for(;;) {
 cpu_exec_start(cs);
@@ -134,70 +144,45 @@ void cpu_loop(CPUX86State *env)
 #endif
 case EXCP0B_NOSEG:
 case EXCP0C_STACK:
-info.si_signo = TARGET_SIGBUS;
-info.si_errno = 0;
-info.si_code = TARGET_SI_KERNEL;
-info._sifields._sigfault._addr = 0;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
 break;
 case EXCP0D_GPF:
 /* XXX: potential problem if ABI32 */
 #ifndef TARGET_X86_64
 if (env->eflags & VM_MASK) {
 handle_vm86_fault(env);
-} else
-#endif
-{
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SI_KERNEL;
-info._sifields._sigfault._addr = 0;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
 }
+#endif
+gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 break;
 case EXCP0E_PAGE:
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-if (!(env->error_code & 1))
-info.si_code = TARGET_SEGV_MAPERR;
-else
-info.si_code = TARGET_SEGV_ACCERR;
-info._sifields._sigfault._addr = env->cr[2];
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+gen_signal(env, TARGET_SIGSEGV,
+   (env->error_code & 1 ?
+TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
+   env->cr[2]);
 break;
 case EXCP00_DIVZ:
 #ifndef TARGET_X86_64
 if (env->eflags & VM_MASK) {
 handle_vm86_trap(env, trapnr);
-} else
-#endif
-{
-/* division by zero */
-info.si_signo = TARGET_SIGFPE;
-info.si_errno = 0;
-info.si_code = TARGET_FPE_INTDIV;
-info._sifields._sigfault._addr = env->eip;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
 }
+#endif
+gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
 break;
 case EXCP01_DB:
 case EXCP03_INT3:
 #ifndef TARGET_X86_64
 if (env->eflags & VM_MASK) {
 handle_vm86_trap(env, trapnr);
-} else
+break;
+}
 #endif
-{
-info.si_signo = TARGET_SIGTRAP;
-info.si_errno = 0;
-if (trapnr == EXCP01_DB) {
-info.si_code = TARGET_TRAP_BRKPT;
-info._sifields._sigfault._addr = env->eip;
-} else {
-info.si_code = TARGET_SI_KERNEL;
-info._sifields._sigfault._addr = 0;
-}
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+if (trapnr == EXCP01_DB) {
+gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
+} else {
+gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
 }
 break;
 case EXCP04_INTO:
@@ -205,31 +190,19 @@ void cpu_loop(CPUX86State *env)
 #ifndef TARGET_X86_64
 if (env->eflags & VM_MASK) {
 handle_vm86_trap(env, trapnr);
-} else
-#endif
-{
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SI_KERNEL;
-info._sifields._sigfault._addr = 0;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
 }
+#endif
+gen_signal(env, 

[PATCH v3 1/5] target/i386: Renumber EXCP_SYSCALL

2020-02-12 Thread Richard Henderson
We are not short of numbers for EXCP_*.  There is no need to confuse things
by having EXCP_VMEXIT and EXCP_SYSCALL overlap, even though the former is
only used for system mode and the latter is only used for user mode.

Reviewed-by: Paolo Bonzini 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/i386/cpu.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 576f309bbf..08b4422f36 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -999,9 +999,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define EXCP11_ALGN17
 #define EXCP12_MCHK18
 
-#define EXCP_SYSCALL0x100 /* only happens in user only emulation
- for syscall instruction */
-#define EXCP_VMEXIT 0x100
+#define EXCP_VMEXIT 0x100 /* only for system emulation */
+#define EXCP_SYSCALL0x101 /* only for user emulation */
 
 /* i386-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_POLL  CPU_INTERRUPT_TGT_EXT_1
-- 
2.20.1




[PATCH v3 4/5] linux-user: Add x86_64 vsyscall page to /proc/self/maps

2020-02-12 Thread Richard Henderson
The page isn't (necessarily) present in the host /proc/self/maps,
and even if it might be it isn't present in page_flags, and even
if it was it might not have the same set of page permissions.

The easiest thing to do, particularly when it comes to the
"[vsyscall]" note at the end of line, is to special case it.

Signed-off-by: Richard Henderson 
---
v3: Use TARGET_VSYSCALL_PAGE.
---
 linux-user/syscall.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d60142f069..b684727b83 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -7012,6 +7012,16 @@ static int open_self_maps(void *cpu_env, int fd)
 }
 }
 
+#ifdef TARGET_VSYSCALL_PAGE
+/*
+ * We only support execution from the vsyscall page.
+ * This is as if CONFIG_LEGACY_VSYSCALL_XONLY=y from v5.3.
+ */
+dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx 
+" --xp  00:00 0 [vsyscall]\n",
+TARGET_VSYSCALL_PAGE, TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE);
+#endif
+
 free(line);
 fclose(fp);
 
-- 
2.20.1




[PATCH v3 0/5] linux-user: Implement x86_64 vsyscalls

2020-02-12 Thread Richard Henderson
Changes for v3:

  * Add TARGET_VSYSCALL_PAGE define.
  * Move the sigsegv goto around.

v2: https://lists.gnu.org/archive/html/qemu-devel/2020-01/msg03474.html
v1: https://lists.gnu.org/archive/html/qemu-devel/2020-01/msg02841.html


r~


Richard Henderson (5):
  target/i386: Renumber EXCP_SYSCALL
  linux-user/i386: Split out gen_signal
  linux-user/i386: Emulate x86_64 vsyscalls
  linux-user: Add x86_64 vsyscall page to /proc/self/maps
  linux-user: Flush out implementation of gettimeofday

 target/i386/cpu.h  |  12 ++-
 linux-user/i386/cpu_loop.c | 201 ++---
 linux-user/syscall.c   |  37 ++-
 target/i386/translate.c|  14 ++-
 4 files changed, 198 insertions(+), 66 deletions(-)

-- 
2.20.1




Re: Restrictions of libnet (was: Re: VW ELF loader)

2020-02-12 Thread David Gibson
On Mon, Feb 10, 2020 at 10:39:52AM +0100, Michal Suchánek wrote:
> On Mon, Feb 10, 2020 at 06:55:16PM +1100, David Gibson wrote:
> > On Wed, Feb 05, 2020 at 07:24:04AM +0100, Thomas Huth wrote:
> > > On 05/02/2020 06.30, David Gibson wrote:
> > > > On Tue, Feb 04, 2020 at 10:20:14AM +0100, Thomas Huth wrote:
> > > >> On 04/02/2020 09.54, Cornelia Huck wrote:
> > > >>> On Tue, 4 Feb 2020 07:16:46 +0100
> > > >>> Thomas Huth  wrote:
> > > >>>
> > >  On 04/02/2020 00.26, Paolo Bonzini wrote:
> > > >
> > > >
> > > > Il mar 4 feb 2020, 00:20 Alexey Kardashevskiy  > > > > ha scritto:
> > > >
> > > > Speaking seriously, what would I put into the guest?
> > > >
> > > > Only things that would be considered drivers. Ignore the partitions
> > > > issue for now so that you can just pass the device tree services to 
> > > > QEMU
> > > > with hypercalls.
> > > >
> > > > Netboot's dhcp/tftp/ip/ipv6 client? It is going to be another 
> > > > SLOF,
> > > > smaller but adhoc with only a couple of people knowing it.
> > > >
> > > >
> > > > You can generalize and reuse the s390 code. All you have to write 
> > > > is the
> > > > PCI scan and virtio-pci setup.  
> > > 
> > >  Well, for netbooting, the s390-ccw bios uses the libnet code from 
> > >  SLOF,
> > >  so re-using this for a slim netboot client on ppc64 would certainly 
> > >  be
> > >  feasible (especially since there are also already virtio drivers in 
> > >  SLOF
> > >  that are written in C), but I think it is not very future proof. The
> > >  libnet from SLOF only supports UDP, and no TCP. So for advanced boot
> > >  scenarios like booting from HTTP or even HTTPS, you need something 
> > >  else
> > >  (i.e. maybe grub is the better option, indeed).
> > > >>>
> > > >>> That makes me wonder what that means for s390: We're inheriting
> > > >>> libnet's limitations, but we don't have grub -- do we need to come up
> > > >>> with something different? Or improve libnet?
> > > >>
> > > >> I don't think that it makes sense to re-invent the wheel yet another
> > > >> time and write yet another TCP implementation (which is likely quite a
> > > >> bit of work, too, especially if you also want to do secure HTTPS in the
> > > >> end). So yes, in the long run (as soon as somebody seriously asks for
> > > >> HTTP booting on s390x) we need something different here.
> > > >>
> > > >> Now looking at our standard s390x bootloader zipl - this has been 
> > > >> giving
> > > >> us a headache a couple of times in the past, too (from a distro point 
> > > >> of
> > > >> view since s390x is the only major platform left that does not use 
> > > >> grub,
> > > >> but also from a s390-ccw bios point of view, see e.g.
> > > >> https://lists.gnu.org/archive/html/qemu-devel/2019-12/msg03046.html and
> > > >> related discussions).
> > > >>
> > > >> So IMHO the s390x world should move towards grub2, too. We could e.g.
> > > >> link it initially into the s390-ccw bios bios ... and if that works out
> > > >> well, later also use it as normal bootloader instead of zipl (not sure
> > > >> if that works in all cases, though, IIRC there were some size
> > > >> constraints and stuff like that).
> > > > 
> > > > petitboot would be another reasonable thing to consider here.  Since
> > > > it's Linux based, you have all the drivers you have there.  It's not
> > > > quite grub, but it does at least parse the same configuration files.
> > > > 
> > > > You do need kexec() of course, I don't know if you have that already
> > > > for s390 or not.
> > > 
> > > AFAIK we have kexec on s390. So yes, petitboot would be another option
> > > for replacing the s390-ccw bios. But when it comes to LPARs and z/VMs, I
> > > don't think it's really feasible to replace the zipl bootloader there
> > > with petitboot, so in that case grub2 still sounds like the better
> > > option to me.
> > 
> > Actually, between that and Paolo's suggestions, I thought of another
> > idea that could be helpful for both s390 and power.  Could we load
> > non-kexec() things (legacy kernels, non-Linux OSes) from Petitboot by
> > having it kexec() into a shim mini-kernel that just sets up the boot
> > environment for the other thing.
> > 
> > What I'm imagining is that petitboot loads everything that will be
> > needed for the other OS into RAM - probably as (or part of) the
> > "initrd" image.  That means the shim doesn't need to have drivers or
> > a network stack to load that in.  It just needs to construct
> > environment and jump into the real kernel.
> 
> How does that differ from what kexec normally does?
> 
> It does support the kernel format that is usually booted on the
> architecture.

It's not a question of format, but of environment.

By the time a kexec() occurs there won't be any OF client interface
available, whether or not there ever was one.  So, kexec() won't 

Re: [PATCH qemu v6 1/6] ppc: Start CPU in the default mode which is big-endian 32bit

2020-02-12 Thread Alexey Kardashevskiy



On 12/02/2020 16:43, David Gibson wrote:
> On Mon, Feb 03, 2020 at 02:29:38PM +1100, Alexey Kardashevskiy wrote:
>> At the moment we enforce 64bit mode on a CPU when reset. This does not
>> make difference as SLOF or Linux set the desired mode straight away.
>> However if we ever boot something other than these two,
>> this might not work as, for example, GRUB expects the default MSR state
>> and does not work properly.
>>
>> This removes setting MSR_SF from the PPC CPU reset.
> 
> Hrm.  This is in the core cpu model so it doesn't just affect pseries,
> but powernv (and theoretically others) as well.  Generally the cpu
> model should have the bare metal behaviour, and we can override it in
> the pseries machine if necessary.
> 
> So for a bare metal POWER system, what mode do we start in?  I'm
> guessing it probably doesn't matter in practice, since the skiboot
> firmware also probably does a mode set on entry, but it'd be nice to
> get this right in theory.


Huh. "Figure 65.  MSR setting due to interrupt" of PowerISA 3.0 says
"The SF bit is set to 1" so after all the existing behavior is correct
and my patch is just wrong. Cool.



> 
>>
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>>  target/ppc/translate_init.inc.c | 6 --
>>  1 file changed, 6 deletions(-)
>>
>> diff --git a/target/ppc/translate_init.inc.c 
>> b/target/ppc/translate_init.inc.c
>> index 53995f62eab2..f6a676cf55e8 100644
>> --- a/target/ppc/translate_init.inc.c
>> +++ b/target/ppc/translate_init.inc.c
>> @@ -10710,12 +10710,6 @@ static void ppc_cpu_reset(CPUState *s)
>>  #endif
>>  #endif
>>  
>> -#if defined(TARGET_PPC64)
>> -if (env->mmu_model & POWERPC_MMU_64) {
>> -msr |= (1ULL << MSR_SF);
>> -}
>> -#endif
>> -
>>  hreg_store_msr(env, msr, 1);
>>  
>>  #if !defined(CONFIG_USER_ONLY)
> 

-- 
Alexey



Re: [PATCH 0/4] arm64: Add the cpufreq device to show cpufreq info to guest

2020-02-12 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200213023532.902-1-fangyi...@huawei.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===

  CC  hw/acpi/cpufreq.o
  CC  hw/acpi/ipmi.o
/tmp/qemu-test/src/hw/acpi/cpufreq.c: In function 'cpufreq_read':
/tmp/qemu-test/src/hw/acpi/cpufreq.c:71:47: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 warn_report("cpufreq_read: offset 0x%lx out of range", offset);
 ~~^~~
 %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c:127:52: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 error_printf("cpufreq_read: Bad offset 0x%lx\n", offset);
  ~~^ ~~
  %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c: In function 'cpufreq_write':
/tmp/qemu-test/src/hw/acpi/cpufreq.c:143:49: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 error_printf("cpufreq_write: offset 0x%lx out of range", offset);
   ~~^~~
   %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c:153:53: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 error_printf("cpufreq_write: Bad offset 0x%lx\n", offset);
   ~~^ ~~
   %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c: In function 'CPPC_Read':
/tmp/qemu-test/src/hw/acpi/cpufreq.c:176:44: error: passing argument 4 of 
'qemu_strtoul' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
 ret = qemu_strtoul(buffer, , 0, );
^~~
In file included from /tmp/qemu-test/src/hw/acpi/cpufreq.c:30:
---
  unsigned long *result);
  ~~~^~
cc1: all warnings being treated as errors
make: *** [/tmp/qemu-test/src/rules.mak:69: hw/acpi/cpufreq.o] Error 1
make: *** Waiting for unfinished jobs
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 664, in 
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=9698cb6880c44cb0984a842b2be0fc70', '-u', 
'1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-_sia_b3d/src/docker-src.2020-02-12-22.07.28.16853:/var/tmp/qemu:z,ro',
 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=9698cb6880c44cb0984a842b2be0fc70
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-_sia_b3d/src'
make: *** [docker-run-test-mingw@fedora] Error 2

real2m6.980s
user0m8.259s


The full log is available at
http://patchew.org/logs/20200213023532.902-1-fangyi...@huawei.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH qemu v6 5/6] spapr: Allow changing offset for -kernel image

2020-02-12 Thread David Gibson
On Mon, Feb 03, 2020 at 02:29:42PM +1100, Alexey Kardashevskiy wrote:
> This allows moving the kernel in the guest memory. The option is useful
> for step debugging (as Linux is linked at 0x0); it also allows loading
> grub which is normally linked to run at 0x2.
> 
> This uses the existing kernel address by default.
> 
> Signed-off-by: Alexey Kardashevskiy 

Applied to ppc-for-5.0, since I think it makes sense even without the
rest of the series.

> ---
>  include/hw/ppc/spapr.h |  1 +
>  hw/ppc/spapr.c | 38 +++---
>  2 files changed, 32 insertions(+), 7 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 3b50f36c338a..32e831a395ae 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -164,6 +164,7 @@ struct SpaprMachineState {
>  void *fdt_blob;
>  long kernel_size;
>  bool kernel_le;
> +uint64_t kernel_addr;
>  uint32_t initrd_base;
>  long initrd_size;
>  uint64_t rtc_offset; /* Now used only during incoming migration */
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 60153bf0b771..b59e9dc360fe 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1054,7 +1054,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
> void *fdt)
>  }
>  
>  if (spapr->kernel_size) {
> -uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
> +uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr),
>cpu_to_be64(spapr->kernel_size) };
>  
>  _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
> @@ -1242,7 +1242,8 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool 
> reset, size_t space)
>  /* Build memory reserve map */
>  if (reset) {
>  if (spapr->kernel_size) {
> -_FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, 
> spapr->kernel_size)));
> +_FDT((fdt_add_mem_rsv(fdt, spapr->kernel_addr,
> +  spapr->kernel_size)));
>  }
>  if (spapr->initrd_size) {
>  _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base,
> @@ -1270,7 +1271,9 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool 
> reset, size_t space)
>  
>  static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
>  {
> -return (addr & 0x0fff) + KERNEL_LOAD_ADDR;
> +SpaprMachineState *spapr = opaque;
> +
> +return (addr & 0x0fff) + spapr->kernel_addr;
>  }
>  
>  static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
> @@ -2947,14 +2950,15 @@ static void spapr_machine_init(MachineState *machine)
>  uint64_t lowaddr = 0;
>  
>  spapr->kernel_size = load_elf(kernel_filename, NULL,
> -  translate_kernel_address, NULL,
> +  translate_kernel_address, spapr,
>NULL, , NULL, NULL, 1,
>PPC_ELF_MACHINE, 0, 0);
>  if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
>  spapr->kernel_size = load_elf(kernel_filename, NULL,
> -  translate_kernel_address, NULL, 
> NULL,
> +  translate_kernel_address, spapr, 
> NULL,
>, NULL, NULL, 0,
> -  PPC_ELF_MACHINE, 0, 0);
> +  PPC_ELF_MACHINE,
> +  0, 0);
>  spapr->kernel_le = spapr->kernel_size > 0;
>  }
>  if (spapr->kernel_size < 0) {
> @@ -2968,7 +2972,7 @@ static void spapr_machine_init(MachineState *machine)
>  /* Try to locate the initrd in the gap between the kernel
>   * and the firmware. Add a bit of space just in case
>   */
> -spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size
> +spapr->initrd_base = (spapr->kernel_addr + spapr->kernel_size
>+ 0x1) & ~0x;
>  spapr->initrd_size = load_image_targphys(initrd_filename,
>   spapr->initrd_base,
> @@ -3214,6 +3218,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, 
> const char *name,
>  visit_type_uint32(v, name, (uint32_t *)opaque, errp);
>  }
>  
> +static void spapr_get_kernel_addr(Object *obj, Visitor *v, const char *name,
> +  void *opaque, Error **errp)
> +{
> +visit_type_uint64(v, name, (uint64_t *)opaque, errp);
> +}
> +
> +static void spapr_set_kernel_addr(Object *obj, Visitor *v, const char *name,
> +  void *opaque, Error **errp)
> +{
> +visit_type_uint64(v, name, (uint64_t *)opaque, errp);
> +}
> +
>  static char *spapr_get_ic_mode(Object *obj, Error **errp)
>  {
>  SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> @@ -3319,6 +3335,14 

Re: The issues about architecture of the COLO checkpoint

2020-02-12 Thread Daniel Cho
Hi Hailiang,

1.
OK, we will try the patch
“0001-COLO-Optimize-memory-back-up-process.patch”,
and thanks for your help.

2.
We understand the reason to compare PVM and SVM's packet. However, the
empty of SVM's packet queue might happened on setting COLO feature and SVM
broken.

On situation 1 ( setting COLO feature ):
We could force do checkpoint after setting COLO feature finish, then it
will protect the state of PVM and SVM . As the Zhang Chen said.

On situation 2 ( SVM broken ):
COLO will do failover for PVM, so it might not cause any wrong on PVM.

However, those situations are our views, so there might be a big difference
between reality and our views.
If we have any wrong views and opinions, please let us know, and correct
us.
Thanks.

Best regards,
Daniel Cho

Zhang, Chen  於 2020年2月13日 週四 上午10:17寫道:

> Add cc Jason Wang, he is a network expert.
>
> In case some network things goes wrong.
>
>
>
> Thanks
>
> Zhang Chen
>
>
>
> *From:* Zhang, Chen
> *Sent:* Thursday, February 13, 2020 10:10 AM
> *To:* 'Zhanghailiang' ; Daniel Cho <
> daniel...@qnap.com>
> *Cc:* Dr. David Alan Gilbert ; qemu-devel@nongnu.org
> *Subject:* RE: The issues about architecture of the COLO checkpoint
>
>
>
> For the issue 2:
>
>
>
> COLO need use the network packets to confirm PVM and SVM in the same state,
>
> Generally speaking, we can’t send PVM packets without compared with SVM
> packets.
>
> But to prevent jamming, I think COLO can do force checkpoint and send the
> PVM packets in this case.
>
>
>
> Thanks
>
> Zhang Chen
>
>
>
> *From:* Zhanghailiang 
> *Sent:* Thursday, February 13, 2020 9:45 AM
> *To:* Daniel Cho 
> *Cc:* Dr. David Alan Gilbert ; qemu-devel@nongnu.org;
> Zhang, Chen 
> *Subject:* RE: The issues about architecture of the COLO checkpoint
>
>
>
> Hi,
>
>
>
> 1.   After re-walked through the codes, yes, you are right, actually,
> after the first migration, we will keep dirty log on in primary side,
>
> And only send the dirty pages in PVM to SVM. The ram cache in secondary
> side is always a backup of PVM, so we don’t have to
>
> Re-send the none-dirtied pages.
>
> The reason why the first checkpoint takes longer time is we have to backup
> the whole VM’s ram into ram cache, that is colo_init_ram_cache().
>
> It is time consuming, but I have optimized in the second patch
> “0001-COLO-Optimize-memory-back-up-process.patch” which you can find in my
> previous reply.
>
>
>
> Besides, I found that, In my previous reply “We can only copy the pages
> that dirtied by PVM and SVM in last checkpoint.”,
>
> We have done this optimization in current upstream codes.
>
>
>
> 2.I don’t quite understand this question. For COLO, we always need both
> network packets of PVM’s and SVM’s to compare before send this packets to
> client.
>
> It depends on this to decide whether or not PVM and SVM are in same state.
>
>
>
> Thanks,
>
> hailiang
>
>
>
> *From:* Daniel Cho [mailto:daniel...@qnap.com ]
> *Sent:* Wednesday, February 12, 2020 4:37 PM
> *To:* Zhang, Chen 
> *Cc:* Zhanghailiang ; Dr. David Alan
> Gilbert ; qemu-devel@nongnu.org
> *Subject:* Re: The issues about architecture of the COLO checkpoint
>
>
>
> Hi Hailiang,
>
>
>
> Thanks for your replaying and explain in detail.
>
> We will try to use the attachments to enhance memory copy.
>
>
>
> However, we have some questions for your replying.
>
>
>
> 1.  As you said, "for each checkpoint, we have to send the whole PVM's
> pages To SVM", why the only first checkpoint will takes more pause time?
>
> In our observing, the first checkpoint will take more time for pausing,
> then other checkpoints will takes a few time for pausing. Does it means
> only the first checkpoint will send the whole pages to SVM, and the other
> checkpoints send the dirty pages to SVM for reloading?
>
>
>
> 2. We notice the COLO-COMPARE component will stuck the packet until
> receive packets from PVM and SVM, as this rule, when we add the
> COLO-COMPARE to PVM, its network will stuck until SVM start. So it is an
> other issue to make PVM stuck while setting COLO feature. With this issue,
> could we let colo-compare to pass the PVM's packet when the SVM's packet
> queue is empty? Then, the PVM's network won't stock, and "if PVM runs
> firstly, it still need to wait for The network packets from SVM to
> compare before send it to client side" won't happened either.
>
>
>
> Best regard,
>
> Daniel Cho
>
>
>
> Zhang, Chen  於 2020年2月12日 週三 下午1:45寫道:
>
>
>
> > -Original Message-
> > From: Zhanghailiang 
> > Sent: Wednesday, February 12, 2020 11:18 AM
> > To: Dr. David Alan Gilbert ; Daniel Cho
> > ; Zhang, Chen 
> > Cc: qemu-devel@nongnu.org
> > Subject: RE: The issues about architecture of the COLO checkpoint
> >
> > Hi,
> >
> > Thank you Dave,
> >
> > I'll reply here directly.
> >
> > -Original Message-
> > From: Dr. David Alan Gilbert [mailto:dgilb...@redhat.com]
> > Sent: Wednesday, February 12, 2020 1:48 AM
> > To: Daniel Cho ; chen.zh...@intel.com;
> > 

RE: [RFC v3 16/25] intel_iommu: add PASID cache management infrastructure

2020-02-12 Thread Liu, Yi L
> From: Peter Xu 
> Sent: Wednesday, February 12, 2020 11:26 PM
> To: Liu, Yi L 
> Subject: Re: [RFC v3 16/25] intel_iommu: add PASID cache management
> infrastructure
> 
> On Wed, Feb 12, 2020 at 08:37:30AM +, Liu, Yi L wrote:
> > > From: Peter Xu 
> > > Sent: Wednesday, February 12, 2020 7:36 AM
> > > To: Liu, Yi L 
> > > Subject: Re: [RFC v3 16/25] intel_iommu: add PASID cache management
> > > infrastructure
> > >
> > > On Wed, Jan 29, 2020 at 04:16:47AM -0800, Liu, Yi L wrote:
> > > > From: Liu Yi L 
> > > >
> > > > This patch adds a PASID cache management infrastructure based on
> > > > new added structure VTDPASIDAddressSpace, which is used to track
> > > > the PASID usage and future PASID tagged DMA address translation
> > > > support in vIOMMU.
> > > >
> > > > struct VTDPASIDAddressSpace {
> > > > VTDBus *vtd_bus;
> > > > uint8_t devfn;
> > > > AddressSpace as;
> > > > uint32_t pasid;
> > > > IntelIOMMUState *iommu_state;
> > > > VTDContextCacheEntry context_cache_entry;
> > > > QLIST_ENTRY(VTDPASIDAddressSpace) next;
> > > > VTDPASIDCacheEntry pasid_cache_entry;
> > > > };
> > > >
> > > > Ideally, a VTDPASIDAddressSpace instance is created when a PASID
> > > > is bound with a DMA AddressSpace. Intel VT-d spec requires guest
> > > > software to issue pasid cache invalidation when bind or unbind a
> > > > pasid with an address space under caching-mode. However, as
> > > > VTDPASIDAddressSpace instances also act as pasid cache in this
> > > > implementation, its creation also happens during vIOMMU PASID
> > > > tagged DMA translation. The creation in this path will not be
> > > > added in this patch since no PASID-capable emulated devices for
> > > > now.
> > > >
> > > > The implementation in this patch manages VTDPASIDAddressSpace
> > > > instances per PASID+BDF (lookup and insert will use PASID and
> > > > BDF) since Intel VT-d spec allows per-BDF PASID Table. When a
> > > > guest bind a PASID with an AddressSpace, QEMU will capture the
> > > > guest pasid selective pasid cache invalidation, and allocate
> > > > remove a VTDPASIDAddressSpace instance per the invalidation
> > > > reasons:
> > > >
> > > > *) a present pasid entry moved to non-present
> > > > *) a present pasid entry to be a present entry
> > > > *) a non-present pasid entry moved to present
> > > >
> > > > vIOMMU emulator could figure out the reason by fetching latest
> > > > guest pasid entry.
> > > >
> > > > Cc: Kevin Tian 
> > > > Cc: Jacob Pan 
> > > > Cc: Peter Xu 
> > > > Cc: Yi Sun 
> > > > Cc: Paolo Bonzini 
> > > > Cc: Richard Henderson 
> > > > Cc: Eduardo Habkost 
> > > > Signed-off-by: Liu Yi L 
> > > > ---
> > > >  hw/i386/intel_iommu.c  | 367
> > > +
> > > >  hw/i386/intel_iommu_internal.h |  14 ++
> > > >  hw/i386/trace-events   |   1 +
> > > >  include/hw/i386/intel_iommu.h  |  36 +++-
> > > >  4 files changed, 417 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index
> > > > 58e7213..c75cb7b 100644
> > > > --- a/hw/i386/intel_iommu.c
> > > > +++ b/hw/i386/intel_iommu.c
> > > > @@ -40,6 +40,7 @@
> > > >  #include "kvm_i386.h"
> > > >  #include "migration/vmstate.h"
> > > >  #include "trace.h"
> > > > +#include "qemu/jhash.h"
> > > >
> > > >  /* context entry operations */
> > > >  #define VTD_CE_GET_RID2PASID(ce) \ @@ -65,6 +66,8 @@  static void
> > > > vtd_address_space_refresh_all(IntelIOMMUState *s);  static void
> > > > vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier
> > > *n);
> > > >
> > > > +static void vtd_pasid_cache_reset(IntelIOMMUState *s);
[...]
> > > > +
> > > > +/**
> > > > + * This function is used to clear pasid_cache_gen of cached pasid
> > > > + * entry in vtd_pasid_as instances. Caller of this function
> > > > +should
> > > > + * hold iommu_lock.
> > > > + */
> > > > +static gboolean vtd_flush_pasid(gpointer key, gpointer value,
> > > > +gpointer user_data) {
> > > > +VTDPASIDCacheInfo *pc_info = user_data;
> > > > +VTDPASIDAddressSpace *vtd_pasid_as = value;
> > > > +IntelIOMMUState *s = vtd_pasid_as->iommu_state;
> > > > +VTDPASIDCacheEntry *pc_entry = _pasid_as->pasid_cache_entry;
> > > > +VTDBus *vtd_bus = vtd_pasid_as->vtd_bus;
> > > > +VTDPASIDEntry pe;
> > > > +uint16_t did;
> > > > +uint32_t pasid;
> > > > +uint16_t devfn;
> > > > +int ret;
> > > > +
> > > > +did = vtd_pe_get_domain_id(_entry->pasid_entry);
> > > > +pasid = vtd_pasid_as->pasid;
> > > > +devfn = vtd_pasid_as->devfn;
> > > > +
> > > > +if (!(pc_entry->pasid_cache_gen == s->pasid_cache_gen)) {
> > > > +return false;
> > > > +}
> > > > +
> > > > +switch (pc_info->flags & VTD_PASID_CACHE_INFO_MASK) {
> > > > +case VTD_PASID_CACHE_PASIDSI:
> > > > +if (pc_info->pasid != pasid) {
> > > > +return 

[PATCH v2] hw/char/exynos4210_uart: Fix memleaks in exynos4210_uart_init

2020-02-12 Thread kuhn.chenqun
From: Chen Qun 

It's easy to reproduce as follow:
virsh qemu-monitor-command vm1 --pretty '{"execute": "device-list-properties",
"arguments":{"typename":"exynos4210.uart"}}'

ASAN shows memory leak stack:
  #1 0xfffd896d71cb in g_malloc0 (/lib64/libglib-2.0.so.0+0x571cb)
  #2 0xaaad270beee3 in timer_new_full /qemu/include/qemu/timer.h:530
  #3 0xaaad270beee3 in timer_new /qemu/include/qemu/timer.h:551
  #4 0xaaad270beee3 in timer_new_ns /qemu/include/qemu/timer.h:569
  #5 0xaaad270beee3 in exynos4210_uart_init /qemu/hw/char/exynos4210_uart.c:677
  #6 0xaaad275c8f4f in object_initialize_with_type /qemu/qom/object.c:516
  #7 0xaaad275c91bb in object_new_with_type /qemu/qom/object.c:684
  #8 0xaaad2755df2f in qmp_device_list_properties /qemu/qom/qom-qmp-cmds.c:152

Reported-by: Euler Robot 
Signed-off-by: Chen Qun 
---
Changes V2 to V1:
-Keep s->wordtime in exynos4210_uart_init (Base on Eduardo and Philippe's 
comments).
---
 hw/char/exynos4210_uart.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 25d6588e41..96d5180e3e 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -674,8 +674,6 @@ static void exynos4210_uart_init(Object *obj)
 SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 Exynos4210UartState *s = EXYNOS4210_UART(dev);
 
-s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
- exynos4210_uart_timeout_int, s);
 s->wordtime = NANOSECONDS_PER_SECOND * 10 / 9600;
 
 /* memory mapping */
@@ -691,6 +689,9 @@ static void exynos4210_uart_realize(DeviceState *dev, Error 
**errp)
 {
 Exynos4210UartState *s = EXYNOS4210_UART(dev);
 
+s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ exynos4210_uart_timeout_int, s);
+
 qemu_chr_fe_set_handlers(>chr, exynos4210_uart_can_receive,
  exynos4210_uart_receive, exynos4210_uart_event,
  NULL, s, NULL, true);
-- 
2.23.0





RE: [RFC v3 03/25] hw/iommu: introduce IOMMUContext

2020-02-12 Thread Liu, Yi L
> From: Peter Xu 
> Sent: Thursday, February 13, 2020 12:00 AM
> To: Liu, Yi L 
> Subject: Re: [RFC v3 03/25] hw/iommu: introduce IOMMUContext
> 
> On Wed, Feb 12, 2020 at 07:15:13AM +, Liu, Yi L wrote:
> 
> [...]
> 
> > While considering your suggestion on dropping one of the two abstract
> > layers. I came up a new proposal as below:
> >
> > We may drop the IOMMUContext in this series, and rename
> > DualStageIOMMUObject to HostIOMMUContext, which is per-vfio-container.
> > Add an interface in PCI layer(e.g. an callback in  PCIDevice) to let vIOMMU 
> > get
> HostIOMMUContext.
> > I think this could cover the requirement of providing explicit method
> > for vIOMMU to call into VFIO and then program host IOMMU.
> >
> > While for the requirement of VFIO to vIOMMU callings (e.g. PRQ), I
> > think it could be done via PCI layer by adding an operation in PCIIOMMUOps.
> Thoughts?
> 
> Hmm sounds good. :)
> 
> The thing is for the calls to the other direction (e.g. VFIO injecting faults 
> to
> vIOMMU), that's neither per-container nor per-device, but per-vIOMMU.
> PCIIOMMUOps suites for that job I'd say, which is per-vIOMMU.
> 
> Let's see how it goes.

Thanks, let me get a new version by end-of this week.

Regards,
Yi Liu



RE: [RFC v3 13/25] intel_iommu: modify x-scalable-mode to be string option

2020-02-12 Thread Liu, Yi L
> From: Peter Xu 
> Sent: Thursday, February 13, 2020 12:06 AM
> To: Liu, Yi L 
> Subject: Re: [RFC v3 13/25] intel_iommu: modify x-scalable-mode to be string
> option
> 
> On Wed, Feb 12, 2020 at 07:28:24AM +, Liu, Yi L wrote:
> > > From: Peter Xu 
> > > Sent: Wednesday, February 12, 2020 3:44 AM
> > > To: Liu, Yi L 
> > > Subject: Re: [RFC v3 13/25] intel_iommu: modify x-scalable-mode to
> > > be string option
> > >
> > > On Wed, Jan 29, 2020 at 04:16:44AM -0800, Liu, Yi L wrote:
> > > > From: Liu Yi L 
> > > >
> > > > Intel VT-d 3.0 introduces scalable mode, and it has a bunch of
> > > > capabilities related to scalable mode translation, thus there are
> > > > multiple
> > > combinations.
> > > > While this vIOMMU implementation wants simplify it for user by
> > > > providing typical combinations. User could config it by
> > > > "x-scalable-mode" option. The usage is as below:
> > > >
> > > > "-device intel-iommu,x-scalable-mode=["legacy"|"modern"]"
> > >
> > > Maybe also "off" when someone wants to explicitly disable it?
> >
> > emmm, I  think x-scalable-mode should be disabled by default. It is
> > enabled only when "legacy" or "modern" is configured. I'm fine to add
> > "off" as an explicit way to turn it off if you think it is necessary.
> > :-)
> 
> It's not necessary.  It'll be necessary when we remove "x-" and change the
> default value.  However it'll always be good to provide all options 
> explicitly in the
> parameter starting from when we design it, imho.  It's still experimental, 
> so...
> Your call. :)

Got it. Let me add it in next version. 

Regards,
Yi Liu


RE: [RFC v3 14/25] intel_iommu: add virtual command capability support

2020-02-12 Thread Liu, Yi L
> From: Peter Xu 
> Sent: Wednesday, February 12, 2020 5:57 AM
> To: Liu, Yi L 
> Subject: Re: [RFC v3 14/25] intel_iommu: add virtual command capability 
> support
> 
> On Wed, Jan 29, 2020 at 04:16:45AM -0800, Liu, Yi L wrote:
> > +/*
> > + * The basic idea is to let hypervisor to set a range for available
> > + * PASIDs for VMs. One of the reasons is PASID #0 is reserved by
> > + * RID_PASID usage. We have no idea how many reserved PASIDs in future,
> > + * so here just an evaluated value. Honestly, set it as "1" is enough
> > + * at current stage.
> > + */
> > +#define VTD_MIN_HPASID  1
> > +#define VTD_MAX_HPASID  0xF
> 
> One more question: I see that PASID is defined as 20bits long.  It's
> fine.  However I start to get confused on how the Scalable Mode PASID
> Directory could service that much of PASID entries.
> 
> I'm looking at spec 3.4.3, Figure 3-8.
> 
> Firstly, we only have two levels for a PASID table.  The context entry
> of a device stores a pointer to the "Scalable Mode PASID Directory"
> page. I see that there're 2^14 entries in "Scalable Mode PASID
> Directory" page, each is a "Scalable Mode PASID Table".
> However... how do we fit in the 4K page if each entry is a pointer of
> x86_64 (8 bytes) while there're 2^14 entries?  A simple math gives me
> 4K/8 = 512, which means the "Scalable Mode PASID Directory" page can
> only have 512 entries, then how the 2^14 come from?  Hmm??

I checked with Kevin. The spec doesn't say the dir table is 4K. It says 4K
only for pasid table. Also, if you look at 9.4, scalabe-mode context entry
includes a PDTS field to specify the actual size of the directory table.

> Apart of this: also I just noticed (when reading the latter part of
> the series) that the time that a pasid table walk can consume will
> depend on this value too.  I'd suggest to make this as small as we
> can, as long as it satisfies the usage.  We can even bump it in the
> future.

I see. This looks to be an optimization. right? Instead of modify the
value of this macro,  I think we can do this optimization by tracking
the allocated PASIDs in QEMU. Thus, the pasid table walk  would be more
efficient and also no dependency on the VTD_MAX_HPASID. Does it make
sense to you? :-)

Regards,
Yi Liu


[PATCH 2/4] acpi/cppc: Add ACPI CPPC registers

2020-02-12 Thread Ying Fang
The Continuous Performance Control Package is used to
describe the ACPI CPPC registers.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/arm/virt-acpi-build.c| 74 -
 hw/arm/virt.c   |  1 +
 include/hw/acpi/acpi-defs.h | 32 
 include/hw/arm/virt.h   |  1 +
 4 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index bd5f771e9b..d133bad738 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -41,6 +41,7 @@
 #include "hw/acpi/pci.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/generic_event_device.h"
+#include "hw/acpi/acpi-defs.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
 #include "hw/arm/virt.h"
@@ -51,7 +52,70 @@
 
 #define ARM_SPI_BASE 32
 
-static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
+
+static void acpi_dsdt_add_psd(Aml *dev, int cpus)
+{
+Aml *pkg;
+Aml *sub;
+
+sub = aml_package(5);
+aml_append(sub, aml_int(5));
+aml_append(sub, aml_int(0));
+/* Assume all vCPUs belong to the same domain */
+aml_append(sub, aml_int(0));
+/* SW_ANY: OSPM coordinate, initiate on any processor */
+aml_append(sub, aml_int(0xFD));
+aml_append(sub, aml_int(cpus));
+
+pkg = aml_package(1);
+aml_append(pkg, sub);
+
+aml_append(dev, aml_name_decl("_PSD", pkg));
+}
+
+static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base)
+{
+Aml *cpc;
+int i;
+
+/* ACPI 6.3 8.4.7.1, version 3 of the CPPC table is used */
+cpc = aml_package(23);
+aml_append(cpc, aml_int(23));
+aml_append(cpc, aml_int(3));
+
+for (i = 0; i < CPPC_REG_COUNT; i++) {
+Aml *res;
+uint8_t reg_width;
+uint8_t acc_type;
+uint64_t addr;
+/* Only some necessary registers are emulated */
+if ((i >= MIN_PERF && i < REFERENCE_CTR) ||
+(i >= ENABLE && i < LOWEST_FREQ)) {
+reg_width = 0;
+acc_type = AML_ANY_ACC;
+addr = 0;
+} else {
+addr = cpu_base + i * 4;
+if (i == REFERENCE_CTR || i == DELIVERED_CTR) {
+reg_width = 64;
+acc_type = AML_QWORD_ACC;
+} else {
+reg_width = 32;
+acc_type = AML_DWORD_ACC;
+}
+}
+
+res = aml_resource_template();
+aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0,
+ acc_type, addr));
+aml_append(cpc, res);
+}
+
+aml_append(dev, aml_name_decl("_CPC", cpc));
+}
+
+static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus,
+   const MemMapEntry *cppc_memmap)
 {
 uint16_t i;
 
@@ -60,6 +124,12 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
 aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
 aml_append(dev, aml_name_decl("_UID", aml_int(i)));
 aml_append(scope, dev);
+/*
+ * Append _CPC and _PSD to show CPU frequency
+ */
+acpi_dsdt_add_cppc(dev,
+   cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE);
+acpi_dsdt_add_psd(dev, smp_cpus);
 }
 }
 
@@ -736,7 +806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
  * the RTC ACPI device at all when using UEFI.
  */
 scope = aml_scope("\\_SB");
-acpi_dsdt_add_cpus(scope, vms->smp_cpus);
+acpi_dsdt_add_cpus(scope, vms->smp_cpus, [VIRT_CPUFREQ]);
 acpi_dsdt_add_uart(scope, [VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 acpi_dsdt_add_flash(scope, [VIRT_FLASH]);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f788fe27d6..ed9dc38b60 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -144,6 +144,7 @@ static const MemMapEntry base_memmap[] = {
 [VIRT_PCDIMM_ACPI] ={ 0x0907, MEMORY_HOTPLUG_IO_LEN },
 [VIRT_ACPI_GED] =   { 0x0908, ACPI_GED_EVT_SEL_LEN },
 [VIRT_MMIO] =   { 0x0a00, 0x0200 },
+[VIRT_CPUFREQ] ={ 0x0b00, 0x0001 },
 /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
 [VIRT_PLATFORM_BUS] =   { 0x0c00, 0x0200 },
 [VIRT_SECURE_MEM] = { 0x0e00, 0x0100 },
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 57a3f58b0c..3a33f7220d 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -634,4 +634,36 @@ struct AcpiIortRC {
 } QEMU_PACKED;
 typedef struct AcpiIortRC AcpiIortRC;
 
+/*
+ * CPPC register definition from kernel header
+ * include/acpi/cppc_acpi.h
+ * The last element is newly added for easy use
+ */
+enum cppc_regs {
+HIGHEST_PERF,
+NOMINAL_PERF,
+LOW_NON_LINEAR_PERF,
+LOWEST_PERF,
+GUARANTEED_PERF,
+DESIRED_PERF,
+MIN_PERF,
+MAX_PERF,
+

[PATCH 3/4] arm: Add the cpufreq device model

2020-02-12 Thread Ying Fang
On ARM64 platform, CPU frequency is retrieved by ACPI CPPC,
so here we create the virtual cpufreq device to present
the CPPC registers and ACPI _CPC objects.

The default frequency is set host CPU nominal frequency, which
is obtained from the host CPPC sysfs. Other performance data
are set to the same value, since we don't support guest performance scaling.

Performance counters are also not emulated and they simply return 1
if readed, and guest should fallback to use the desired performance
value as the current performance.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/cpufreq.c | 247 ++
 2 files changed, 248 insertions(+)
 create mode 100644 hw/acpi/cpufreq.c

diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 777da07f4d..61530675d4 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -16,6 +16,7 @@ common-obj-y += bios-linker-loader.o
 common-obj-y += aml-build.o utils.o
 common-obj-$(CONFIG_ACPI_PCI) += pci.o
 common-obj-$(CONFIG_TPM) += tpm.o
+common-obj-$(CONFIG_CPUFREQ) += cpufreq.o
 
 common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(call lnot,$(CONFIG_IPMI)) += ipmi-stub.o
diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c
new file mode 100644
index 00..f38087884a
--- /dev/null
+++ b/hw/acpi/cpufreq.c
@@ -0,0 +1,247 @@
+/*
+ * ACPI CPPC register device
+ *
+ * Support for showing CPU frequency in guest OS.
+ *
+ * Copyright (c) 2019 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "chardev/char.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "qemu/option.h"
+#include "sysemu/sysemu.h"
+#include "hw/acpi/acpi-defs.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "hw/boards.h"
+
+#define TYPE_CPUFREQ "cpufreq"
+#define CPUFREQ(obj) OBJECT_CHECK(CpufreqState, (obj), TYPE_CPUFREQ)
+#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq"
+#define CPU_MAX_FREQ_FILE 
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"
+#define HZ_MAX_LENGTH 1024
+#define MAX_SUPPORT_SPACE 0x1
+
+typedef struct CpufreqState {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+uint32_t HighestPerformance;
+uint32_t NominalPerformance;
+uint32_t LowestNonlinearPerformance;
+uint32_t LowestPerformance;
+uint32_t GuaranteedPerformance;
+uint32_t DesiredPerformance;
+uint64_t ReferencePerformanceCounter;
+uint64_t DeliveredPerformanceCounter;
+uint32_t PerformanceLimited;
+uint32_t LowestFreq;
+uint32_t NominalFreq;
+uint32_t reg_size;
+} CpufreqState;
+
+
+static uint64_t cpufreq_read(void *opaque, hwaddr offset,
+   unsigned size)
+{
+CpufreqState *s = (CpufreqState *)opaque;
+uint64_t r;
+uint64_t n;
+
+MachineState *ms = MACHINE(qdev_get_machine());
+unsigned int smp_cpus = ms->smp.cpus;
+
+if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) {
+warn_report("cpufreq_read: offset 0x%lx out of range", offset);
+return 0;
+}
+
+n = offset % CPPC_REG_PER_CPU_STRIDE;
+switch (n) {
+case 0:
+r = s->HighestPerformance;
+break;
+case 4:
+r = s->NominalPerformance;
+break;
+case 8:
+r = s->LowestNonlinearPerformance;
+break;
+case 12:
+r = s->LowestPerformance;
+break;
+case 16:
+r = s->GuaranteedPerformance;
+break;
+case 20:
+r = s->DesiredPerformance;
+break;
+/*
+ * We don't have real counters and it is hard to emulate, so always set the
+ * counter value to 1 to rely on Linux to use the DesiredPerformance value
+ * directly.
+ */
+case 24:
+r = s->ReferencePerformanceCounter;
+break;
+/*
+ * Guest may still access the register by 32bit; add the process to
+ * eliminate unnecessary warnings
+ */
+case 28:
+r = s->ReferencePerformanceCounter >> 32;
+break;
+case 32:
+r = s->DeliveredPerformanceCounter;
+break;
+case 36:
+r = s->DeliveredPerformanceCounter >> 32;
+break;
+
+case 40:
+r = s->PerformanceLimited;
+break;
+case 44:
+r = s->LowestFreq;
+break;

Re: [PATCH 0/4] arm64: Add the cpufreq device to show cpufreq info to guest

2020-02-12 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200213020712.447-1-fangyi...@huawei.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===

  CC  hw/acpi/cpufreq.o
  CC  hw/acpi/ipmi.o
/tmp/qemu-test/src/hw/acpi/cpufreq.c: In function 'cpufreq_read':
/tmp/qemu-test/src/hw/acpi/cpufreq.c:71:47: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 warn_report("cpufreq_read: offset 0x%lx out of range", offset);
 ~~^~~
 %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c:127:52: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 error_printf("cpufreq_read: Bad offset 0x%lx\n", offset);
  ~~^ ~~
  %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c: In function 'cpufreq_write':
/tmp/qemu-test/src/hw/acpi/cpufreq.c:143:49: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 error_printf("cpufreq_write: offset 0x%lx out of range", offset);
   ~~^~~
   %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c:153:53: error: format '%lx' expects 
argument of type 'long unsigned int', but argument 2 has type 'hwaddr' {aka 
'long long unsigned int'} [-Werror=format=]
 error_printf("cpufreq_write: Bad offset 0x%lx\n", offset);
   ~~^ ~~
   %llx
/tmp/qemu-test/src/hw/acpi/cpufreq.c: In function 'CPPC_Read':
/tmp/qemu-test/src/hw/acpi/cpufreq.c:176:44: error: passing argument 4 of 
'qemu_strtoul' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
 ret = qemu_strtoul(buffer, , 0, );
^~~
In file included from /tmp/qemu-test/src/hw/acpi/cpufreq.c:30:
---
  unsigned long *result);
  ~~~^~
cc1: all warnings being treated as errors
make: *** [/tmp/qemu-test/src/rules.mak:69: hw/acpi/cpufreq.o] Error 1
make: *** Waiting for unfinished jobs
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 664, in 
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=00c1cc0b2f9f4bd19358be85a8852117', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-qo5ngnib/src/docker-src.2020-02-12-21.34.24.4890:/var/tmp/qemu:z,ro',
 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=00c1cc0b2f9f4bd19358be85a8852117
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-qo5ngnib/src'
make: *** [docker-run-test-mingw@fedora] Error 2

real2m12.645s
user0m8.479s


The full log is available at
http://patchew.org/logs/20200213020712.447-1-fangyi...@huawei.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH 1/4] acpi: Add aml_generic_register

2020-02-12 Thread Ying Fang
The generic register descriptor describes the localtion of a
fixed width register within any of the ACPI-defined address space.

This is needed to declare the ACPI CPPC registers.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/acpi/aml-build.c | 22 ++
 include/hw/acpi/aml-build.h |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 2c3702b882..79b1431f07 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1370,6 +1370,28 @@ Aml *aml_sleep(uint64_t msec)
 return var;
 }
 
+/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+  uint8_t reg_offset, AmlAccessType type, uint64_t 
addr)
+{
+int i;
+Aml *var = aml_alloc();
+build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */
+build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */
+build_append_byte(var->buf, 0);/* Length, bits[15:8] value = 0 */
+build_append_byte(var->buf, rs);   /* Address Space ID */
+build_append_byte(var->buf, reg_width);   /* Register Bit Width */
+build_append_byte(var->buf, reg_offset);  /* Register Bit Offset */
+build_append_byte(var->buf, type);/* Access Size */
+
+/* Register address */
+for (i = 0; i < 8; i++) {
+build_append_byte(var->buf, extract64(addr, i * 8, 8));
+}
+
+return var;
+}
+
 static uint8_t Hex2Byte(const char *src)
 {
 int hi, lo;
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index de4a406568..37a047b156 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -364,6 +364,9 @@ Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed min_fixed,
 Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz,
  uint8_t channel);
 Aml *aml_sleep(uint64_t msec);
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+  uint8_t reg_offset, AmlAccessType type,
+  uint64_t addr);
 Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source);
 
 /* Block AML object primitives */
-- 
2.19.1





[PATCH 0/4] arm64: Add the cpufreq device to show cpufreq info to guest

2020-02-12 Thread Ying Fang
On ARM64 platform, cpu frequency is retrieved via ACPI CPPC.
A virtual cpufreq device based on ACPI CPPC is created to
present cpu frequency info to the guest.

The default frequency is set to host cpu nominal frequency,
which is obtained from the host CPPC sysfs. Other performance
data are set to the same value, since we don't support guest
performance scaling here.

Performance counters are also not emulated and they simply
return 1 if read, and guest should fallback to use desired
performance value as the current performance.

Guest kernel version above 4.18 is required to make it work.

Ying Fang (4):
  acpi: add aml_generic_register
  acpi/cppc: add ACPI CPPC registers
  arm_virt: add the cpufreq device model
  arm_virt: create the cpufreq device

 default-configs/aarch64-softmmu.mak |   1 +
 hw/acpi/Kconfig |   4 +
 hw/acpi/Makefile.objs   |   1 +
 hw/acpi/aml-build.c |  22 +++
 hw/acpi/cpufreq.c   | 247 
 hw/arm/virt-acpi-build.c|  74 -
 hw/arm/virt.c   |  14 ++
 include/hw/acpi/acpi-defs.h |  32 
 include/hw/acpi/aml-build.h |   3 +
 include/hw/arm/virt.h   |   1 +
 10 files changed, 397 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/cpufreq.c

-- 
2.19.1





[PATCH 4/4] arm: Create the cpufreq device

2020-02-12 Thread Ying Fang
Add the cpufreq device to arm64 virt machine

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 default-configs/aarch64-softmmu.mak |  1 +
 hw/acpi/Kconfig |  4 
 hw/arm/virt.c   | 13 +
 3 files changed, 18 insertions(+)

diff --git a/default-configs/aarch64-softmmu.mak 
b/default-configs/aarch64-softmmu.mak
index 958b1e08e4..0a030e853f 100644
--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -6,3 +6,4 @@ include arm-softmmu.mak
 CONFIG_XLNX_ZYNQMP_ARM=y
 CONFIG_XLNX_VERSAL=y
 CONFIG_SBSA_REF=y
+CONFIG_CPUFREQ=y
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 54209c6f2f..7d8aa58492 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -38,3 +38,7 @@ config ACPI_VMGENID
 depends on PC
 
 config ACPI_HW_REDUCED
+
+config CPUFREQ
+bool
+default y
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ed9dc38b60..53638f9557 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -764,6 +764,17 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
 g_free(nodename);
 }
 
+static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem)
+{
+hwaddr base = vms->memmap[VIRT_CPUFREQ].base;
+DeviceState *dev = qdev_create(NULL, "cpufreq");
+SysBusDevice *s = SYS_BUS_DEVICE(dev);
+
+qdev_init_nofail(dev);
+memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0));
+}
+
+
 static void create_rtc(const VirtMachineState *vms)
 {
 char *nodename;
@@ -1723,6 +1734,8 @@ static void machvirt_init(MachineState *machine)
 
 create_uart(vms, VIRT_UART, sysmem, serial_hd(0));
 
+create_cpufreq(vms, sysmem);
+
 if (vms->secure) {
 create_secure_ram(vms, secure_sysmem);
 create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
-- 
2.19.1





RE: The issues about architecture of the COLO checkpoint

2020-02-12 Thread Zhang, Chen
Add cc Jason Wang, he is a network expert.
In case some network things goes wrong.

Thanks
Zhang Chen

From: Zhang, Chen
Sent: Thursday, February 13, 2020 10:10 AM
To: 'Zhanghailiang' ; Daniel Cho 

Cc: Dr. David Alan Gilbert ; qemu-devel@nongnu.org
Subject: RE: The issues about architecture of the COLO checkpoint

For the issue 2:

COLO need use the network packets to confirm PVM and SVM in the same state,
Generally speaking, we can’t send PVM packets without compared with SVM packets.
But to prevent jamming, I think COLO can do force checkpoint and send the PVM 
packets in this case.

Thanks
Zhang Chen

From: Zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>
Sent: Thursday, February 13, 2020 9:45 AM
To: Daniel Cho mailto:daniel...@qnap.com>>
Cc: Dr. David Alan Gilbert mailto:dgilb...@redhat.com>>; 
qemu-devel@nongnu.org; Zhang, Chen 
mailto:chen.zh...@intel.com>>
Subject: RE: The issues about architecture of the COLO checkpoint

Hi,


1.   After re-walked through the codes, yes, you are right, actually, after 
the first migration, we will keep dirty log on in primary side,

And only send the dirty pages in PVM to SVM. The ram cache in secondary side is 
always a backup of PVM, so we don’t have to

Re-send the none-dirtied pages.

The reason why the first checkpoint takes longer time is we have to backup the 
whole VM’s ram into ram cache, that is colo_init_ram_cache().

It is time consuming, but I have optimized in the second patch 
“0001-COLO-Optimize-memory-back-up-process.patch” which you can find in my 
previous reply.



Besides, I found that, In my previous reply “We can only copy the pages that 
dirtied by PVM and SVM in last checkpoint.”,

We have done this optimization in current upstream codes.

2.I don’t quite understand this question. For COLO, we always need both network 
packets of PVM’s and SVM’s to compare before send this packets to client.
It depends on this to decide whether or not PVM and SVM are in same state.

Thanks,
hailiang

From: Daniel Cho [mailto:daniel...@qnap.com]
Sent: Wednesday, February 12, 2020 4:37 PM
To: Zhang, Chen mailto:chen.zh...@intel.com>>
Cc: Zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>; Dr. 
David Alan Gilbert mailto:dgilb...@redhat.com>>; 
qemu-devel@nongnu.org
Subject: Re: The issues about architecture of the COLO checkpoint

Hi Hailiang,

Thanks for your replaying and explain in detail.
We will try to use the attachments to enhance memory copy.

However, we have some questions for your replying.

1.  As you said, "for each checkpoint, we have to send the whole PVM's pages To 
SVM", why the only first checkpoint will takes more pause time?
In our observing, the first checkpoint will take more time for pausing, then 
other checkpoints will takes a few time for pausing. Does it means only the 
first checkpoint will send the whole pages to SVM, and the other checkpoints 
send the dirty pages to SVM for reloading?

2. We notice the COLO-COMPARE component will stuck the packet until receive 
packets from PVM and SVM, as this rule, when we add the COLO-COMPARE to PVM, 
its network will stuck until SVM start. So it is an other issue to make PVM 
stuck while setting COLO feature. With this issue, could we let colo-compare to 
pass the PVM's packet when the SVM's packet queue is empty? Then, the PVM's 
network won't stock, and "if PVM runs firstly, it still need to wait for The 
network packets from SVM to compare before send it to client side" won't 
happened either.

Best regard,
Daniel Cho

Zhang, Chen mailto:chen.zh...@intel.com>> 於 2020年2月12日 週三 
下午1:45寫道:


> -Original Message-
> From: Zhanghailiang 
> mailto:zhang.zhanghaili...@huawei.com>>
> Sent: Wednesday, February 12, 2020 11:18 AM
> To: Dr. David Alan Gilbert mailto:dgilb...@redhat.com>>; 
> Daniel Cho
> mailto:daniel...@qnap.com>>; Zhang, Chen 
> mailto:chen.zh...@intel.com>>
> Cc: qemu-devel@nongnu.org
> Subject: RE: The issues about architecture of the COLO checkpoint
>
> Hi,
>
> Thank you Dave,
>
> I'll reply here directly.
>
> -Original Message-
> From: Dr. David Alan Gilbert 
> [mailto:dgilb...@redhat.com]
> Sent: Wednesday, February 12, 2020 1:48 AM
> To: Daniel Cho mailto:daniel...@qnap.com>>; 
> chen.zh...@intel.com;
> Zhanghailiang 
> mailto:zhang.zhanghaili...@huawei.com>>
> Cc: qemu-devel@nongnu.org
> Subject: Re: The issues about architecture of the COLO checkpoint
>
>
> cc'ing in COLO people:
>
>
> * Daniel Cho (daniel...@qnap.com) wrote:
> > Hi everyone,
> >  We have some issues about setting COLO feature. Hope somebody
> > could give us some advice.
> >
> > Issue 1:
> >  We dynamic to set COLO feature for PVM(2 core, 16G memory),  but
> > the Primary VM will pause a long time(based on memory size) for
> > waiting SVM start. Does it have any idea 

RE: [PATCH] hw/char/exynos4210_uart: Fix memleaks in exynos4210_uart_init

2020-02-12 Thread Chenqun (kuhn)
>-Original Message-
>From: Eduardo Habkost [mailto:ehabk...@redhat.com]
>Sent: Thursday, February 13, 2020 12:20 AM
>To: Philippe Mathieu-Daudé 
>Cc: Chenqun (kuhn) ; qemu-
>de...@nongnu.org; i.mitsya...@gmail.com; peter.mayd...@linaro.org;
>qemu-triv...@nongnu.org; Zhanghailiang
>; Markus Armbruster
>
>Subject: Re: [PATCH] hw/char/exynos4210_uart: Fix memleaks in
>exynos4210_uart_init
>
>On Wed, Feb 12, 2020 at 08:39:55AM +0100, Philippe Mathieu-Daudé wrote:
>> Cc'ing Eduardo & Markus.
>>
>> On 2/12/20 7:44 AM, Chenqun (kuhn) wrote:
>> > > -Original Message-
>> > > From: Philippe Mathieu-Daudé [mailto:phi...@redhat.com]
>> > > Sent: Wednesday, February 12, 2020 2:16 PM
>> > > To: Chenqun (kuhn) ; qemu-
>> > > de...@nongnu.org; i.mitsya...@gmail.com; peter.mayd...@linaro.org
>> > > Cc: qemu-triv...@nongnu.org; Zhanghailiang
>> > > 
>> > > Subject: Re: [PATCH] hw/char/exynos4210_uart: Fix memleaks in
>> > > exynos4210_uart_init
>> > >
>> > > On 2/12/20 4:36 AM, kuhn.chen...@huawei.com wrote:
>> > > > From: Chen Qun 
>> > > >
>> > > > It's easy to reproduce as follow:
>> > > > virsh qemu-monitor-command vm1 --pretty '{"execute":
>> > > > "device-list-properties",
>"arguments":{"typename":"exynos4210.uart"}}'
>> > > >
>> > > > ASAN shows memory leak stack:
>> > > > #1 0xfffd896d71cb in g_malloc0 (/lib64/libglib-2.0.so.0+0x571cb)
>> > > > #2 0xaaad270beee3 in timer_new_full
>/qemu/include/qemu/timer.h:530
>> > > > #3 0xaaad270beee3 in timer_new /qemu/include/qemu/timer.h:551
>> > > > #4 0xaaad270beee3 in timer_new_ns
>/qemu/include/qemu/timer.h:569
>> > > > #5 0xaaad270beee3 in exynos4210_uart_init
>> > > /qemu/hw/char/exynos4210_uart.c:677
>> > > > #6 0xaaad275c8f4f in object_initialize_with_type
>/qemu/qom/object.c:516
>> > > > #7 0xaaad275c91bb in object_new_with_type
>/qemu/qom/object.c:684
>> > > > #8 0xaaad2755df2f in qmp_device_list_properties
>> > > > /qemu/qom/qom-qmp-cmds.c:152
>> > > >
>> > > > Reported-by: Euler Robot 
>> > > > Signed-off-by: Chen Qun 
>> > > > ---
>> > > >hw/char/exynos4210_uart.c | 8 
>> > > >1 file changed, 4 insertions(+), 4 deletions(-)
>> > > >
>> > > > diff --git a/hw/char/exynos4210_uart.c
>> > > > b/hw/char/exynos4210_uart.c index 25d6588e41..5048db5410 100644
>> > > > --- a/hw/char/exynos4210_uart.c
>> > > > +++ b/hw/char/exynos4210_uart.c
>> > > > @@ -674,10 +674,6 @@ static void exynos4210_uart_init(Object *obj)
>> > > >SysBusDevice *dev = SYS_BUS_DEVICE(obj);
>> > > >Exynos4210UartState *s = EXYNOS4210_UART(dev);
>> > > >
>> > > > -s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>> > > > - exynos4210_uart_timeout_int, 
>> > > > s);
>> > > > -s->wordtime = NANOSECONDS_PER_SECOND * 10 / 9600;
>> > >
>> > > Why are you moving s->wordtime from init() to realize()?
>> > >
>> > Hi  Philippe,  thanks for your reply!
>> >
>> > Because I found the variable wordtime is usually used with
>fifo_timeout_timer.
>> > Eg, they are used together in the exynos4210_uart_rx_timeout_set
>function.
>> >
>> > I didn't find anything wrong with wordtime in the realize().
>> > Does it have any other effects?
>>
>> IIUC when we use both init() and realize(), realize() should only
>> contains on code that consumes the object properties... But maybe the
>> design is not clear. Then why not move all the init() code to realize()?
>
>Normally I would recommend the opposite: delay as much as possible to
>realize(), to avoid unwanted side effects when (e.g.) running qom-list-
>properties.
>
>But as s->wordtime is a simple struct field (that we could even decide to
>expose to the outside as a read-only QOM property), it doesn't really matter.
>Personally, I would keep it where it is just to avoid churn.
>
OK,  Let's keep s->wordtime  in init(). 
I will change it in next version.

Thanks.
>--
>Eduardo



RE: The issues about architecture of the COLO checkpoint

2020-02-12 Thread Zhang, Chen
For the issue 2:

COLO need use the network packets to confirm PVM and SVM in the same state,
Generally speaking, we can’t send PVM packets without compared with SVM packets.
But to prevent jamming, I think COLO can do force checkpoint and send the PVM 
packets in this case.

Thanks
Zhang Chen

From: Zhanghailiang 
Sent: Thursday, February 13, 2020 9:45 AM
To: Daniel Cho 
Cc: Dr. David Alan Gilbert ; qemu-devel@nongnu.org; Zhang, 
Chen 
Subject: RE: The issues about architecture of the COLO checkpoint

Hi,


1.   After re-walked through the codes, yes, you are right, actually, after 
the first migration, we will keep dirty log on in primary side,

And only send the dirty pages in PVM to SVM. The ram cache in secondary side is 
always a backup of PVM, so we don’t have to

Re-send the none-dirtied pages.

The reason why the first checkpoint takes longer time is we have to backup the 
whole VM’s ram into ram cache, that is colo_init_ram_cache().

It is time consuming, but I have optimized in the second patch 
“0001-COLO-Optimize-memory-back-up-process.patch” which you can find in my 
previous reply.



Besides, I found that, In my previous reply “We can only copy the pages that 
dirtied by PVM and SVM in last checkpoint.”,

We have done this optimization in current upstream codes.

2.I don’t quite understand this question. For COLO, we always need both network 
packets of PVM’s and SVM’s to compare before send this packets to client.
It depends on this to decide whether or not PVM and SVM are in same state.

Thanks,
hailiang

From: Daniel Cho [mailto:daniel...@qnap.com]
Sent: Wednesday, February 12, 2020 4:37 PM
To: Zhang, Chen mailto:chen.zh...@intel.com>>
Cc: Zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>; Dr. 
David Alan Gilbert mailto:dgilb...@redhat.com>>; 
qemu-devel@nongnu.org
Subject: Re: The issues about architecture of the COLO checkpoint

Hi Hailiang,

Thanks for your replaying and explain in detail.
We will try to use the attachments to enhance memory copy.

However, we have some questions for your replying.

1.  As you said, "for each checkpoint, we have to send the whole PVM's pages To 
SVM", why the only first checkpoint will takes more pause time?
In our observing, the first checkpoint will take more time for pausing, then 
other checkpoints will takes a few time for pausing. Does it means only the 
first checkpoint will send the whole pages to SVM, and the other checkpoints 
send the dirty pages to SVM for reloading?

2. We notice the COLO-COMPARE component will stuck the packet until receive 
packets from PVM and SVM, as this rule, when we add the COLO-COMPARE to PVM, 
its network will stuck until SVM start. So it is an other issue to make PVM 
stuck while setting COLO feature. With this issue, could we let colo-compare to 
pass the PVM's packet when the SVM's packet queue is empty? Then, the PVM's 
network won't stock, and "if PVM runs firstly, it still need to wait for The 
network packets from SVM to compare before send it to client side" won't 
happened either.

Best regard,
Daniel Cho

Zhang, Chen mailto:chen.zh...@intel.com>> 於 2020年2月12日 週三 
下午1:45寫道:


> -Original Message-
> From: Zhanghailiang 
> mailto:zhang.zhanghaili...@huawei.com>>
> Sent: Wednesday, February 12, 2020 11:18 AM
> To: Dr. David Alan Gilbert mailto:dgilb...@redhat.com>>; 
> Daniel Cho
> mailto:daniel...@qnap.com>>; Zhang, Chen 
> mailto:chen.zh...@intel.com>>
> Cc: qemu-devel@nongnu.org
> Subject: RE: The issues about architecture of the COLO checkpoint
>
> Hi,
>
> Thank you Dave,
>
> I'll reply here directly.
>
> -Original Message-
> From: Dr. David Alan Gilbert 
> [mailto:dgilb...@redhat.com]
> Sent: Wednesday, February 12, 2020 1:48 AM
> To: Daniel Cho mailto:daniel...@qnap.com>>; 
> chen.zh...@intel.com;
> Zhanghailiang 
> mailto:zhang.zhanghaili...@huawei.com>>
> Cc: qemu-devel@nongnu.org
> Subject: Re: The issues about architecture of the COLO checkpoint
>
>
> cc'ing in COLO people:
>
>
> * Daniel Cho (daniel...@qnap.com) wrote:
> > Hi everyone,
> >  We have some issues about setting COLO feature. Hope somebody
> > could give us some advice.
> >
> > Issue 1:
> >  We dynamic to set COLO feature for PVM(2 core, 16G memory),  but
> > the Primary VM will pause a long time(based on memory size) for
> > waiting SVM start. Does it have any idea to reduce the pause time?
> >
>
> Yes, we do have some ideas to optimize this downtime.
>
> The main problem for current version is, for each checkpoint, we have to
> send the whole PVM's pages
> To SVM, and then copy the whole VM's state into SVM from ram cache, in
> this process, we need both of them be paused.
> Just as you said, the downtime is based on memory size.
>
> So firstly, we need to reduce the sending data while do checkpoint, actually,
> we can migrate 

[PATCH 3/4] arm: Add the cpufreq device model

2020-02-12 Thread fangying1
From: Ying Fang 

On ARM64 platform, CPU frequency is retrieved by ACPI CPPC,
so here we create the virtual cpufreq device to present
the CPPC registers and ACPI _CPC objects.

The default frequency is set host CPU nominal frequency, which
is obtained from the host CPPC sysfs. Other performance data
are set to the same value, since we don't support guest performance scaling.

Performance counters are also not emulated and they simply return 1
if readed, and guest should fallback to use the desired performance
value as the current performance.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/cpufreq.c | 247 ++
 2 files changed, 248 insertions(+)
 create mode 100644 hw/acpi/cpufreq.c

diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 777da07f4d..61530675d4 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -16,6 +16,7 @@ common-obj-y += bios-linker-loader.o
 common-obj-y += aml-build.o utils.o
 common-obj-$(CONFIG_ACPI_PCI) += pci.o
 common-obj-$(CONFIG_TPM) += tpm.o
+common-obj-$(CONFIG_CPUFREQ) += cpufreq.o
 
 common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(call lnot,$(CONFIG_IPMI)) += ipmi-stub.o
diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c
new file mode 100644
index 00..f38087884a
--- /dev/null
+++ b/hw/acpi/cpufreq.c
@@ -0,0 +1,247 @@
+/*
+ * ACPI CPPC register device
+ *
+ * Support for showing CPU frequency in guest OS.
+ *
+ * Copyright (c) 2019 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "chardev/char.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "qemu/option.h"
+#include "sysemu/sysemu.h"
+#include "hw/acpi/acpi-defs.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "hw/boards.h"
+
+#define TYPE_CPUFREQ "cpufreq"
+#define CPUFREQ(obj) OBJECT_CHECK(CpufreqState, (obj), TYPE_CPUFREQ)
+#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq"
+#define CPU_MAX_FREQ_FILE 
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"
+#define HZ_MAX_LENGTH 1024
+#define MAX_SUPPORT_SPACE 0x1
+
+typedef struct CpufreqState {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+uint32_t HighestPerformance;
+uint32_t NominalPerformance;
+uint32_t LowestNonlinearPerformance;
+uint32_t LowestPerformance;
+uint32_t GuaranteedPerformance;
+uint32_t DesiredPerformance;
+uint64_t ReferencePerformanceCounter;
+uint64_t DeliveredPerformanceCounter;
+uint32_t PerformanceLimited;
+uint32_t LowestFreq;
+uint32_t NominalFreq;
+uint32_t reg_size;
+} CpufreqState;
+
+
+static uint64_t cpufreq_read(void *opaque, hwaddr offset,
+   unsigned size)
+{
+CpufreqState *s = (CpufreqState *)opaque;
+uint64_t r;
+uint64_t n;
+
+MachineState *ms = MACHINE(qdev_get_machine());
+unsigned int smp_cpus = ms->smp.cpus;
+
+if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) {
+warn_report("cpufreq_read: offset 0x%lx out of range", offset);
+return 0;
+}
+
+n = offset % CPPC_REG_PER_CPU_STRIDE;
+switch (n) {
+case 0:
+r = s->HighestPerformance;
+break;
+case 4:
+r = s->NominalPerformance;
+break;
+case 8:
+r = s->LowestNonlinearPerformance;
+break;
+case 12:
+r = s->LowestPerformance;
+break;
+case 16:
+r = s->GuaranteedPerformance;
+break;
+case 20:
+r = s->DesiredPerformance;
+break;
+/*
+ * We don't have real counters and it is hard to emulate, so always set the
+ * counter value to 1 to rely on Linux to use the DesiredPerformance value
+ * directly.
+ */
+case 24:
+r = s->ReferencePerformanceCounter;
+break;
+/*
+ * Guest may still access the register by 32bit; add the process to
+ * eliminate unnecessary warnings
+ */
+case 28:
+r = s->ReferencePerformanceCounter >> 32;
+break;
+case 32:
+r = s->DeliveredPerformanceCounter;
+break;
+case 36:
+r = s->DeliveredPerformanceCounter >> 32;
+break;
+
+case 40:
+r = s->PerformanceLimited;
+break;
+case 44:
+r = 

[PATCH 0/4] arm64: Add the cpufreq device to show cpufreq info to guest

2020-02-12 Thread fangying1
From: Ying Fang 

On ARM64 platform, cpu frequency is retrieved via ACPI CPPC.
A virtual cpufreq device based on ACPI CPPC is created to
present cpu frequency info to the guest.

The default frequency is set to host cpu nominal frequency,
which is obtained from the host CPPC sysfs. Other performance
data are set to the same value, since we don't support guest
performance scaling here.

Performance counters are also not emulated and they simply
return 1 if read, and guest should fallback to use desired
performance value as the current performance.

Guest kernel version above 4.18 is required to make it work.

Ying Fang (4):
  acpi: add aml_generic_register
  acpi/cppc: add ACPI CPPC registers
  arm_virt: add the cpufreq device model
  arm_virt: create the cpufreq device

 default-configs/aarch64-softmmu.mak |   1 +
 hw/acpi/Kconfig |   4 +
 hw/acpi/Makefile.objs   |   1 +
 hw/acpi/aml-build.c |  22 +++
 hw/acpi/cpufreq.c   | 247 
 hw/arm/virt-acpi-build.c|  74 -
 hw/arm/virt.c   |  14 ++
 include/hw/acpi/acpi-defs.h |  32 
 include/hw/acpi/aml-build.h |   3 +
 include/hw/arm/virt.h   |   1 +
 10 files changed, 397 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/cpufreq.c

-- 
2.19.1





[PATCH 2/4] acpi/cppc: Add ACPI CPPC registers

2020-02-12 Thread fangying1
From: Ying Fang 

The Continuous Performance Control Package is used to
describe the ACPI CPPC registers.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/arm/virt-acpi-build.c| 74 -
 hw/arm/virt.c   |  1 +
 include/hw/acpi/acpi-defs.h | 32 
 include/hw/arm/virt.h   |  1 +
 4 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index bd5f771e9b..d133bad738 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -41,6 +41,7 @@
 #include "hw/acpi/pci.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/generic_event_device.h"
+#include "hw/acpi/acpi-defs.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
 #include "hw/arm/virt.h"
@@ -51,7 +52,70 @@
 
 #define ARM_SPI_BASE 32
 
-static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
+
+static void acpi_dsdt_add_psd(Aml *dev, int cpus)
+{
+Aml *pkg;
+Aml *sub;
+
+sub = aml_package(5);
+aml_append(sub, aml_int(5));
+aml_append(sub, aml_int(0));
+/* Assume all vCPUs belong to the same domain */
+aml_append(sub, aml_int(0));
+/* SW_ANY: OSPM coordinate, initiate on any processor */
+aml_append(sub, aml_int(0xFD));
+aml_append(sub, aml_int(cpus));
+
+pkg = aml_package(1);
+aml_append(pkg, sub);
+
+aml_append(dev, aml_name_decl("_PSD", pkg));
+}
+
+static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base)
+{
+Aml *cpc;
+int i;
+
+/* ACPI 6.3 8.4.7.1, version 3 of the CPPC table is used */
+cpc = aml_package(23);
+aml_append(cpc, aml_int(23));
+aml_append(cpc, aml_int(3));
+
+for (i = 0; i < CPPC_REG_COUNT; i++) {
+Aml *res;
+uint8_t reg_width;
+uint8_t acc_type;
+uint64_t addr;
+/* Only some necessary registers are emulated */
+if ((i >= MIN_PERF && i < REFERENCE_CTR) ||
+(i >= ENABLE && i < LOWEST_FREQ)) {
+reg_width = 0;
+acc_type = AML_ANY_ACC;
+addr = 0;
+} else {
+addr = cpu_base + i * 4;
+if (i == REFERENCE_CTR || i == DELIVERED_CTR) {
+reg_width = 64;
+acc_type = AML_QWORD_ACC;
+} else {
+reg_width = 32;
+acc_type = AML_DWORD_ACC;
+}
+}
+
+res = aml_resource_template();
+aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0,
+ acc_type, addr));
+aml_append(cpc, res);
+}
+
+aml_append(dev, aml_name_decl("_CPC", cpc));
+}
+
+static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus,
+   const MemMapEntry *cppc_memmap)
 {
 uint16_t i;
 
@@ -60,6 +124,12 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
 aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
 aml_append(dev, aml_name_decl("_UID", aml_int(i)));
 aml_append(scope, dev);
+/*
+ * Append _CPC and _PSD to show CPU frequency
+ */
+acpi_dsdt_add_cppc(dev,
+   cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE);
+acpi_dsdt_add_psd(dev, smp_cpus);
 }
 }
 
@@ -736,7 +806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
  * the RTC ACPI device at all when using UEFI.
  */
 scope = aml_scope("\\_SB");
-acpi_dsdt_add_cpus(scope, vms->smp_cpus);
+acpi_dsdt_add_cpus(scope, vms->smp_cpus, [VIRT_CPUFREQ]);
 acpi_dsdt_add_uart(scope, [VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 acpi_dsdt_add_flash(scope, [VIRT_FLASH]);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f788fe27d6..ed9dc38b60 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -144,6 +144,7 @@ static const MemMapEntry base_memmap[] = {
 [VIRT_PCDIMM_ACPI] ={ 0x0907, MEMORY_HOTPLUG_IO_LEN },
 [VIRT_ACPI_GED] =   { 0x0908, ACPI_GED_EVT_SEL_LEN },
 [VIRT_MMIO] =   { 0x0a00, 0x0200 },
+[VIRT_CPUFREQ] ={ 0x0b00, 0x0001 },
 /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
 [VIRT_PLATFORM_BUS] =   { 0x0c00, 0x0200 },
 [VIRT_SECURE_MEM] = { 0x0e00, 0x0100 },
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 57a3f58b0c..3a33f7220d 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -634,4 +634,36 @@ struct AcpiIortRC {
 } QEMU_PACKED;
 typedef struct AcpiIortRC AcpiIortRC;
 
+/*
+ * CPPC register definition from kernel header
+ * include/acpi/cppc_acpi.h
+ * The last element is newly added for easy use
+ */
+enum cppc_regs {
+HIGHEST_PERF,
+NOMINAL_PERF,
+LOW_NON_LINEAR_PERF,
+LOWEST_PERF,
+GUARANTEED_PERF,
+DESIRED_PERF,
+MIN_PERF,
+MAX_PERF,
+

[PATCH 1/4] acpi: Add aml_generic_register

2020-02-12 Thread fangying1
From: Ying Fang 

The generic register descriptor describes the localtion of a
fixed width register within any of the ACPI-defined address space.

This is needed to declare the ACPI CPPC registers.

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 hw/acpi/aml-build.c | 22 ++
 include/hw/acpi/aml-build.h |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 2c3702b882..79b1431f07 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1370,6 +1370,28 @@ Aml *aml_sleep(uint64_t msec)
 return var;
 }
 
+/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+  uint8_t reg_offset, AmlAccessType type, uint64_t 
addr)
+{
+int i;
+Aml *var = aml_alloc();
+build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */
+build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */
+build_append_byte(var->buf, 0);/* Length, bits[15:8] value = 0 */
+build_append_byte(var->buf, rs);   /* Address Space ID */
+build_append_byte(var->buf, reg_width);   /* Register Bit Width */
+build_append_byte(var->buf, reg_offset);  /* Register Bit Offset */
+build_append_byte(var->buf, type);/* Access Size */
+
+/* Register address */
+for (i = 0; i < 8; i++) {
+build_append_byte(var->buf, extract64(addr, i * 8, 8));
+}
+
+return var;
+}
+
 static uint8_t Hex2Byte(const char *src)
 {
 int hi, lo;
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index de4a406568..37a047b156 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -364,6 +364,9 @@ Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed min_fixed,
 Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz,
  uint8_t channel);
 Aml *aml_sleep(uint64_t msec);
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+  uint8_t reg_offset, AmlAccessType type,
+  uint64_t addr);
 Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source);
 
 /* Block AML object primitives */
-- 
2.19.1





[PATCH 4/4] arm: Create the cpufreq device

2020-02-12 Thread fangying1
From: Ying Fang 

Signed-off-by: Heyi Guo 
Signed-off-by: Ying Fang 
---
 default-configs/aarch64-softmmu.mak |  1 +
 hw/acpi/Kconfig |  4 
 hw/arm/virt.c   | 13 +
 3 files changed, 18 insertions(+)

diff --git a/default-configs/aarch64-softmmu.mak 
b/default-configs/aarch64-softmmu.mak
index 958b1e08e4..0a030e853f 100644
--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -6,3 +6,4 @@ include arm-softmmu.mak
 CONFIG_XLNX_ZYNQMP_ARM=y
 CONFIG_XLNX_VERSAL=y
 CONFIG_SBSA_REF=y
+CONFIG_CPUFREQ=y
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 54209c6f2f..7d8aa58492 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -38,3 +38,7 @@ config ACPI_VMGENID
 depends on PC
 
 config ACPI_HW_REDUCED
+
+config CPUFREQ
+bool
+default y
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ed9dc38b60..53638f9557 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -764,6 +764,17 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
 g_free(nodename);
 }
 
+static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem)
+{
+hwaddr base = vms->memmap[VIRT_CPUFREQ].base;
+DeviceState *dev = qdev_create(NULL, "cpufreq");
+SysBusDevice *s = SYS_BUS_DEVICE(dev);
+
+qdev_init_nofail(dev);
+memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0));
+}
+
+
 static void create_rtc(const VirtMachineState *vms)
 {
 char *nodename;
@@ -1723,6 +1734,8 @@ static void machvirt_init(MachineState *machine)
 
 create_uart(vms, VIRT_UART, sysmem, serial_hd(0));
 
+create_cpufreq(vms, sysmem);
+
 if (vms->secure) {
 create_secure_ram(vms, secure_sysmem);
 create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
-- 
2.19.1





[Bug 1863025] [NEW] Use-after-free after flush in TCG accelerator

2020-02-12 Thread Yifan
Public bug reported:

I believe I found a UAF in TCG that can lead to a guest VM escape. The
security list informed me "This can not be treated as a security issue."
and to post it here. I am looking at the 4.2.0 source code. The issue
requires a race and I will try to describe it in terms of three
concurrent threads.

Thread A:

A1. qemu_tcg_cpu_thread_fn runs work loop
A2. qemu_wait_io_event => qemu_wait_io_event_common => process_queued_cpu_work
A3. start_exclusive critical section entered
A4. do_tb_flush is called, TB memory freed/re-allocated
A5. end_exclusive exits critical section

Thread B:

B1. qemu_tcg_cpu_thread_fn runs work loop
B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code
B3. tcg_tb_alloc obtains a new TB

Thread C:

C1. qemu_tcg_cpu_thread_fn runs work loop
C2. cpu_exec_step_atomic executes
C3. TB obtained with tb_lookup__cpu_state or tb_gen_code
C4. start_exclusive critical section entered
C5. cpu_tb_exec executes the TB code
C6. end_exclusive exits critical section

Consider the following sequence of events:
  B2 => B3 => C3 (same TB as B2) => A3 => A4 (TB freed) => A5 => B2 =>
  B3 (re-allocates TB from B2) => C4 => C5 (freed/reused TB now executing) => C6

In short, because thread C uses the TB in the critical section, there is
no guarantee that the pointer has not been "freed" (rather the memory is
marked as re-usable) and therefore a use-after-free occurs.

Since the TCG generated code can be in the same memory as the TB data
structure, it is possible for an attacker to overwrite the UAF pointer
with code generated from TCG. This can overwrite key pointer values and
could lead to code execution on the host outside of the TCG sandbox.

** Affects: qemu
 Importance: Undecided
 Status: New

** Description changed:

- I believe I found a UAF in TCG that can lead to a guest VM escape. The 
security 
- list informed me "This can not be treated as a security issue." and to post 
it 
- here. I am looking at the 4.2.0 source code. The issue requires a race and I 
- will try to describe it in terms of three concurrent threads.
- 
- I am looking 
- at the 4.2.0 source code. The issue requires a race and I will try to 
describe 
- it in terms of three concurrent threads.
+ I believe I found a UAF in TCG that can lead to a guest VM escape. The
+ security list informed me "This can not be treated as a security issue."
+ and to post it here. I am looking at the 4.2.0 source code. The issue
+ requires a race and I will try to describe it in terms of three
+ concurrent threads.
  
  Thread A:
  
  A1. qemu_tcg_cpu_thread_fn runs work loop
  A2. qemu_wait_io_event => qemu_wait_io_event_common => process_queued_cpu_work
  A3. start_exclusive critical section entered
  A4. do_tb_flush is called, TB memory freed/re-allocated
  A5. end_exclusive exits critical section
  
  Thread B:
  
  B1. qemu_tcg_cpu_thread_fn runs work loop
  B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code
  B3. tcg_tb_alloc obtains a new TB
  
  Thread C:
  
  C1. qemu_tcg_cpu_thread_fn runs work loop
  C2. cpu_exec_step_atomic executes
  C3. TB obtained with tb_lookup__cpu_state or tb_gen_code
  C4. start_exclusive critical section entered
  C5. cpu_tb_exec executes the TB code
  C6. end_exclusive exits critical section
  
  Consider the following sequence of events:
-   B2 => B3 => C3 (same TB as B2) => A3 => A4 (TB freed) => A5 => B2 => 
-   B3 (re-allocates TB from B2) => C4 => C5 (freed/reused TB now executing) => 
C6
+   B2 => B3 => C3 (same TB as B2) => A3 => A4 (TB freed) => A5 => B2 =>
+   B3 (re-allocates TB from B2) => C4 => C5 (freed/reused TB now executing) => 
C6
  
- In short, because thread C uses the TB in the critical section, there is no 
- guarantee that the pointer has not been "freed" (rather the memory is marked 
as 
- re-usable) and therefore a use-after-free occurs.
+ In short, because thread C uses the TB in the critical section, there is
+ no guarantee that the pointer has not been "freed" (rather the memory is
+ marked as re-usable) and therefore a use-after-free occurs.
  
- Since the TCG generated code can be in the same memory as the TB data 
structure,
- it is possible for an attacker to overwrite the UAF pointer with code 
generated
- from TCG. This can overwrite key pointer values and could lead to code 
- execution on the host outside of the TCG sandbox.
+ Since the TCG generated code can be in the same memory as the TB data
+ structure, it is possible for an attacker to overwrite the UAF pointer
+ with code generated from TCG. This can overwrite key pointer values and
+ could lead to code execution on the host outside of the TCG sandbox.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1863025

Title:
  Use-after-free after flush in TCG accelerator

Status in QEMU:
  New

Bug description:
  I believe I found a UAF in TCG that can lead to a guest VM escape. The
  security list 

[Bug 1863023] [NEW] Deadlock in QXL

2020-02-12 Thread Yifan
Public bug reported:

This is on qemu 4.2.0 OSX host, running fresh Windows 7 with SPICE guest
tools just installed.

Command line: `qemu-system-x86_64 -qmp tcp:localhost:,server,nowait
-smp cpus=2 -boot order=d -m 2048 -soundhw hda -drive
file=hda.img,if=ide,media=disk -spice port=5930,addr=127.0.0.1,disable-
ticketing,image-compression=off,playback-compression=off,streaming-
video=off -vga qxl -device rtl8139,netdev=net0 -netdev user,id=net0`

After the Windows logo, the screen is black. I dump the two vCPU
threads:

```
* thread #16
  * frame #0: 0x7fff523b8ce6 libsystem_kernel.dylib`__psynch_cvwait + 10
frame #1: 0x7fff52467185 libsystem_pthread.dylib`_pthread_cond_wait + 
701
frame #2: 0x000110bf88bd 
qemu-system-x86_64`qemu_cond_wait_impl(cond=0x00011121e8d0, 
mutex=0x00011120ba48, file="cpus-common.c", line=144) at 
qemu-thread-posix.c:173:11 [opt]
frame #3: 0x000110926a59 
qemu-system-x86_64`do_run_on_cpu(cpu=, func=, 
data=, mutex=0x00011120ba48) at cpus-common.c:144:9 [opt]
frame #4: 0x00011080c50a 
qemu-system-x86_64`memory_region_snapshot_and_clear_dirty at memory.c:2595:5 
[opt]
frame #5: 0x00011080c4d7 
qemu-system-x86_64`memory_region_snapshot_and_clear_dirty(mr=, 
addr=0, size=2359296, client=) at memory.c:2107 [opt]
frame #6: 0x000110849fe1 qemu-system-x86_64`vga_update_display 
[inlined] vga_draw_graphic(s=, full_update=0) at vga.c:1661:16 
[opt]
frame #7: 0x00011084996a 
qemu-system-x86_64`vga_update_display(opaque=) at vga.c:1785 [opt]
frame #8: 0x0001109b261d 
qemu-system-x86_64`qxl_hard_reset(d=0x7f84f873, loadvm=0) at 
qxl.c:1285:5 [opt]
frame #9: 0x00011080ac97 
qemu-system-x86_64`memory_region_write_accessor(mr=0x7f84f8741fb0, addr=5, 
value=, size=1, shift=, mask=, 
attrs=MemTxAttrs @ 0x7786d890) at memory.c:483:5 [opt]
frame #10: 0x00011080ab31 
qemu-system-x86_64`memory_region_dispatch_write [inlined] 
access_with_adjusted_size(addr=, value=0x015c6100, 
size=, access_size_min=, 
access_size_max=, access_fn=, mr=, 
attrs=) at memory.c:544:18 [opt]
frame #11: 0x00011080aafd 
qemu-system-x86_64`memory_region_dispatch_write(mr=, 
addr=, data=22831360, op=32644, attrs=MemTxAttrs @ 
0x7786d8c0) at memory.c:1475 [opt]
frame #12: 0x0001107b080d 
qemu-system-x86_64`address_space_stb(as=, addr=, 
val=22831360, attrs=MemTxAttrs @ r12, result=0x) at 
memory_ldst.inc.c:378:13 [opt]
frame #13: 0x000118570230

* thread #18
  * frame #0: 0x7fff523b8ce6 libsystem_kernel.dylib`__psynch_cvwait + 10
frame #1: 0x7fff52467185 libsystem_pthread.dylib`_pthread_cond_wait + 
701
frame #2: 0x000110bf88bd 
qemu-system-x86_64`qemu_cond_wait_impl(cond=0x00011121e860, 
mutex=0x00011121e818, file="cpus-common.c", line=196) at 
qemu-thread-posix.c:173:11 [opt]
frame #3: 0x000110926c44 qemu-system-x86_64`start_exclusive at 
cpus-common.c:196:9 [opt]
frame #4: 0x000110837c35 
qemu-system-x86_64`cpu_exec_step_atomic(cpu=0x7f851829) at 
cpu-exec.c:265:9 [opt]
frame #5: 0x0001107fcf95 
qemu-system-x86_64`qemu_tcg_cpu_thread_fn(arg=0x7f851829) at 
cpus.c:1799:17 [opt]
frame #6: 0x000110bf911e 
qemu-system-x86_64`qemu_thread_start(args=) at 
qemu-thread-posix.c:519:9 [opt]
frame #7: 0x7fff52466e65 libsystem_pthread.dylib`_pthread_start + 148
frame #8: 0x7fff5246283b libsystem_pthread.dylib`thread_start + 15
```

Seems like thread #16 had a STB to QXL MMIO registers which caused it to
call `qxl_hard_reset` and eventually made its way to `do_run_on_cpu`
which waits for `qemu_work_cond`. The only way `qemu_work_cond` is set
is if one of the two vCPU executes the queued work at the end of the TCG
execution. Thread #16 is stuck waiting, so what about thread #18? Thread
#18 is waiting for `exclusive_cond` which is set once all the other CPUs
are done running (but thread #16 is waiting still). So classic deadlock.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1863023

Title:
  Deadlock in QXL

Status in QEMU:
  New

Bug description:
  This is on qemu 4.2.0 OSX host, running fresh Windows 7 with SPICE
  guest tools just installed.

  Command line: `qemu-system-x86_64 -qmp
  tcp:localhost:,server,nowait -smp cpus=2 -boot order=d -m 2048
  -soundhw hda -drive file=hda.img,if=ide,media=disk -spice
  port=5930,addr=127.0.0.1,disable-ticketing,image-compression=off
  ,playback-compression=off,streaming-video=off -vga qxl -device
  rtl8139,netdev=net0 -netdev user,id=net0`

  After the Windows logo, the screen is black. I dump the two vCPU
  threads:

  ```
  * thread #16
* frame #0: 0x7fff523b8ce6 libsystem_kernel.dylib`__psynch_cvwait + 10
  frame #1: 0x7fff52467185 

[PATCH v2 7/9] linux-user: x86_64: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update x86_64 syscall numbers based on Linux kernel v5.5.

CC: Paolo Bonzini 
CC: Richard Henderson 
CC: Eduardo Habkost 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/x86_64/syscall_nr.h | 24 
 1 file changed, 24 insertions(+)

diff --git a/linux-user/x86_64/syscall_nr.h b/linux-user/x86_64/syscall_nr.h
index 9b6981e..e5d14ec 100644
--- a/linux-user/x86_64/syscall_nr.h
+++ b/linux-user/x86_64/syscall_nr.h
@@ -328,5 +328,29 @@
 #define TARGET_NR_membarrier324
 #define TARGET_NR_mlock2325
 #define TARGET_NR_copy_file_range   326
+#define TARGET_NR_preadv2   327
+#define TARGET_NR_pwritev2  328
+#define TARGET_NR_pkey_mprotect 329
+#define TARGET_NR_pkey_alloc330
+#define TARGET_NR_pkey_free 331
+#define TARGET_NR_statx 332
+#define TARGET_NR_io_pgetevents 333
+#define TARGET_NR_rseq  334
+/*
+ * don't use numbers 387 through 423, add new calls after the last
+ * 'common' entry
+ */
+#define TARGET_NR_pidfd_send_signal 424
+#define TARGET_NR_io_uring_setup425
+#define TARGET_NR_io_uring_enter426
+#define TARGET_NR_io_uring_register 427
+#define TARGET_NR_open_tree 428
+#define TARGET_NR_move_mount429
+#define TARGET_NR_fsopen430
+#define TARGET_NR_fsconfig  431
+#define TARGET_NR_fsmount   432
+#define TARGET_NR_fspick433
+#define TARGET_NR_pidfd_open434
+#define TARGET_NR_clone3435
 
 #endif
-- 
2.7.4




[PATCH v2 9/9] linux-user: xtensa: Remove unused constant TARGET_NR_syscall_count

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Currently, there is no usage of TARGET_NR_syscall_count for target
xtensa, and there is no obvious indication if there is some planned
usage in future.

CC: Max Filippov 
Acked-by: Max Filippov 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/xtensa/syscall_nr.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/linux-user/xtensa/syscall_nr.h b/linux-user/xtensa/syscall_nr.h
index 3d19d0c..39bff65 100644
--- a/linux-user/xtensa/syscall_nr.h
+++ b/linux-user/xtensa/syscall_nr.h
@@ -466,6 +466,4 @@
 #define TARGET_NR_pidfd_open 434
 #define TARGET_NR_clone3 435
 
-#define TARGET_NR_syscall_count  436
-
 #endif /* XTENSA_SYSCALL_NR_H */
-- 
2.7.4




[PATCH v2 8/9] linux-user: xtensa: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update xtensa syscall numbers based on Linux kernel v5.5.

CC: Max Filippov 
Acked-by: Max Filippov 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/xtensa/syscall_nr.h | 38 --
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/linux-user/xtensa/syscall_nr.h b/linux-user/xtensa/syscall_nr.h
index 27645be..3d19d0c 100644
--- a/linux-user/xtensa/syscall_nr.h
+++ b/linux-user/xtensa/syscall_nr.h
@@ -431,7 +431,41 @@
 #define TARGET_NR_pkey_free  350
 
 #define TARGET_NR_statx  351
-
-#define TARGET_NR_syscall_count  352
+#define TARGET_NR_rseq   352
+/* 353 through 402 are unassigned to sync up with generic numbers */
+#define TARGET_NR_clock_gettime64403
+#define TARGET_NR_clock_settime64404
+#define TARGET_NR_clock_adjtime64405
+#define TARGET_NR_clock_getres_time64406
+#define TARGET_NR_clock_nanosleep_time64 407
+#define TARGET_NR_timer_gettime64408
+#define TARGET_NR_timer_settime64409
+#define TARGET_NR_timerfd_gettime64  410
+#define TARGET_NR_timerfd_settime64  411
+#define TARGET_NR_utimensat_time64   412
+#define TARGET_NR_pselect6_time64413
+#define TARGET_NR_ppoll_time64   414
+#define TARGET_NR_io_pgetevents_time64   416
+#define TARGET_NR_recvmmsg_time64417
+#define TARGET_NR_mq_timedsend_time64418
+#define TARGET_NR_mq_timedreceive_time64 419
+#define TARGET_NR_semtimedop_time64  420
+#define TARGET_NR_rt_sigtimedwait_time64 421
+#define TARGET_NR_futex_time64   422
+#define TARGET_NR_sched_rr_get_interval_time64   423
+#define TARGET_NR_pidfd_send_signal  424
+#define TARGET_NR_io_uring_setup 425
+#define TARGET_NR_io_uring_enter 426
+#define TARGET_NR_io_uring_register  427
+#define TARGET_NR_open_tree  428
+#define TARGET_NR_move_mount 429
+#define TARGET_NR_fsopen 430
+#define TARGET_NR_fsconfig   431
+#define TARGET_NR_fsmount432
+#define TARGET_NR_fspick 433
+#define TARGET_NR_pidfd_open 434
+#define TARGET_NR_clone3 435
+
+#define TARGET_NR_syscall_count  436
 
 #endif /* XTENSA_SYSCALL_NR_H */
-- 
2.7.4




[PATCH v2 2/9] linux-user: arm: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update arm syscall numbers based on Linux kernel v5.5.

CC: Peter Maydell 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/arm/syscall_nr.h | 44 
 1 file changed, 44 insertions(+)

diff --git a/linux-user/arm/syscall_nr.h b/linux-user/arm/syscall_nr.h
index e7eda0d..6db9235 100644
--- a/linux-user/arm/syscall_nr.h
+++ b/linux-user/arm/syscall_nr.h
@@ -399,5 +399,49 @@
 #define TARGET_NR_userfaultfd  (388)
 #define TARGET_NR_membarrier   (389)
 #define TARGET_NR_mlock2   (390)
+#define TARGET_NR_copy_file_range  (391)
+#define TARGET_NR_preadv2  (392)
+#define TARGET_NR_pwritev2 (393)
+#define TARGET_NR_pkey_mprotect(394)
+#define TARGET_NR_pkey_alloc   (395)
+#define TARGET_NR_pkey_free(396)
+#define TARGET_NR_statx(397)
+#define TARGET_NR_rseq (398)
+#define TARGET_NR_io_pgetevents(399)
+#define TARGET_NR_migrate_pages(400)
+#define TARGET_NR_kexec_file_load  (401)
+/* 402 is unused */
+#define TARGET_NR_clock_gettime64  (403)
+#define TARGET_NR_clock_settime64  (404)
+#define TARGET_NR_clock_adjtime64  (405)
+#define TARGET_NR_clock_getres_time64  (406)
+#define TARGET_NR_clock_nanosleep_time64   (407)
+#define TARGET_NR_timer_gettime64  (408)
+#define TARGET_NR_timer_settime64  (409)
+#define TARGET_NR_timerfd_gettime64(410)
+#define TARGET_NR_timerfd_settime64(411)
+#define TARGET_NR_utimensat_time64 (412)
+#define TARGET_NR_pselect6_time64  (413)
+#define TARGET_NR_ppoll_time64 (414)
+#define TARGET_NR_io_pgetevents_time64 (416)
+#define TARGET_NR_recvmmsg_time64  (417)
+#define TARGET_NR_mq_timedsend_time64  (418)
+#define TARGET_NR_mq_timedreceive_time64   (419)
+#define TARGET_NR_semtimedop_time64(420)
+#define TARGET_NR_rt_sigtimedwait_time64   (421)
+#define TARGET_NR_futex_time64 (422)
+#define TARGET_NR_sched_rr_get_interval_time64 (423)
+#define TARGET_NR_pidfd_send_signal(424)
+#define TARGET_NR_io_uring_setup   (425)
+#define TARGET_NR_io_uring_enter   (426)
+#define TARGET_NR_io_uring_register(427)
+#define TARGET_NR_open_tree(428)
+#define TARGET_NR_move_mount   (429)
+#define TARGET_NR_fsopen   (430)
+#define TARGET_NR_fsconfig (431)
+#define TARGET_NR_fsmount  (432)
+#define TARGET_NR_fspick   (433)
+#define TARGET_NR_pidfd_open   (434)
+#define TARGET_NR_clone3   (435)
 
 #endif
-- 
2.7.4




[PATCH v2 3/9] linux-user: m68k: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update m68k syscall numbers based on Linux kernel v5.5.

CC: Laurent Vivier 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/m68k/syscall_nr.h | 50 +++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/linux-user/m68k/syscall_nr.h b/linux-user/m68k/syscall_nr.h
index d33d8e9..01aee34 100644
--- a/linux-user/m68k/syscall_nr.h
+++ b/linux-user/m68k/syscall_nr.h
@@ -382,5 +382,53 @@
 #define TARGET_NR_copy_file_range   376
 #define TARGET_NR_preadv2   377
 #define TARGET_NR_pwritev2  378
-
+#define TARGET_NR_statx 379
+#define TARGET_NR_seccomp   380
+#define TARGET_NR_pkey_mprotect 381
+#define TARGET_NR_pkey_alloc382
+#define TARGET_NR_pkey_free 383
+#define TARGET_NR_rseq  384
+/* room for arch specific calls */
+#define TARGET_NR_semget393
+#define TARGET_NR_semctl394
+#define TARGET_NR_shmget395
+#define TARGET_NR_shmctl396
+#define TARGET_NR_shmat 397
+#define TARGET_NR_shmdt 398
+#define TARGET_NR_msgget399
+#define TARGET_NR_msgsnd400
+#define TARGET_NR_msgrcv401
+#define TARGET_NR_msgctl402
+#define TARGET_NR_clock_gettime64   403
+#define TARGET_NR_clock_settime64   404
+#define TARGET_NR_clock_adjtime64   405
+#define TARGET_NR_clock_getres_time64   406
+#define TARGET_NR_clock_nanosleep_time64 407
+#define TARGET_NR_timer_gettime64   408
+#define TARGET_NR_timer_settime64   409
+#define TARGET_NR_timerfd_gettime64 410
+#define TARGET_NR_timerfd_settime64 411
+#define TARGET_NR_utimensat_time64  412
+#define TARGET_NR_pselect6_time64   413
+#define TARGET_NR_ppoll_time64  414
+#define TARGET_NR_io_pgetevents_time64  416
+#define TARGET_NR_recvmmsg_time64   417
+#define TARGET_NR_mq_timedsend_time64   418
+#define TARGET_NR_mq_timedreceive_time64 419
+#define TARGET_NR_semtimedop_time64 420
+#define TARGET_NR_rt_sigtimedwait_time64 421
+#define TARGET_NR_futex_time64  422
+#define TARGET_NR_sched_rr_get_interval_time64 423
+#define TARGET_NR_pidfd_send_signal 424
+#define TARGET_NR_io_uring_setup425
+#define TARGET_NR_io_uring_enter426
+#define TARGET_NR_io_uring_register 427
+#define TARGET_NR_open_tree 428
+#define TARGET_NR_move_mount429
+#define TARGET_NR_fsopen430
+#define TARGET_NR_fsconfig  431
+#define TARGET_NR_fsmount   432
+#define TARGET_NR_fspick433
+#define TARGET_NR_pidfd_open434
+/* 435 reserved for clone3 */
 #endif
-- 
2.7.4




[PATCH v2 5/9] linux-user: mips: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update mips syscall numbers based on Linux kernel tag v5.5.

CC: Aurelien Jarno 
CC: Aleksandar Rikalo 
Signed-off-by: Aleksandar Markovic 
---
 linux-user/mips/syscall_nr.h   | 45 +++
 linux-user/mips64/syscall_nr.h | 50 -
 linux-user/mips/cpu_loop.c | 83 +-
 3 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/linux-user/mips/syscall_nr.h b/linux-user/mips/syscall_nr.h
index 7fa7fa5..0be3af1 100644
--- a/linux-user/mips/syscall_nr.h
+++ b/linux-user/mips/syscall_nr.h
@@ -376,5 +376,50 @@
 #define TARGET_NR_statx (TARGET_NR_Linux + 366)
 #define TARGET_NR_rseq  (TARGET_NR_Linux + 367)
 #define TARGET_NR_io_pgetevents (TARGET_NR_Linux + 368)
+/* room for arch specific calls */
+#define TARGET_NR_semget(TARGET_NR_Linux + 393)
+#define TARGET_NR_semctl(TARGET_NR_Linux + 394)
+#define TARGET_NR_shmget(TARGET_NR_Linux + 395)
+#define TARGET_NR_shmctl(TARGET_NR_Linux + 396)
+#define TARGET_NR_shmat (TARGET_NR_Linux + 397)
+#define TARGET_NR_shmdt (TARGET_NR_Linux + 398)
+#define TARGET_NR_msgget(TARGET_NR_Linux + 399)
+#define TARGET_NR_msgsnd(TARGET_NR_Linux + 400)
+#define TARGET_NR_msgrcv(TARGET_NR_Linux + 401)
+#define TARGET_NR_msgctl(TARGET_NR_Linux + 402)
+/* 403-423 common for 32-bit archs */
+#define TARGET_NR_clock_gettime64  (TARGET_NR_Linux + 403)
+#define TARGET_NR_clock_settime64  (TARGET_NR_Linux + 404)
+#define TARGET_NR_clock_adjtime64  (TARGET_NR_Linux + 405)
+#define TARGET_NR_clock_getres_time64  (TARGET_NR_Linux + 406)
+#define TARGET_NR_clock_nanosleep_time64   (TARGET_NR_Linux + 407)
+#define TARGET_NR_timer_gettime64  (TARGET_NR_Linux + 408)
+#define TARGET_NR_timer_settime64  (TARGET_NR_Linux + 409)
+#define TARGET_NR_timerfd_gettime64(TARGET_NR_Linux + 410)
+#define TARGET_NR_timerfd_settime64(TARGET_NR_Linux + 411)
+#define TARGET_NR_utimensat_time64 (TARGET_NR_Linux + 412)
+#define TARGET_NR_pselect6_time64  (TARGET_NR_Linux + 413)
+#define TARGET_NR_ppoll_time64 (TARGET_NR_Linux + 414)
+#define TARGET_NR_io_pgetevents_time64 (TARGET_NR_Linux + 416)
+#define TARGET_NR_recvmmsg_time64  (TARGET_NR_Linux + 417)
+#define TARGET_NR_mq_timedsend_time64  (TARGET_NR_Linux + 418)
+#define TARGET_NR_mq_timedreceive_time64   (TARGET_NR_Linux + 419)
+#define TARGET_NR_semtimedop_time64(TARGET_NR_Linux + 420)
+#define TARGET_NR_rt_sigtimedwait_time64   (TARGET_NR_Linux + 421)
+#define TARGET_NR_futex_time64 (TARGET_NR_Linux + 422)
+#define TARGET_NR_sched_rr_get_interval_time64 (TARGET_NR_Linux + 423)
+/* 424 onwards common for all archs */
+#define TARGET_NR_pidfd_send_signal(TARGET_NR_Linux + 424)
+#define TARGET_NR_io_uring_setup   (TARGET_NR_Linux + 425)
+#define TARGET_NR_io_uring_enter   (TARGET_NR_Linux + 426)
+#define TARGET_NR_io_uring_register(TARGET_NR_Linux + 427)
+#define TARGET_NR_open_tree(TARGET_NR_Linux + 428)
+#define TARGET_NR_move_mount   (TARGET_NR_Linux + 429)
+#define TARGET_NR_fsopen   (TARGET_NR_Linux + 430)
+#define TARGET_NR_fsconfig (TARGET_NR_Linux + 431)
+#define TARGET_NR_fsmount  (TARGET_NR_Linux + 432)
+#define TARGET_NR_fspick   (TARGET_NR_Linux + 433)
+#define TARGET_NR_pidfd_open   (TARGET_NR_Linux + 434)
+#define TARGET_NR_clone3   (TARGET_NR_Linux + 435)
 
 #endif
diff --git a/linux-user/mips64/syscall_nr.h b/linux-user/mips64/syscall_nr.h
index db40f69..6e23e9f 100644
--- a/linux-user/mips64/syscall_nr.h
+++ b/linux-user/mips64/syscall_nr.h
@@ -339,6 +339,39 @@
 #define TARGET_NR_statx (TARGET_NR_Linux + 330)
 #define TARGET_NR_rseq  (TARGET_NR_Linux + 331)
 #define TARGET_NR_io_pgetevents (TARGET_NR_Linux + 332)
+/* 333 through 402 are unassigned to sync up with generic numbers */
+#define TARGET_NR_clock_gettime64   (TARGET_NR_Linux + 403)
+#define TARGET_NR_clock_settime64   (TARGET_NR_Linux + 404)
+#define TARGET_NR_clock_adjtime64   (TARGET_NR_Linux + 405)
+#define TARGET_NR_clock_getres_time64   (TARGET_NR_Linux + 406)
+#define TARGET_NR_clock_nanosleep_time64 (TARGET_NR_Linux + 407)
+#define TARGET_NR_timer_gettime64   (TARGET_NR_Linux + 408)
+#define TARGET_NR_timer_settime64   (TARGET_NR_Linux + 409)
+#define TARGET_NR_timerfd_gettime64 (TARGET_NR_Linux + 410)
+#define TARGET_NR_timerfd_settime64 (TARGET_NR_Linux + 411)
+#define TARGET_NR_utimensat_time64  

[PATCH v2 0/9] linux-user: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

v1->v2:

  - corrected mips parts based on Laurent's review

This series is a spin-off of another larger linux-user series
that become too large to handle, hence these patches related to
syscall numbers are now in this, separate, series.

This series covers updating syscall numbers defined in the following
files:

  - linux-user/alpha/syscall_nr.h
  - linux-user/arm/syscall_nr.h
  - linux-user/m68k/syscall_nr.h
  - linux-user/microblaze/syscall_nr.h
  - linux-user/mips/cpu_loop.c
  - linux-user/mips/syscall_nr.h
  - linux-user/mips64/syscall_nr.h
  - linux-user/sh4/syscall_nr.h
  - linux-user/x86_64/syscall_nr.h
  - linux-user/xtensa/syscall_nr.h

--

Aleksandar Markovic (9):
  linux-user: alpha: Update syscall numbers to kernel 5.5 level
  linux-user: arm: Update syscall numbers to kernel 5.5 level
  linux-user: m68k: Update syscall numbers to kernel 5.5 level
  linux-user: microblaze: Update syscall numbers to kernel 5.5 level
  linux-user: mips: Update syscall numbers to kernel 5.5 level
  linux-user: sh4: Update syscall numbers to kernel 5.5 level
  linux-user: x86_64: Update syscall numbers to kernel 5.5 level
  linux-user: xtensa: Update syscall numbers to kernel 5.5 level
  linux-user: xtensa: Remove unused constant TARGET_NR_syscall_count

 linux-user/alpha/syscall_nr.h  | 35 
 linux-user/arm/syscall_nr.h| 44 
 linux-user/m68k/syscall_nr.h   | 50 ++-
 linux-user/microblaze/syscall_nr.h | 45 +
 linux-user/mips/syscall_nr.h   | 45 +
 linux-user/mips64/syscall_nr.h | 50 ++-
 linux-user/sh4/syscall_nr.h| 48 ++
 linux-user/x86_64/syscall_nr.h | 24 +++
 linux-user/xtensa/syscall_nr.h | 36 -
 linux-user/mips/cpu_loop.c | 83 +-
 10 files changed, 454 insertions(+), 6 deletions(-)

-- 
2.7.4




[PATCH v2 1/9] linux-user: alpha: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update alpha syscall numbers based on Linux kernel v5.5.

CC: Richard Henderson 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/alpha/syscall_nr.h | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/linux-user/alpha/syscall_nr.h b/linux-user/alpha/syscall_nr.h
index 2e5541b..c29fc17 100644
--- a/linux-user/alpha/syscall_nr.h
+++ b/linux-user/alpha/syscall_nr.h
@@ -453,5 +453,40 @@
 #define TARGET_NR_getrandom 511
 #define TARGET_NR_memfd_create  512
 #define TARGET_NR_execveat  513
+#define TARGET_NR_seccomp   514
+#define TARGET_NR_bpf   515
+#define TARGET_NR_userfaultfd   516
+#define TARGET_NR_membarrier517
+#define TARGET_NR_mlock2518
+#define TARGET_NR_copy_file_range   519
+#define TARGET_NR_preadv2   520
+#define TARGET_NR_pwritev2  521
+#define TARGET_NR_statx 522
+#define TARGET_NR_io_pgetevents 523
+#define TARGET_NR_pkey_mprotect 524
+#define TARGET_NR_pkey_alloc525
+#define TARGET_NR_pkey_free 526
+#define TARGET_NR_rseq  527
+#define TARGET_NR_statfs64  528
+#define TARGET_NR_fstatfs64 529
+#define TARGET_NR_getegid   530
+#define TARGET_NR_geteuid   531
+#define TARGET_NR_getppid   532
+/*
+ * all other architectures have common numbers for new syscall, alpha
+ * is the exception.
+ */
+#define TARGET_NR_pidfd_send_signal 534
+#define TARGET_NR_io_uring_setup535
+#define TARGET_NR_io_uring_enter536
+#define TARGET_NR_io_uring_register 537
+#define TARGET_NR_open_tree 538
+#define TARGET_NR_move_mount539
+#define TARGET_NR_fsopen540
+#define TARGET_NR_fsconfig  541
+#define TARGET_NR_fsmount   542
+#define TARGET_NR_fspick543
+#define TARGET_NR_pidfd_open544
+/* 545 reserved for clone3 */
 
 #endif
-- 
2.7.4




[PATCH v2 4/9] linux-user: microblaze: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update microblaze syscall numbers based on Linux kernel v5.5.

CC: Edgar E. Iglesias 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/microblaze/syscall_nr.h | 45 ++
 1 file changed, 45 insertions(+)

diff --git a/linux-user/microblaze/syscall_nr.h 
b/linux-user/microblaze/syscall_nr.h
index aa2eb93..ec1758e 100644
--- a/linux-user/microblaze/syscall_nr.h
+++ b/linux-user/microblaze/syscall_nr.h
@@ -393,5 +393,50 @@
 #define TARGET_NR_memfd_create  386
 #define TARGET_NR_bpf   387
 #define TARGET_NR_execveat  388
+#define TARGET_NR_userfaultfd   389
+#define TARGET_NR_membarrier390
+#define TARGET_NR_mlock2391
+#define TARGET_NR_copy_file_range   392
+#define TARGET_NR_preadv2   393
+#define TARGET_NR_pwritev2  394
+#define TARGET_NR_pkey_mprotect 395
+#define TARGET_NR_pkey_alloc396
+#define TARGET_NR_pkey_free 397
+#define TARGET_NR_statx 398
+#define TARGET_NR_io_pgetevents 399
+#define TARGET_NR_rseq  400
+/* 401 and 402 are unused */
+#define TARGET_NR_clock_gettime64   403
+#define TARGET_NR_clock_settime64   404
+#define TARGET_NR_clock_adjtime64   405
+#define TARGET_NR_clock_getres_time64   406
+#define TARGET_NR_clock_nanosleep_time64 407
+#define TARGET_NR_timer_gettime64   408
+#define TARGET_NR_timer_settime64   409
+#define TARGET_NR_timerfd_gettime64 410
+#define TARGET_NR_timerfd_settime64 411
+#define TARGET_NR_utimensat_time64  412
+#define TARGET_NR_pselect6_time64   413
+#define TARGET_NR_ppoll_time64  414
+#define TARGET_NR_io_pgetevents_time64  416
+#define TARGET_NR_recvmmsg_time64   417
+#define TARGET_NR_mq_timedsend_time64   418
+#define TARGET_NR_mq_timedreceive_time64 419
+#define TARGET_NR_semtimedop_time64 420
+#define TARGET_NR_rt_sigtimedwait_time64 421
+#define TARGET_NR_futex_time64  422
+#define TARGET_NR_sched_rr_get_interval_time64 423
+#define TARGET_NR_pidfd_send_signal 424
+#define TARGET_NR_io_uring_setup425
+#define TARGET_NR_io_uring_enter426
+#define TARGET_NR_io_uring_register 427
+#define TARGET_NR_open_tree 428
+#define TARGET_NR_move_mount429
+#define TARGET_NR_fsopen430
+#define TARGET_NR_fsconfig  431
+#define TARGET_NR_fsmount   432
+#define TARGET_NR_fspick433
+#define TARGET_NR_pidfd_open434
+#define TARGET_NR_clone3435
 
 #endif
-- 
2.7.4




[PATCH v2 6/9] linux-user: sh4: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
From: Aleksandar Markovic 

Update sh4 syscall numbers based on Linux kernel v5.5.

CC: Aurelien Jarno 
Signed-off-by: Aleksandar Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/sh4/syscall_nr.h | 48 +
 1 file changed, 48 insertions(+)

diff --git a/linux-user/sh4/syscall_nr.h b/linux-user/sh4/syscall_nr.h
index d53a2a0..8c21fcf 100644
--- a/linux-user/sh4/syscall_nr.h
+++ b/linux-user/sh4/syscall_nr.h
@@ -389,5 +389,53 @@
 #define TARGET_NR_copy_file_range   380
 #define TARGET_NR_preadv2   381
 #define TARGET_NR_pwritev2  382
+#define TARGET_NR_statx 383
+#define TARGET_NR_pkey_mprotect 384
+#define TARGET_NR_pkey_alloc385
+#define TARGET_NR_pkey_free 386
+#define TARGET_NR_rseq  387
+/* room for arch specific syscalls */
+#define TARGET_NR_semget 393
+#define TARGET_NR_semctl 394
+#define TARGET_NR_shmget 395
+#define TARGET_NR_shmctl 396
+#define TARGET_NR_shmat  397
+#define TARGET_NR_shmdt  398
+#define TARGET_NR_msgget 399
+#define TARGET_NR_msgsnd 400
+#define TARGET_NR_msgrcv 401
+#define TARGET_NR_msgctl 402
+#define TARGET_NR_clock_gettime64403
+#define TARGET_NR_clock_settime64404
+#define TARGET_NR_clock_adjtime64405
+#define TARGET_NR_clock_getres_time64406
+#define TARGET_NR_clock_nanosleep_time64 407
+#define TARGET_NR_timer_gettime64408
+#define TARGET_NR_timer_settime64409
+#define TARGET_NR_timerfd_gettime64  410
+#define TARGET_NR_timerfd_settime64  411
+#define TARGET_NR_utimensat_time64   412
+#define TARGET_NR_pselect6_time64413
+#define TARGET_NR_ppoll_time64   414
+#define TARGET_NR_io_pgetevents_time64   416
+#define TARGET_NR_recvmmsg_time64417
+#define TARGET_NR_mq_timedsend_time64418
+#define TARGET_NR_mq_timedreceive_time64 419
+#define TARGET_NR_semtimedop_time64  420
+#define TARGET_NR_rt_sigtimedwait_time64 421
+#define TARGET_NR_futex_time64   422
+#define TARGET_NR_sched_rr_get_interval_time64   423
+#define TARGET_NR_pidfd_send_signal  424
+#define TARGET_NR_io_uring_setup 425
+#define TARGET_NR_io_uring_enter 426
+#define TARGET_NR_io_uring_register  427
+#define TARGET_NR_open_tree  428
+#define TARGET_NR_move_mount 429
+#define TARGET_NR_fsopen 430
+#define TARGET_NR_fsconfig   431
+#define TARGET_NR_fsmount432
+#define TARGET_NR_fspick 433
+#define TARGET_NR_pidfd_open 434
+/* 435 reserved for clone3 */
 
 #endif
-- 
2.7.4




RE: The issues about architecture of the COLO checkpoint

2020-02-12 Thread Zhanghailiang
Hi,


1.  After re-walked through the codes, yes, you are right, actually, after 
the first migration, we will keep dirty log on in primary side,

And only send the dirty pages in PVM to SVM. The ram cache in secondary side is 
always a backup of PVM, so we don’t have to

Re-send the none-dirtied pages.

The reason why the first checkpoint takes longer time is we have to backup the 
whole VM’s ram into ram cache, that is colo_init_ram_cache().

It is time consuming, but I have optimized in the second patch 
“0001-COLO-Optimize-memory-back-up-process.patch” which you can find in my 
previous reply.



Besides, I found that, In my previous reply “We can only copy the pages that 
dirtied by PVM and SVM in last checkpoint.”,

We have done this optimization in current upstream codes.

2.I don’t quite understand this question. For COLO, we always need both network 
packets of PVM’s and SVM’s to compare before send this packets to client.
It depends on this to decide whether or not PVM and SVM are in same state.

Thanks,
hailiang
From: Daniel Cho [mailto:daniel...@qnap.com]
Sent: Wednesday, February 12, 2020 4:37 PM
To: Zhang, Chen 
Cc: Zhanghailiang ; Dr. David Alan Gilbert 
; qemu-devel@nongnu.org
Subject: Re: The issues about architecture of the COLO checkpoint

Hi Hailiang,

Thanks for your replaying and explain in detail.
We will try to use the attachments to enhance memory copy.

However, we have some questions for your replying.

1.  As you said, "for each checkpoint, we have to send the whole PVM's pages To 
SVM", why the only first checkpoint will takes more pause time?
In our observing, the first checkpoint will take more time for pausing, then 
other checkpoints will takes a few time for pausing. Does it means only the 
first checkpoint will send the whole pages to SVM, and the other checkpoints 
send the dirty pages to SVM for reloading?

2. We notice the COLO-COMPARE component will stuck the packet until receive 
packets from PVM and SVM, as this rule, when we add the COLO-COMPARE to PVM, 
its network will stuck until SVM start. So it is an other issue to make PVM 
stuck while setting COLO feature. With this issue, could we let colo-compare to 
pass the PVM's packet when the SVM's packet queue is empty? Then, the PVM's 
network won't stock, and "if PVM runs firstly, it still need to wait for The 
network packets from SVM to compare before send it to client side" won't 
happened either.

Best regard,
Daniel Cho

Zhang, Chen mailto:chen.zh...@intel.com>> 於 2020年2月12日 週三 
下午1:45寫道:


> -Original Message-
> From: Zhanghailiang 
> mailto:zhang.zhanghaili...@huawei.com>>
> Sent: Wednesday, February 12, 2020 11:18 AM
> To: Dr. David Alan Gilbert mailto:dgilb...@redhat.com>>; 
> Daniel Cho
> mailto:daniel...@qnap.com>>; Zhang, Chen 
> mailto:chen.zh...@intel.com>>
> Cc: qemu-devel@nongnu.org
> Subject: RE: The issues about architecture of the COLO checkpoint
>
> Hi,
>
> Thank you Dave,
>
> I'll reply here directly.
>
> -Original Message-
> From: Dr. David Alan Gilbert 
> [mailto:dgilb...@redhat.com]
> Sent: Wednesday, February 12, 2020 1:48 AM
> To: Daniel Cho mailto:daniel...@qnap.com>>; 
> chen.zh...@intel.com;
> Zhanghailiang 
> mailto:zhang.zhanghaili...@huawei.com>>
> Cc: qemu-devel@nongnu.org
> Subject: Re: The issues about architecture of the COLO checkpoint
>
>
> cc'ing in COLO people:
>
>
> * Daniel Cho (daniel...@qnap.com) wrote:
> > Hi everyone,
> >  We have some issues about setting COLO feature. Hope somebody
> > could give us some advice.
> >
> > Issue 1:
> >  We dynamic to set COLO feature for PVM(2 core, 16G memory),  but
> > the Primary VM will pause a long time(based on memory size) for
> > waiting SVM start. Does it have any idea to reduce the pause time?
> >
>
> Yes, we do have some ideas to optimize this downtime.
>
> The main problem for current version is, for each checkpoint, we have to
> send the whole PVM's pages
> To SVM, and then copy the whole VM's state into SVM from ram cache, in
> this process, we need both of them be paused.
> Just as you said, the downtime is based on memory size.
>
> So firstly, we need to reduce the sending data while do checkpoint, actually,
> we can migrate parts of PVM's dirty pages in background
> While both of VMs are running. And then we load these pages into ram
> cache (backup memory) in SVM temporarily. While do checkpoint,
> We just send the last dirty pages of PVM to slave side and then copy the ram
> cache into SVM. Further on, we don't have
> To send the whole PVM's dirty pages, we can only send the pages that
> dirtied by PVM or SVM during two checkpoints. (Because
> If one page is not dirtied by both PVM and SVM, the data of this pages will
> keep same in SVM, PVM, backup memory). This method can reduce
> the time that consumed in sending data.
>
> For the second problem, 

Re: VW ELF loader

2020-02-12 Thread Alexey Kardashevskiy



On 10/02/2020 11:31, Alexey Kardashevskiy wrote:
> 
> 
> On 07/02/2020 10:46, Paolo Bonzini wrote:
>> On 07/02/20 00:23, Alexey Kardashevskiy wrote:
 Right, not unlike what you get with vof=on. :)  I'm not against at all
 that idea.  I just don't understand what you refer to below as (2).
 Does petitboot not have the problem because it kexecs the new kernel?
>>>
>>> Petitboot does not have this problem *if* it runs without SLOF, i.e.
>>> directly via -kernel and -initrd and uses OF CI (cut down version, about
>>> v3-v4 of my patchset, without block devices and grub lookup). In this
>>> case there is one device tree instance, fully synchronized with the
>>> machine state.
>>>
>>> If there is still SLOF and (2) is happening, then petitboot is screwed
>>> as any other kernel.
>>
>> Ok, so "minimal pseudo-OpenFirmware in QEMU" is doable and can get
>> everything right;
> 
> I am not convinced that ditching drivers is not right; I am moving elf +
> mbr + gpt + grub loading to the guest though so 20 bytes blob becomes
> FDT-less firmmare, a few kbytes big.


Ok. So, I have made a small firmware which does OF CI, loads GRUB and
instantiates RTAS:
https://github.com/aik/of1275
Quite raw but gives the idea.

It does not contain drivers and still relies on QEMU to hook an OF path
to a backend. Is this a showstopper and without drivers it is no go? Thanks,



>> it's just work to set up PCI and do all that other
>> do_driver_stuff(), so you can either do it yourself or use
>> Linux+petitboot.  Is this correct?
> 
> Except using the "just" word, yes, correct ;)
> 
>> Also, can a normal distro kernel run via -kernel/-initrd + the minimal
>> firmware in QEMU?
> 
> Yes.


-- 
Alexey



[PATCH 0/2] delete virtio queues in vhost-user-blk-unrealize

2020-02-12 Thread pannengyuan
From: Pan Nengyuan 

This series patch fix memleaks when detaching vhost-user-blk device.
1. use old virtio_del_queue to fix memleaks, it's easier for stable branches to 
merge.
   As the discussion in 
https://lists.nongnu.org/archive/html/qemu-devel/2020-01/msg02903.html

2. convert virtio_del_queue to the new one(virtio_delete_queue).

Pan Nengyuan (2):
  vhost-user-blk: delete virtioqueues in unrealize to fix memleaks
  vhost-use-blk: convert to new virtio_delete_queue

 hw/block/vhost-user-blk.c  | 15 +--
 include/hw/virtio/vhost-user-blk.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

-- 
2.21.0.windows.1





[PATCH 1/2] vhost-user-blk: delete virtioqueues in unrealize to fix memleaks

2020-02-12 Thread pannengyuan
From: Pan Nengyuan 

virtio queues forgot to delete in unrealize, and aslo error path in
realize, this patch fix these memleaks, the leak stack is as follow:

Direct leak of 114688 byte(s) in 16 object(s) allocated from:
#0 0x7f24024fdbf0 in calloc (/lib64/libasan.so.3+0xcabf0)
#1 0x7f2401642015 in g_malloc0 (/lib64/libglib-2.0.so.0+0x50015)
#2 0x55ad175a6447 in virtio_add_queue /mnt/sdb/qemu/hw/virtio/virtio.c:2327
#3 0x55ad17570cf9 in vhost_user_blk_device_realize 
/mnt/sdb/qemu/hw/block/vhost-user-blk.c:419
#4 0x55ad175a3707 in virtio_device_realize 
/mnt/sdb/qemu/hw/virtio/virtio.c:3509
#5 0x55ad176ad0d1 in device_set_realized /mnt/sdb/qemu/hw/core/qdev.c:876
#6 0x55ad1781ff9d in property_set_bool /mnt/sdb/qemu/qom/object.c:2080
#7 0x55ad178245ae in object_property_set_qobject 
/mnt/sdb/qemu/qom/qom-qobject.c:26
#8 0x55ad17821eb4 in object_property_set_bool 
/mnt/sdb/qemu/qom/object.c:1338
#9 0x55ad177aeed7 in virtio_pci_realize 
/mnt/sdb/qemu/hw/virtio/virtio-pci.c:1801

Reported-by: Euler Robot 
Signed-off-by: Pan Nengyuan 
---
 hw/block/vhost-user-blk.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index d8c459c575..2eba8b9db0 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -460,6 +460,9 @@ reconnect:
 virtio_err:
 g_free(s->vqs);
 g_free(s->inflight);
+for (i = 0; i < s->num_queues; i++) {
+virtio_del_queue(vdev, i);
+}
 virtio_cleanup(vdev);
 vhost_user_cleanup(>vhost_user);
 }
@@ -468,6 +471,7 @@ static void vhost_user_blk_device_unrealize(DeviceState 
*dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostUserBlk *s = VHOST_USER_BLK(dev);
+int i;
 
 virtio_set_status(vdev, 0);
 qemu_chr_fe_set_handlers(>chardev,  NULL, NULL, NULL,
@@ -476,6 +480,10 @@ static void vhost_user_blk_device_unrealize(DeviceState 
*dev, Error **errp)
 vhost_dev_free_inflight(s->inflight);
 g_free(s->vqs);
 g_free(s->inflight);
+
+for (i = 0; i < s->num_queues; i++) {
+virtio_del_queue(vdev, i);
+}
 virtio_cleanup(vdev);
 vhost_user_cleanup(>vhost_user);
 }
-- 
2.21.0.windows.1





[PATCH 2/2] vhost-use-blk: convert to new virtio_delete_queue

2020-02-12 Thread pannengyuan
From: Pan Nengyuan 

use the new virtio_delete_queue function to cleanup.

Signed-off-by: Pan Nengyuan 
---
 hw/block/vhost-user-blk.c  | 11 +++
 include/hw/virtio/vhost-user-blk.h |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 2eba8b9db0..ed6a5cc03b 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -420,9 +420,10 @@ static void vhost_user_blk_device_realize(DeviceState 
*dev, Error **errp)
 virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
 sizeof(struct virtio_blk_config));
 
+s->virtqs = g_new0(VirtQueue *, s->num_queues);
 for (i = 0; i < s->num_queues; i++) {
-virtio_add_queue(vdev, s->queue_size,
- vhost_user_blk_handle_output);
+s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
+vhost_user_blk_handle_output);
 }
 
 s->inflight = g_new0(struct vhost_inflight, 1);
@@ -461,8 +462,9 @@ virtio_err:
 g_free(s->vqs);
 g_free(s->inflight);
 for (i = 0; i < s->num_queues; i++) {
-virtio_del_queue(vdev, i);
+virtio_delete_queue(s->virtqs[i]);
 }
+g_free(s->virtqs);
 virtio_cleanup(vdev);
 vhost_user_cleanup(>vhost_user);
 }
@@ -482,8 +484,9 @@ static void vhost_user_blk_device_unrealize(DeviceState 
*dev, Error **errp)
 g_free(s->inflight);
 
 for (i = 0; i < s->num_queues; i++) {
-virtio_del_queue(vdev, i);
+virtio_delete_queue(s->virtqs[i]);
 }
+g_free(s->virtqs);
 virtio_cleanup(vdev);
 vhost_user_cleanup(>vhost_user);
 }
diff --git a/include/hw/virtio/vhost-user-blk.h 
b/include/hw/virtio/vhost-user-blk.h
index 108bfadeeb..f68911f6f0 100644
--- a/include/hw/virtio/vhost-user-blk.h
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -37,6 +37,7 @@ typedef struct VHostUserBlk {
 struct vhost_inflight *inflight;
 VhostUserState vhost_user;
 struct vhost_virtqueue *vqs;
+VirtQueue **virtqs;
 guint watch;
 bool connected;
 } VHostUserBlk;
-- 
2.21.0.windows.1





Re: [PATCH 5/9] linux-user: mips: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Aleksandar Markovic
On Wed, Feb 12, 2020 at 10:10 PM Laurent Vivier  wrote:
>
> Le 04/02/2020 à 13:07, Aleksandar Markovic a écrit :
> > From: Aleksandar Markovic 
> >
> > Update mips syscall numbers based on Linux kernel tag v5.5.
> >
> > CC: Aurelien Jarno 
> > CC: Aleksandar Rikalo 
> > Signed-off-by: Aleksandar Markovic 
> > ---
> >  linux-user/mips/cpu_loop.c | 78 
> > +-
> >  linux-user/mips/syscall_nr.h   | 45 
> >  linux-user/mips64/syscall_nr.h | 13 +++
> >  3 files changed, 135 insertions(+), 1 deletion(-)
> >
> > diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
> > index 39915b3..b81479b 100644
> > --- a/linux-user/mips/cpu_loop.c
> > +++ b/linux-user/mips/cpu_loop.c
> > @@ -25,8 +25,9 @@
> >  #include "internal.h"
> >
> >  # ifdef TARGET_ABI_MIPSO32
> > +#  define MIPS_SYSCALL_NUMBER_UNUSED -1
> >  #  define MIPS_SYS(name, args) args,
> > -static const uint8_t mips_syscall_args[] = {
> > +static const int8_t mips_syscall_args[] = {
> >  MIPS_SYS(sys_syscall, 8)/* 4000 */
> >  MIPS_SYS(sys_exit   , 1)
> >  MIPS_SYS(sys_fork   , 0)
> > @@ -390,6 +391,75 @@ static const uint8_t mips_syscall_args[] = {
> >  MIPS_SYS(sys_copy_file_range, 6) /* 360 */
> >  MIPS_SYS(sys_preadv2, 6)
> >  MIPS_SYS(sys_pwritev2, 6)
> > +MIPS_SYS(sys_pkey_mprotect, 4)
> > +MIPS_SYS(sys_pkey_alloc, 2)
> > +MIPS_SYS(sys_pkey_free, 1) /* 365 */
> > +MIPS_SYS(sys_statx, 5)
> > +MIPS_SYS(sys_rseq, 4)
> > +MIPS_SYS(sys_io_pgetevents, 6)
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,/* 370 */
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,/* 375 */
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,/* 380 */
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,/* 385 */
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,/* 390 */
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYSCALL_NUMBER_UNUSED,
> > +MIPS_SYS(sys_semget, 3)
> > +MIPS_SYS(sys_semctl, 4)
> > +MIPS_SYS(sys_shmget, 3)/* 395 */
> > +MIPS_SYS(sys_shmctl, 3)
> > +MIPS_SYS(sys_shmat, 3)
> > +MIPS_SYS(sys_shmdt, 1)
> > +MIPS_SYS(sys_msgget, 2)
> > +MIPS_SYS(sys_msgsnd, 4)/* 400 */
> > +MIPS_SYS(sys_msgrcv, 5)
> > +MIPS_SYS(sys_msgctl, 3)
> > +MIPS_SYS(sys_timer_gettime64, 2)
> > +MIPS_SYS(sys_timer_settime64, 4)
> > +MIPS_SYS(sys_timerfd_gettime64, 2) /* 410 */
> > +MIPS_SYS(sys_timerfd_settime64, 4)
>
> These have different names in the kernel:
>
> clock_gettime64  403
> clock_settime64  404
> clock_adjtime64  405
> clock_getres_time64  406
> clock_nanosleep_time64   407
>

In fact, the whole range 403 - 407 was missing, names are good for
408-411, and all following them.

> > +MIPS_SYS(sys_utimensat_time64, 4)
> > +MIPS_SYS(sys_pselect6_time64, 6)
> > +MIPS_SYS(sys_ppoll_time64, 5)
> > +MIPS_SYSCALL_NUMBER_UNUSED,/* 415 */
> > +MIPS_SYS(sys_io_pgetevents_time64, 6)
> > +MIPS_SYS(sys_recvmmsg_time64, 5)
> > +MIPS_SYS(sys_mq_timedsend_time64, 5)
> > +MIPS_SYS(sys_mq_timedreceive_time64, 5)
> > +MIPS_SYS(sys_semtimedop_time64, 4) /* 420 */
> > +MIPS_SYS(sys_rt_sigtimedwait_time64, 4)
> > +MIPS_SYS(sys_futex_time64, 6)
> > +MIPS_SYS(sys_sched_rr_get_interval_time64, 2)
> > +MIPS_SYS(sys_pidfd_send_signal, 4)
> > +MIPS_SYS(sys_io_uring_setup, 2)/* 425 */
> > +MIPS_SYS(sys_io_uring_enter, 6)
> > +MIPS_SYS(sys_io_uring_register, 4)
> > +MIPS_SYS(sys_open_tree, 3)
> > +MIPS_SYS(sys_move_mount, 5)
> > +MIPS_SYS(sys_fsopen, 2)/* 430 */
> > +MIPS_SYS(sys_fsconfig, 5)
> > +MIPS_SYS(sys_fsmount, 3)
> > +MIPS_SYS(sys_fspick, 3)
> > +MIPS_SYS(sys_pidfd_open, 2)
> > +MIPS_SYS(sys_clone3, 2)/* 435 */
>
> This comes from arch/mips/kernel/syscalls/syscall_o32.tbl for the
> 

Re: [PATCH v2 3/5] a standone-alone tool to directly share disk image file via vhost-user protocol

2020-02-12 Thread Coiby Xu
> > > Yes, I think at least for the moment it should work fine this way.
> > > Eventually, I'd like to integrate it with --export (and associated QMP
> > > commands, which are still to be created), too. Maybe at that point we
> > > want to make the QOM object not user creatable any more.
> >
> > Does it mean TYPE_USER_CREATABLE interface in QOM will become
> > deprecated in the future? I'm curious what are the reasons for making
> > QOM object no user creatable? Because we may still need to start
> > vhost-user block device backend through HMP or QMP instead of stating
> > it as a standalone-alone daemon.

> Not in general, but if we have something like a block-export-add QMP
> command, the QOM interface would be redundant. We could still leave it
> there and have both a low-level and a high-level interface, but whether
> we would want to is something we still have to decide.

I see. So QOM interface will still be used as a low-level interface.
In the draft version, vhost-user-blk-server is started using specific
command vhost_user_server_start/vhost_user_server_stop which proivide
interfaces easier for implementing block-export-add QMP command. But
in later versions, only object_add/object_del syntax is supported to
start/stop vhost-user-blk-server. I'll keep an eye on how the storage
daemon develops and adapt my code accordingly.


On Sun, Feb 2, 2020 at 5:33 PM Kevin Wolf  wrote:
>
> Am 31.01.2020 um 17:42 hat Coiby Xu geschrieben:
> > > Yes, I think at least for the moment it should work fine this way.
> > > Eventually, I'd like to integrate it with --export (and associated QMP
> > > commands, which are still to be created), too. Maybe at that point we
> > > want to make the QOM object not user creatable any more.
> >
> > Does it mean TYPE_USER_CREATABLE interface in QOM will become
> > deprecated in the future? I'm curious what are the reasons for making
> > QOM object no user creatable? Because we may still need to start
> > vhost-user block device backend through HMP or QMP instead of stating
> > it as a standalone-alone daemon.
>
> Not in general, but if we have something like a block-export-add QMP
> command, the QOM interface would be redundant. We could still leave it
> there and have both a low-level and a high-level interface, but whether
> we would want to is something we still have to decide.
>
> > > As for test cases, do you think it would be hard to just modify the
> > > tests to send an explicit 'quit' command to the daemon?
> >
> > Accroding to 
> > https://patchew.org/QEMU/20191017130204.16131-1-kw...@redhat.com/20191017130204.16131-10-kw...@redhat.com/,
> >
> > > +static bool exit_requested = false;
> > > +
> > > +void qemu_system_killed(int signal, pid_t pid)
> > > +{
> > > +exit_requested = true;
> > > +}
> >
> > if exit_requested = true, qemu-storage-daemon will exit the main loop
> > and then quit. So is calling qemu_system_killed by what you means "to
> > send an explicit 'quit' command to the daemon"?
>
> qemu_system_killed() is call in the signal handlers for, amongst others,
> SIGTERM and SIGINT. This is one way to stop the storage daemon (for
> manual use, sending SIGINT with Ctrl-C is probably the easiest way).
>
> What I actually meant is the 'quit' QMP command which will cause
> qmp_quit() to be run, which contains the same code. But if sending a
> signal is more convenient, that's just as good.
>
> Kevin
>
> > On Fri, Jan 17, 2020 at 6:12 PM Kevin Wolf  wrote:
> > >
> > > Am 17.01.2020 um 09:12 hat Coiby Xu geschrieben:
> > > > Excellent! I will add an option (or object property) for
> > > > vhost-user-blk server oject which will tell the daemon process to exit
> > > > when the client disconnects, thus "make check-qtest" will not get held
> > > > by this daemon process. After that since Kevin's qemu-storage-daemon
> > > > support "-object" option
> > > > (https://patchew.org/QEMU/20191017130204.16131-1-kw...@redhat.com/20191017130204.16131-3-kw...@redhat.com/)
> > > > and vhost-user-server is a user-creatable QOM object, it will work out
> > > > of the box.
> > >
> > > Yes, I think at least for the moment it should work fine this way.
> > > Eventually, I'd like to integrate it with --export (and associated QMP
> > > commands, which are still to be created), too. Maybe at that point we
> > > want to make the QOM object not user creatable any more.
> > >
> > > Would it make sense to prefix the object type name with "x-" so we can
> > > later retire it from the external user interface without a deprecation
> > > period?
> > >
> > > As for test cases, do you think it would be hard to just modify the
> > > tests to send an explicit 'quit' command to the daemon?
> > >
> > > > I'm curious when will be formal version of qemu-storage-daemon
> > > > finished so I can take advantage of it? Or should I apply the RFC
> > > > PATCHes to my working branch directly and submit them together with
> > > > the patches on vhost-user-blk server feature when posting v3?
> > >
> > > It's the 

[PATCH v2 1/2] spapr: Disable legacy virtio devices for pseries-5.0 and later

2020-02-12 Thread David Gibson
PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
the guess mostly like classic PCI, even if some of the individual
devices on the bus are PCI Express.  One consequence of that is that
virtio-pci devices still default to being in transitional mode, though
legacy mode is now disabled by default on current q35 x86 machine
types.

Legacy mode virtio devices aren't really necessary any more, and are
causing some problems for future changes.  Therefore, for the
pseries-5.0 machine type (and onwards), switch to modern-only
virtio-pci devices by default.

This does mean we no longer support guest kernels prior to 4.0, unless
they have modern virtio support backported (which some distro kernels
like that in RHEL7 do).

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index cb220fde45..6e1e467cc6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -65,6 +65,7 @@
 
 #include "hw/pci/pci.h"
 #include "hw/scsi/scsi.h"
+#include "hw/virtio/virtio-pci.h"
 #include "hw/virtio/virtio-scsi.h"
 #include "hw/virtio/vhost-scsi-common.h"
 
@@ -4567,7 +4568,14 @@ static void 
spapr_machine_latest_class_options(MachineClass *mc)
  */
 static void spapr_machine_5_0_class_options(MachineClass *mc)
 {
-/* Defaults for the latest behaviour inherited from the base class */
+/* Most defaults for the latest behaviour are inherited from the
+ * base class, but we need to override the (non ppc specific)
+ * default behaviour for virtio */
+static GlobalProperty compat[] = {
+{ TYPE_VIRTIO_PCI, "disable-legacy", "on", },
+};
+
+compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
@@ -4578,12 +4586,16 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
 static void spapr_machine_4_2_class_options(MachineClass *mc)
 {
 SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+static GlobalProperty compat[] = {
+{ TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
+};
 
 spapr_machine_5_0_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 mc->nvdimm_supported = false;
+compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
-- 
2.24.1




[PATCH v2 0/2] spapr: Use vIOMMU translation for virtio by default

2020-02-12 Thread David Gibson
Upcoming Secure VM support for pSeries machines introduces some
complications for virtio, since the transfer buffers need to be
explicitly shared so that the hypervisor can access them.

While it's not strictly speaking dependent on it, the fact that virtio
devices bypass normal platform IOMMU translation complicates the issue
on the guest side.  Since there are some significan downsides to
bypassing the vIOMMU anyway, let's just disable that.

There's already a flag to do this in virtio, just turn it on by
default for forthcoming pseries machine types.

Any opinions on whether dropping support for the older guest kernels
is acceptable at this point?

Changes since v1:
 * Added information on which guest kernel versions will no longer
   work with these changes
 * Use Michael Tsirkin's suggested better way of handling the machine
   type change

David Gibson (2):
  spapr: Disable legacy virtio devices for pseries-5.0 and later
  spapr: Enable virtio iommu_platform=on by default

 hw/ppc/spapr.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

-- 
2.24.1




[PATCH v2 2/2] spapr: Enable virtio iommu_platform=on by default

2020-02-12 Thread David Gibson
Traditionally, virtio devices don't do DMA by the usual path on the
guest platform.  In particular they usually bypass any virtual IOMMU
the guest has, using hypervisor magic to access untranslated guest
physical addresses.

There's now the optional iommu_platform flag which can tell virtio
devices to use the platform's normal DMA path, including any IOMMUs.
That flag was motiviated for the case of hardware virtio
implementations, but there are other reasons to want it.

Specifically, the fact that the virtio device doesn't use vIOMMU
translation means that virtio devices are unsafe to pass to nested
guests, or to use with VFIO userspace drivers inside the guest.  This
is particularly noticeable on the pseries platform which *always* has
a guest-visible vIOMMU.

Not using the normal DMA path also causes difficulties for the guest
side driver when using the upcoming POWER Secure VMs (a.k.a. PEF).
While it's theoretically possible to handle this on the guest side,
it's really fiddly.  Given the other problems with the non-translated
virtio device, let's just enable vIOMMU translation for virtio devices
by default in the pseries-5.0 (and later) machine types.

This does mean the new machine type will no longer support guest
kernels older than 4.8, unless they have support for the virtio
IOMMU_PLATFORM flag backported (which some distro kernels like RHEL7
do).

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6e1e467cc6..d4f3dcdda5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4573,6 +4573,7 @@ static void spapr_machine_5_0_class_options(MachineClass 
*mc)
  * default behaviour for virtio */
 static GlobalProperty compat[] = {
 { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
+{ TYPE_VIRTIO_DEVICE, "iommu_platform", "on", },
 };
 
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
@@ -4588,6 +4589,7 @@ static void spapr_machine_4_2_class_options(MachineClass 
*mc)
 SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
 static GlobalProperty compat[] = {
 { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
+{ TYPE_VIRTIO_DEVICE, "iommu_platform", "off", },
 };
 
 spapr_machine_5_0_class_options(mc);
-- 
2.24.1




RE: The issues about architecture of the COLO checkpoint

2020-02-12 Thread Zhanghailiang
Hi Zhang Chen,

> -Original Message-
> From: Zhang, Chen [mailto:chen.zh...@intel.com]
> Sent: Wednesday, February 12, 2020 1:45 PM
> To: Zhanghailiang ; Dr. David Alan
> Gilbert ; Daniel Cho 
> Cc: qemu-devel@nongnu.org
> Subject: RE: The issues about architecture of the COLO checkpoint
> 
> 
> 
> > -Original Message-
> > From: Zhanghailiang 
> > Sent: Wednesday, February 12, 2020 11:18 AM
> > To: Dr. David Alan Gilbert ; Daniel Cho
> > ; Zhang, Chen 
> > Cc: qemu-devel@nongnu.org
> > Subject: RE: The issues about architecture of the COLO checkpoint
> >
> > Hi,
> >
> > Thank you Dave,
> >
> > I'll reply here directly.
> >
> > -Original Message-
> > From: Dr. David Alan Gilbert [mailto:dgilb...@redhat.com]
> > Sent: Wednesday, February 12, 2020 1:48 AM
> > To: Daniel Cho ; chen.zh...@intel.com;
> > Zhanghailiang 
> > Cc: qemu-devel@nongnu.org
> > Subject: Re: The issues about architecture of the COLO checkpoint
> >
> >
> > cc'ing in COLO people:
> >
> >
> > * Daniel Cho (daniel...@qnap.com) wrote:
> > > Hi everyone,
> > >  We have some issues about setting COLO feature. Hope somebody
> > > could give us some advice.
> > >
> > > Issue 1:
> > >  We dynamic to set COLO feature for PVM(2 core, 16G memory),
> > > but the Primary VM will pause a long time(based on memory size) for
> > > waiting SVM start. Does it have any idea to reduce the pause time?
> > >
> >
> > Yes, we do have some ideas to optimize this downtime.
> >
> > The main problem for current version is, for each checkpoint, we have
> > to send the whole PVM's pages To SVM, and then copy the whole VM's
> > state into SVM from ram cache, in this process, we need both of them
> > be paused.
> > Just as you said, the downtime is based on memory size.
> >
> > So firstly, we need to reduce the sending data while do checkpoint,
> > actually, we can migrate parts of PVM's dirty pages in background
> > While both of VMs are running. And then we load these pages into ram
> > cache (backup memory) in SVM temporarily. While do checkpoint, We just
> > send the last dirty pages of PVM to slave side and then copy the ram
> > cache into SVM. Further on, we don't have To send the whole PVM's
> > dirty pages, we can only send the pages that dirtied by PVM or SVM
> > during two checkpoints. (Because If one page is not dirtied by both
> > PVM and SVM, the data of this pages will keep same in SVM, PVM, backup
> > memory). This method can reduce the time that consumed in sending
> > data.
> >
> > For the second problem, we can reduce the memory copy by two methods,
> > first one, we don't have to copy the whole pages in ram cache, We can
> > only copy the pages that dirtied by PVM and SVM in last checkpoint.
> > Second, we can use userfault missing function to reduce the Time
> > consumed in memory copy. (For the second time, in theory, we can
> > reduce time consumed in memory into ms level).
> >
> > You can find the first optimization in attachment, it is based on an
> > old qemu version (qemu-2.6), it should not be difficult to rebase it
> > Into master or your version. And please feel free to send the new
> > version if you want into community ;)
> >
> >
> 
> Thanks Hailiang!
> By the way, Do you have time to push the patches to upstream?
> I think this is a better and faster option.
> 

Yes, I can do this, for the second optimization, we need time to realize and 
test

Thanks

> Thanks
> Zhang Chen
> 
> > >
> > > Issue 2:
> > >  In
> > > https://github.com/qemu/qemu/blob/master/migration/colo.c#L503,
> > > could we move start_vm() before Line 488? Because at first
> > > checkpoint PVM will wait for SVM's reply, it cause PVM stop for a while.
> > >
> >
> > No, that makes no sense, because if PVM runs firstly, it still need to
> > wait for The network packets from SVM to compare before send it to client
> side.
> >
> >
> > Thanks,
> > Hailiang
> >
> > >  We set the COLO feature on running VM, so we hope the running
> > > VM could continuous service for users.
> > > Do you have any suggestions for those issues?
> > >
> > > Best regards,
> > > Daniel Cho
> > --
> > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH] MAINTAINERS: Orphan MIPS KVM CPUs

2020-02-12 Thread Aleksandar Markovic
On Wed, Feb 12, 2020 at 7:44 PM Philippe Mathieu-Daudé
 wrote:
>
> Cc'ing qemu-trivial@ & Paolo.
>

We are in the process of handling this within the company, and this
patch should go via MIPS tree, not trivial tree - will be updated when
the opinions are crystallized, and all consultations with others were
done. There is no rush.

Thanks,
Aleksandar


> On Sat, Dec 21, 2019 at 5:42 PM James Hogan  wrote:
> >
> > I haven't been active for 18 months, and don't have the hardware set up
> > to test KVM for MIPS, so mark it as orphaned and remove myself as
> > maintainer. Hopefully somebody from MIPS can pick this up.
> >
> > Signed-off-by: James Hogan 
> > Cc: Aleksandar Rikalo 
> > Cc: Aurelien Jarno 
> > Cc: Aleksandar Markovic 
> > Cc: qemu-devel@nongnu.org
> > ---
> >  MAINTAINERS | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 740401bcbb86..a798ad2b0b8a 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -363,9 +363,8 @@ S: Maintained
> >  F: target/arm/kvm.c
> >
> >  MIPS KVM CPUs
> > -M: James Hogan 
> >  R: Aleksandar Rikalo 
> > -S: Maintained
> > +S: Orphan
> >  F: target/mips/kvm.c
> >
> >  PPC KVM CPUs
> > --
> > 2.24.0
> >
> >
>



Re: [PATCH 0/9] linux-user: Update syscall numbers to kernel 5.5 level

2020-02-12 Thread Laurent Vivier
Le 12/02/2020 à 22:34, Laurent Vivier a écrit :
> Le 04/02/2020 à 13:07, Aleksandar Markovic a écrit :
>> From: Aleksandar Markovic 
>>
>> Hello, folks!
>>
>> This series is a spin-off of another larger linux-user series
>> that become too large to handle, hence these patches related to
>> syscall numbers are now in this, separate, series.
>>
>> Now that kernel 5.5 is released few days ago, it is time to
>> reflect what can be updated in linux-user for upcomming QEMU 5.0.
>>
>> An obvoius kernel change we should take into account are new
>> system calls, and the foirst step is updating syscall numbers
>> mainteined within QEMU linux-user.
>>
>> Regarding kernel syscal numbers there is good news and bad news.
>>
>> The good news is that kernel developers decided to make future
>> syscall numbers much more synchonized accross architectures than
>> before. They already reached that state.
>>
>> The bad news is that we in QEMU did not reach that state yet, since
>> we lag after kernel significantly.
>>
>> The good news again is that we will reach that state if we update
>> syscall numbers accross the board now. This is the main intention
>> and motivation of this series.
>>
>> The bad news again is that in order to update syscall numebrs we
>> need to be very careful at this moment. There are a number of new
>> syscalls added to the kernel recently that QEMU doesn't know about
>> at all. Significant number of new syscalls deal with 32/64-bit
>> compatibility, traditionally a problematic area in kernel, and in
>> QEMU as well. Moreover, some of the new syscalls are applicable to
>> 32-bit architectures only.
>>
>> This series covers updating syscall numbers defined in the following
>> files:
>>
>>   - linux-user/alpha/syscall_nr.h
>>   - linux-user/arm/syscall_nr.h
>>   - linux-user/m68k/syscall_nr.h
>>   - linux-user/microblaze/syscall_nr.h
>>   - linux-user/mips/cpu_loop.c
>>   - linux-user/mips/syscall_nr.h
>>   - linux-user/mips64/syscall_nr.h
>>   - linux-user/sh4/syscall_nr.h
>>   - linux-user/x86_64/syscall_nr.h
>>   - linux-user/xtensa/syscall_nr.h
>>
>> This series doesn't cover following files (since they use certain
>> proprietary rules for mapping between kernel source and qemu source,
>> I don't feel quite comfortable changing them - therefore I am asking
>> corresponding target maintainers or Lauren to update them, if
>> possible, before our 5.0 release):
>>
>>   - linux-user/aarch64/syscall_nr.h
>>   - linux-user/i386/syscall_nr.h
>>   - linux-user/nios2/syscall_nr.h
>>   - linux-user/ppc/syscall_nr.h
>>   - linux-user/riscv/syscall_nr.h
>>   - linux-user/s390x/syscall_nr.h
>>   - linux-user/sparc/syscall_nr.h
>>   - linux-user/sparc64/syscall_nr.h
>>
>> CC: Peter Maydell 
>> CC: Paolo Bonzini 
>> CC: Richard Henderson 
>> CC: Eduardo Habkost 
>> CC: Chris Wulff 
>> CC: Marek Vasut 
>> CC: David Gibson 
>> CC: Palmer Dabbelt 
>> CC: Alistair Francis 
>> CC: Sagar Karandikar 
>> CC: Bastian Koppelmann 
>> CC: David Hildenbrand 
>> CC: Mark Cave-Ayland 
>> CC: Artyom Tarasenko 
>>
>> This series also doesn't cover following files (since I can't find
>> corresponding kernel code - and I am also asking corresponding
>> target maintainers or Lauren to update them, if possible, before
>> our 5.0 release):
>>
>>   - linux-user/cris/cpu_loop.c
>>   - linux-user/hppa/syscall_nr.h
>>   - linux-user/openrisc/syscall_nr.h
>>   - linux-user/tilegx/syscall_nr.h
>>
>> CC: Edgar E. Iglesias 
>> CC: Richard Henderson 
>> CC: Stafford Horne 
>>
>> Again, I don't plan (I am really running out of time resources) to
>> work in a significant way on this issue any more, and I am asking
>> you guys other maintainers to help finish updating syscall numbers
>> before QEMU 5.0 release.
>>
>> Once we do that, updating syscall numbers in QEMU should and will
>> be significantly easier.
>>
> 
> I think we should copy the syscall.tbl and syscallhdr.sh scripts from
> the linux tree and update syscallhdr.sh to generate our syscall_nr.h

I made a draft here:

https://github.com/vivier/qemu/commits/linux-user-syscalltbl

Thanks,
Laurent




[PULL 2/3] tcg: Add support for a helper with 7 arguments

2020-02-12 Thread Richard Henderson
From: Taylor Simpson 

Currently, helpers can only take up to 6 arguments.  This patch adds the
capability for up to 7 arguments.  I have tested it with the Hexagon port
that I am preparing for submission.

Signed-off-by: Taylor Simpson 
Message-Id: <1580942510-2820-1-git-send-email-tsimp...@quicinc.com>
Signed-off-by: Richard Henderson 
---
 include/exec/helper-gen.h   | 13 +
 include/exec/helper-head.h  |  2 ++
 include/exec/helper-proto.h |  6 ++
 include/exec/helper-tcg.h   |  7 +++
 4 files changed, 28 insertions(+)

diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h
index 236ff40524..29c02f85dc 100644
--- a/include/exec/helper-gen.h
+++ b/include/exec/helper-gen.h
@@ -66,6 +66,18 @@ static inline void glue(gen_helper_, 
name)(dh_retvar_decl(ret)  \
   tcg_gen_callN(HELPER(name), dh_retvar(ret), 6, args); \
 }
 
+#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)  \
+dh_arg_decl(t1, 1),  dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),\
+dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6), \
+dh_arg_decl(t7, 7)) \
+{   \
+  TCGTemp *args[7] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \
+ dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6),   \
+ dh_arg(t7, 7) };   \
+  tcg_gen_callN(HELPER(name), dh_retvar(ret), 7, args); \
+}
+
 #include "helper.h"
 #include "trace/generated-helpers.h"
 #include "trace/generated-helpers-wrappers.h"
@@ -79,6 +91,7 @@ static inline void glue(gen_helper_, 
name)(dh_retvar_decl(ret)  \
 #undef DEF_HELPER_FLAGS_4
 #undef DEF_HELPER_FLAGS_5
 #undef DEF_HELPER_FLAGS_6
+#undef DEF_HELPER_FLAGS_7
 #undef GEN_HELPER
 
 #endif /* HELPER_GEN_H */
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
index f2519c9741..3094c7946d 100644
--- a/include/exec/helper-head.h
+++ b/include/exec/helper-head.h
@@ -148,6 +148,8 @@
 DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5)
 #define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) \
 DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6)
+#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
+DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
 
 /* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
 
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index 1c4ba9bc78..a0a8d9aa46 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -30,6 +30,11 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), 
dh_ctype(t3), \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
 dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
 
+#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
+dh_ctype(t7));
+
 #include "helper.h"
 #include "trace/generated-helpers.h"
 #include "tcg-runtime.h"
@@ -42,5 +47,6 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), 
dh_ctype(t3), \
 #undef DEF_HELPER_FLAGS_4
 #undef DEF_HELPER_FLAGS_5
 #undef DEF_HELPER_FLAGS_6
+#undef DEF_HELPER_FLAGS_7
 
 #endif /* HELPER_PROTO_H */
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
index 573c2ce2e9..27870509a2 100644
--- a/include/exec/helper-tcg.h
+++ b/include/exec/helper-tcg.h
@@ -52,6 +52,12 @@
 | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
 | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) },
 
+#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \
+  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
+.sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
+| dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
+| dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) },
+
 #include "helper.h"
 #include "trace/generated-helpers.h"
 #include "tcg-runtime.h"
@@ -65,5 +71,6 @@
 #undef DEF_HELPER_FLAGS_4
 #undef DEF_HELPER_FLAGS_5
 #undef DEF_HELPER_FLAGS_6
+#undef DEF_HELPER_FLAGS_7
 
 #endif /* HELPER_TCG_H */
-- 
2.20.1




[PULL 1/3] exec: flush CPU TB cache in breakpoint_invalidate

2020-02-12 Thread Richard Henderson
From: Max Filippov 

When a breakpoint is inserted at location for which there's currently no
virtual to physical translation no action is taken on CPU TB cache. If a
TB for that virtual address already exists but is not visible ATM the
breakpoint won't be hit next time an instruction at that address will be
executed.

Flush entire CPU TB cache in breakpoint_invalidate to force
re-translation of all TBs for the breakpoint address.

This change fixes the following scenario:
- linux user application is running
- a breakpoint is inserted from QEMU gdbstub for a user address that is
  not currently present in the target CPU TLB
- an instruction at that address is executed, but the external debugger
  doesn't get control.

Reviewed-by: Richard Henderson 
Signed-off-by: Max Filippov 
Message-Id: <20191127220602.10827-2-jcmvb...@gmail.com>
Signed-off-by: Richard Henderson 
---
 exec.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/exec.c b/exec.c
index 67e520d18e..7f4074f95e 100644
--- a/exec.c
+++ b/exec.c
@@ -1019,14 +1019,13 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr 
addr, MemTxAttrs attrs)
 
 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 {
-MemTxAttrs attrs;
-hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, );
-int asidx = cpu_asidx_from_attrs(cpu, attrs);
-if (phys != -1) {
-/* Locks grabbed by tb_invalidate_phys_addr */
-tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
-phys | (pc & ~TARGET_PAGE_MASK), attrs);
-}
+/*
+ * There may not be a virtual to physical translation for the pc
+ * right now, but there may exist cached TB for this pc.
+ * Flush the whole TB cache to force re-translation of such TBs.
+ * This is heavyweight, but we're debugging anyway.
+ */
+tb_flush(cpu);
 }
 #endif
 
-- 
2.20.1




[PULL 3/3] tcg: Add tcg_gen_gvec_5_ptr

2020-02-12 Thread Richard Henderson
Extend the vector generator infrastructure to handle
5 vector arguments.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Reviewed-by: Taylor Simpson 
Signed-off-by: Richard Henderson 
---
 include/tcg/tcg-op-gvec.h |  7 +++
 tcg/tcg-op-gvec.c | 32 
 2 files changed, 39 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 830d68f697..74534e2480 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -83,6 +83,13 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, 
uint32_t bofs,
 uint32_t maxsz, int32_t data,
 gen_helper_gvec_4_ptr *fn);
 
+typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+   TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
+uint32_t oprsz, uint32_t maxsz, int32_t data,
+gen_helper_gvec_5_ptr *fn);
+
 /* Expand a gvec operation.  Either inline or out-of-line depending on
the actual vector size and the operations supported by the host.  */
 typedef struct {
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 41b4a3c661..327d9588e0 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -290,6 +290,38 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, 
uint32_t bofs,
 tcg_temp_free_i32(desc);
 }
 
+/* Generate a call to a gvec-style helper with five vector operands
+   and an extra pointer operand.  */
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
+uint32_t oprsz, uint32_t maxsz, int32_t data,
+gen_helper_gvec_5_ptr *fn)
+{
+TCGv_ptr a0, a1, a2, a3, a4;
+TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+a0 = tcg_temp_new_ptr();
+a1 = tcg_temp_new_ptr();
+a2 = tcg_temp_new_ptr();
+a3 = tcg_temp_new_ptr();
+a4 = tcg_temp_new_ptr();
+
+tcg_gen_addi_ptr(a0, cpu_env, dofs);
+tcg_gen_addi_ptr(a1, cpu_env, aofs);
+tcg_gen_addi_ptr(a2, cpu_env, bofs);
+tcg_gen_addi_ptr(a3, cpu_env, cofs);
+tcg_gen_addi_ptr(a4, cpu_env, eofs);
+
+fn(a0, a1, a2, a3, a4, ptr, desc);
+
+tcg_temp_free_ptr(a0);
+tcg_temp_free_ptr(a1);
+tcg_temp_free_ptr(a2);
+tcg_temp_free_ptr(a3);
+tcg_temp_free_ptr(a4);
+tcg_temp_free_i32(desc);
+}
+
 /* Return true if we want to implement something of OPRSZ bytes
in units of LNSZ.  This limits the expansion of inline code.  */
 static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
-- 
2.20.1




[PULL 0/3] tcg patch queue

2020-02-12 Thread Richard Henderson
The following changes since commit e18e5501d8ac692d32657a3e1ef545b14e72b730:

  Merge remote-tracking branch 
'remotes/dgilbert-gitlab/tags/pull-virtiofs-20200210' into staging (2020-02-10 
18:09:14 +)

are available in the Git repository at:

  https://github.com/rth7680/qemu.git tags/pull-tcg-20200212

for you to fetch changes up to 2445971604c1cfd3ec484457159f4ac300fb04d2:

  tcg: Add tcg_gen_gvec_5_ptr (2020-02-12 14:58:36 -0800)


Fix breakpoint invalidation.
Add support for tcg helpers with 7 arguments.
Add support for gvec helpers with 5 arguments.


Max Filippov (1):
  exec: flush CPU TB cache in breakpoint_invalidate

Richard Henderson (1):
  tcg: Add tcg_gen_gvec_5_ptr

Taylor Simpson (1):
  tcg: Add support for a helper with 7 arguments

 include/exec/helper-gen.h   | 13 +
 include/exec/helper-head.h  |  2 ++
 include/exec/helper-proto.h |  6 ++
 include/exec/helper-tcg.h   |  7 +++
 include/tcg/tcg-op-gvec.h   |  7 +++
 exec.c  | 15 +++
 tcg/tcg-op-gvec.c   | 32 
 7 files changed, 74 insertions(+), 8 deletions(-)



[Bug 1862986] Re: qemu-s390x crashes when run on aarch64

2020-02-12 Thread Richard Henderson
You need to provide the test binary.

I can run a chroot of s390x ubuntu bionic on aarch64 just fine,
so it must be something specific to your test.

** Changed in: qemu
   Status: New => Incomplete

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1862986

Title:
  qemu-s390x crashes when run on aarch64

Status in QEMU:
  Incomplete

Bug description:
  All tested versions (2.11 and 4.2) qemu-s390x crashes with a segfault
  when run on an aarch64 odroid Ubuntu.


  Steps to reproduce:

  root@odroid:~/workspace/bitcoin-core# /usr/local/bin/qemu-s390x 
"/root/workspace/bitcoin-core/build/bitcoin-s390x-linux-gnu/src/test/test_bitcoin_orig"
  Segmentation fault (core dumped)
  root@odroid:~/workspace/bitcoin-core# /usr/local/bin/qemu-s390x --version
  qemu-s390x version 4.2.0
  Copyright (c) 2003-2019 Fabrice Bellard and the QEMU Project developers
  root@odroid:~/workspace/bitcoin-core# /usr/bin/qemu-s390x 
"/root/workspace/bitcoin-core/build/bitcoin-s390x-linux-gnu/src/test/test_bitcoin_orig"
  Segmentation fault (core dumped)
  root@odroid:~/workspace/bitcoin-core# /usr/bin/qemu-s390x --version
  qemu-s390x version 2.11.1(Debian 1:2.11+dfsg-1ubuntu7.22)
  Copyright (c) 2003-2017 Fabrice Bellard and the QEMU Project developers

  qemu-arm does work on the same machine:

  root@odroid:~/workspace/bitcoin-core# /usr/bin/qemu-arm 
bitcoin-0.19.0.1-armhf/bin/test_bitcoin -t amount_tests
  Running 4 test cases...

  *** No errors detected
  root@odroid:~/workspace/bitcoin-core# /usr/local/bin/qemu-arm 
bitcoin-0.19.0.1-armhf/bin/test_bitcoin -t amount_tests
  Running 4 test cases...

  *** No errors detected


  
  What kind of debug information would be helpful for this issue report?
  GDB for the self-compiled latest release is not particularly helpful:

  (gdb) run
  Starting program: /usr/local/bin/qemu-s390x 
/root/workspace/bitcoin-core/build/bitcoin-s390x-linux-gnu/src/test/test_bitcoin_orig
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
  [New Thread 0x7fb7a2a140 (LWP 28264)]

  Thread 1 "qemu-s390x" received signal SIGSEGV, Segmentation fault.
  0x0096b218 in __bss_start__ ()
  (gdb) bt
  #0  0x0096b218 in __bss_start__ ()
  #1  0x006120a8 in ?? ()
  #2  0x0055579904b0 in ?? ()
  Backtrace stopped: previous frame inner to this frame (corrupt stack?)


  
  A bit more information is available in the version shipped by Ubuntu:

  (gdb) run
  Starting program: /usr/bin/qemu-s390x 
/root/workspace/bitcoin-core/build/bitcoin-s390x-linux-gnu/src/test/test_bitcoin_orig
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
  [New Thread 0x7fb7a01180 (LWP 28271)]

  Thread 1 "qemu-s390x" received signal SIGSEGV, Segmentation fault.
  0x00738f98 in code_gen_buffer ()
  (gdb) bt
  #0  0x00738f98 in code_gen_buffer ()
  #1  0x005e96c8 in cpu_exec ()
  #2  0x005ee430 in cpu_loop ()
  #3  0x005c3328 in main ()

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1862986/+subscriptions



Re: [PATCH v5 5/6] hppa: Add emulation of Artist graphics

2020-02-12 Thread Philippe Mathieu-Daudé

Hi Sven,

On 12/20/19 10:15 PM, Sven Schnelle wrote:

This adds emulation of Artist graphics good enough
to get a Text console on both Linux and HP-UX. The
X11 server from HP-UX also works.

Signed-off-by: Sven Schnelle 
---
  hw/display/Kconfig   |4 +
  hw/display/Makefile.objs |1 +
  hw/display/artist.c  | 1450 ++
  hw/display/trace-events  |9 +
  hw/hppa/Kconfig  |1 +
  hw/hppa/hppa_hardware.h  |1 +
  hw/hppa/machine.c|9 +
  7 files changed, 1475 insertions(+)
  create mode 100644 hw/display/artist.c

diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index c500d1fc6d..15d59e10dc 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -91,6 +91,10 @@ config TCX
  config CG3
  bool
  
+config ARTIST

+bool
+select FRAMEBUFFER
+
  config VGA
  bool
  
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs

index f2182e3bef..5f03dfdcc4 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -40,6 +40,7 @@ common-obj-$(CONFIG_SM501) += sm501.o
  common-obj-$(CONFIG_TCX) += tcx.o
  common-obj-$(CONFIG_CG3) += cg3.o
  common-obj-$(CONFIG_NEXTCUBE) += next-fb.o
+common-obj-$(CONFIG_ARTIST) += artist.o
  
  obj-$(CONFIG_VGA) += vga.o
  
diff --git a/hw/display/artist.c b/hw/display/artist.c

[...]

+static void draw_line_size(ARTISTState *s, bool update_start)
+{
+
+int startx = artist_get_x(s->vram_start);
+int starty = artist_get_y(s->vram_start);
+int endx = artist_get_x(s->line_size);
+int endy = artist_get_y(s->line_size);
+
+trace_artist_draw_line(startx, starty, endx, endy);
+draw_line(s, startx, starty, endx, endy, update_start, -1, -1);
+}
+
+static void draw_line_xy(ARTISTState *s, bool update_start)
+{
+
+int startx = artist_get_x(s->vram_start);
+int starty = artist_get_y(s->vram_start);
+int sizex = artist_get_x(s->blockmove_size);
+int sizey = artist_get_y(s->blockmove_size);
+int linexy = s->line_xy >> 16;
+int endx, endy;
+
+endx = startx;
+endy = starty;
+
+if (sizex > 0) {
+endx = startx + linexy;
+}
+
+if (sizex < 0) {
+endx = startx;
+startx -= linexy;
+}
+
+if (sizey > 0) {
+endy = starty + linexy;
+}
+
+if (sizey < 0) {
+endy = starty;
+starty -= linexy;
+}
+
+if (startx < 0) {
+startx = 0;
+}
+
+if (endx < 0) {
+endx = 0;


If negative, set to zero.


+}
+
+if (starty < 0) {
+starty = 0;
+}
+
+if (endy < 0) {
+endy = 0;


Ditto.


+}
+
+
+if (endx < 0) {
+return;


Here Coverity points:

>>> CID 1419388:  Control flow issues  (DEADCODE)
>>> Execution cannot reach this statement: "return;".


+}
+
+if (endy < 0) {
+return;


Here again.


+}
+
+trace_artist_draw_line(startx, starty, endx, endy);
+draw_line(s, startx, starty, endx, endy, false, -1, -1);
+}
+
+static void draw_line_end(ARTISTState *s, bool update_start)
+{
+
+int startx = artist_get_x(s->vram_start);
+int starty = artist_get_y(s->vram_start);
+int endx = artist_get_x(s->line_end);
+int endy = artist_get_y(s->line_end);
+
+trace_artist_draw_line(startx, starty, endx, endy);
+draw_line(s, startx, starty, endx, endy, update_start, -1, -1);
+}





Re: [PATCH] target/ppc/cpu.h: Remove duplicate includes

2020-02-12 Thread David Gibson
On Wed, Feb 12, 2020 at 11:26:14PM +0100, BALATON Zoltan wrote:
> Commit 74433bf083b added some includes but added them twice. Since
> these are guarded against multiple inclusion including them once is
> enough.
> 
> Signed-off-by: BALATON Zoltan 

Applied to ppc-for-5.0, thanks.

> ---
>  target/ppc/cpu.h | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 3a1eb76004..07dd2b4da7 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -23,8 +23,6 @@
>  #include "qemu/int128.h"
>  #include "exec/cpu-defs.h"
>  #include "cpu-qom.h"
> -#include "exec/cpu-defs.h"
> -#include "cpu-qom.h"
>  
>  /* #define PPC_EMULATE_32BITS_HYPV */
>  

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH 0/3] pnv: Fix a couple of issues reported by Coverity

2020-02-12 Thread David Gibson
On Wed, Feb 12, 2020 at 07:53:54PM +0100, Greg Kurz wrote:
> This fixes some issues in the PHB3 and PHB4 code that was merged
> recently.

Applied to ppc-for-5.0, thanks.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: Question about (and problem with) pflash data access

2020-02-12 Thread Philippe Mathieu-Daudé

Cc'ing Paolo and Alexey.

On 2/13/20 12:09 AM, Guenter Roeck wrote:

On Wed, Feb 12, 2020 at 10:39:30PM +0100, Philippe Mathieu-Daudé wrote:

Cc'ing Jean-Christophe and Peter.

On 2/12/20 7:46 PM, Guenter Roeck wrote:

Hi,

I have been playing with pflash recently. For the most part it works,
but I do have an odd problem when trying to instantiate pflash on sx1.

My data file looks as follows.

000 0001       
020        
*
0002000 0002       
0002020        
*
0004000 0003       
0004020        
...

In the sx1 machine, this becomes:

000 6001       
020        
*
0002000 6002       
0002020        
*
0004000 6003       
0004020        
*
...

pflash is instantiated with "-drive file=flash.32M.test,format=raw,if=pflash".

I don't have much success with pflash tracing - data accesses don't
show up there.

I did find a number of problems with the sx1 emulation, but I have no clue
what is going on with pflash. As far as I can see pflash works fine on
other machines. Can someone give me a hint what to look out for ?


This is specific to the SX1, introduced in commit 997641a84ff:

  64 static uint64_t static_read(void *opaque, hwaddr offset,
  65 unsigned size)
  66 {
  67 uint32_t *val = (uint32_t *) opaque;
  68 uint32_t mask = (4 / size) - 1;
  69
  70 return *val >> ((offset & mask) << 3);
  71 }

Only guessing, this looks like some hw parity, and I imagine you need to
write the parity bits in your flash.32M file before starting QEMU, then it
would appear "normal" within the guest.


I thought this might be related, but that is not the case. I added log
messages, and even ran the code in gdb. static_read() and static_write()
are not executed.

Also,

 memory_region_init_io([0], NULL, _ops, ,
   "sx1.cs0", OMAP_CS0_SIZE - flash_size);
  ^^
 memory_region_add_subregion(address_space,
 OMAP_CS0_BASE + flash_size, [0]);
 ^^

suggests that the code is only executed for memory accesses _after_
the actual flash. The memory tree is:

memory-region: system
   - (prio 0, i/o): system
 -01ff (prio 0, romd): omap_sx1.flash0-1
 -01ff (prio 0, rom): omap_sx1.flash0-0


Eh two memory regions with same size and same priority... Is this legal?

(qemu) info mtree -f -d
FlatView #0
 AS "memory", root: system
 AS "cpu-memory-0", root: system
 Root memory region: system
  -01ff (prio 0, romd): omap_sx1.flash0-1
  0200-03ff (prio 0, i/o): sx1.cs0
  0400-07ff (prio 0, i/o): sx1.cs1
  0800-0bff (prio 0, i/o): sx1.cs3
  1000-11ff (prio 0, ram): omap1.dram
  2000-2002 (prio 0, ram): omap1.sram
  ...
  Dispatch
Physical sections
  #0 @.. (noname) [unassigned]
  #1 @..01ff omap_sx1.flash0-1 [not dirty]
  #2 @0200..03ff sx1.cs0 [ROM]
  #3 @0400..07ff sx1.cs1 [watch]
  #4 @0800..0bff sx1.cs3
  #5 @1000..11ff omap1.dram
  #6 @2000..2002 omap1.sram
  ...
Nodes (9 bits per level, 6 levels) ptr=[3] skip=4
  [0]
  0   skip=3  ptr=[3]
  1..511  skip=1  ptr=NIL
  [1]
  0   skip=2  ptr=[3]
  1..511  skip=1  ptr=NIL
  [2]
  0   skip=1  ptr=[3]
  1..511  skip=1  ptr=NIL
  [3]
  0   skip=1  ptr=[4]
  1   skip=1  ptr=[5]
  2   skip=2  ptr=[7]
  3..13   skip=1  ptr=NIL
 14   skip=2  ptr=[9]
 15   skip=2  ptr=[11]
 16..511  skip=1  ptr=NIL
  [4]
  0..63   skip=0  ptr=#1
 64..127  skip=0  ptr=#2
128..255  skip=0  ptr=#3
256..383  skip=0  ptr=#4
384..511  skip=1  ptr=NIL

So the romd wins.


 0200-03ff (prio 0, i/o): sx1.cs0

I thought that the dual memory assignment (omap_sx1.flash0-1 and
omap_sx1.flash0-0) might play a role, but removing that didn't make
a difference either (not that I have any idea what it is supposed
to be used for).

Thanks,
Guenter






Re: [PATCH v5 1/6] hw/hppa/dino.c: Improve emulation of Dino PCI chip

2020-02-12 Thread Philippe Mathieu-Daudé

Hi Sven, Helge.

On 12/20/19 10:15 PM, Sven Schnelle wrote:

From: Helge Deller 

The tests of the dino chip with the Online-diagnostics CD
("ODE DINOTEST") now succeeds.
Additionally add some qemu trace events.

Signed-off-by: Helge Deller 
Signed-off-by: Sven Schnelle 
Reviewed-by: Philippe Mathieu-Daudé 
---
  MAINTAINERS  |  2 +-
  hw/hppa/dino.c   | 97 +---
  hw/hppa/trace-events |  5 +++
  3 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 387879aebc..e333bc67a4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -876,7 +876,7 @@ F: hw/*/etraxfs_*.c
  
  HP-PARISC Machines

  --
-Dino
+HP B160L
  M: Richard Henderson 
  R: Helge Deller 
  S: Odd Fixes
diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c
index ab6969b45f..9797a7f0d9 100644
--- a/hw/hppa/dino.c
+++ b/hw/hppa/dino.c
@@ -1,7 +1,7 @@
  /*
- * HP-PARISC Dino PCI chipset emulation.
+ * HP-PARISC Dino PCI chipset emulation, as in B160L and similiar machines
   *
- * (C) 2017 by Helge Deller 
+ * (C) 2017-2019 by Helge Deller 
   *
   * This work is licensed under the GNU GPL license version 2 or later.
   *
@@ -21,6 +21,7 @@
  #include "migration/vmstate.h"
  #include "hppa_sys.h"
  #include "exec/address-spaces.h"
+#include "trace.h"
  
  
  #define TYPE_DINO_PCI_HOST_BRIDGE "dino-pcihost"

@@ -82,11 +83,28 @@
  #define DINO_PCI_HOST_BRIDGE(obj) \
  OBJECT_CHECK(DinoState, (obj), TYPE_DINO_PCI_HOST_BRIDGE)
  
+#define DINO800_REGS ((DINO_TLTIM - DINO_GMASK) / 4)


Coverity noticed (CID 1419394) this should be '(1 + (DINO_TLTIM - 
DINO_GMASK) / 4)' (13 registers).



+static const uint32_t reg800_keep_bits[DINO800_REGS] = {
+MAKE_64BIT_MASK(0, 1),
+MAKE_64BIT_MASK(0, 7),
+MAKE_64BIT_MASK(0, 7),
+MAKE_64BIT_MASK(0, 8),
+MAKE_64BIT_MASK(0, 7),
+MAKE_64BIT_MASK(0, 9),
+MAKE_64BIT_MASK(0, 32),


Looking at the datasheet pp. 30, this register is 'Undefined'.
We should report GUEST_ERROR if it is accessed.


+MAKE_64BIT_MASK(0, 8),
+MAKE_64BIT_MASK(0, 30),
+MAKE_64BIT_MASK(0, 25),


Still looking at the datasheet (pp. 37) PCIROR is 24-bit (not 25).


+MAKE_64BIT_MASK(0, 22),


Here you are missing register 0x82c...


+MAKE_64BIT_MASK(0, 9),
+};
+


Altogether:

static const uint32_t reg800_keep_bits[DINO800_REGS] = {
MAKE_64BIT_MASK(0, 1),  /* GMASK */
MAKE_64BIT_MASK(0, 7),  /* PAMR */
MAKE_64BIT_MASK(0, 7),  /* PAPR */
MAKE_64BIT_MASK(0, 8),  /* DAMODE */
MAKE_64BIT_MASK(0, 7),  /* PCICMD */
MAKE_64BIT_MASK(0, 9),  /* PCISTS */
MAKE_64BIT_MASK(0, 0),  /* Undefined */
MAKE_64BIT_MASK(0, 8),  /* MLTIM */
MAKE_64BIT_MASK(0, 30), /* BRDG_FEAT */
MAKE_64BIT_MASK(0, 24), /* PCIROR */
MAKE_64BIT_MASK(0, 22), /* PCIWOR */
MAKE_64BIT_MASK(0, 0),  /* Undocumented */
MAKE_64BIT_MASK(0, 9),  /* TLTIM */
};


  typedef struct DinoState {
  PCIHostState parent_obj;
  
  /* PCI_CONFIG_ADDR is parent_obj.config_reg, via pci_host_conf_be_ops,

 so that we can map PCI_CONFIG_DATA to pci_host_data_be_ops.  */
+uint32_t config_reg_dino; /* keep original copy, including 2 lowest bits */
  
  uint32_t iar0;

  uint32_t iar1;
@@ -94,8 +112,12 @@ typedef struct DinoState {
  uint32_t ipr;
  uint32_t icr;
  uint32_t ilr;
+uint32_t io_fbb_en;
  uint32_t io_addr_en;
  uint32_t io_control;
+uint32_t toc_addr;
+
+uint32_t reg800[DINO800_REGS];
  
  MemoryRegion this_mem;

  MemoryRegion pci_mem;
@@ -106,8 +128,6 @@ typedef struct DinoState {
  MemoryRegion bm_ram_alias;
  MemoryRegion bm_pci_alias;
  MemoryRegion bm_cpu_alias;
-
-MemoryRegion cpu0_eir_mem;
  } DinoState;
  
  /*

@@ -122,6 +142,8 @@ static void gsc_to_pci_forwarding(DinoState *s)
  tmp = extract32(s->io_control, 7, 2);
  enabled = (tmp == 0x01);
  io_addr_en = s->io_addr_en;
+/* Mask out first (=firmware) and last (=Dino) areas. */
+io_addr_en &= ~(BIT(31) | BIT(0));
  
  memory_region_transaction_begin();

  for (i = 1; i < 31; i++) {
@@ -142,6 +164,8 @@ static bool dino_chip_mem_valid(void *opaque, hwaddr addr,
  unsigned size, bool is_write,
  MemTxAttrs attrs)
  {
+bool ret = false;
+
  switch (addr) {
  case DINO_IAR0:
  case DINO_IAR1:
@@ -152,16 +176,22 @@ static bool dino_chip_mem_valid(void *opaque, hwaddr addr,
  case DINO_ICR:
  case DINO_ILR:
  case DINO_IO_CONTROL:
+case DINO_IO_FBB_EN:
  case DINO_IO_ADDR_EN:
  case DINO_PCI_IO_DATA:
-return true;
+case DINO_TOC_ADDR:
+case DINO_GMASK ... DINO_TLTIM:
+ret = true;
+break;
  case DINO_PCI_IO_DATA + 2:
-return size <= 2;
+ret = (size <= 2);
+break;
  case DINO_PCI_IO_DATA + 

Re: [PATCH v12 Kernel 4/7] vfio iommu: Implementation of ioctl to for dirty pages tracking.

2020-02-12 Thread Alex Williamson
On Thu, 13 Feb 2020 02:26:23 +0530
Kirti Wankhede  wrote:

> On 2/10/2020 10:55 PM, Alex Williamson wrote:
> > On Sat, 8 Feb 2020 01:12:31 +0530
> > Kirti Wankhede  wrote:
> >   
> >> VFIO_IOMMU_DIRTY_PAGES ioctl performs three operations:
> >> - Start pinned and unpinned pages tracking while migration is active
> >> - Stop pinned and unpinned dirty pages tracking. This is also used to
> >>stop dirty pages tracking if migration failed or cancelled.
> >> - Get dirty pages bitmap. This ioctl returns bitmap of dirty pages, its
> >>user space application responsibility to copy content of dirty pages
> >>from source to destination during migration.
> >>
> >> To prevent DoS attack, memory for bitmap is allocated per vfio_dma
> >> structure. Bitmap size is calculated considering smallest supported page
> >> size. Bitmap is allocated when dirty logging is enabled for those
> >> vfio_dmas whose vpfn list is not empty or whole range is mapped, in
> >> case of pass-through device.
> >>
> >> There could be multiple option as to when bitmap should be populated:
> >> * Polulate bitmap for already pinned pages when bitmap is allocated for
> >>a vfio_dma with the smallest supported page size. Updates bitmap from
> >>page pinning and unpinning functions. When user application queries
> >>bitmap, check if requested page size is same as page size used to
> >>populated bitmap. If it is equal, copy bitmap. But if not equal,
> >>re-populated bitmap according to requested page size and then copy to
> >>user.
> >>Pros: Bitmap gets populated on the fly after dirty tracking has
> >>  started.
> >>Cons: If requested page size is different than smallest supported
> >>  page size, then bitmap has to be re-populated again, with
> >>  additional overhead of allocating bitmap memory again for
> >>  re-population of bitmap.  
> > 
> > No memory needs to be allocated to re-populate the bitmap.  The bitmap
> > is clear-on-read and by tracking the bitmap in the smallest supported
> > page size we can guarantee that we can fit the user requested bitmap
> > size within the space occupied by that minimal page size range of the
> > bitmap.  Therefore we'd destructively translate the requested region of
> > the bitmap to a different page size, write it out to the user, and
> > clear it.  Also we expect userspace to use the minimum page size almost
> > exclusively, which is optimized by this approach as dirty bit tracking
> > is spread out over each page pinning operation.
> >   
> >>
> >> * Populate bitmap when bitmap is queried by user application.
> >>Pros: Bitmap is populated with requested page size. This eliminates
> >>  the need to re-populate bitmap if requested page size is
> >>  different than smallest supported pages size.
> >>Cons: There is one time processing time, when bitmap is queried.  
> > 
> > Another significant Con is that the vpfn list needs to track and manage
> > unpinned pages, which makes it more complex and intrusive.  The
> > previous option seems to have both time and complexity advantages,
> > especially in the case we expect to be most common of the user
> > accessing the bitmap with the minimum page size, ie. PAGE_SIZE.  It's
> > also not clear why we pre-allocate the bitmap at all with this approach.
> >   
> >> I prefer later option with simple logic and to eliminate over-head of
> >> bitmap repopulation in case of differnt page sizes. Later option is
> >> implemented in this patch.  
> > 
> > Hmm, we'll see below, but I not convinced based on the above rationale.
> >   
> >> Signed-off-by: Kirti Wankhede 
> >> Reviewed-by: Neo Jia 
> >> ---
> >>   drivers/vfio/vfio_iommu_type1.c | 299 
> >> ++--
> >>   1 file changed, 287 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/vfio/vfio_iommu_type1.c 
> >> b/drivers/vfio/vfio_iommu_type1.c
> >> index d386461e5d11..df358dc1c85b 100644
> >> --- a/drivers/vfio/vfio_iommu_type1.c
> >> +++ b/drivers/vfio/vfio_iommu_type1.c
> >> @@ -70,6 +70,7 @@ struct vfio_iommu {
> >>unsigned intdma_avail;
> >>boolv2;
> >>boolnesting;
> >> +  booldirty_page_tracking;
> >>   };
> >>   
> >>   struct vfio_domain {
> >> @@ -90,6 +91,7 @@ struct vfio_dma {
> >>boollock_cap;   /* capable(CAP_IPC_LOCK) */
> >>struct task_struct  *task;
> >>struct rb_root  pfn_list;   /* Ex-user pinned pfn list */
> >> +  unsigned long   *bitmap;
> >>   };
> >>   
> >>   struct vfio_group {
> >> @@ -125,6 +127,7 @@ struct vfio_regions {
> >>(!list_empty(>domain_list))
> >>   
> >>   static int put_pfn(unsigned long pfn, int prot);
> >> +static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu);
> >>   
> >>   /*
> >>* This code handles mapping and unmapping of user data buffers

Re: Question about (and problem with) pflash data access

2020-02-12 Thread Guenter Roeck
On Wed, Feb 12, 2020 at 10:39:30PM +0100, Philippe Mathieu-Daudé wrote:
> Cc'ing Jean-Christophe and Peter.
> 
> On 2/12/20 7:46 PM, Guenter Roeck wrote:
> > Hi,
> > 
> > I have been playing with pflash recently. For the most part it works,
> > but I do have an odd problem when trying to instantiate pflash on sx1.
> > 
> > My data file looks as follows.
> > 
> > 000 0001       
> > 020        
> > *
> > 0002000 0002       
> > 0002020        
> > *
> > 0004000 0003       
> > 0004020        
> > ...
> > 
> > In the sx1 machine, this becomes:
> > 
> > 000 6001       
> > 020        
> > *
> > 0002000 6002       
> > 0002020        
> > *
> > 0004000 6003       
> > 0004020        
> > *
> > ...
> > 
> > pflash is instantiated with "-drive 
> > file=flash.32M.test,format=raw,if=pflash".
> > 
> > I don't have much success with pflash tracing - data accesses don't
> > show up there.
> > 
> > I did find a number of problems with the sx1 emulation, but I have no clue
> > what is going on with pflash. As far as I can see pflash works fine on
> > other machines. Can someone give me a hint what to look out for ?
> 
> This is specific to the SX1, introduced in commit 997641a84ff:
> 
>  64 static uint64_t static_read(void *opaque, hwaddr offset,
>  65 unsigned size)
>  66 {
>  67 uint32_t *val = (uint32_t *) opaque;
>  68 uint32_t mask = (4 / size) - 1;
>  69
>  70 return *val >> ((offset & mask) << 3);
>  71 }
> 
> Only guessing, this looks like some hw parity, and I imagine you need to
> write the parity bits in your flash.32M file before starting QEMU, then it
> would appear "normal" within the guest.
> 
I thought this might be related, but that is not the case. I added log
messages, and even ran the code in gdb. static_read() and static_write()
are not executed.

Also,

memory_region_init_io([0], NULL, _ops, ,
  "sx1.cs0", OMAP_CS0_SIZE - flash_size);
 ^^
memory_region_add_subregion(address_space,
OMAP_CS0_BASE + flash_size, [0]);
^^

suggests that the code is only executed for memory accesses _after_
the actual flash. The memory tree is:

memory-region: system
  - (prio 0, i/o): system
-01ff (prio 0, romd): omap_sx1.flash0-1
-01ff (prio 0, rom): omap_sx1.flash0-0
0200-03ff (prio 0, i/o): sx1.cs0

I thought that the dual memory assignment (omap_sx1.flash0-1 and
omap_sx1.flash0-0) might play a role, but removing that didn't make
a difference either (not that I have any idea what it is supposed
to be used for).

Thanks,
Guenter



[Bug 1857811] Re: qemu user static binary seems to lack support for network namespace.

2020-02-12 Thread crocket
qemu-4.0.0

> uname -a
Linux gentoo 4.19.97-gentoo #3 SMP PREEMPT Mon Feb 10 15:09:44 KST 2020 x86_64 
AMD FX(tm)-8300 Eight-Core Processor AuthenticAMD GNU/Linux

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1857811

Title:
  qemu user static binary seems to lack support for network namespace.

Status in QEMU:
  New

Bug description:
  Whenever I execute emerge in gentoo linux in qemu-aarch64 chroot, I
  see the following error message.

  Unable to configure loopback interface: Operation not supported

  If I disable emerge's network-sandbox which utilizes network
  namespace, the error disappears.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1857811/+subscriptions



Re: [PATCH] target/ppc/cpu.h: Remove duplicate includes

2020-02-12 Thread Philippe Mathieu-Daudé

On 2/12/20 11:26 PM, BALATON Zoltan wrote:

Commit 74433bf083b added some includes but added them twice. Since
these are guarded against multiple inclusion including them once is
enough.

Signed-off-by: BALATON Zoltan 
---
  target/ppc/cpu.h | 2 --
  1 file changed, 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 3a1eb76004..07dd2b4da7 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -23,8 +23,6 @@
  #include "qemu/int128.h"
  #include "exec/cpu-defs.h"
  #include "cpu-qom.h"
-#include "exec/cpu-defs.h"
-#include "cpu-qom.h"
  
  /* #define PPC_EMULATE_32BITS_HYPV */


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 07/17] hw/arm/allwinner: add Security Identifier device

2020-02-12 Thread Philippe Mathieu-Daudé
)-.On Wed, Feb 12, 2020 at 10:31 PM Niek Linnenbank
 wrote:
>
> Hi Corey,
>
> On Thu, Feb 6, 2020 at 10:09 PM Niek Linnenbank  
> wrote:
>>
>> Hi Corey,
>>
>> On Mon, Feb 3, 2020 at 2:10 PM Corey Minyard  wrote:
>>>
>>> On Sun, Feb 02, 2020 at 10:27:49PM +0100, Niek Linnenbank wrote:
>>> > Hi Corey,
>>> >
>>> > Thanks for reviewing!
>>> >
>>> > On Mon, Jan 20, 2020 at 6:59 PM Corey Minyard  wrote:
>>> >
>>> > > On Sat, Jan 18, 2020 at 04:25:08PM +0100, Philippe Mathieu-Daudé wrote:
>>> > > > Cc'ing Corey/David for good advices about using UUID.
>>> > >
>>> > > Is there any reason you didn't use the built-in qemu UUID for this?  It
>>> > > would simplify things in general.
>>> > >
>>> >
>>> > Currently the Allwinner SID device is using the QemuUUID type from
>>> > include/qemu/uuid.h.
>>> > Is that the build-in UUID you are referring to or should I use something
>>> > else?
>>>
>>> You are using the QemuUUID type, which is of course what you should do,
>>> but you aren't using the UUID generated by qemu (at least that I can find).
>>> That in include/sysemu/sysemu.h and is named qemu_uuid.  Whether you
>>> should use that or not depends on your needs.  If you just need some
>>> uuid, then that's what you should probably use.  If you need something
>>> the user can individually control for this device, for instance, then
>>> that probably won't do.
>>
>>
>> Ah I did not know about the qemu_uuid variable, thanks for pointing that out.
>> Basically the SID identifier is a number that is unique for each board that 
>> comes
>> out of the factory. It is currently used by U-Boot to as input to generate a 
>> MAC address.
>>
>> If I understand correctly, qemu_uuid is optional and by default zero.
>> However the SID value should not be zero, as otherwise U-Boot can't pick a 
>> MAC address
>> resulting in a non-working ethernet device.
>>
>> Currently the hw/arm/orangepi.c machine specifies a fixed SID to be used for 
>> the emulated board,
>> also containing a prefix (8100c002) that indicates the H3 chipset. One thing 
>> that I am strugling with is that

Suggestion while reading this, you might display a warning if the user
provided UUID doesn't start with 8100c002.

>> I'm not able to override the property using '-global', if hw/arm/orangepi.c 
>> initializes the property with qdev_prop_set_string:
>>
>> $ qemu-system-arm -M orangepi-pc -kernel u-boot -nographic -nic user \
>> -global allwinner-sid.identifier=8100c002-0001-0002-0003-44556688
>>
>> If I don't set the property in hw/arm/orangepi.c, I can set it with 
>> '-global'. Do you perhaps have a
>> recommendation how to improve that? Basically what is needed is that the 
>> machine sets the default
>> property including the chip prefix, and that the user can override it. 
>> Although it is not required for a
>> working emulated board, it would be a nice-to-have that the user can set it.
>
>
> FYI and possibly others who have a similar usecase, I figured out how to do 
> this. In the machine init function,
> after creating the new SoC object, simply check if the identifier has a value:
>
> +if (qemu_uuid_is_null(>h3->sid.identifier)) {
> +qdev_prop_set_string(DEVICE(s->h3), "identifier",
> + "8100c002-0001-0002-0003-44556677");

Similarly, display a warning "No UUID provided, using default one..."
(or generate one 8100c002-XXX)?

> +}
>
> That way, if the user passed -global to override it, the machine will not 
> overrule the user's value
> and by default the machine sets an identifier containing the H3 specific chip 
> prefix.
>
[...]
>>> > > > > --- /dev/null
>>> > > > > +++ b/hw/misc/allwinner-sid.c
>>> > > > > @@ -0,0 +1,170 @@
>>> > > > > +/*
>>> > > > > + * Allwinner Security ID emulation
>>> > > > > + *
>>> > > > > + * Copyright (C) 2019 Niek Linnenbank 
>>> > > > > + *
>>> > > > > + * This program is free software: you can redistribute it and/or
>>> > > modify
>>> > > > > + * it under the terms of the GNU General Public License as 
>>> > > > > published
>>> > > by
>>> > > > > + * the Free Software Foundation, either version 2 of the License, 
>>> > > > > or
>>> > > > > + * (at your option) any later version.
>>> > > > > + *
>>> > > > > + * This program is distributed in the hope that it will be useful,
>>> > > > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> > > > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> > > > > + * GNU General Public License for more details.
>>> > > > > + *
>>> > > > > + * You should have received a copy of the GNU General Public 
>>> > > > > License
>>> > > > > + * along with this program.  If not, see <
>>> > > http://www.gnu.org/licenses/>.
>>> > > > > + */
>>> > > > > +
>>> > > > > +#include "qemu/osdep.h"
>>> > > > > +#include "qemu/units.h"
>>> > > > > +#include "hw/sysbus.h"
>>> > > > > +#include "migration/vmstate.h"
>>> > > > > +#include "qemu/log.h"
>>> > > > > +#include "qemu/module.h"
>>> > > > > +#include 

[Bug 1862887] Re: qemu does not load pulseaudio modules properly

2020-02-12 Thread Chris Hoy
EDIT: This is for Arch 2020.02.01

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1862887

Title:
  qemu does not load pulseaudio modules properly

Status in QEMU:
  New

Bug description:
  Hello,

  This is on Arch-linux(latest) and the qemu 4.2.0 version made from git clone 
https://github.com/spheenik/qemu.git
  with:
   ./configure --prefix=/opt/qemu-test --python=/usr/bin/python2 
--target-list=x86_64-softmmu 
  --audio-drv-list=pa --disable-werror
  added to the build.

  I've been workin on a passthrough Windows 10 vm this month and have been 
steadily seeing some promising progress. My block/issue at the moment is 
integrating the audio now that the GPU has been succesfully passed through. 
  I've been going back and forth between the audio options for Pulseaudio and 
cannot change the following issue:
  pulseaudio: pa_context_connect() failed
  pulseaudio: Reason: Connection refused
  pulseaudio: Failed to initialize PA contextlibusb: error [udev_hotplug_event] 
ignoring udev action bind
  I leave my current operable build followed by some of the options that I have 
tried using to correct this despite the following errors not changing

  This is my current operable build:

  #!/bin/bash

  vmname="windows10vm"

  if ps -ef | grep /opt/qemu-test/bin/qemu-system-x86_64 | grep -q 
multifunction=on; then
  echo "A passthrough VM is already running." &
  exit 1

  else

  /opt/qemu-test/bin/qemu-system-x86_64 \
  -m 12G \
  -drive id=disk0,if=virtio,cache=none,format=raw,file=.../win2.img \
  -drive file=.../Win10_1909_English_x64.iso,index=1,media=cdrom \
  -drive file=.../virtio-win-0.1.171.iso,index=2,media=cdrom \
  -boot order=dc \
  -bios /usr/share/ovmf/x64/OVMF_CODE.fd \
  -name $vmname,process=$vmname \
  -machine type=q35,accel=kvm,vmport=off \
  -cpu host,kvm=off \
  -smp 3,sockets=1,cores=3,threads=1 \
  -device virtio-balloon \
  -rtc clock=host,base=localtime \
  -vga none \
  -nographic \
  -serial none \
  -parallel none \
  -soundhw hda \
  -usb \
  -device usb-host,vendorid=...,productid=... \
  -device usb-host,vendorid=...,productid=... \
  -device usb-host,vendorid=...,productid=... \
  -device vfio-pci,host=...,multifunction=on \
  -device vfio-pci,host=... \
  -device e1000,netdev=net0 \
  -netdev user,id=net0,hostfwd=tcp::...-:22 \

  Here's a list of setting combinations I had tried to resolve this:

  #export QEMU_AUDIO_DRV=pa
  #QEMU_ALSA_DAC_BUFFER_SIZE=512 QEMU_ALSA_DAC_PERIOD_SIZE=170
  #export QEMU_PA_SAMPLES=8192 
  #export QEMU_AUDIO_TIMER_PERIOD=99
  #export QEMU_PA_SERVER=/run/user/1000/pulse/native
  #export 
QEMU_PA_SINK=alsa_output.usb-C-Media_Electronics_Inc._USB_Audio_Device-00.analog-stereo
  #export QEMU_PA_SOURCE=input

  -audiodev pa,id=pa1,server=server=/run/user/1000/pulse/native

  At best I have removed an XDG_RUNTIME_DIR error but other than that
  this build has no audio compatability.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1862887/+subscriptions



Re: [PULL 34/35] target/ppc: Use probe_write for DCBZ

2020-02-12 Thread Richard Henderson
On 2/12/20 10:48 AM, Greg Kurz wrote:
> On Mon,  3 Feb 2020 17:11:22 +1100
> David Gibson  wrote:
> 
>> From: Richard Henderson 
>>
>> Using probe_write instead of tlb_vaddr_to_host means that we
>> process watchpoints and notdirty pages more efficiently.
>>
>> Signed-off-by: Richard Henderson 
>> Message-Id: <20200129235040.24022-5-richard.hender...@linaro.org>
>> Tested-by: Howard Spoelstra 
>> Signed-off-by: David Gibson 
>> ---
>>  target/ppc/mem_helper.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
>> index 0cb78777e7..98f589552b 100644
>> --- a/target/ppc/mem_helper.c
>> +++ b/target/ppc/mem_helper.c
>> @@ -298,7 +298,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong 
>> addr,
>>  }
>>  
>>  /* Try fast path translate */
>> -haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
>> +haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
> 
> Hi Richard,
> 
> This one is making coverity unhappy.
> 
> 
> ** CID 1419390:  Memory - corruptions  (OVERRUN)
> 
> 
> __
> *** CID 1419390:  Memory - corruptions  (OVERRUN)
> /target/ppc/mem_helper.c: 301 in dcbz_common()
> 295 /* Check reservation */
> 296 if ((env->reserve_addr & mask) == addr)  {
> 297 env->reserve_addr = (target_ulong)-1ULL;
> 298 }
> 299 
> 300 /* Try fast path translate */
 CID 1419390:  Memory - corruptions  (OVERRUN)
 Overrunning callee's array of size 9 by passing argument "mmu_idx" 
 (which evaluates to 9) in call to "probe_write".  
> 301 haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
> 302 if (haddr) {
> 303 memset(haddr, 0, dcbz_size);
> 304 } else {
> 305 /* Slow path */
> 306 for (i = 0; i < dcbz_size; i += 8) {
> 
> 
> Can you have a look ?

That's a bit of a mystery, given

#define NB_MMU_MODES 10

So I wonder what array is supposed to be of size 9...

Ho hum.  False positive.  Expanding everything in the coverity gui shows it's
taking the definition from target/xtensa/cpu-param.h.


r~



Re: [PATCH] tracing: only allow -trace to override -D if set

2020-02-12 Thread Philippe Mathieu-Daudé

On 2/12/20 4:34 PM, Stefan Hajnoczi wrote:

On Tue, Feb 11, 2020 at 11:10:54AM +, Alex Bennée wrote:

Otherwise any -D settings the user may have made get ignored.

Signed-off-by: Alex Bennée 
---
  trace/control.c | 11 ---
  1 file changed, 8 insertions(+), 3 deletions(-)


Thanks, applied to my tracing tree:
https://github.com/stefanha/qemu/commits/tracing


If possible, please add 'Fixes: e144a605a'.




[PATCH] target/ppc/cpu.h: Remove duplicate includes

2020-02-12 Thread BALATON Zoltan
Commit 74433bf083b added some includes but added them twice. Since
these are guarded against multiple inclusion including them once is
enough.

Signed-off-by: BALATON Zoltan 
---
 target/ppc/cpu.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 3a1eb76004..07dd2b4da7 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -23,8 +23,6 @@
 #include "qemu/int128.h"
 #include "exec/cpu-defs.h"
 #include "cpu-qom.h"
-#include "exec/cpu-defs.h"
-#include "cpu-qom.h"
 
 /* #define PPC_EMULATE_32BITS_HYPV */
 
-- 
2.21.1




Re: [PATCH] acpi: cpuhp: document CPHP_GET_CPU_ID_CMD command

2020-02-12 Thread Laszlo Ersek
Michael,

On 01/29/20 15:06, Igor Mammedov wrote:
> Commit 3a61c8db9d25 introduced CPHP_GET_CPU_ID_CMD command but
> did not sufficiently describe it. Fix it by adding missing command
> documentation.
> 
> Fixes: 3a61c8db9d25 ("acpi: cpuhp: add CPHP_GET_CPU_ID_CMD command")
> Signed-off-by: Igor Mammedov 
> Reviewed-by: Laszlo Ersek 
> ---
>  docs/specs/acpi_cpu_hotplug.txt | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> index a8ce5e7..9bb22d1 100644
> --- a/docs/specs/acpi_cpu_hotplug.txt
> +++ b/docs/specs/acpi_cpu_hotplug.txt
> @@ -94,6 +94,8 @@ write access:
> register in QEMU
>  2: following writes to 'Command data' register set OST status
> register in QEMU
> +3: following reads from 'Command data' and 'Command data 2' 
> return
> +   architecture specific CPU ID value for currently selected CPU.
>  other values: reserved
>  [0x6-0x7] reserved
>  [0x8] Command data: (DWORD access)
> 

can you please merge this?

It's a docs patch, but 3a61c8db9d25 (noted in "Fixes:") had gone in
through your tree.

Thank you!
Laszlo




Re: [PATCH v4 04/20] hw/arm/allwinner-h3: add USB host controller

2020-02-12 Thread Philippe Mathieu-Daudé

On 2/2/20 8:33 PM, Niek Linnenbank wrote:
On Sun, Jan 19, 2020 at 7:44 PM Philippe Mathieu-Daudé 
mailto:phi...@redhat.com>> wrote:


On 1/19/20 7:37 PM, Philippe Mathieu-Daudé wrote:
 > On 1/19/20 1:50 AM, Niek Linnenbank wrote:
 >> The Allwinner H3 System on Chip contains multiple USB 2.0 bus
 >> connections which provide software access using the Enhanced
 >> Host Controller Interface (EHCI) and Open Host Controller
 >> Interface (OHCI) interfaces. This commit adds support for
 >> both interfaces in the Allwinner H3 System on Chip.
 >>
 >> Signed-off-by: Niek Linnenbank mailto:nieklinnenb...@gmail.com>>
 >> Reviewed-by: Gerd Hoffmann mailto:kra...@redhat.com>>
 >> ---
 >>   hw/usb/hcd-ehci.h |  1 +
 >>   include/hw/arm/allwinner-h3.h |  8 ++
 >>   hw/arm/allwinner-h3.c | 52
---
 >>   hw/usb/hcd-ehci-sysbus.c  | 17 
 >>   4 files changed, 74 insertions(+), 4 deletions(-)
 >>
 >> diff --git a/hw/usb/hcd-ehci.h b/hw/usb/hcd-ehci.h
 >> index 0298238f0b..edb59311c4 100644
 >> --- a/hw/usb/hcd-ehci.h
 >> +++ b/hw/usb/hcd-ehci.h
 >> @@ -342,6 +342,7 @@ typedef struct EHCIPCIState {
 >>   #define TYPE_SYS_BUS_EHCI "sysbus-ehci-usb"
 >>   #define TYPE_PLATFORM_EHCI "platform-ehci-usb"
 >>   #define TYPE_EXYNOS4210_EHCI "exynos4210-ehci-usb"
 >> +#define TYPE_AW_H3_EHCI "aw-h3-ehci-usb"
 >>   #define TYPE_TEGRA2_EHCI "tegra2-ehci-usb"
 >>   #define TYPE_PPC4xx_EHCI "ppc4xx-ehci-usb"
 >>   #define TYPE_FUSBH200_EHCI "fusbh200-ehci-usb"
 >> diff --git a/include/hw/arm/allwinner-h3.h
 >> b/include/hw/arm/allwinner-h3.h
 >> index abdc20871a..4f4dcbcd17 100644
 >> --- a/include/hw/arm/allwinner-h3.h
 >> +++ b/include/hw/arm/allwinner-h3.h
 >> @@ -56,6 +56,14 @@ enum {
 >>   AW_H3_SRAM_A1,
 >>   AW_H3_SRAM_A2,
 >>   AW_H3_SRAM_C,
 >> +    AW_H3_EHCI0,
 >> +    AW_H3_OHCI0,
 >> +    AW_H3_EHCI1,
 >> +    AW_H3_OHCI1,
 >> +    AW_H3_EHCI2,
 >> +    AW_H3_OHCI2,
 >> +    AW_H3_EHCI3,
 >> +    AW_H3_OHCI3,
 >>   AW_H3_CCU,
 >>   AW_H3_PIT,
 >>   AW_H3_UART0,
 >> diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c
 >> index 8df8e3e05e..f360625ee9 100644
 >> --- a/hw/arm/allwinner-h3.c
 >> +++ b/hw/arm/allwinner-h3.c
 >> @@ -28,6 +28,7 @@
 >>   #include "hw/sysbus.h"
 >>   #include "hw/char/serial.h"
 >>   #include "hw/misc/unimp.h"
 >> +#include "hw/usb/hcd-ehci.h"
 >>   #include "sysemu/sysemu.h"
 >>   #include "hw/arm/allwinner-h3.h"
 >> @@ -36,6 +37,14 @@ const hwaddr allwinner_h3_memmap[] = {
 >>   [AW_H3_SRAM_A1]    = 0x,
 >>   [AW_H3_SRAM_A2]    = 0x00044000,
 >>   [AW_H3_SRAM_C] = 0x0001,
 >> +    [AW_H3_EHCI0]  = 0x01c1a000,
 >> +    [AW_H3_OHCI0]  = 0x01c1a400,
 >> +    [AW_H3_EHCI1]  = 0x01c1b000,
 >> +    [AW_H3_OHCI1]  = 0x01c1b400,
 >> +    [AW_H3_EHCI2]  = 0x01c1c000,
 >> +    [AW_H3_OHCI2]  = 0x01c1c400,
 >> +    [AW_H3_EHCI3]  = 0x01c1d000,
 >> +    [AW_H3_OHCI3]  = 0x01c1d400,
 >>   [AW_H3_CCU]    = 0x01c2,
 >>   [AW_H3_PIT]    = 0x01c20c00,
 >>   [AW_H3_UART0]  = 0x01c28000,
 >> @@ -73,10 +82,10 @@ struct AwH3Unimplemented {
 >>   { "msgbox",    0x01c17000, 4 * KiB },
 >>   { "spinlock",  0x01c18000, 4 * KiB },
 >>   { "usb0-otg",  0x01c19000, 4 * KiB },
 >> -    { "usb0",  0x01c1a000, 4 * KiB },
 >> -    { "usb1",  0x01c1b000, 4 * KiB },
 >> -    { "usb2",  0x01c1c000, 4 * KiB },
 >> -    { "usb3",  0x01c1d000, 4 * KiB },
 >> +    { "usb0-phy",  0x01c1a000, 4 * KiB },
 >> +    { "usb1-phy",  0x01c1b000, 4 * KiB },
 >> +    { "usb2-phy",  0x01c1c000, 4 * KiB },
 >> +    { "usb3-phy",  0x01c1d000, 4 * KiB },
 >
 > As in v3 comment, this can be done in patch #1.
 >
 > Reviewed-by: Philippe Mathieu-Daudé mailto:phi...@redhat.com>>
 > Tested-by: Philippe Mathieu-Daudé mailto:phi...@redhat.com>>

Err, this patch is incomplete, when using ./configure
--without-default-devices:

$ qemu-system-arm -M orangepi-pc
qemu-system-arm: invalid accelerator kvm
qemu-system-arm: falling back to tcg
qemu-system-arm: Unknown device 'aw-h3-ehci-usb' for default sysbus
qemu-system-arm: Unknown device 'sysbus-ohci' for default sysbus
Aborted (core dumped)


Thanks for pointing this out, I was not aware at all that the 
--without-default-devices option existed.

It's not in the configure --help message also.


Indeed. This is https://bugs.launchpad.net/qemu/+bug/1836537


I tried to re-produce the error by running:
$ ./configure --target-list=arm-softmmu --without-default-devices; make -j5
$ 

Re: [PATCH v4 00/20] Add Allwinner H3 SoC and Orange Pi PC Machine

2020-02-12 Thread Philippe Mathieu-Daudé

On 2/12/20 10:47 PM, Niek Linnenbank wrote:

Hi all,

Short status update regarding this series.

Currently I am debugging booting NetBSD 9.0-RC2, which is recently 
released [1]

The problem is that, apparently, RC2 can't mount the rootfs properly:

[   3.1871510] vfs_mountroot: can't open root device
[   3.2141370] cannot mount root, error = 6

This only happens using the RC2 filesystem image. However, the RC2 
kernel can
properly load & boot the RC1 filesystem image, and the RC1 kernel gives 
the same error
on the RC2 filesystem.  But I dont think its a NetBSD-RC2 issue, because 
on a real

Orange Pi PC hardware board, this problem does not show.


Linux shows:
  console: mmc0: Problem switching card into high-speed mode!

QEMU MMC support is not in good shape, and high-speed SD card support is 
limited. I keep procrastinating at reviewing your SDHCI patch hm.


So you added:

 * DMA transfers
 * Direct FIFO I/O
 * Short/Long format command responses

I'd start diffing the trace output of the following events:

allwinner_sdhost*
sdcard*

with both kernels.



I'm comparing traces and adding more low-level debug output to NetBSD 
and QEMU to find the issue.


Also I processed the review remarks which were send so far.

Kind regards,
Niek

[1] https://www.netbsd.org/releases/formal-9/NetBSD-9.0.html





Re: Question: SiFive U SPI and SD

2020-02-12 Thread Nikita Ermakov
Hello, Bin, Alistair,

Thank you for the answers!

On Thu, 13 Feb 2020 at 00:16, Alistair Francis  wrote:

> I had a crack at this awhile ago and you can see the patches here (I
> just rebased them):
> https://github.com/alistair23/qemu/tree/mainline/alistair/sifive_spi.next
>
> Debugging failures was really hard in Linux, but now that U-Boot and
> Oreboot (https://github.com/oreboot/oreboot/pull/234) have SPI support
> it is probably easier to debug.
>
> If you wanted to help you could try to debug that branch and figure
> out why it isn't working.
>
Thanks! I will try that.

-- 
Thanks,
Nikita
B8 00 4C CD 21


Re: [PATCH v4 16/20] tests/boot_linux_console: Add a SLOW test booting Ubuntu on OrangePi PC

2020-02-12 Thread Philippe Mathieu-Daudé

On 2/6/20 10:21 PM, Niek Linnenbank wrote:

Hi Philippe,

On Sun, Jan 19, 2020 at 11:30 PM Philippe Mathieu-Daudé > wrote:


On 1/19/20 1:50 AM, Niek Linnenbank wrote:
 > From: Philippe Mathieu-Daudé mailto:f4...@amsat.org>>
 >
 > This test boots Ubuntu Bionic on a OrangePi PC board.
 >
 > As it requires 1GB of storage, and is slow, this test is disabled
 > on automatic CI testing.
 >
 > It is useful for workstation testing. Currently Avocado timeouts too
 > quickly, so we can't run userland commands.
 >
 > The kernel image and DeviceTree blob are built by the Armbian
 > project (based on Debian):
 > https://www.armbian.com/orange-pi-pc/
 >
 > The Ubuntu image is downloaded from:
 > https://dl.armbian.com/orangepipc/Bionic_current

I forgot the image is compressed is compressed with 7z, which is not
provided by avocado.utils.archive. This patch requires more checks, see
inlined...

 >
 > This test can be run using:
 >
 >   $ AVOCADO_ALLOW_LARGE_STORAGE=yes \
 >     avocado --show=app,console run -t machine:orangepi-pc \
 >       tests/acceptance/boot_linux_console.py
 >   console: U-Boot SPL 2019.04-armbian (Nov 18 2019 - 23:08:35 +0100)
 >   console: DRAM: 1024 MiB
 >   console: Failed to set core voltage! Can't set CPU frequency
 >   console: Trying to boot from MMC1
 >   console: U-Boot 2019.04-armbian (Nov 18 2019 - 23:08:35 +0100)
Allwinner Technology
 >   console: CPU:   Allwinner H3 (SUN8I )
 >   console: Model: Xunlong Orange Pi PC
 >   console: DRAM:  1 GiB
 >   console: MMC:   mmc@1c0f000: 0
 >   [...]
 >   console: Uncompressing Linux... done, booting the kernel.
 >   console: Booting Linux on physical CPU 0x0
 >   console: Linux version 5.3.9-sunxi (root@builder) (gcc version
8.3.0 (GNU Toolchain for the A-profile Architecture 8.3-2019.03
(arm-rel-8.36))) #19.11.3 SMP Mon Nov 18 18:49:43 CET 2019
 >   console: CPU: ARMv7 Processor [410fc075] revision 5 (ARMv7),
cr=50c5387d
 >   console: CPU: div instructions available: patching division code
 >   console: CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing
instruction cache
 >   console: OF: fdt: Machine model: Xunlong Orange Pi PC
 >   [...]
 >   console: EXT4-fs (mmcblk0p1): mounted filesystem with writeback
data mode. Opts: (null)
 >   console: done.
 >   console: Begin: Running /scripts/local-bottom ... done.
 >   console: Begin: Running /scripts/init-bottom ... done.
 >   console: systemd[1]: systemd 237 running in system mode. (...)
 >   console: systemd[1]: Detected architecture arm.
 >   console: Welcome to Ubuntu 18.04.3 LTS!
 >   console: systemd[1]: Set hostname to .
 >
 > Signed-off-by: Philippe Mathieu-Daudé mailto:f4...@amsat.org>>
 > [NL: rename in commit message Raspbian to Armbian, remove
vm.set_machine()]
 > [NL: changed test to boot from SD card via BootROM]
 > Signed-off-by: Niek Linnenbank mailto:nieklinnenb...@gmail.com>>
 > ---
 >  tests/acceptance/boot_linux_console.py | 41
++
 >  1 file changed, 41 insertions(+)
 >
 > diff --git a/tests/acceptance/boot_linux_console.py
b/tests/acceptance/boot_linux_console.py
 > index 50294e1675..399d5062db 100644
 > --- a/tests/acceptance/boot_linux_console.py
 > +++ b/tests/acceptance/boot_linux_console.py

This patch needs:

   from avocado.utils.path import find_command

   P7ZIP_AVAILABLE = True
   try:
       find_command('7z')
   except CmdNotFoundError:
       P7ZIP_AVAILABLE = False

 > @@ -591,6 +591,47 @@ class BootLinuxConsole(Test):
 >          exec_command_and_wait_for_pattern(self, 'reboot',
 >                                                  'reboot:
Restarting system')
 >
 > +    @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'),
'storage limited')

        @skipUnless(P7ZIP_AVAILABLE, '7z not installed')


Ah indeed, that makes sense. I'll add this check for v5.
Now that we come accross this, I'm thinking that functionality like 
decompression
might not be specific to the linux tests and perhaps should eventually 
come in a utility/helper .py file.


Yes, but it is quicker if you start this way where you use it, then when 
another uses it he'll refactor the helper. Ideally this should go in the 
avocado.archive package.




Regards,
Niek


 > +    def test_arm_orangepi_bionic(self):
 > +        """
 > +        :avocado: tags=arch:arm
 > +        :avocado: tags=machine:orangepi-pc
 > +        """
 > +
 > +        # This test download a 196MB compressed image and expand
it to 932MB...
 > +        image_url = ('https://dl.armbian.com/orangepipc/archive/'
 > +   
  

Re: [PATCH v4 00/20] Add Allwinner H3 SoC and Orange Pi PC Machine

2020-02-12 Thread Niek Linnenbank
Hi all,

Short status update regarding this series.

Currently I am debugging booting NetBSD 9.0-RC2, which is recently released
[1]
The problem is that, apparently, RC2 can't mount the rootfs properly:

[   3.1871510] vfs_mountroot: can't open root device
[   3.2141370] cannot mount root, error = 6

This only happens using the RC2 filesystem image. However, the RC2 kernel
can
properly load & boot the RC1 filesystem image, and the RC1 kernel gives the
same error
on the RC2 filesystem.  But I dont think its a NetBSD-RC2 issue, because on
a real
Orange Pi PC hardware board, this problem does not show.

I'm comparing traces and adding more low-level debug output to NetBSD and
QEMU to find the issue.

Also I processed the review remarks which were send so far.

Kind regards,
Niek

[1] https://www.netbsd.org/releases/formal-9/NetBSD-9.0.html

On Sun, Jan 19, 2020 at 1:51 AM Niek Linnenbank 
wrote:

> Dear QEMU developers,
>
> Hereby I would like to contribute the following set of patches to QEMU
> which add support for the Allwinner H3 System on Chip and the
> Orange Pi PC machine. The following features and devices are supported:
>
>  * SMP (Quad Core Cortex A7)
>  * Generic Interrupt Controller configuration
>  * SRAM mappings
>  * SDRAM controller
>  * Real Time Clock
>  * Timer device (re-used from Allwinner A10)
>  * UART
>  * SD/MMC storage controller
>  * EMAC ethernet connectivity
>  * USB 2.0 interfaces
>  * Clock Control Unit
>  * System Control module
>  * Security Identifier device
>
> Functionality related to graphical output such as HDMI, GPU,
> Display Engine and audio are not included. Recently released
> mainline Linux kernels (4.19 up to latest master), mainline U-Boot
> and NetBSD 9.0-RC1 are known to work.
>
> For full details on how to use the Orange Pi PC machine, see the file
> docs/orangepi.rst which is included as a patch in this series.
>
> The contents of this patch series is available on Github at:
>
>   https://github.com/nieklinnenbank/qemu/tree/allwinner-h3-v3
>
> The followings are currently known issues in this series:
>
>   - NetBSD 9.0-RC1 reads out year 2050 from RTC, while Linux works fine
>  -> This is due to difference in base year defined by the
> corresponding drivers
>   - RTC date & time is not persistent
>   - boot0 custom Allwinner bootloader not yet working
>   - Watchdog not yet implemented, affects U-Boot 'reset' and
> shutdown/reboot
>  -> This is part of the existing A10 timer that needs to be
> generalized first
>
> Looking forward to your review comments. I will do my best
> to update the patches where needed.
>
> = CHANGELOG =
> v4:
>  * docs/orangepi.rst: correct SDRAM size in board description: 512MB ->
> 1GiB
>  * hw/arm/orangepi.c: correct SDRAM size in commit message: 512MB -> 1GiB
>  * hw/arm/orangepi.c: set .nb_cpus in the orangepi_binfo struct static
> initialisation
>  * hw/arm/orangepi.c: remove .board_id from orangepi_binfo struct
>  * hw/arm/orangepi.c: move BIOS check to top of orangepi_init()
>  * hw/arm/orangepi.c: change clk1-freq argument 2400 to 24 * 1000 *
> 1000 for readability
>  * hw/arm/orangepi.c: rephrase 1GiB check error message
>  * include/hw/arm/allwinner-h3.h: improved comments
>  * hw/arm/allwinner-h3.c: remove duplicate initialization and declaration
> of i variable
>  * hw/arm/allwinner-h3.c: use DEVICE(>cpus[i]) instead of qemu_get_cpu()
>  * hw/arm/allwinner-h3.c: use qdev API instead of object API in CPU
> initialization part
>  * hw/arm/allwinner-h3.c: add note that UARTs are connected to APB2_CLK,
> for future clocktree API
>  * hw/arm/allwinner-h3.c: extend commit message for Boot ROM with
> description for the 32KiB size
>  * hw/rtc/allwinner-rtc.c: correct usage of AwRtcClass->regmap_size for
> checking r/w offset
>  * hw/misc/allwinner-cpucfg.c: remove 64-bit counter, as it is unused by
> Linux/U-Boot/NetBSD
>  * hw/misc/allwinner-cpucfg.c: add CPU_EXCEPTION_LEVEL_ON_RESET constant
>  * hw/misc/allwinner-cpucfg.c: break instead of return after logging guest
> error, for tracing
>  * hw/misc/allwinner-cpucfg.c: reduce duplication in switch/case for
> REG_CPUX_RST_CTRL in write function
>  * include/hw/rtc/allwinner-rtc.h: increase AW_RTC_REGS_MAXADDR to 0x200
>  * include/hw/rtc/allwinner-rtc.h: change type of AwRtcClass->year_offset
> to int, to match struct tm
>  * tests/acceptance/boot_linux_console.py: remove calls to vm.set_machine()
>  * tests/acceptance/boot_linux_console.py: added NetBSD test by Philippe
>  * docs/orangepi.rst: removed some unneeded words/typos
>  * docs/orangepi.rst: remove usage of -j5 for calling make (not all users
> have >= 4 SMP cores)
>  * include/hw/*/allwinner*.h: moved #include "qemu/osdep.h" and unneeded
> #includes to .c file
>
> v3: https://lists.gnu.org/archive/html/qemu-devel/2020-01/msg01534.html
> https://github.com/nieklinnenbank/qemu/tree/allwinner-h3-v3
>
> v2: https://lists.gnu.org/archive/html/qemu-devel/2019-12/msg03265.html
> 

Re: [PATCH] block: make BlockConf.*_size properties 32-bit

2020-02-12 Thread Eric Blake

On 2/11/20 5:54 AM, Roman Kagan wrote:

Devices (virtio-blk, scsi, etc.) and the block layer are happy to use
32-bit for logical_block_size, physical_block_size, and min_io_size.
However, the properties in BlockConf are defined as uint16_t limiting
the values to 32768.

This appears unnecessary tight, and we've seen bigger block sizes handy
at times.


What larger sizes?  I could see 64k or maybe even 1M block sizes,...



Make them 32 bit instead and lift the limitation.

Signed-off-by: Roman Kagan 
---
  hw/core/qdev-properties.c| 21 -
  include/hw/block/block.h |  8 
  include/hw/qdev-properties.h |  2 +-
  3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 7f93bfeb88..5f84e4a3b8 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -716,30 +716,32 @@ const PropertyInfo qdev_prop_pci_devfn = {
  
  /* --- blocksize --- */
  
+#define MIN_BLOCK_SIZE 512

+#define MAX_BLOCK_SIZE 2147483648


...but 2G block sizes are going to have tremendous performance problems.

I'm not necessarily opposed to the widening to a 32-bit type, but think 
you need more justification or a smaller number for the max block size, 
particularly since qcow2 refuses to use cluster sizes larger than 2M and 
it makes no sense to allow a block size larger than a cluster size.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PULL 5/5] qemu_set_log_filename: filename argument may be NULL

2020-02-12 Thread Richard Henderson
On 2/12/20 2:45 AM, Alex Bennée wrote:
> 
> Richard Henderson  writes:
> 
>> On 1/30/20 1:38 PM, Stefan Hajnoczi wrote:
>>> From: Salvador Fandino 
>>>
>>> NULL is a valid log filename used to indicate we want to use stderr
>>> but qemu_set_log_filename (which is called by bsd-user/main.c) was not
>>> handling it correctly.
>>>
>>> That also made redundant a couple of NULL checks in calling code which
>>> have been removed.
>>>
>>> Signed-off-by: Salvador Fandino 
>>> Message-Id: <20200123193626.19956-1-salva...@qindel.com>
>>> Signed-off-by: Stefan Hajnoczi 
>>> ---
>>>  trace/control.c |  4 +---
>>>  util/log.c  | 28 
>>>  vl.c|  5 +
>>>  3 files changed, 18 insertions(+), 19 deletions(-)
>>
>> This patch has broken -D  for *-linux-user.
>> After e144a605a, all logging goes to stderr.
> 
> I posted:
> 
>   Subject: [PATCH] tracing: only allow -trace to override -D if set
>   Date: Tue, 11 Feb 2020 11:10:54 +
>   Message-Id: <2020021054.27538-1-alex.ben...@linaro.org>
> 
> as a fix which partially reverted this.

Thanks, that fixes it for me.


r~



Re: Question about (and problem with) pflash data access

2020-02-12 Thread Philippe Mathieu-Daudé

Cc'ing Jean-Christophe and Peter.

On 2/12/20 7:46 PM, Guenter Roeck wrote:

Hi,

I have been playing with pflash recently. For the most part it works,
but I do have an odd problem when trying to instantiate pflash on sx1.

My data file looks as follows.

000 0001       
020        
*
0002000 0002       
0002020        
*
0004000 0003       
0004020        
...

In the sx1 machine, this becomes:

000 6001       
020        
*
0002000 6002       
0002020        
*
0004000 6003       
0004020        
*
...

pflash is instantiated with "-drive file=flash.32M.test,format=raw,if=pflash".

I don't have much success with pflash tracing - data accesses don't
show up there.

I did find a number of problems with the sx1 emulation, but I have no clue
what is going on with pflash. As far as I can see pflash works fine on
other machines. Can someone give me a hint what to look out for ?


This is specific to the SX1, introduced in commit 997641a84ff:

 64 static uint64_t static_read(void *opaque, hwaddr offset,
 65 unsigned size)
 66 {
 67 uint32_t *val = (uint32_t *) opaque;
 68 uint32_t mask = (4 / size) - 1;
 69
 70 return *val >> ((offset & mask) << 3);
 71 }

Only guessing, this looks like some hw parity, and I imagine you need to 
write the parity bits in your flash.32M file before starting QEMU, then 
it would appear "normal" within the guest.





  1   2   3   4   >