[PATCH] loader: support loading large files (>=2GB)

2022-04-27 Thread Peter Collingbourne
Currently the loader uses int as the return type for various APIs
that deal with file sizes, which leads to an error if the file
size is >=2GB, as it ends up being interpreted as a negative error
code. Furthermore, we do not tolerate short reads, which are possible
at least on Linux when attempting to read such large files in one
syscall.

Fix the first problem by switching to 64-bit types for file sizes,
and fix the second by introducing a loop around the read syscall.

Signed-off-by: Peter Collingbourne 
---
 hw/core/generic-loader.c |  2 +-
 hw/core/loader.c | 44 
 include/hw/loader.h  | 13 ++--
 3 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index c666545aa0..0891fa73c3 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -67,7 +67,7 @@ static void generic_loader_realize(DeviceState *dev, Error 
**errp)
 GenericLoaderState *s = GENERIC_LOADER(dev);
 hwaddr entry;
 int big_endian;
-int size = 0;
+int64_t size = 0;
 
 s->set_pc = false;
 
diff --git a/hw/core/loader.c b/hw/core/loader.c
index ca2f2431fb..d07c79c400 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -115,17 +115,17 @@ ssize_t read_targphys(const char *name,
 return did;
 }
 
-int load_image_targphys(const char *filename,
-hwaddr addr, uint64_t max_sz)
+int64_t load_image_targphys(const char *filename,
+hwaddr addr, uint64_t max_sz)
 {
 return load_image_targphys_as(filename, addr, max_sz, NULL);
 }
 
 /* return the size or -1 if error */
-int load_image_targphys_as(const char *filename,
-   hwaddr addr, uint64_t max_sz, AddressSpace *as)
+int64_t load_image_targphys_as(const char *filename,
+   hwaddr addr, uint64_t max_sz, AddressSpace *as)
 {
-int size;
+int64_t size;
 
 size = get_image_size(filename);
 if (size < 0 || size > max_sz) {
@@ -139,9 +139,9 @@ int load_image_targphys_as(const char *filename,
 return size;
 }
 
-int load_image_mr(const char *filename, MemoryRegion *mr)
+int64_t load_image_mr(const char *filename, MemoryRegion *mr)
 {
-int size;
+int64_t size;
 
 if (!memory_access_is_direct(mr, false)) {
 /* Can only load an image into RAM or ROM */
@@ -963,7 +963,8 @@ int rom_add_file(const char *file, const char *fw_dir,
 {
 MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
 Rom *rom;
-int rc, fd = -1;
+int fd = -1;
+size_t bytes_read = 0;
 char devpath[100];
 
 if (as && mr) {
@@ -1003,11 +1004,17 @@ int rom_add_file(const char *file, const char *fw_dir,
 rom->datasize = rom->romsize;
 rom->data = g_malloc0(rom->datasize);
 lseek(fd, 0, SEEK_SET);
-rc = read(fd, rom->data, rom->datasize);
-if (rc != rom->datasize) {
-fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n",
-rom->name, rc, rom->datasize);
-goto err;
+while (bytes_read < rom->datasize) {
+ssize_t rc =
+read(fd, rom->data + bytes_read, rom->datasize - bytes_read);
+if (rc <= 0) {
+fprintf(stderr,
+"rom: file %-20s: read error: rc=%zd at position %zd "
+"(expected size %zd)\n",
+rom->name, rc, bytes_read, rom->datasize);
+goto err;
+}
+bytes_read += rc;
 }
 close(fd);
 rom_insert(rom);
@@ -1671,7 +1678,7 @@ typedef struct {
 HexLine line;
 uint8_t *bin_buf;
 hwaddr *start_addr;
-int total_size;
+int64_t total_size;
 uint32_t next_address_to_write;
 uint32_t current_address;
 uint32_t current_rom_index;
@@ -1767,8 +1774,8 @@ static int handle_record_type(HexParser *parser)
 }
 
 /* return size or -1 if error */
-static int parse_hex_blob(const char *filename, hwaddr *addr, uint8_t 
*hex_blob,
-  size_t hex_blob_size, AddressSpace *as)
+static int64_t parse_hex_blob(const char *filename, hwaddr *addr, uint8_t 
*hex_blob,
+  size_t hex_blob_size, AddressSpace *as)
 {
 bool in_process = false; /* avoid re-enter and
   * check whether record begin with ':' */
@@ -1832,11 +1839,12 @@ out:
 }
 
 /* return size or -1 if error */
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as)
+int64_t load_targphys_hex_as(const char *filename, hwaddr *entry,
+ AddressSpace *as)
 {
 gsize hex_blob_size;
 gchar *hex_blob;
-int total_size = 0;
+int64_t total_size = 0;
 
 if (!g_file_get_contents(filename, _blob, _blob_size, NULL)) {
 return -1;
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 5572108ba

[PATCH v2] target/arm: Implement MTE3

2021-06-16 Thread Peter Collingbourne
MTE3 introduces an asymmetric tag checking mode, in which loads are
checked synchronously and stores are checked asynchronously. Add
support for it.

Signed-off-by: Peter Collingbourne 
Reviewed-by: Richard Henderson 
---
v2:
- remove unused argument

 target/arm/cpu64.c  |  2 +-
 target/arm/mte_helper.c | 82 +
 2 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 1c23187d1a..c7a1626bec 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -683,7 +683,7 @@ static void aarch64_max_initfn(Object *obj)
  * during realize if the board provides no tag memory, much like
  * we do for EL2 with the virtualization=on property.
  */
-t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
+t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3);
 cpu->isar.id_aa64pfr1 = t;
 
 t = cpu->isar.id_aa64mmfr0;
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 9e615cc513..724175210b 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -538,13 +538,50 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, 
uint64_t val)
 }
 }
 
+static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
+uint64_t dirty_ptr, uintptr_t ra)
+{
+int is_write, syn;
+
+env->exception.vaddress = dirty_ptr;
+
+is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
+0x11);
+raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), 
ra);
+g_assert_not_reached();
+}
+
+static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr,
+ uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el)
+{
+int select;
+
+if (regime_has_2_ranges(arm_mmu_idx)) {
+select = extract64(dirty_ptr, 55, 1);
+} else {
+select = 0;
+}
+env->cp15.tfsr_el[el] |= 1 << select;
+#ifdef CONFIG_USER_ONLY
+/*
+ * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
+ * which then sends a SIGSEGV when the thread is next scheduled.
+ * This cpu will return to the main loop at the end of the TB,
+ * which is rather sooner than "normal".  But the alternative
+ * is waiting until the next syscall.
+ */
+qemu_cpu_kick(env_cpu(env));
+#endif
+}
+
 /* Record a tag check failure.  */
 static void mte_check_fail(CPUARMState *env, uint32_t desc,
uint64_t dirty_ptr, uintptr_t ra)
 {
 int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
 ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
-int el, reg_el, tcf, select, is_write, syn;
+int el, reg_el, tcf;
 uint64_t sctlr;
 
 reg_el = regime_el(env, arm_mmu_idx);
@@ -564,14 +601,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
 switch (tcf) {
 case 1:
 /* Tag check fail causes a synchronous exception. */
-env->exception.vaddress = dirty_ptr;
-
-is_write = FIELD_EX32(desc, MTEDESC, WRITE);
-syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
-is_write, 0x11);
-raise_exception_ra(env, EXCP_DATA_ABORT, syn,
-   exception_target_el(env), ra);
-/* noreturn, but fall through to the assert anyway */
+mte_sync_check_fail(env, desc, dirty_ptr, ra);
+break;
 
 case 0:
 /*
@@ -583,30 +614,19 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
 
 case 2:
 /* Tag check fail causes asynchronous flag set.  */
-if (regime_has_2_ranges(arm_mmu_idx)) {
-select = extract64(dirty_ptr, 55, 1);
-} else {
-select = 0;
-}
-env->cp15.tfsr_el[el] |= 1 << select;
-#ifdef CONFIG_USER_ONLY
-/*
- * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
- * which then sends a SIGSEGV when the thread is next scheduled.
- * This cpu will return to the main loop at the end of the TB,
- * which is rather sooner than "normal".  But the alternative
- * is waiting until the next syscall.
- */
-qemu_cpu_kick(env_cpu(env));
-#endif
+mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
 break;
 
-default:
-/* Case 3: Reserved. */
-qemu_log_mask(LOG_GUEST_ERROR,
-  "Tag check failure with SCTLR_EL%d.TCF%s "
-  "set to reserved value %d\n",
-  reg_el, el ? "" : "0", tcf);
+case 3:
+/*
+ * Tag check fail causes asynchronous flag set for stores, or
+ * a synchronous exception for loads.
+ */
+if (FIELD_EX32(desc, MTEDESC, WRITE))

[PATCH] target/arm: Implement MTE3

2021-06-11 Thread Peter Collingbourne
MTE3 introduces an asymmetric tag checking mode, in which loads are
checked synchronously and stores are checked asynchronously. Add
support for it.

Signed-off-by: Peter Collingbourne 
---
 target/arm/cpu64.c  |  2 +-
 target/arm/mte_helper.c | 83 ++---
 2 files changed, 53 insertions(+), 32 deletions(-)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 1c23187d1a..c7a1626bec 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -683,7 +683,7 @@ static void aarch64_max_initfn(Object *obj)
  * during realize if the board provides no tag memory, much like
  * we do for EL2 with the virtualization=on property.
  */
-t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
+t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3);
 cpu->isar.id_aa64pfr1 = t;
 
 t = cpu->isar.id_aa64mmfr0;
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 166b9d260f..7b76d871ff 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -538,13 +538,51 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, 
uint64_t val)
 }
 }
 
+static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
+uint64_t dirty_ptr, uintptr_t ra)
+{
+int is_write, syn;
+
+env->exception.vaddress = dirty_ptr;
+
+is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
+0x11);
+raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), 
ra);
+g_assert_not_reached();
+}
+
+static void mte_async_check_fail(CPUARMState *env, uint32_t desc,
+ uint64_t dirty_ptr, uintptr_t ra,
+ ARMMMUIdx arm_mmu_idx, int el)
+{
+int select;
+
+if (regime_has_2_ranges(arm_mmu_idx)) {
+select = extract64(dirty_ptr, 55, 1);
+} else {
+select = 0;
+}
+env->cp15.tfsr_el[el] |= 1 << select;
+#ifdef CONFIG_USER_ONLY
+/*
+ * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
+ * which then sends a SIGSEGV when the thread is next scheduled.
+ * This cpu will return to the main loop at the end of the TB,
+ * which is rather sooner than "normal".  But the alternative
+ * is waiting until the next syscall.
+ */
+qemu_cpu_kick(env_cpu(env));
+#endif
+}
+
 /* Record a tag check failure.  */
 static void mte_check_fail(CPUARMState *env, uint32_t desc,
uint64_t dirty_ptr, uintptr_t ra)
 {
 int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
 ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
-int el, reg_el, tcf, select, is_write, syn;
+int el, reg_el, tcf;
 uint64_t sctlr;
 
 reg_el = regime_el(env, arm_mmu_idx);
@@ -564,14 +602,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
 switch (tcf) {
 case 1:
 /* Tag check fail causes a synchronous exception. */
-env->exception.vaddress = dirty_ptr;
-
-is_write = FIELD_EX32(desc, MTEDESC, WRITE);
-syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
-is_write, 0x11);
-raise_exception_ra(env, EXCP_DATA_ABORT, syn,
-   exception_target_el(env), ra);
-/* noreturn, but fall through to the assert anyway */
+mte_sync_check_fail(env, desc, dirty_ptr, ra);
+break;
 
 case 0:
 /*
@@ -583,30 +615,19 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
 
 case 2:
 /* Tag check fail causes asynchronous flag set.  */
-if (regime_has_2_ranges(arm_mmu_idx)) {
-select = extract64(dirty_ptr, 55, 1);
-} else {
-select = 0;
-}
-env->cp15.tfsr_el[el] |= 1 << select;
-#ifdef CONFIG_USER_ONLY
-/*
- * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
- * which then sends a SIGSEGV when the thread is next scheduled.
- * This cpu will return to the main loop at the end of the TB,
- * which is rather sooner than "normal".  But the alternative
- * is waiting until the next syscall.
- */
-qemu_cpu_kick(env_cpu(env));
-#endif
+mte_async_check_fail(env, desc, dirty_ptr, ra, arm_mmu_idx, el);
 break;
 
-default:
-/* Case 3: Reserved. */
-qemu_log_mask(LOG_GUEST_ERROR,
-  "Tag check failure with SCTLR_EL%d.TCF%s "
-  "set to reserved value %d\n",
-  reg_el, el ? "" : "0", tcf);
+case 3:
+/*
+ * Tag check fail causes asynchronous flag set for stores, or
+ * a synchronous exception for loads.
+ */
+if (FIELD_EX32(desc, MTEDESC, WRITE)) {
+  

[Bug 1921948] Re: MTE tags not checked properly for unaligned accesses at EL1

2021-06-09 Thread Peter Collingbourne
I happened to notice that you're moving your bug tracker to gitlab so I
refiled this issue over there: https://gitlab.com/qemu-
project/qemu/-/issues/403

** Bug watch added: gitlab.com/qemu-project/qemu/-/issues #403
   https://gitlab.com/qemu-project/qemu/-/issues/403

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921948

Title:
  MTE tags not checked properly for unaligned accesses at EL1

Status in QEMU:
  Confirmed

Bug description:
  For kernel memory accesses that span across two memory granules,
  QEMU's MTE implementation only checks the tag of the first granule but
  not of the second one.

  To reproduce this, build the Linux kernel with CONFIG_KASAN_HW_TAGS
  enabled, apply the patch below, and boot the kernel:

  diff --git a/sound/last.c b/sound/last.c
  index f0bb98780e70..04745cb30b74 100644
  --- a/sound/last.c
  +++ b/sound/last.c
  @@ -5,12 +5,18 @@
*/
   
   #include 
  +#include 
   #include 
   
   static int __init alsa_sound_last_init(void)
   {
  struct snd_card *card;
  int idx, ok = 0;
  +
  +   char *ptr = kmalloc(128, GFP_KERNEL);
  +   pr_err("KASAN report should follow:\n");
  +   *(volatile unsigned long *)(ptr + 124);
  +   kfree(ptr);
  
  printk(KERN_INFO "ALSA device list:\n");
  for (idx = 0; idx < SNDRV_CARDS; idx++) {

  KASAN tags the 128 allocated bytes with the same tag as the returned
  pointer. The memory granule that follows the 128 allocated bytes has a
  different tag (with 1/15 probability).

  Expected result: a tag fault is detected and a KASAN report is printed when 
accessing bytes [124, 130).
  Observed result: no tag fault is detected and no KASAN report is printed.

  Here are the flags that I use to run QEMU if they matter:

  qemu-system-aarch64 -s -machine virt,mte=on -cpu max -m 2G -smp 2 -net
  user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 -net nic
  -nographic -kernel ./Image -append "console=ttyAMA0 root=/dev/vda
  earlyprintk=serial" -drive file=./fs.img,format=raw,if=virtio -no-
  shutdown -no-reboot

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1921948/+subscriptions



[Bug 1921948] Re: MTE tags not checked properly for unaligned accesses at EL1

2021-05-21 Thread Peter Collingbourne
(s/PR_MTE_TCF_ASYNC/PR_MTE_TCF_SYNC/g in the above program -- but the
actual constant is correct)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921948

Title:
  MTE tags not checked properly for unaligned accesses at EL1

Status in QEMU:
  Confirmed

Bug description:
  For kernel memory accesses that span across two memory granules,
  QEMU's MTE implementation only checks the tag of the first granule but
  not of the second one.

  To reproduce this, build the Linux kernel with CONFIG_KASAN_HW_TAGS
  enabled, apply the patch below, and boot the kernel:

  diff --git a/sound/last.c b/sound/last.c
  index f0bb98780e70..04745cb30b74 100644
  --- a/sound/last.c
  +++ b/sound/last.c
  @@ -5,12 +5,18 @@
*/
   
   #include 
  +#include 
   #include 
   
   static int __init alsa_sound_last_init(void)
   {
  struct snd_card *card;
  int idx, ok = 0;
  +
  +   char *ptr = kmalloc(128, GFP_KERNEL);
  +   pr_err("KASAN report should follow:\n");
  +   *(volatile unsigned long *)(ptr + 124);
  +   kfree(ptr);
  
  printk(KERN_INFO "ALSA device list:\n");
  for (idx = 0; idx < SNDRV_CARDS; idx++) {

  KASAN tags the 128 allocated bytes with the same tag as the returned
  pointer. The memory granule that follows the 128 allocated bytes has a
  different tag (with 1/15 probability).

  Expected result: a tag fault is detected and a KASAN report is printed when 
accessing bytes [124, 130).
  Observed result: no tag fault is detected and no KASAN report is printed.

  Here are the flags that I use to run QEMU if they matter:

  qemu-system-aarch64 -s -machine virt,mte=on -cpu max -m 2G -smp 2 -net
  user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 -net nic
  -nographic -kernel ./Image -append "console=ttyAMA0 root=/dev/vda
  earlyprintk=serial" -drive file=./fs.img,format=raw,if=virtio -no-
  shutdown -no-reboot

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1921948/+subscriptions



[Bug 1921948] Re: MTE tags not checked properly for unaligned accesses at EL1

2021-05-21 Thread Peter Collingbourne
It looks like there's still a bug here: I'm seeing false positive MTE
faults for unaligned accesses that touch multiple pages. This userspace
assembly program demonstrates the problem, but for some reason it only
reproduces some of the time for me:

.arch_extension memtag

.globl _start
_start:
mov x0, #0x37 // PR_SET_TAGGED_ADDR_CTRL
mov x1, #0x3 // PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_ASYNC
mov x2, #0
mov x3, #0
mov x4, #0
mov x8, #0xa7 // prctl
svc #0

mov x0, xzr
mov w1, #0x2000
mov w2, #0x23 // PROT_READ|PROT_WRITE|PROT_MTE
mov w3, #0x22 // MAP_PRIVATE|MAP_ANONYMOUS
mov w4, #0x
mov x5, xzr
mov x8, #0xde // mmap
svc #0

mov x1, #(1 << 56)
add x0, x0, x1
add x0, x0, #0xff0
stg x0, [x0]
stg x0, [x0, #16]
str x1, [x0, #12]

mov x0, #0
mov x8, #0x5d // exit
svc #0

** Changed in: qemu
   Status: Fix Committed => Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921948

Title:
  MTE tags not checked properly for unaligned accesses at EL1

Status in QEMU:
  Confirmed

Bug description:
  For kernel memory accesses that span across two memory granules,
  QEMU's MTE implementation only checks the tag of the first granule but
  not of the second one.

  To reproduce this, build the Linux kernel with CONFIG_KASAN_HW_TAGS
  enabled, apply the patch below, and boot the kernel:

  diff --git a/sound/last.c b/sound/last.c
  index f0bb98780e70..04745cb30b74 100644
  --- a/sound/last.c
  +++ b/sound/last.c
  @@ -5,12 +5,18 @@
*/
   
   #include 
  +#include 
   #include 
   
   static int __init alsa_sound_last_init(void)
   {
  struct snd_card *card;
  int idx, ok = 0;
  +
  +   char *ptr = kmalloc(128, GFP_KERNEL);
  +   pr_err("KASAN report should follow:\n");
  +   *(volatile unsigned long *)(ptr + 124);
  +   kfree(ptr);
  
  printk(KERN_INFO "ALSA device list:\n");
  for (idx = 0; idx < SNDRV_CARDS; idx++) {

  KASAN tags the 128 allocated bytes with the same tag as the returned
  pointer. The memory granule that follows the 128 allocated bytes has a
  different tag (with 1/15 probability).

  Expected result: a tag fault is detected and a KASAN report is printed when 
accessing bytes [124, 130).
  Observed result: no tag fault is detected and no KASAN report is printed.

  Here are the flags that I use to run QEMU if they matter:

  qemu-system-aarch64 -s -machine virt,mte=on -cpu max -m 2G -smp 2 -net
  user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 -net nic
  -nographic -kernel ./Image -append "console=ttyAMA0 root=/dev/vda
  earlyprintk=serial" -drive file=./fs.img,format=raw,if=virtio -no-
  shutdown -no-reboot

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1921948/+subscriptions



[Bug 1921948] Re: MTE tags not checked properly for unaligned accesses at EL1

2021-03-30 Thread Peter Collingbourne
The flags that you need to pass to FVP to enable MTE are listed near the
end of the README here:

https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/README.md

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921948

Title:
  MTE tags not checked properly for unaligned accesses at EL1

Status in QEMU:
  New

Bug description:
  For kernel memory accesses that span across two memory granules,
  QEMU's MTE implementation only checks the tag of the first granule but
  not of the second one.

  To reproduce this, build the Linux kernel with CONFIG_KASAN_HW_TAGS
  enabled, apply the patch below, and boot the kernel:

  diff --git a/sound/last.c b/sound/last.c
  index f0bb98780e70..04745cb30b74 100644
  --- a/sound/last.c
  +++ b/sound/last.c
  @@ -5,12 +5,18 @@
*/
   
   #include 
  +#include 
   #include 
   
   static int __init alsa_sound_last_init(void)
   {
  struct snd_card *card;
  int idx, ok = 0;
  +
  +   char *ptr = kmalloc(128, GFP_KERNEL);
  +   pr_err("KASAN report should follow:\n");
  +   *(volatile unsigned long *)(ptr + 124);
  +   kfree(ptr);
  
  printk(KERN_INFO "ALSA device list:\n");
  for (idx = 0; idx < SNDRV_CARDS; idx++) {

  KASAN tags the 128 allocated bytes with the same tag as the returned
  pointer. The memory granule that follows the 128 allocated bytes has a
  different tag (with 1/15 probability).

  Expected result: a tag fault is detected and a KASAN report is printed when 
accessing bytes [124, 130).
  Observed result: no tag fault is detected and no KASAN report is printed.

  Here are the flags that I use to run QEMU if they matter:

  qemu-system-aarch64 -s -machine virt,mte=on -cpu max -m 2G -smp 2 -net
  user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 -net nic
  -nographic -kernel ./Image -append "console=ttyAMA0 root=/dev/vda
  earlyprintk=serial" -drive file=./fs.img,format=raw,if=virtio -no-
  shutdown -no-reboot

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1921948/+subscriptions



[PATCH] target/arm: Use TCF0 and TFSRE0 for unprivileged tag checks

2021-02-19 Thread Peter Collingbourne via
Section D6.7 of the ARM ARM states:

For the purpose of determining Tag Check Fault handling, unprivileged
load and store instructions are treated as if executed at EL0 when
executed at either:
- EL1, when the Effective value of PSTATE.UAO is 0.
- EL2, when both the Effective value of HCR_EL2.{E2H, TGE} is {1, 1}
  and the Effective value of PSTATE.UAO is 0.

ARM has confirmed a defect in the pseudocode function
AArch64.TagCheckFault that makes it inconsistent with the above
wording. The remedy is to adjust references to PSTATE.EL in that
function to instead refer to AArch64.AccessUsesEL(acctype), so
that unprivileged instructions use SCTLR_EL1.TCF0 and TFSRE0_EL1.
The exception type for synchronous tag check faults remains unchanged.

This patch implements the described change by partially reverting
commits 50244cc76abc and cc97b0019bb5.

Signed-off-by: Peter Collingbourne 
---
 target/arm/helper.c |  2 +-
 target/arm/mte_helper.c | 13 +
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0e1a3b9421..b0223bda4c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -13133,7 +13133,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, 
int el, int fp_el,
 if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV)
 && tbid
 && !(env->pstate & PSTATE_TCO)
-&& (sctlr & SCTLR_TCF)
+&& (sctlr & SCTLR_TCF0)
 && allocation_tag_access_enabled(env, 0, sctlr)) {
 flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1);
 }
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 1c569336ea..0bbb9ec346 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -550,10 +550,14 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
 reg_el = regime_el(env, arm_mmu_idx);
 sctlr = env->cp15.sctlr_el[reg_el];
 
-el = arm_current_el(env);
-if (el == 0) {
+switch (arm_mmu_idx) {
+case ARMMMUIdx_E10_0:
+case ARMMMUIdx_E20_0:
+el = 0;
 tcf = extract64(sctlr, 38, 2);
-} else {
+break;
+default:
+el = reg_el;
 tcf = extract64(sctlr, 40, 2);
 }
 
@@ -570,7 +574,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
 env->exception.vaddress = dirty_ptr;
 
 is_write = FIELD_EX32(desc, MTEDESC, WRITE);
-syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11);
+syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
+is_write, 0x11);
 raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
 /* noreturn, but fall through to the assert anyway */
 
-- 
2.30.0.617.g56c4b15f3c-goog




Re: [PATCH v6 09/11] arm/hvf: Add a WFI handler

2021-02-10 Thread Peter Collingbourne
On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell  wrote:
>
> On Wed, 20 Jan 2021 at 22:44, Alexander Graf  wrote:
> >
> > From: Peter Collingbourne 
> >
> > Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > In this implementation IPI is blocked on the CPU thread at startup and
> > pselect() is used to atomically unblock the signal and begin sleeping.
> > The signal is sent unconditionally so there's no need to worry about
> > races between actually sleeping and the "we think we're sleeping"
> > state. It may lead to an extra wakeup but that's better than missing
> > it entirely.
> >
> > Signed-off-by: Peter Collingbourne 
> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap]
> > Signed-off-by: Alexander Graf 
> > Acked-by: Roman Bolshakov 
> > ---
> >  accel/hvf/hvf-cpus.c |  5 ++--
> >  include/sysemu/hvf_int.h |  1 +
> >  target/arm/hvf/hvf.c | 56 
> >  3 files changed, 59 insertions(+), 3 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index 6d70ee742e..abef6a58f7 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu)
> >  cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
> >
> >  /* init cpu signals */
> > -sigset_t set;
> >  struct sigaction sigact;
> >
> >  memset(, 0, sizeof(sigact));
> >  sigact.sa_handler = dummy_signal;
> >  sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >
> >  #ifdef __aarch64__
> >  r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
> > NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index c2ac6c8f97..7a397fe85a 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -51,6 +51,7 @@ extern HVFState *hvf_state;
> >  struct hvf_vcpu_state {
> >  uint64_t fd;
> >  void *exit;
> > +sigset_t unblock_ipi_mask;
> >  };
> >
> >  void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 8f18efe856..f0850ab14a 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >   * QEMU Hypervisor.framework support for Apple Silicon
> >
> >   * Copyright 2020 Alexander Graf 
> > + * Copyright 2020 Google LLC
> >   *
> >   * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> >   * See the COPYING file in the top-level directory.
> > @@ -17,6 +18,8 @@
> >  #include "sysemu/hvf_int.h"
> >  #include "sysemu/hw_accel.h"
> >
> > +#include 
> > +
> >  #include "exec/address-spaces.h"
> >  #include "hw/irq.h"
> >  #include "qemu/main-loop.h"
> > @@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >  void hvf_kick_vcpu_thread(CPUState *cpu)
> >  {
> > +cpus_kick_thread(cpu);
> >  hv_vcpus_exit(>hvf->fd, 1);
> >  }
> >
> > @@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >  return 0;
> >  }
> >
> > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> > +{
> > +/*
> > + * Use pselect to sleep so that other threads can IPI us while we're
> > + * sleeping.
> > + */
> > +qatomic_mb_set(>thread_kicked, false);
> > +qemu_mutex_unlock_iothread();
> > +pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
> > +qemu_mutex_lock_iothread();
> > +}
>
> It seems a bit odd that this is specific to Arm hvf.
> Doesn't x86 hvf need "pause until interrupt" functionality ?

I'm not very familiar with x86 HVF (and I don't have an x86 Mac to
test with), but my reading of the x86 HVF code is that there appear to
be no exits that put the system to sleep (not even MWAIT which I think
is the x86 equivalent of WFI -- the code just appears to busy loop). I
think that implies that either we actually busy loop on x86 (seems
unlikely to me since I guess someone would have noticed by now) or
MWAIT does not actually result in a VM exit, and HVF itself goes to
sleep inside hv_vcpu_run

[Bug 1907137] Re: LDTR not properly emulated when MTE tag checks enabled at EL0

2020-12-21 Thread Peter Collingbourne
Thanks for the patch. I've verified that it fixes the assertion failure
with both TCF0=1 and TCF0=2.

You can disregard the kernel panic from comment #1 since it turns out
that it was from an older build of QEMU that did not have 50244cc76abc.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1907137

Title:
  LDTR not properly emulated when MTE tag checks enabled at EL0

Status in QEMU:
  In Progress

Bug description:
  I am trying to boot Android (just the non-GUI parts for now) under
  QEMU with MTE enabled. This can be done by following the instructions
  here to build the fvp-eng target with MTE support:

  
https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/

  and launching QEMU with the following command:

  qemu-system-aarch64 -kernel $ANDROID_PRODUCT_OUT/kernel -initrd
  $ANDROID_PRODUCT_OUT/combined-ramdisk.img -machine virt,mte=on -cpu
  max -drive driver=raw,file=$ANDROID_PRODUCT_OUT/system-
  qemu.img,if=none,id=system -device virtio-blk-device,drive=system
  -append "console=ttyAMA0 earlyprintk=ttyAMA0
  androidboot.hardware=fvpbase
  androidboot.boot_devices=a003e00.virtio_mmio loglevel=9
  printk.devkmsg=on buildvariant=eng" -m 512 -nographic -no-reboot

  If I do this then QEMU crashes like so:

  **
  ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not be 
reached
  Bail out! ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should 
not be reached

  The error is caused by an MTE tag check fault from an LDTR instruction
  in __arch_copy_from_user. At this point TCF=0 and TCF0=2.

  I have this patch that gets me past the error but it is unclear
  whether this is the correct fix since there may be other confusion
  between TCF and TCF0 elsewhere.

  diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
  index 153bd1e9df..aa5db4eac4 100644
  --- a/target/arm/mte_helper.c
  +++ b/target/arm/mte_helper.c
  @@ -552,10 +552,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
   case 0:
   /*
* Tag check fail does not affect the PE.
  - * We eliminate this case by not setting MTE_ACTIVE
  - * in tb_flags, so that we never make this runtime call.
*/
  -g_assert_not_reached();
  +break;
   
   case 2:
   /* Tag check fail causes asynchronous flag set.  */

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1907137/+subscriptions



[Bug 1907137] Re: LDTR not properly emulated when MTE tag checks enabled at EL0

2020-12-21 Thread Peter Collingbourne
Thanks, I will try that patch.

Apologies, the instructions assume some familiarity with building
Android. You will find instructions for downloading the "repo" tool and
the Android source tree here:

https://source.android.com/setup/build/downloading

This isn't the first time I've received this kind of feedback. I will
add a link to that page to the document.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1907137

Title:
  LDTR not properly emulated when MTE tag checks enabled at EL0

Status in QEMU:
  In Progress

Bug description:
  I am trying to boot Android (just the non-GUI parts for now) under
  QEMU with MTE enabled. This can be done by following the instructions
  here to build the fvp-eng target with MTE support:

  
https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/

  and launching QEMU with the following command:

  qemu-system-aarch64 -kernel $ANDROID_PRODUCT_OUT/kernel -initrd
  $ANDROID_PRODUCT_OUT/combined-ramdisk.img -machine virt,mte=on -cpu
  max -drive driver=raw,file=$ANDROID_PRODUCT_OUT/system-
  qemu.img,if=none,id=system -device virtio-blk-device,drive=system
  -append "console=ttyAMA0 earlyprintk=ttyAMA0
  androidboot.hardware=fvpbase
  androidboot.boot_devices=a003e00.virtio_mmio loglevel=9
  printk.devkmsg=on buildvariant=eng" -m 512 -nographic -no-reboot

  If I do this then QEMU crashes like so:

  **
  ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not be 
reached
  Bail out! ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should 
not be reached

  The error is caused by an MTE tag check fault from an LDTR instruction
  in __arch_copy_from_user. At this point TCF=0 and TCF0=2.

  I have this patch that gets me past the error but it is unclear
  whether this is the correct fix since there may be other confusion
  between TCF and TCF0 elsewhere.

  diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
  index 153bd1e9df..aa5db4eac4 100644
  --- a/target/arm/mte_helper.c
  +++ b/target/arm/mte_helper.c
  @@ -552,10 +552,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
   case 0:
   /*
* Tag check fail does not affect the PE.
  - * We eliminate this case by not setting MTE_ACTIVE
  - * in tb_flags, so that we never make this runtime call.
*/
  -g_assert_not_reached();
  +break;
   
   case 2:
   /* Tag check fail causes asynchronous flag set.  */

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1907137/+subscriptions



[Bug 1907137] Re: LDTR not properly emulated when MTE tag checks enabled at EL0

2020-12-21 Thread Peter Collingbourne
This isn't a fork of qemu, it was an upstream checkout based on commit
944fdc5e27a5b5adbb765891e8e70e88ba9a00ec (well, okay, I did have the ARM
HVF series applied to this checkout, but I wouldn't expect that to
affect TCG). I verified that 50244cc76abc is present in my checkout.

Let me see if I can reproduce with the current upstream master.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1907137

Title:
  LDTR not properly emulated when MTE tag checks enabled at EL0

Status in QEMU:
  Incomplete

Bug description:
  I am trying to boot Android (just the non-GUI parts for now) under
  QEMU with MTE enabled. This can be done by following the instructions
  here to build the fvp-eng target with MTE support:

  
https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/

  and launching QEMU with the following command:

  qemu-system-aarch64 -kernel $ANDROID_PRODUCT_OUT/kernel -initrd
  $ANDROID_PRODUCT_OUT/combined-ramdisk.img -machine virt,mte=on -cpu
  max -drive driver=raw,file=$ANDROID_PRODUCT_OUT/system-
  qemu.img,if=none,id=system -device virtio-blk-device,drive=system
  -append "console=ttyAMA0 earlyprintk=ttyAMA0
  androidboot.hardware=fvpbase
  androidboot.boot_devices=a003e00.virtio_mmio loglevel=9
  printk.devkmsg=on buildvariant=eng" -m 512 -nographic -no-reboot

  If I do this then QEMU crashes like so:

  **
  ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not be 
reached
  Bail out! ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should 
not be reached

  The error is caused by an MTE tag check fault from an LDTR instruction
  in __arch_copy_from_user. At this point TCF=0 and TCF0=2.

  I have this patch that gets me past the error but it is unclear
  whether this is the correct fix since there may be other confusion
  between TCF and TCF0 elsewhere.

  diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
  index 153bd1e9df..aa5db4eac4 100644
  --- a/target/arm/mte_helper.c
  +++ b/target/arm/mte_helper.c
  @@ -552,10 +552,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
   case 0:
   /*
* Tag check fail does not affect the PE.
  - * We eliminate this case by not setting MTE_ACTIVE
  - * in tb_flags, so that we never make this runtime call.
*/
  -g_assert_not_reached();
  +break;
   
   case 2:
   /* Tag check fail causes asynchronous flag set.  */

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1907137/+subscriptions



[Bug 1907137] Re: LDTR not properly emulated when MTE tag checks enabled at EL0

2020-12-17 Thread Peter Collingbourne
The workaround patch above is insufficient if I change userspace to set
TCF0=1. With that I get a kernel panic:

[   13.336255][C0] Bad mode in Synchronous Abort handler detected on CPU0, 
code 0x9211 -- DABT (lower EL)
[   13.337437][C0] CPU: 0 PID: 1 Comm: init Not tainted 
5.10.0-rc7-mainline-00300-gf4328758abb6 #1
[   13.338086][C0] Hardware name: linux,dummy-virt (DT)
[   13.338948][C0] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
[   13.339951][C0] pc : __arch_copy_from_user+0x1e4/0x340
[   13.340483][C0] lr : _copy_from_user+0xbc/0x564
[   13.340930][C0] sp : ffc01000bda0
[   13.341385][C0] x29: ffc01000bda0
[   13.342295][C0] x28: ff804011c100
[   13.342951][C0]
[   13.343321][C0] x27: 
[   13.343759][C0] x26: 
[   13.344178][C0]
[   13.344513][C0] x25: 
[   13.344954][C0] x24: 
[   13.345382][C0]
[   13.345713][C0] x23: 037e18aca850
[   13.346153][C0] x22: 037e18aca860
[   13.346809][C0]
[   13.347144][C0] x21: ff8043d1ef80
[   13.347596][C0] x20: 037e18aca850
[   13.348023][C0]
[   13.348354][C0] x19: ff8043295000
[   13.348806][C0] x18: ff8040103c38
[   13.349232][C0]
[   13.349557][C0] x17: 0400
[   13.349998][C0] x16: 007f
[   13.350634][C0]
[   13.350965][C0] x15: 007f9fed34f8
[   13.351409][C0] x14: 006d65747379730c
[   13.351844][C0]
[   13.352167][C0] x13: 01ed
[   13.352610][C0] x12: 
[   13.353034][C0]
[   13.353358][C0] x11: 
[   13.353802][C0] x10: 
[   13.354232][C0]
[   13.354785][C0] x9 : 006d65747379730c
[   13.355236][C0] x8 : 
[   13.355673][C0]
[   13.355998][C0] x7 : 
[   13.356448][C0] x6 : ff8043295040
[   13.356874][C0]
[   13.357200][C0] x5 : ff8043296000
[   13.357646][C0] x4 : 
[   13.358077][C0]
[   13.358423][C0] x3 : 0001
[   13.359055][C0] x2 : 0f80
[   13.359497][C0]
[   13.359829][C0] x1 : 037e18aca8c0
[   13.360278][C0] x0 : ff8043295000
[   13.360705][C0]
[   13.362315][C0] Kernel panic - not syncing: bad mode
[   13.362377][C0] CPU: 0 PID: 1 Comm: init Not tainted 
5.10.0-rc7-mainline-00300-gf4328758abb6 #1
[   13.362410][C0] Hardware name: linux,dummy-virt (DT)
[   13.362442][C0] Call trace:
[   13.362474][C0]  dump_backtrace+0x0/0x1e0
[   13.362507][C0]  show_stack+0x1c/0x2c
[   13.362539][C0]  dump_stack+0xd0/0x154
[   13.362570][C0]  panic+0x158/0x370
[   13.362602][C0]  bad_el0_sync+0x0/0x5c
[   13.362634][C0]  el1_inv+0x3c/0x5c
[   13.362666][C0]  el1_sync_handler+0x64/0x8c
[   13.362698][C0]  el1_sync+0x84/0x140
[   13.362730][C0]  __arch_copy_from_user+0x1e4/0x340
[   13.362762][C0]  copy_mount_options+0x40/0x1d0
[   13.362794][C0]  __arm64_sys_mount+0x84/0x13c
[   13.362826][C0]  el0_svc_common+0xc0/0x1b4
[   13.362858][C0]  do_el0_svc+0x20/0x30
[   13.362890][C0]  el0_svc+0x14/0x24
[   13.362922][C0]  el0_sync_handler+0x88/0xec
[   13.362953][C0]  el0_sync+0x17c/0x180
[   13.363547][C0] Kernel Offset: 0x2abd80 from 0xffc01000
[   13.363580][C0] PHYS_OFFSET: 0x4000
[   13.363613][C0] CPU features: 0x27e0152,6180a230
[   13.363644][C0] Memory Limit: none

It looks like the tag check fault coming from the LDTR is reported using
the wrong EL.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1907137

Title:
  LDTR not properly emulated when MTE tag checks enabled at EL0

Status in QEMU:
  New

Bug description:
  I am trying to boot Android (just the non-GUI parts for now) under
  QEMU with MTE enabled. This can be done by following the instructions
  here to build the fvp-eng target with MTE support:

  
https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/

  and launching QEMU with the following command:

  qemu-system-aarch64 -kernel $ANDROID_PRODUCT_OUT/kernel -initrd
  $ANDROID_PRODUCT_OUT/combined-ramdisk.img -machine virt,mte=on -cpu
  max -drive driver=raw,file=$ANDROID_PRODUCT_OUT/system-
  qemu.img,if=none,id=system -device virtio-blk-device,drive=system
  -append "console=ttyAMA0 earlyprintk=ttyAMA0
  androidboot.hardware=fvpbase
  androidboot.boot_devices=a003e00.virtio_mmio loglevel=9
  printk.devkmsg=on buildvariant=eng" -m 512 -nographic -no-reboot

  If I do this then QEMU crashes like so:

  **
  ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not be 
reached
  Bail out! ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should 
not be reached

  The error is caused by an MTE tag check 

[Bug 1907137] [NEW] LDTR not properly emulated when MTE tag checks enabled at EL0

2020-12-07 Thread Peter Collingbourne
Public bug reported:

I am trying to boot Android (just the non-GUI parts for now) under QEMU
with MTE enabled. This can be done by following the instructions here to
build the fvp-eng target with MTE support:

https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/

and launching QEMU with the following command:

qemu-system-aarch64 -kernel $ANDROID_PRODUCT_OUT/kernel -initrd
$ANDROID_PRODUCT_OUT/combined-ramdisk.img -machine virt,mte=on -cpu max
-drive driver=raw,file=$ANDROID_PRODUCT_OUT/system-
qemu.img,if=none,id=system -device virtio-blk-device,drive=system
-append "console=ttyAMA0 earlyprintk=ttyAMA0
androidboot.hardware=fvpbase
androidboot.boot_devices=a003e00.virtio_mmio loglevel=9
printk.devkmsg=on buildvariant=eng" -m 512 -nographic -no-reboot

If I do this then QEMU crashes like so:

**
ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not be reached
Bail out! ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not 
be reached

The error is caused by an MTE tag check fault from an LDTR instruction
in __arch_copy_from_user. At this point TCF=0 and TCF0=2.

I have this patch that gets me past the error but it is unclear whether
this is the correct fix since there may be other confusion between TCF
and TCF0 elsewhere.

diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 153bd1e9df..aa5db4eac4 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -552,10 +552,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
 case 0:
 /*
  * Tag check fail does not affect the PE.
- * We eliminate this case by not setting MTE_ACTIVE
- * in tb_flags, so that we never make this runtime call.
  */
-g_assert_not_reached();
+break;
 
 case 2:
 /* Tag check fail causes asynchronous flag set.  */

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1907137

Title:
  LDTR not properly emulated when MTE tag checks enabled at EL0

Status in QEMU:
  New

Bug description:
  I am trying to boot Android (just the non-GUI parts for now) under
  QEMU with MTE enabled. This can be done by following the instructions
  here to build the fvp-eng target with MTE support:

  
https://cs.android.com/android/platform/superproject/+/master:device/generic/goldfish/fvpbase/

  and launching QEMU with the following command:

  qemu-system-aarch64 -kernel $ANDROID_PRODUCT_OUT/kernel -initrd
  $ANDROID_PRODUCT_OUT/combined-ramdisk.img -machine virt,mte=on -cpu
  max -drive driver=raw,file=$ANDROID_PRODUCT_OUT/system-
  qemu.img,if=none,id=system -device virtio-blk-device,drive=system
  -append "console=ttyAMA0 earlyprintk=ttyAMA0
  androidboot.hardware=fvpbase
  androidboot.boot_devices=a003e00.virtio_mmio loglevel=9
  printk.devkmsg=on buildvariant=eng" -m 512 -nographic -no-reboot

  If I do this then QEMU crashes like so:

  **
  ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should not be 
reached
  Bail out! ERROR:../target/arm/mte_helper.c:558:mte_check_fail: code should 
not be reached

  The error is caused by an MTE tag check fault from an LDTR instruction
  in __arch_copy_from_user. At this point TCF=0 and TCF0=2.

  I have this patch that gets me past the error but it is unclear
  whether this is the correct fix since there may be other confusion
  between TCF and TCF0 elsewhere.

  diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
  index 153bd1e9df..aa5db4eac4 100644
  --- a/target/arm/mte_helper.c
  +++ b/target/arm/mte_helper.c
  @@ -552,10 +552,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t 
desc,
   case 0:
   /*
* Tag check fail does not affect the PE.
  - * We eliminate this case by not setting MTE_ACTIVE
  - * in tb_flags, so that we never make this runtime call.
*/
  -g_assert_not_reached();
  +break;
   
   case 2:
   /* Tag check fail causes asynchronous flag set.  */

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1907137/+subscriptions



Re: [PATCH 2/8] hvf: Move common code out

2020-12-03 Thread Peter Collingbourne
On Thu, Dec 3, 2020 at 1:41 AM Roman Bolshakov  wrote:
>
> On Mon, Nov 30, 2020 at 04:00:11PM -0800, Peter Collingbourne wrote:
> > On Mon, Nov 30, 2020 at 3:18 PM Alexander Graf  wrote:
> > >
> > >
> > > On 01.12.20 00:01, Peter Collingbourne wrote:
> > > > On Mon, Nov 30, 2020 at 1:40 PM Alexander Graf  wrote:
> > > >> Hi Peter,
> > > >>
> > > >> On 30.11.20 22:08, Peter Collingbourne wrote:
> > > >>> On Mon, Nov 30, 2020 at 12:56 PM Frank Yang  wrote:
> > > >>>>
> > > >>>> On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf  
> > > >>>> wrote:
> > > >>>>> Hi Frank,
> > > >>>>>
> > > >>>>> Thanks for the update :). Your previous email nudged me into the 
> > > >>>>> right direction. I previously had implemented WFI through the 
> > > >>>>> internal timer framework which performed way worse.
> > > >>>> Cool, glad it's helping. Also, Peter found out that the main thing 
> > > >>>> keeping us from just using cntpct_el0 on the host directly and 
> > > >>>> compare with cval is that if we sleep, cval is going to be much < 
> > > >>>> cntpct_el0 by the sleep time. If we can get either the architecture 
> > > >>>> or macos to read out the sleep time then we might be able to not 
> > > >>>> have to use a poll interval either!
> > > >>>>> Along the way, I stumbled over a few issues though. For starters, 
> > > >>>>> the signal mask for SIG_IPI was not set correctly, so while 
> > > >>>>> pselect() would exit, the signal would never get delivered to the 
> > > >>>>> thread! For a fix, check out
> > > >>>>>
> > > >>>>> 
> > > >>>>> https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/
> > > >>>>>
> > > >>>> Thanks, we'll take a look :)
> > > >>>>
> > > >>>>> Please also have a look at my latest stab at WFI emulation. It 
> > > >>>>> doesn't handle WFE (that's only relevant in overcommitted 
> > > >>>>> scenarios). But it does handle WFI and even does something similar 
> > > >>>>> to hlt polling, albeit not with an adaptive threshold.
> > > >>> Sorry I'm not subscribed to qemu-devel (I'll subscribe in a bit) so
> > > >>> I'll reply to your patch here. You have:
> > > >>>
> > > >>> +/* Set cpu->hvf->sleeping so that we get a
> > > >>> SIG_IPI signal. */
> > > >>> +cpu->hvf->sleeping = true;
> > > >>> +smp_mb();
> > > >>> +
> > > >>> +/* Bail out if we received an IRQ meanwhile */
> > > >>> +if (cpu->thread_kicked || 
> > > >>> (cpu->interrupt_request &
> > > >>> +(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > > >>> +cpu->hvf->sleeping = false;
> > > >>> +break;
> > > >>> +}
> > > >>> +
> > > >>> +/* nanosleep returns on signal, so we wake up on 
> > > >>> kick. */
> > > >>> +nanosleep(ts, NULL);
> > > >>>
> > > >>> and then send the signal conditional on whether sleeping is true, but
> > > >>> I think this is racy. If the signal is sent after sleeping is set to
> > > >>> true but before entering nanosleep then I think it will be ignored and
> > > >>> we will miss the wakeup. That's why in my implementation I block IPI
> > > >>> on the CPU thread at startup and then use pselect to atomically
> > > >>> unblock and begin sleeping. The signal is sent unconditionally so
> > > >>> there's no need to worry about races between actually sleeping and the
> > > >>> "we think we're sleeping" state. It may lead to an extra wakeup but
> > > >>> that's better than missing it entirely.
> > > >>
> > > >> Thanks a bunch for the comme

Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-03 Thread Peter Collingbourne
On Thu, Dec 3, 2020 at 2:12 AM Roman Bolshakov  wrote:
>
> On Tue, Dec 01, 2020 at 10:59:50AM -0800, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 3:16 AM Alexander Graf  wrote:
> > >
> > > Hi Peter,
> > >
> > > On 01.12.20 09:21, Peter Collingbourne wrote:
> > > > Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> > > > up on IPI.
> > > >
> > > > Signed-off-by: Peter Collingbourne 
> > >
> > >
> > > Thanks a bunch!
> > >
> > >
> > > > ---
> > > > Alexander Graf wrote:
> > > >> I would love to take a patch from you here :). I'll still be stuck for 
> > > >> a
> > > >> while with the sysreg sync rework that Peter asked for before I can 
> > > >> look
> > > >> at WFI again.
> > > > Okay, here's a patch :) It's a relatively straightforward adaptation
> > > > of what we have in our fork, which can now boot Android to GUI while
> > > > remaining at around 4% CPU when idle.
> > > >
> > > > I'm not set up to boot a full Linux distribution at the moment so I
> > > > tested it on upstream QEMU by running a recent mainline Linux kernel
> > > > with a rootfs containing an init program that just does sleep(5)
> > > > and verified that the qemu process remains at low CPU usage during
> > > > the sleep. This was on top of your v2 plus the last patch of your v1
> > > > since it doesn't look like you have a replacement for that logic yet.
> > > >
> > > >   accel/hvf/hvf-cpus.c |  5 +--
> > > >   include/sysemu/hvf_int.h |  3 +-
> > > >   target/arm/hvf/hvf.c | 94 +++-
> > > >   3 files changed, 28 insertions(+), 74 deletions(-)
> > > >
> > > > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > > > index 4360f64671..b2c8fb57f6 100644
> > > > --- a/accel/hvf/hvf-cpus.c
> > > > +++ b/accel/hvf/hvf-cpus.c
> > > > @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> > > >   sigact.sa_handler = dummy_signal;
> > > >   sigaction(SIG_IPI, , NULL);
> > > >
> > > > -pthread_sigmask(SIG_BLOCK, NULL, );
> > > > -sigdelset(, SIG_IPI);
> > > > -pthread_sigmask(SIG_SETMASK, , NULL);
> > > > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > > > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> > >
> > >
> > > What will this do to the x86 hvf implementation? We're now not
> > > unblocking SIG_IPI again for that, right?
> >
> > Yes and that was the case before your patch series.
> >
> > > >
> > > >   #ifdef __aarch64__
> > > >   r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t 
> > > > **)>hvf->exit, NULL);
> > > > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > > > index c56baa3ae8..13adf6ea77 100644
> > > > --- a/include/sysemu/hvf_int.h
> > > > +++ b/include/sysemu/hvf_int.h
> > > > @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> > > >   struct hvf_vcpu_state {
> > > >   uint64_t fd;
> > > >   void *exit;
> > > > -struct timespec ts;
> > > > -bool sleeping;
> > > > +sigset_t unblock_ipi_mask;
> > > >   };
> > > >
> > > >   void assert_hvf_ok(hv_return_t ret);
> > > > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > > > index 8fe10966d2..60a361ff38 100644
> > > > --- a/target/arm/hvf/hvf.c
> > > > +++ b/target/arm/hvf/hvf.c
> > > > @@ -2,6 +2,7 @@
> > > >* QEMU Hypervisor.framework support for Apple Silicon
> > > >
> > > >* Copyright 2020 Alexander Graf 
> > > > + * Copyright 2020 Google LLC
> > > >*
> > > >* This work is licensed under the terms of the GNU GPL, version 2 or 
> > > > later.
> > > >* See the COPYING file in the top-level directory.
> > > > @@ -18,6 +19,7 @@
> > > >   #include "sysemu/hw_accel.h"
> > > >
> > > >   #include 
> > > > +#include 
> > > >
> > > >   #include "exec/address-spaces.h"
> > > >   #include "hw/irq.h"
> > > > @@ -320,18 +322,8 @@ int hvf_arch_init

Re: [PATCH v3 08/10] arm/hvf: Add a WFI handler

2020-12-03 Thread Peter Collingbourne
On Thu, Dec 3, 2020 at 2:39 AM Roman Bolshakov  wrote:
>
> On Wed, Dec 02, 2020 at 08:04:06PM +0100, Alexander Graf wrote:
> > From: Peter Collingbourne 
> >
> > Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap]
> > Signed-off-by: Alexander Graf 
> > ---
> >  accel/hvf/hvf-cpus.c |  5 ++--
> >  include/sysemu/hvf_int.h |  1 +
> >  target/arm/hvf/hvf.c | 55 
> >  3 files changed, 58 insertions(+), 3 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index e613c22ad0..a981ccde70 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -337,15 +337,14 @@ static int hvf_init_vcpu(CPUState *cpu)
> >  cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
> >
> >  /* init cpu signals */
> > -sigset_t set;
> >  struct sigaction sigact;
> >
> >  memset(, 0, sizeof(sigact));
> >  sigact.sa_handler = dummy_signal;
> >  sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >
> >  #ifdef __aarch64__
> >  r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
> > NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index 5f15119184..13adf6ea77 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -62,6 +62,7 @@ extern HVFState *hvf_state;
> >  struct hvf_vcpu_state {
> >  uint64_t fd;
> >  void *exit;
> > +sigset_t unblock_ipi_mask;
> >  };
> >
> >  void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 5ecce36d4a..79aeeb237b 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >   * QEMU Hypervisor.framework support for Apple Silicon
> >
> >   * Copyright 2020 Alexander Graf 
> > + * Copyright 2020 Google LLC
> >   *
> >   * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> >   * See the COPYING file in the top-level directory.
> > @@ -18,6 +19,7 @@
> >  #include "sysemu/hw_accel.h"
> >
> >  #include 
> > +#include 
> >
> >  #include "exec/address-spaces.h"
> >  #include "hw/irq.h"
> > @@ -413,6 +415,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >  void hvf_kick_vcpu_thread(CPUState *cpu)
> >  {
> > +cpus_kick_thread(cpu);
> >  hv_vcpus_exit(>hvf->fd, 1);
> >  }
> >
> > @@ -468,6 +471,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >  return 0;
> >  }
> >
> > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> > +{
> > +/*
> > + * Use pselect to sleep so that other threads can IPI us while we're
> > + * sleeping.
> > + */
> > +qatomic_mb_set(>thread_kicked, false);
> > +qemu_mutex_unlock_iothread();
>
> I raised a concern earlier, but I don't for sure if a kick could be lost
> right here. On x86 it could be lost.

If the signal is sent right before the pselect() it will be blocked
i.e. left pending. With the pselect() we get an atomic unblock of
SIG_IPI at the same time as we begin sleeping, which means that we
will receive the signal and leave the pselect() immediately.

I think at some point macOS had an incorrect implementation of
pselect() where the signal mask was non-atomically set in userspace
which could lead to the signal being missed but I checked the latest
XNU sources and it looks like the pselect() implementation has been
moved to the kernel.

> > +pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
> > +qemu_mutex_lock_iothread();
> > +}
> > +
> >  int hvf_vcpu_exec(CPUState *cpu)
> >  {
> >  ARMCPU *arm_cpu = ARM_CPU(cpu);
> > @@ -579,6 +594,46 @@ int hvf_vcpu_exec(CPUState *cpu)
> >  }
> >  case EC_WFX_TRAP:
> >  advance_pc = true;
> > +if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> > +(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > +
> > +uint64_t ctl;
> > +  

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Peter Collingbourne
On Wed, Dec 2, 2020 at 3:26 PM Frank Yang  wrote:
>
>
>
> On Wed, Dec 2, 2020 at 2:57 PM Alexander Graf  wrote:
>>
>>
>> On 02.12.20 23:46, Frank Yang wrote:
>>
>>
>>
>> On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf  wrote:
>>>
>>>
>>> On 02.12.20 23:19, Frank Yang wrote:
>>>
>>>
>>> From downstream: 
>>> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>>>
>>> Based on v3 of Alexander Graf's patches
>>>
>>> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>>>
>>> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
>>> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
>>> require effort to do that accurately---with individual values, even if
>>> they are a tiny bit off it can result in a lockup due to inconsistent
>>> time differences between vCPUs. So just use a global approximate value
>>> for now.
>>>
>>> Not tested in upstream yet, but Android emulator snapshots work without
>>> time warp now.
>>>
>>> Signed-off-by: Lingfeng Yang 
>>>
>>>
>>> If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, we 
>>> should be able to just recover the offset after migration by looking at 
>>> QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?
>>>
>>> That would end up much easier than this patch I hope.
>>>
>>>
>>
>> The virtual clock interfaces/implementations in QEMU seem complex to me 
>> relative to the fix needed here and they don't seem to compute ticks with 
>> mach_absolute_time() (which in this case we want since we want to compute in 
>> timer ticks instead of having to mess with ns / cycle conversions). I do 
>> agree this patch does seem more complicated on the surface though versus 
>> "just" setting cntvoff directly to some value. Maybe we should simplify the 
>> QEMU_CLOCK_VIRTUAL implementation first to maintain CNTVOFF_EL2/CNTV using 
>> mach_absolute_time() first?
>>
>>
>> So QEMU_CLOCK_VIRTUAL calls cpu_get_clock() which just adds an offset to 
>> gettimeofday(). This offset is already part of the live migration stream[1]. 
>> So if you just configure CNTVOFF_EL2 based on QEMU_CLOCK_VIRTUAL adjusted by 
>> the clock frequency on vcpu init, you should have everything you need. You 
>> can do that on every CPU init even, as the virtual clock will just be 0 on 
>> start.
>>
>> The only thing we need to change then is to move the WFI from a direct call 
>> to mach_absolute_time() to also check the virtual clock instead. I would 
>> hope that gettimeofday() calls mach_absolute_time() in the background too to 
>> speed it up.
>>
> Sounds plausible, but I noticed that we also have cpu_ticks_offset as part of 
> the migration stream, and I prefer mach_absolute_time() (ticks) instead of 
> seconds in WFI as well as it makes the calculation more accurate (ticks 
> against ticks instead of conversion between ns and ticks).
>
> Should we look at integrating this with cpu_ticks_offset instead?

Seems plausible to me. As far as I can tell the intent is that
cpu_get_host_ticks() does not increment while asleep (e.g. on x86 it
uses RDTSC which as far as I know does not increment while asleep), so
we could provide an implementation on Mac that calls
mach_absolute_time().

Peter



Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Peter Collingbourne
On Wed, Dec 2, 2020 at 2:57 PM Alexander Graf  wrote:
>
>
> On 02.12.20 23:46, Frank Yang wrote:
>
>
>
> On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf  wrote:
>>
>>
>> On 02.12.20 23:19, Frank Yang wrote:
>>
>>
>> From downstream: 
>> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>>
>> Based on v3 of Alexander Graf's patches
>>
>> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>>
>> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
>> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
>> require effort to do that accurately---with individual values, even if
>> they are a tiny bit off it can result in a lockup due to inconsistent
>> time differences between vCPUs. So just use a global approximate value
>> for now.
>>
>> Not tested in upstream yet, but Android emulator snapshots work without
>> time warp now.
>>
>> Signed-off-by: Lingfeng Yang 
>>
>>
>> If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, we 
>> should be able to just recover the offset after migration by looking at 
>> QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?
>>
>> That would end up much easier than this patch I hope.
>>
>>
>
> The virtual clock interfaces/implementations in QEMU seem complex to me 
> relative to the fix needed here and they don't seem to compute ticks with 
> mach_absolute_time() (which in this case we want since we want to compute in 
> timer ticks instead of having to mess with ns / cycle conversions). I do 
> agree this patch does seem more complicated on the surface though versus 
> "just" setting cntvoff directly to some value. Maybe we should simplify the 
> QEMU_CLOCK_VIRTUAL implementation first to maintain CNTVOFF_EL2/CNTV using 
> mach_absolute_time() first?
>
>
> So QEMU_CLOCK_VIRTUAL calls cpu_get_clock() which just adds an offset to 
> gettimeofday(). This offset is already part of the live migration stream[1]. 
> So if you just configure CNTVOFF_EL2 based on QEMU_CLOCK_VIRTUAL adjusted by 
> the clock frequency on vcpu init, you should have everything you need. You 
> can do that on every CPU init even, as the virtual clock will just be 0 on 
> start.
>
> The only thing we need to change then is to move the WFI from a direct call 
> to mach_absolute_time() to also check the virtual clock instead. I would hope 
> that gettimeofday() calls mach_absolute_time() in the background too to speed 
> it up.

I'm not sure that something based on gettimeofday() (or
clock_gettime(CLOCK_MONOTONIC) which it looks like cpu_get_clock() can
also call) will work. It will include time spent asleep so it won't
correspond to mach_absolute_time() aka guest CNTVCT_EL0.

Peter



Re: [PATCH v3 3/3] arm/hvf: Add a WFI handler

2020-12-02 Thread Peter Collingbourne
On Wed, Dec 2, 2020 at 10:49 AM Alexander Graf  wrote:
>
>
> On 02.12.20 05:44, Peter Collingbourne wrote:
> > Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
> > ---
> > v3:
> > - move the simplified locking to a separate patch
> > - spin on sleep <2ms
> >
> > v2:
> > - simplify locking further
> > - wait indefinitely on disabled or masked timers
> >
> >   accel/hvf/hvf-cpus.c |  4 +--
> >   include/sysemu/hvf_int.h |  1 +
> >   target/arm/hvf/hvf.c | 56 
> >   3 files changed, 59 insertions(+), 2 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index e613c22ad0..b2c8fb57f6 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -344,8 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >   sigact.sa_handler = dummy_signal;
> >   sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
>
>
> That turns set into an unused variable, no? I'll fix it up while
> applying though. The rest looks great, I'll push it as part of my next
> patch set.

Yes, thanks for spotting that, your fixup looks good.

Peter



Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 5:53 PM Alexander Graf  wrote:
>
>
> On 02.12.20 02:19, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 2:04 PM Alexander Graf  wrote:
> >>
> >> On 01.12.20 19:59, Peter Collingbourne wrote:
> >>> On Tue, Dec 1, 2020 at 3:16 AM Alexander Graf  wrote:
> >>>> Hi Peter,
> >>>>
> >>>> On 01.12.20 09:21, Peter Collingbourne wrote:
> >>>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>>>> up on IPI.
> >>>>>
> >>>>> Signed-off-by: Peter Collingbourne 
> >>>> Thanks a bunch!
> >>>>
> >>>>
> >>>>> ---
> >>>>> Alexander Graf wrote:
> >>>>>> I would love to take a patch from you here :). I'll still be stuck for 
> >>>>>> a
> >>>>>> while with the sysreg sync rework that Peter asked for before I can 
> >>>>>> look
> >>>>>> at WFI again.
> >>>>> Okay, here's a patch :) It's a relatively straightforward adaptation
> >>>>> of what we have in our fork, which can now boot Android to GUI while
> >>>>> remaining at around 4% CPU when idle.
> >>>>>
> >>>>> I'm not set up to boot a full Linux distribution at the moment so I
> >>>>> tested it on upstream QEMU by running a recent mainline Linux kernel
> >>>>> with a rootfs containing an init program that just does sleep(5)
> >>>>> and verified that the qemu process remains at low CPU usage during
> >>>>> the sleep. This was on top of your v2 plus the last patch of your v1
> >>>>> since it doesn't look like you have a replacement for that logic yet.
> >>>>>
> >>>>> accel/hvf/hvf-cpus.c |  5 +--
> >>>>> include/sysemu/hvf_int.h |  3 +-
> >>>>> target/arm/hvf/hvf.c | 94 
> >>>>> +++-
> >>>>> 3 files changed, 28 insertions(+), 74 deletions(-)
> >>>>>
> >>>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>>>> index 4360f64671..b2c8fb57f6 100644
> >>>>> --- a/accel/hvf/hvf-cpus.c
> >>>>> +++ b/accel/hvf/hvf-cpus.c
> >>>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>>> sigact.sa_handler = dummy_signal;
> >>>>> sigaction(SIG_IPI, , NULL);
> >>>>>
> >>>>> -pthread_sigmask(SIG_BLOCK, NULL, );
> >>>>> -sigdelset(, SIG_IPI);
> >>>>> -pthread_sigmask(SIG_SETMASK, , NULL);
> >>>>> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> >>>>> +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >>>> What will this do to the x86 hvf implementation? We're now not
> >>>> unblocking SIG_IPI again for that, right?
> >>> Yes and that was the case before your patch series.
> >>
> >> The way I understand Roman, he wanted to unblock the IPI signal on x86:
> >>
> >> https://patchwork.kernel.org/project/qemu-devel/patch/20201126215017.41156-3-ag...@csgraf.de/#23807021
> >>
> >> I agree that at this point it's not a problem though to break it again.
> >> I'm not quite sure how to merge your patches within my patch set though,
> >> given they basically revert half of my previously introduced code...
> >>
> >>
> >>>>> #ifdef __aarch64__
> >>>>> r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t 
> >>>>> **)>hvf->exit, NULL);
> >>>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>>>> index c56baa3ae8..13adf6ea77 100644
> >>>>> --- a/include/sysemu/hvf_int.h
> >>>>> +++ b/include/sysemu/hvf_int.h
> >>>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>>> struct hvf_vcpu_state {
> >>>>> uint64_t fd;
> >>>>> void *exit;
> >>>>> -struct timespec ts;
> >>>>> -bool sleeping;
> >>>>> +sigset_t unblock_ipi_mask;
> >>>>> };
> >>>>>
> >>>>> void assert_hvf_ok(hv_return_t ret);
> >>>>> diff --git a/target/arm/hvf/hvf.c b/ta

[PATCH v3 3/3] arm/hvf: Add a WFI handler

2020-12-01 Thread Peter Collingbourne via
Sleep on WFI until the VTIMER is due but allow ourselves to be woken
up on IPI.

Signed-off-by: Peter Collingbourne 
---
v3:
- move the simplified locking to a separate patch
- spin on sleep <2ms

v2:
- simplify locking further
- wait indefinitely on disabled or masked timers

 accel/hvf/hvf-cpus.c |  4 +--
 include/sysemu/hvf_int.h |  1 +
 target/arm/hvf/hvf.c | 56 
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index e613c22ad0..b2c8fb57f6 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -344,8 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
 sigact.sa_handler = dummy_signal;
 sigaction(SIG_IPI, , NULL);
 
-pthread_sigmask(SIG_BLOCK, NULL, );
-sigdelset(, SIG_IPI);
+pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
+sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
 r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 5f15119184..13adf6ea77 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -62,6 +62,7 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
 uint64_t fd;
 void *exit;
+sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 31db6fca68..f230193cf5 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf 
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +19,7 @@
 #include "sysemu/hw_accel.h"
 
 #include 
+#include 
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
@@ -320,6 +322,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
+cpus_kick_thread(cpu);
 hv_vcpus_exit(>hvf->fd, 1);
 }
 
@@ -338,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
 return 0;
 }
 
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+/*
+ * Use pselect to sleep so that other threads can IPI us while we're
+ * sleeping.
+ */
+qatomic_mb_set(>thread_kicked, false);
+qemu_mutex_unlock_iothread();
+pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
+qemu_mutex_lock_iothread();
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -466,6 +481,47 @@ int hvf_vcpu_exec(CPUState *cpu)
 break;
 }
 case EC_WFX_TRAP:
+if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
+(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
+advance_pc = true;
+
+uint64_t ctl;
+r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
+);
+assert_hvf_ok(r);
+
+if (!(ctl & 1) || (ctl & 2)) {
+/* Timer disabled or masked, just wait for an IPI. */
+hvf_wait_for_ipi(cpu, NULL);
+break;
+}
+
+uint64_t cval;
+r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
+);
+assert_hvf_ok(r);
+
+int64_t ticks_to_sleep = cval - mach_absolute_time();
+if (ticks_to_sleep < 0) {
+break;
+}
+
+uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
+uint64_t nanos =
+(ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
+10 / arm_cpu->gt_cntfrq_hz;
+
+/*
+ * Don't sleep for less than 2ms. This is believed to improve
+ * latency of message passing workloads.
+ */
+if (!seconds && nanos < 200) {
+break;
+}
+
+struct timespec ts = { seconds, nanos };
+hvf_wait_for_ipi(cpu, );
+}
 break;
 case EC_AA64_HVC:
 cpu_synchronize_state(cpu);
-- 
2.29.2.454.gaff20da3a2-goog




[PATCH v3 1/3] Revert "hvf: Actually set SIG_IPI mask"

2020-12-01 Thread Peter Collingbourne via
From: Alexander Graf 

This reverts commit 926a35700f0c14d6b95cbf8c3c3cce55ec7ffc3e.

You can just drop patch 3 of your v2 instead of taking this commit.
---
 accel/hvf/hvf-cpus.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 4360f64671..e613c22ad0 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -346,7 +346,6 @@ static int hvf_init_vcpu(CPUState *cpu)
 
 pthread_sigmask(SIG_BLOCK, NULL, );
 sigdelset(, SIG_IPI);
-pthread_sigmask(SIG_SETMASK, , NULL);
 
 #ifdef __aarch64__
 r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
-- 
2.29.2.454.gaff20da3a2-goog




[PATCH v3 2/3] arm/hvf: Do some cleanups

2020-12-01 Thread Peter Collingbourne via
- Stop setting current_cpu
- Remove the previous WFx handler
- Simplify locking
- Remove the unused ret variable in hvf_vcpu_exec

Signed-off-by: Peter Collingbourne 
---
 include/sysemu/hvf_int.h |   2 -
 target/arm/hvf/hvf.c | 106 ++-
 2 files changed, 5 insertions(+), 103 deletions(-)

diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index c56baa3ae8..5f15119184 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -62,8 +62,6 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
 uint64_t fd;
 void *exit;
-struct timespec ts;
-bool sleeping;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8fe10966d2..31db6fca68 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -320,18 +320,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
-if (cpu->hvf->sleeping) {
-/*
- * When sleeping, make sure we always send signals. Also, clear the
- * timespec, so that an IPI that arrives between setting hvf->sleeping
- * and the nanosleep syscall still aborts the sleep.
- */
-cpu->thread_kicked = false;
-cpu->hvf->ts = (struct timespec){ };
-cpus_kick_thread(cpu);
-} else {
-hv_vcpus_exit(>hvf->fd, 1);
-}
+hv_vcpus_exit(>hvf->fd, 1);
 }
 
 static int hvf_inject_interrupts(CPUState *cpu)
@@ -355,17 +344,11 @@ int hvf_vcpu_exec(CPUState *cpu)
 CPUARMState *env = _cpu->env;
 hv_vcpu_exit_t *hvf_exit = cpu->hvf->exit;
 hv_return_t r;
-int ret = 0;
-
-qemu_mutex_unlock_iothread();
 
-do {
+while (1) {
 bool advance_pc = false;
 
-qemu_mutex_lock_iothread();
-current_cpu = cpu;
 qemu_wait_io_event_common(cpu);
-qemu_mutex_unlock_iothread();
 
 flush_cpu_state(cpu);
 
@@ -374,10 +357,10 @@ int hvf_vcpu_exec(CPUState *cpu)
 }
 
 if (cpu->halted) {
-qemu_mutex_lock_iothread();
 return EXCP_HLT;
 }
 
+qemu_mutex_unlock_iothread();
 assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
 
 /* handle VMEXIT */
@@ -385,15 +368,13 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint64_t syndrome = hvf_exit->exception.syndrome;
 uint32_t ec = syn_get_ec(syndrome);
 
+qemu_mutex_lock_iothread();
 switch (exit_reason) {
 case HV_EXIT_REASON_EXCEPTION:
 /* This is the main one, handle below. */
 break;
 case HV_EXIT_REASON_VTIMER_ACTIVATED:
-qemu_mutex_lock_iothread();
-current_cpu = cpu;
 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
-qemu_mutex_unlock_iothread();
 continue;
 case HV_EXIT_REASON_CANCELED:
 /* we got kicked, no exit to process */
@@ -402,7 +383,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 assert(0);
 }
 
-ret = 0;
 switch (ec) {
 case EC_DATAABORT: {
 bool isv = syndrome & ARM_EL_ISV;
@@ -413,9 +393,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint32_t srt = (syndrome >> 16) & 0x1f;
 uint64_t val = 0;
 
-qemu_mutex_lock_iothread();
-current_cpu = cpu;
-
 DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
 "iswrite=%x s1ptw=%x len=%d srt=%d]\n",
 env->pc, hvf_exit->exception.virtual_address,
@@ -446,8 +423,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 hvf_set_reg(cpu, srt, val);
 }
 
-qemu_mutex_unlock_iothread();
-
 advance_pc = true;
 break;
 }
@@ -491,83 +466,18 @@ int hvf_vcpu_exec(CPUState *cpu)
 break;
 }
 case EC_WFX_TRAP:
-if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
-(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
-uint64_t cval, ctl, val, diff, now;
-
-/* Set up a local timer for vtimer if necessary ... */
-r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, 
);
-assert_hvf_ok(r);
-r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
HV_SYS_REG_CNTV_CVAL_EL0, );
-assert_hvf_ok(r);
-
-asm volatile("mrs %0, cntvct_el0" : "=r"(val));
-diff = cval - val;
-
-now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
-  gt_cntfrq_period_ns(arm_cpu);
-
-/* Timer disabled or masked, just wait for long */
-if (!(ctl & 1) || (ctl & 2)) {
-diff = (120 * NANOSECONDS_PER_SECOND) /
-   gt_cntfr

Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 5:54 PM Alexander Graf  wrote:
>
>
> On 02.12.20 02:52, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 5:39 PM Alexander Graf  wrote:
> >>
> >> On 02.12.20 02:32, Peter Collingbourne wrote:
> >>> On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf  wrote:
> >>>> On 01.12.20 22:00, Peter Collingbourne wrote:
> >>>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>>>> up on IPI.
> >>>>>
> >>>>> Signed-off-by: Peter Collingbourne 
> >>>>> ---
> >>>>> v2:
> >>>>> - simplify locking further
> >>>>> - wait indefinitely on disabled or masked timers
> >>>>>
> >>>>> accel/hvf/hvf-cpus.c |   5 +-
> >>>>> include/sysemu/hvf_int.h |   3 +-
> >>>>> target/arm/hvf/hvf.c | 116 
> >>>>> ++-
> >>>>> 3 files changed, 43 insertions(+), 81 deletions(-)
> >>>>>
> >>>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>>>> index 4360f64671..b2c8fb57f6 100644
> >>>>> --- a/accel/hvf/hvf-cpus.c
> >>>>> +++ b/accel/hvf/hvf-cpus.c
> >>>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>>> sigact.sa_handler = dummy_signal;
> >>>>> sigaction(SIG_IPI, , NULL);
> >>>>>
> >>>>> -pthread_sigmask(SIG_BLOCK, NULL, );
> >>>>> -sigdelset(, SIG_IPI);
> >>>>> -pthread_sigmask(SIG_SETMASK, , NULL);
> >>>>> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> >>>>> +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >>>>>
> >>>>> #ifdef __aarch64__
> >>>>> r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t 
> >>>>> **)>hvf->exit, NULL);
> >>>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>>>> index c56baa3ae8..13adf6ea77 100644
> >>>>> --- a/include/sysemu/hvf_int.h
> >>>>> +++ b/include/sysemu/hvf_int.h
> >>>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>>> struct hvf_vcpu_state {
> >>>>> uint64_t fd;
> >>>>> void *exit;
> >>>>> -struct timespec ts;
> >>>>> -bool sleeping;
> >>>>> +sigset_t unblock_ipi_mask;
> >>>>> };
> >>>>>
> >>>>> void assert_hvf_ok(hv_return_t ret);
> >>>>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >>>>> index 8fe10966d2..3321d48aa2 100644
> >>>>> --- a/target/arm/hvf/hvf.c
> >>>>> +++ b/target/arm/hvf/hvf.c
> >>>>> @@ -2,6 +2,7 @@
> >>>>>  * QEMU Hypervisor.framework support for Apple Silicon
> >>>>>
> >>>>>  * Copyright 2020 Alexander Graf 
> >>>>> + * Copyright 2020 Google LLC
> >>>>>  *
> >>>>>  * This work is licensed under the terms of the GNU GPL, version 2 
> >>>>> or later.
> >>>>>  * See the COPYING file in the top-level directory.
> >>>>> @@ -18,6 +19,7 @@
> >>>>> #include "sysemu/hw_accel.h"
> >>>>>
> >>>>> #include 
> >>>>> +#include 
> >>>>>
> >>>>> #include "exec/address-spaces.h"
> >>>>> #include "hw/irq.h"
> >>>>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >>>>>
> >>>>> void hvf_kick_vcpu_thread(CPUState *cpu)
> >>>>> {
> >>>>> -if (cpu->hvf->sleeping) {
> >>>>> -/*
> >>>>> - * When sleeping, make sure we always send signals. Also, 
> >>>>> clear the
> >>>>> - * timespec, so that an IPI that arrives between setting 
> >>>>> hvf->sleeping
> >>>>> - * and the nanosleep syscall still aborts the sleep.
> >>>>> - */
> >>>>> -cpu->thread_kicked = false;
> >>&g

Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 5:39 PM Alexander Graf  wrote:
>
>
> On 02.12.20 02:32, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf  wrote:
> >>
> >> On 01.12.20 22:00, Peter Collingbourne wrote:
> >>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>> up on IPI.
> >>>
> >>> Signed-off-by: Peter Collingbourne 
> >>> ---
> >>> v2:
> >>> - simplify locking further
> >>> - wait indefinitely on disabled or masked timers
> >>>
> >>>accel/hvf/hvf-cpus.c |   5 +-
> >>>include/sysemu/hvf_int.h |   3 +-
> >>>target/arm/hvf/hvf.c | 116 ++-
> >>>3 files changed, 43 insertions(+), 81 deletions(-)
> >>>
> >>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>> index 4360f64671..b2c8fb57f6 100644
> >>> --- a/accel/hvf/hvf-cpus.c
> >>> +++ b/accel/hvf/hvf-cpus.c
> >>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>sigact.sa_handler = dummy_signal;
> >>>sigaction(SIG_IPI, , NULL);
> >>>
> >>> -pthread_sigmask(SIG_BLOCK, NULL, );
> >>> -sigdelset(, SIG_IPI);
> >>> -pthread_sigmask(SIG_SETMASK, , NULL);
> >>> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> >>> +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >>>
> >>>#ifdef __aarch64__
> >>>r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t 
> >>> **)>hvf->exit, NULL);
> >>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>> index c56baa3ae8..13adf6ea77 100644
> >>> --- a/include/sysemu/hvf_int.h
> >>> +++ b/include/sysemu/hvf_int.h
> >>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>struct hvf_vcpu_state {
> >>>uint64_t fd;
> >>>void *exit;
> >>> -struct timespec ts;
> >>> -bool sleeping;
> >>> +sigset_t unblock_ipi_mask;
> >>>};
> >>>
> >>>void assert_hvf_ok(hv_return_t ret);
> >>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >>> index 8fe10966d2..3321d48aa2 100644
> >>> --- a/target/arm/hvf/hvf.c
> >>> +++ b/target/arm/hvf/hvf.c
> >>> @@ -2,6 +2,7 @@
> >>> * QEMU Hypervisor.framework support for Apple Silicon
> >>>
> >>> * Copyright 2020 Alexander Graf 
> >>> + * Copyright 2020 Google LLC
> >>> *
> >>> * This work is licensed under the terms of the GNU GPL, version 2 or 
> >>> later.
> >>> * See the COPYING file in the top-level directory.
> >>> @@ -18,6 +19,7 @@
> >>>#include "sysemu/hw_accel.h"
> >>>
> >>>#include 
> >>> +#include 
> >>>
> >>>#include "exec/address-spaces.h"
> >>>#include "hw/irq.h"
> >>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >>>
> >>>void hvf_kick_vcpu_thread(CPUState *cpu)
> >>>{
> >>> -if (cpu->hvf->sleeping) {
> >>> -/*
> >>> - * When sleeping, make sure we always send signals. Also, clear 
> >>> the
> >>> - * timespec, so that an IPI that arrives between setting 
> >>> hvf->sleeping
> >>> - * and the nanosleep syscall still aborts the sleep.
> >>> - */
> >>> -cpu->thread_kicked = false;
> >>> -cpu->hvf->ts = (struct timespec){ };
> >>> -cpus_kick_thread(cpu);
> >>> -} else {
> >>> -hv_vcpus_exit(>hvf->fd, 1);
> >>> -}
> >>> +cpus_kick_thread(cpu);
> >>> +hv_vcpus_exit(>hvf->fd, 1);
> >>>}
> >>>
> >>>static int hvf_inject_interrupts(CPUState *cpu)
> >>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >>>return 0;
> >>>}
> >>>
> >>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> >>> +{
> >>> +/*
> >>> + * Use pselect to sleep so that other threads can IPI us while we're
> >>> + * sl

Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf  wrote:
>
>
> On 01.12.20 22:00, Peter Collingbourne wrote:
> > Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
> > ---
> > v2:
> > - simplify locking further
> > - wait indefinitely on disabled or masked timers
> >
> >   accel/hvf/hvf-cpus.c |   5 +-
> >   include/sysemu/hvf_int.h |   3 +-
> >   target/arm/hvf/hvf.c | 116 ++-
> >   3 files changed, 43 insertions(+), 81 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index 4360f64671..b2c8fb57f6 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >   sigact.sa_handler = dummy_signal;
> >   sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > -pthread_sigmask(SIG_SETMASK, , NULL);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >
> >   #ifdef __aarch64__
> >   r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
> > NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index c56baa3ae8..13adf6ea77 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >   struct hvf_vcpu_state {
> >   uint64_t fd;
> >   void *exit;
> > -struct timespec ts;
> > -bool sleeping;
> > +sigset_t unblock_ipi_mask;
> >   };
> >
> >   void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 8fe10966d2..3321d48aa2 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >* QEMU Hypervisor.framework support for Apple Silicon
> >
> >* Copyright 2020 Alexander Graf 
> > + * Copyright 2020 Google LLC
> >*
> >* This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> >* See the COPYING file in the top-level directory.
> > @@ -18,6 +19,7 @@
> >   #include "sysemu/hw_accel.h"
> >
> >   #include 
> > +#include 
> >
> >   #include "exec/address-spaces.h"
> >   #include "hw/irq.h"
> > @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >   void hvf_kick_vcpu_thread(CPUState *cpu)
> >   {
> > -if (cpu->hvf->sleeping) {
> > -/*
> > - * When sleeping, make sure we always send signals. Also, clear the
> > - * timespec, so that an IPI that arrives between setting 
> > hvf->sleeping
> > - * and the nanosleep syscall still aborts the sleep.
> > - */
> > -cpu->thread_kicked = false;
> > -cpu->hvf->ts = (struct timespec){ };
> > -cpus_kick_thread(cpu);
> > -} else {
> > -hv_vcpus_exit(>hvf->fd, 1);
> > -}
> > +cpus_kick_thread(cpu);
> > +hv_vcpus_exit(>hvf->fd, 1);
> >   }
> >
> >   static int hvf_inject_interrupts(CPUState *cpu)
> > @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >   return 0;
> >   }
> >
> > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> > +{
> > +/*
> > + * Use pselect to sleep so that other threads can IPI us while we're
> > + * sleeping.
> > + */
> > +qatomic_mb_set(>thread_kicked, false);
> > +qemu_mutex_unlock_iothread();
> > +pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
> > +qemu_mutex_lock_iothread();
> > +}
> > +
> >   int hvf_vcpu_exec(CPUState *cpu)
> >   {
> >   ARMCPU *arm_cpu = ARM_CPU(cpu);
> > @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   hv_return_t r;
> >   int ret = 0;
> >
> > -qemu_mutex_unlock_iothread();
> > -
> >   do {
> >   bool advance_pc = false;
> >
> > -qemu_mutex_lock_iothread();
> >   current_cpu = cpu;
> >   qemu_wait_io_event_common(cpu);
> > -qemu_mutex_unlock_iothread();
> >
> >   flush_cpu_state(cpu);
> >
> > @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
&

Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 2:04 PM Alexander Graf  wrote:
>
>
> On 01.12.20 19:59, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 3:16 AM Alexander Graf  wrote:
> >> Hi Peter,
> >>
> >> On 01.12.20 09:21, Peter Collingbourne wrote:
> >>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>> up on IPI.
> >>>
> >>> Signed-off-by: Peter Collingbourne 
> >>
> >> Thanks a bunch!
> >>
> >>
> >>> ---
> >>> Alexander Graf wrote:
> >>>> I would love to take a patch from you here :). I'll still be stuck for a
> >>>> while with the sysreg sync rework that Peter asked for before I can look
> >>>> at WFI again.
> >>> Okay, here's a patch :) It's a relatively straightforward adaptation
> >>> of what we have in our fork, which can now boot Android to GUI while
> >>> remaining at around 4% CPU when idle.
> >>>
> >>> I'm not set up to boot a full Linux distribution at the moment so I
> >>> tested it on upstream QEMU by running a recent mainline Linux kernel
> >>> with a rootfs containing an init program that just does sleep(5)
> >>> and verified that the qemu process remains at low CPU usage during
> >>> the sleep. This was on top of your v2 plus the last patch of your v1
> >>> since it doesn't look like you have a replacement for that logic yet.
> >>>
> >>>accel/hvf/hvf-cpus.c |  5 +--
> >>>include/sysemu/hvf_int.h |  3 +-
> >>>target/arm/hvf/hvf.c | 94 +++-
> >>>3 files changed, 28 insertions(+), 74 deletions(-)
> >>>
> >>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>> index 4360f64671..b2c8fb57f6 100644
> >>> --- a/accel/hvf/hvf-cpus.c
> >>> +++ b/accel/hvf/hvf-cpus.c
> >>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>sigact.sa_handler = dummy_signal;
> >>>sigaction(SIG_IPI, , NULL);
> >>>
> >>> -pthread_sigmask(SIG_BLOCK, NULL, );
> >>> -sigdelset(, SIG_IPI);
> >>> -pthread_sigmask(SIG_SETMASK, , NULL);
> >>> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> >>> +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >>
> >> What will this do to the x86 hvf implementation? We're now not
> >> unblocking SIG_IPI again for that, right?
> > Yes and that was the case before your patch series.
>
>
> The way I understand Roman, he wanted to unblock the IPI signal on x86:
>
> https://patchwork.kernel.org/project/qemu-devel/patch/20201126215017.41156-3-ag...@csgraf.de/#23807021
>
> I agree that at this point it's not a problem though to break it again.
> I'm not quite sure how to merge your patches within my patch set though,
> given they basically revert half of my previously introduced code...
>
>
> >
> >>>#ifdef __aarch64__
> >>>r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t 
> >>> **)>hvf->exit, NULL);
> >>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>> index c56baa3ae8..13adf6ea77 100644
> >>> --- a/include/sysemu/hvf_int.h
> >>> +++ b/include/sysemu/hvf_int.h
> >>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>struct hvf_vcpu_state {
> >>>uint64_t fd;
> >>>void *exit;
> >>> -struct timespec ts;
> >>> -bool sleeping;
> >>> +sigset_t unblock_ipi_mask;
> >>>};
> >>>
> >>>void assert_hvf_ok(hv_return_t ret);
> >>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >>> index 8fe10966d2..60a361ff38 100644
> >>> --- a/target/arm/hvf/hvf.c
> >>> +++ b/target/arm/hvf/hvf.c
> >>> @@ -2,6 +2,7 @@
> >>> * QEMU Hypervisor.framework support for Apple Silicon
> >>>
> >>> * Copyright 2020 Alexander Graf 
> >>> + * Copyright 2020 Google LLC
> >>> *
> >>> * This work is licensed under the terms of the GNU GPL, version 2 or 
> >>> later.
> >>> * See the COPYING file in the top-level directory.
> >>> @@ -18,6 +19,7 @@
> >>>#include "sysemu/hw_accel.h"
> >>>
> >>>#include 
> >>> +#include 
> >>>
> >>

Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 2:09 PM Alexander Graf  wrote:
>
>
> On 01.12.20 21:03, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 8:26 AM Alexander Graf  wrote:
> >>
> >> On 01.12.20 09:21, Peter Collingbourne wrote:
> >>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>> up on IPI.
> >>>
> >>> Signed-off-by: Peter Collingbourne 
> >>> ---
> >>> Alexander Graf wrote:
> >>>> I would love to take a patch from you here :). I'll still be stuck for a
> >>>> while with the sysreg sync rework that Peter asked for before I can look
> >>>> at WFI again.
> >>> Okay, here's a patch :) It's a relatively straightforward adaptation
> >>> of what we have in our fork, which can now boot Android to GUI while
> >>> remaining at around 4% CPU when idle.
> >>>
> >>> I'm not set up to boot a full Linux distribution at the moment so I
> >>> tested it on upstream QEMU by running a recent mainline Linux kernel
> >>> with a rootfs containing an init program that just does sleep(5)
> >>> and verified that the qemu process remains at low CPU usage during
> >>> the sleep. This was on top of your v2 plus the last patch of your v1
> >>> since it doesn't look like you have a replacement for that logic yet.
> >>
> >> How about something like this instead?
> >>
> >>
> >> Alex
> >>
> >>
> >> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >> index 4360f64671..50384013ea 100644
> >> --- a/accel/hvf/hvf-cpus.c
> >> +++ b/accel/hvf/hvf-cpus.c
> >> @@ -337,16 +337,18 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
> >>
> >>/* init cpu signals */
> >> -sigset_t set;
> >>struct sigaction sigact;
> >>
> >>memset(, 0, sizeof(sigact));
> >>sigact.sa_handler = dummy_signal;
> >>sigaction(SIG_IPI, , NULL);
> >>
> >> -pthread_sigmask(SIG_BLOCK, NULL, );
> >> -sigdelset(, SIG_IPI);
> >> -pthread_sigmask(SIG_SETMASK, , NULL);
> >> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->sigmask);
> >> +sigdelset(>hvf->sigmask, SIG_IPI);
> >> +pthread_sigmask(SIG_SETMASK, >hvf->sigmask, NULL);
> >> +
> >> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->sigmask_ipi);
> >> +sigaddset(>hvf->sigmask_ipi, SIG_IPI);
> > There's no reason to unblock SIG_IPI while not in pselect and it can
> > easily lead to missed wakeups. The whole point of pselect is so that
> > you can guarantee that only one part of your program sees signals
> > without a possibility of them being missed.
>
>
> Hm, I think I start to agree with you here :). We can probably just
> leave SIG_IPI masked at all times and only unmask on pselect. The worst
> thing that will happen is a premature wakeup if we did get an IPI
> incoming while hvf->sleeping is set, but were either not running
> pselect() yet and bailed out or already finished pselect() execution.

Ack.

> >
> >>#ifdef __aarch64__
> >>r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t
> >> **)>hvf->exit, NULL);
> >> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >> index c56baa3ae8..6e237f2db0 100644
> >> --- a/include/sysemu/hvf_int.h
> >> +++ b/include/sysemu/hvf_int.h
> >> @@ -62,8 +62,9 @@ extern HVFState *hvf_state;
> >>struct hvf_vcpu_state {
> >>uint64_t fd;
> >>void *exit;
> >> -struct timespec ts;
> >>bool sleeping;
> >> +sigset_t sigmask;
> >> +sigset_t sigmask_ipi;
> >>};
> >>
> >>void assert_hvf_ok(hv_return_t ret);
> >> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >> index 0c01a03725..350b845e6e 100644
> >> --- a/target/arm/hvf/hvf.c
> >> +++ b/target/arm/hvf/hvf.c
> >> @@ -320,20 +320,24 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >>
> >>void hvf_kick_vcpu_thread(CPUState *cpu)
> >>{
> >> -if (cpu->hvf->sleeping) {
> >> -/*
> >> - * When sleeping, make sure we always send signals. Also, clear 
> >> the
> >> - * timespec, so that an IPI that arrives between setting
> >> hvf->sleeping
> >> - * 

Re: [PATCH v2 2/2] arm/hvf: Stop setting current_cpu

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 2:11 PM Alexander Graf  wrote:
>
>
> On 01.12.20 22:00, Peter Collingbourne wrote:
> > This variable is already being set by the generic HVF code and it's a
> > thread-local variable so I don't see how it can be overwritten.
> >
> > Signed-off-by: Peter Collingbourne 
>
>
> Yikes :). Yes, absolutely!
>
> Would you mind if I squash this straight into my patch?

Sure, please go ahead.

Peter

>
>
> Thanks,
>
> Alex
>
>
> > ---
> >   target/arm/hvf/hvf.c | 8 
> >   1 file changed, 8 deletions(-)
> >
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 3321d48aa2..40984fcf4d 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -364,7 +364,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   do {
> >   bool advance_pc = false;
> >
> > -current_cpu = cpu;
> >   qemu_wait_io_event_common(cpu);
> >
> >   flush_cpu_state(cpu);
> > @@ -391,7 +390,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   /* This is the main one, handle below. */
> >   break;
> >   case HV_EXIT_REASON_VTIMER_ACTIVATED:
> > -current_cpu = cpu;
> >   qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> >   continue;
> >   case HV_EXIT_REASON_CANCELED:
> > @@ -412,8 +410,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   uint32_t srt = (syndrome >> 16) & 0x1f;
> >   uint64_t val = 0;
> >
> > -current_cpu = cpu;
> > -
> >   DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx 
> > isv=%x "
> >   "iswrite=%x s1ptw=%x len=%d srt=%d]\n",
> >   env->pc, hvf_exit->exception.virtual_address,
> > @@ -523,7 +519,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   break;
> >   case EC_AA64_HVC:
> >   cpu_synchronize_state(cpu);
> > -current_cpu = cpu;
> >   if (arm_is_psci_call(arm_cpu, EXCP_HVC)) {
> >   arm_handle_psci_call(arm_cpu);
> >   } else {
> > @@ -533,7 +528,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   break;
> >   case EC_AA64_SMC:
> >   cpu_synchronize_state(cpu);
> > -current_cpu = cpu;
> >   if (arm_is_psci_call(arm_cpu, EXCP_SMC)) {
> >   arm_handle_psci_call(arm_cpu);
> >   } else {
> > @@ -561,7 +555,5 @@ int hvf_vcpu_exec(CPUState *cpu)
> >   }
> >   } while (ret == 0);
> >
> > -current_cpu = cpu;
> > -
> >   return ret;
> >   }



[PATCH v2 2/2] arm/hvf: Stop setting current_cpu

2020-12-01 Thread Peter Collingbourne via
This variable is already being set by the generic HVF code and it's a
thread-local variable so I don't see how it can be overwritten.

Signed-off-by: Peter Collingbourne 
---
 target/arm/hvf/hvf.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 3321d48aa2..40984fcf4d 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -364,7 +364,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 do {
 bool advance_pc = false;
 
-current_cpu = cpu;
 qemu_wait_io_event_common(cpu);
 
 flush_cpu_state(cpu);
@@ -391,7 +390,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 /* This is the main one, handle below. */
 break;
 case HV_EXIT_REASON_VTIMER_ACTIVATED:
-current_cpu = cpu;
 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
 continue;
 case HV_EXIT_REASON_CANCELED:
@@ -412,8 +410,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint32_t srt = (syndrome >> 16) & 0x1f;
 uint64_t val = 0;
 
-current_cpu = cpu;
-
 DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
 "iswrite=%x s1ptw=%x len=%d srt=%d]\n",
 env->pc, hvf_exit->exception.virtual_address,
@@ -523,7 +519,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 break;
 case EC_AA64_HVC:
 cpu_synchronize_state(cpu);
-current_cpu = cpu;
 if (arm_is_psci_call(arm_cpu, EXCP_HVC)) {
 arm_handle_psci_call(arm_cpu);
 } else {
@@ -533,7 +528,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 break;
 case EC_AA64_SMC:
 cpu_synchronize_state(cpu);
-current_cpu = cpu;
 if (arm_is_psci_call(arm_cpu, EXCP_SMC)) {
 arm_handle_psci_call(arm_cpu);
 } else {
@@ -561,7 +555,5 @@ int hvf_vcpu_exec(CPUState *cpu)
 }
 } while (ret == 0);
 
-current_cpu = cpu;
-
 return ret;
 }
-- 
2.29.2.454.gaff20da3a2-goog




[PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne via
Sleep on WFx until the VTIMER is due but allow ourselves to be woken
up on IPI.

Signed-off-by: Peter Collingbourne 
---
v2:
- simplify locking further
- wait indefinitely on disabled or masked timers

 accel/hvf/hvf-cpus.c |   5 +-
 include/sysemu/hvf_int.h |   3 +-
 target/arm/hvf/hvf.c | 116 ++-
 3 files changed, 43 insertions(+), 81 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 4360f64671..b2c8fb57f6 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
 sigact.sa_handler = dummy_signal;
 sigaction(SIG_IPI, , NULL);
 
-pthread_sigmask(SIG_BLOCK, NULL, );
-sigdelset(, SIG_IPI);
-pthread_sigmask(SIG_SETMASK, , NULL);
+pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
+sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
 r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index c56baa3ae8..13adf6ea77 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -62,8 +62,7 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
 uint64_t fd;
 void *exit;
-struct timespec ts;
-bool sleeping;
+sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8fe10966d2..3321d48aa2 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf 
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +19,7 @@
 #include "sysemu/hw_accel.h"
 
 #include 
+#include 
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
@@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
-if (cpu->hvf->sleeping) {
-/*
- * When sleeping, make sure we always send signals. Also, clear the
- * timespec, so that an IPI that arrives between setting hvf->sleeping
- * and the nanosleep syscall still aborts the sleep.
- */
-cpu->thread_kicked = false;
-cpu->hvf->ts = (struct timespec){ };
-cpus_kick_thread(cpu);
-} else {
-hv_vcpus_exit(>hvf->fd, 1);
-}
+cpus_kick_thread(cpu);
+hv_vcpus_exit(>hvf->fd, 1);
 }
 
 static int hvf_inject_interrupts(CPUState *cpu)
@@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
 return 0;
 }
 
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+/*
+ * Use pselect to sleep so that other threads can IPI us while we're
+ * sleeping.
+ */
+qatomic_mb_set(>thread_kicked, false);
+qemu_mutex_unlock_iothread();
+pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
+qemu_mutex_lock_iothread();
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
 hv_return_t r;
 int ret = 0;
 
-qemu_mutex_unlock_iothread();
-
 do {
 bool advance_pc = false;
 
-qemu_mutex_lock_iothread();
 current_cpu = cpu;
 qemu_wait_io_event_common(cpu);
-qemu_mutex_unlock_iothread();
 
 flush_cpu_state(cpu);
 
@@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
 }
 
 if (cpu->halted) {
-qemu_mutex_lock_iothread();
 return EXCP_HLT;
 }
 
+qemu_mutex_unlock_iothread();
 assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
 
 /* handle VMEXIT */
@@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint64_t syndrome = hvf_exit->exception.syndrome;
 uint32_t ec = syn_get_ec(syndrome);
 
+qemu_mutex_lock_iothread();
 switch (exit_reason) {
 case HV_EXIT_REASON_EXCEPTION:
 /* This is the main one, handle below. */
 break;
 case HV_EXIT_REASON_VTIMER_ACTIVATED:
-qemu_mutex_lock_iothread();
 current_cpu = cpu;
 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
-qemu_mutex_unlock_iothread();
 continue;
 case HV_EXIT_REASON_CANCELED:
 /* we got kicked, no exit to process */
@@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint32_t srt = (syndrome >> 16) & 0x1f;
 uint64_t val = 0;
 
-qemu_mutex_lock_iothread();
 current_cpu = cpu;
 
 DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
@@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 hvf_set_reg(cpu, srt, val);
   

Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 8:26 AM Alexander Graf  wrote:
>
>
> On 01.12.20 09:21, Peter Collingbourne wrote:
> > Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
> > ---
> > Alexander Graf wrote:
> >> I would love to take a patch from you here :). I'll still be stuck for a
> >> while with the sysreg sync rework that Peter asked for before I can look
> >> at WFI again.
> > Okay, here's a patch :) It's a relatively straightforward adaptation
> > of what we have in our fork, which can now boot Android to GUI while
> > remaining at around 4% CPU when idle.
> >
> > I'm not set up to boot a full Linux distribution at the moment so I
> > tested it on upstream QEMU by running a recent mainline Linux kernel
> > with a rootfs containing an init program that just does sleep(5)
> > and verified that the qemu process remains at low CPU usage during
> > the sleep. This was on top of your v2 plus the last patch of your v1
> > since it doesn't look like you have a replacement for that logic yet.
>
>
> How about something like this instead?
>
>
> Alex
>
>
> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> index 4360f64671..50384013ea 100644
> --- a/accel/hvf/hvf-cpus.c
> +++ b/accel/hvf/hvf-cpus.c
> @@ -337,16 +337,18 @@ static int hvf_init_vcpu(CPUState *cpu)
>   cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
>
>   /* init cpu signals */
> -sigset_t set;
>   struct sigaction sigact;
>
>   memset(, 0, sizeof(sigact));
>   sigact.sa_handler = dummy_signal;
>   sigaction(SIG_IPI, , NULL);
>
> -pthread_sigmask(SIG_BLOCK, NULL, );
> -sigdelset(, SIG_IPI);
> -pthread_sigmask(SIG_SETMASK, , NULL);
> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->sigmask);
> +sigdelset(>hvf->sigmask, SIG_IPI);
> +pthread_sigmask(SIG_SETMASK, >hvf->sigmask, NULL);
> +
> +pthread_sigmask(SIG_BLOCK, NULL, >hvf->sigmask_ipi);
> +sigaddset(>hvf->sigmask_ipi, SIG_IPI);

There's no reason to unblock SIG_IPI while not in pselect and it can
easily lead to missed wakeups. The whole point of pselect is so that
you can guarantee that only one part of your program sees signals
without a possibility of them being missed.

>
>   #ifdef __aarch64__
>   r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t
> **)>hvf->exit, NULL);
> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> index c56baa3ae8..6e237f2db0 100644
> --- a/include/sysemu/hvf_int.h
> +++ b/include/sysemu/hvf_int.h
> @@ -62,8 +62,9 @@ extern HVFState *hvf_state;
>   struct hvf_vcpu_state {
>   uint64_t fd;
>   void *exit;
> -struct timespec ts;
>   bool sleeping;
> +sigset_t sigmask;
> +sigset_t sigmask_ipi;
>   };
>
>   void assert_hvf_ok(hv_return_t ret);
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 0c01a03725..350b845e6e 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -320,20 +320,24 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>
>   void hvf_kick_vcpu_thread(CPUState *cpu)
>   {
> -if (cpu->hvf->sleeping) {
> -/*
> - * When sleeping, make sure we always send signals. Also, clear the
> - * timespec, so that an IPI that arrives between setting
> hvf->sleeping
> - * and the nanosleep syscall still aborts the sleep.
> - */
> -cpu->thread_kicked = false;
> -cpu->hvf->ts = (struct timespec){ };
> +if (qatomic_read(>hvf->sleeping)) {
> +/* When sleeping, send a signal to get out of pselect */
>   cpus_kick_thread(cpu);
>   } else {
>   hv_vcpus_exit(>hvf->fd, 1);
>   }
>   }
>
> +static void hvf_block_sig_ipi(CPUState *cpu)
> +{
> +pthread_sigmask(SIG_SETMASK, >hvf->sigmask_ipi, NULL);
> +}
> +
> +static void hvf_unblock_sig_ipi(CPUState *cpu)
> +{
> +pthread_sigmask(SIG_SETMASK, >hvf->sigmask, NULL);
> +}
> +
>   static int hvf_inject_interrupts(CPUState *cpu)
>   {
>   if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) {
> @@ -354,6 +358,7 @@ int hvf_vcpu_exec(CPUState *cpu)
>   ARMCPU *arm_cpu = ARM_CPU(cpu);
>   CPUARMState *env = _cpu->env;
>   hv_vcpu_exit_t *hvf_exit = cpu->hvf->exit;
> +const uint32_t irq_mask = CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ;
>   hv_return_t r;
>   int ret = 0;
>
> @@ -491,8 +496,8 @@ int hvf_vcpu_exec(CPUState *cpu)
>   break;
>   }
>   case EC_WFX_TRAP:
> - 

Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne
On Tue, Dec 1, 2020 at 3:16 AM Alexander Graf  wrote:
>
> Hi Peter,
>
> On 01.12.20 09:21, Peter Collingbourne wrote:
> > Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
>
>
> Thanks a bunch!
>
>
> > ---
> > Alexander Graf wrote:
> >> I would love to take a patch from you here :). I'll still be stuck for a
> >> while with the sysreg sync rework that Peter asked for before I can look
> >> at WFI again.
> > Okay, here's a patch :) It's a relatively straightforward adaptation
> > of what we have in our fork, which can now boot Android to GUI while
> > remaining at around 4% CPU when idle.
> >
> > I'm not set up to boot a full Linux distribution at the moment so I
> > tested it on upstream QEMU by running a recent mainline Linux kernel
> > with a rootfs containing an init program that just does sleep(5)
> > and verified that the qemu process remains at low CPU usage during
> > the sleep. This was on top of your v2 plus the last patch of your v1
> > since it doesn't look like you have a replacement for that logic yet.
> >
> >   accel/hvf/hvf-cpus.c |  5 +--
> >   include/sysemu/hvf_int.h |  3 +-
> >   target/arm/hvf/hvf.c | 94 +++-
> >   3 files changed, 28 insertions(+), 74 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index 4360f64671..b2c8fb57f6 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >   sigact.sa_handler = dummy_signal;
> >   sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > -pthread_sigmask(SIG_SETMASK, , NULL);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
>
>
> What will this do to the x86 hvf implementation? We're now not
> unblocking SIG_IPI again for that, right?

Yes and that was the case before your patch series.

> >
> >   #ifdef __aarch64__
> >   r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
> > NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index c56baa3ae8..13adf6ea77 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >   struct hvf_vcpu_state {
> >   uint64_t fd;
> >   void *exit;
> > -struct timespec ts;
> > -bool sleeping;
> > +sigset_t unblock_ipi_mask;
> >   };
> >
> >   void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 8fe10966d2..60a361ff38 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >* QEMU Hypervisor.framework support for Apple Silicon
> >
> >* Copyright 2020 Alexander Graf 
> > + * Copyright 2020 Google LLC
> >*
> >* This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> >* See the COPYING file in the top-level directory.
> > @@ -18,6 +19,7 @@
> >   #include "sysemu/hw_accel.h"
> >
> >   #include 
> > +#include 
> >
> >   #include "exec/address-spaces.h"
> >   #include "hw/irq.h"
> > @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >   void hvf_kick_vcpu_thread(CPUState *cpu)
> >   {
> > -if (cpu->hvf->sleeping) {
> > -/*
> > - * When sleeping, make sure we always send signals. Also, clear the
> > - * timespec, so that an IPI that arrives between setting 
> > hvf->sleeping
> > - * and the nanosleep syscall still aborts the sleep.
> > - */
> > -cpu->thread_kicked = false;
> > -cpu->hvf->ts = (struct timespec){ };
> > -cpus_kick_thread(cpu);
> > -} else {
> > -hv_vcpus_exit(>hvf->fd, 1);
> > -}
> > +cpus_kick_thread(cpu);
> > +hv_vcpus_exit(>hvf->fd, 1);
>
>
> This means your first WFI will almost always return immediately due to a
> pending signal, because there probably was an IRQ pending before on the
> same CPU, no?

That's right. Any approach involving the "sleeping" field would need
to be implemented carefully to avoid races that may result in missed
wakeups so fo

[PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-01 Thread Peter Collingbourne via
Sleep on WFx until the VTIMER is due but allow ourselves to be woken
up on IPI.

Signed-off-by: Peter Collingbourne 
---
Alexander Graf wrote:
> I would love to take a patch from you here :). I'll still be stuck for a
> while with the sysreg sync rework that Peter asked for before I can look
> at WFI again.

Okay, here's a patch :) It's a relatively straightforward adaptation
of what we have in our fork, which can now boot Android to GUI while
remaining at around 4% CPU when idle.

I'm not set up to boot a full Linux distribution at the moment so I
tested it on upstream QEMU by running a recent mainline Linux kernel
with a rootfs containing an init program that just does sleep(5)
and verified that the qemu process remains at low CPU usage during
the sleep. This was on top of your v2 plus the last patch of your v1
since it doesn't look like you have a replacement for that logic yet.

 accel/hvf/hvf-cpus.c |  5 +--
 include/sysemu/hvf_int.h |  3 +-
 target/arm/hvf/hvf.c | 94 +++-
 3 files changed, 28 insertions(+), 74 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 4360f64671..b2c8fb57f6 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
 sigact.sa_handler = dummy_signal;
 sigaction(SIG_IPI, , NULL);
 
-pthread_sigmask(SIG_BLOCK, NULL, );
-sigdelset(, SIG_IPI);
-pthread_sigmask(SIG_SETMASK, , NULL);
+pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
+sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
 r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index c56baa3ae8..13adf6ea77 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -62,8 +62,7 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
 uint64_t fd;
 void *exit;
-struct timespec ts;
-bool sleeping;
+sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8fe10966d2..60a361ff38 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf 
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +19,7 @@
 #include "sysemu/hw_accel.h"
 
 #include 
+#include 
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
@@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
-if (cpu->hvf->sleeping) {
-/*
- * When sleeping, make sure we always send signals. Also, clear the
- * timespec, so that an IPI that arrives between setting hvf->sleeping
- * and the nanosleep syscall still aborts the sleep.
- */
-cpu->thread_kicked = false;
-cpu->hvf->ts = (struct timespec){ };
-cpus_kick_thread(cpu);
-} else {
-hv_vcpus_exit(>hvf->fd, 1);
-}
+cpus_kick_thread(cpu);
+hv_vcpus_exit(>hvf->fd, 1);
 }
 
 static int hvf_inject_interrupts(CPUState *cpu)
@@ -385,18 +377,19 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint64_t syndrome = hvf_exit->exception.syndrome;
 uint32_t ec = syn_get_ec(syndrome);
 
+qemu_mutex_lock_iothread();
 switch (exit_reason) {
 case HV_EXIT_REASON_EXCEPTION:
 /* This is the main one, handle below. */
 break;
 case HV_EXIT_REASON_VTIMER_ACTIVATED:
-qemu_mutex_lock_iothread();
 current_cpu = cpu;
 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
 qemu_mutex_unlock_iothread();
 continue;
 case HV_EXIT_REASON_CANCELED:
 /* we got kicked, no exit to process */
+qemu_mutex_unlock_iothread();
 continue;
 default:
 assert(0);
@@ -413,7 +406,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 uint32_t srt = (syndrome >> 16) & 0x1f;
 uint64_t val = 0;
 
-qemu_mutex_lock_iothread();
 current_cpu = cpu;
 
 DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
@@ -446,8 +438,6 @@ int hvf_vcpu_exec(CPUState *cpu)
 hvf_set_reg(cpu, srt, val);
 }
 
-qemu_mutex_unlock_iothread();
-
 advance_pc = true;
 break;
 }
@@ -493,68 +483,36 @@ int hvf_vcpu_exec(CPUState *cpu)
 case EC_WFX_TRAP:
 if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
 (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
-

Re: [PATCH 2/8] hvf: Move common code out

2020-11-30 Thread Peter Collingbourne
On Mon, Nov 30, 2020 at 3:18 PM Alexander Graf  wrote:
>
>
> On 01.12.20 00:01, Peter Collingbourne wrote:
> > On Mon, Nov 30, 2020 at 1:40 PM Alexander Graf  wrote:
> >> Hi Peter,
> >>
> >> On 30.11.20 22:08, Peter Collingbourne wrote:
> >>> On Mon, Nov 30, 2020 at 12:56 PM Frank Yang  wrote:
> >>>>
> >>>> On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf  wrote:
> >>>>> Hi Frank,
> >>>>>
> >>>>> Thanks for the update :). Your previous email nudged me into the right 
> >>>>> direction. I previously had implemented WFI through the internal timer 
> >>>>> framework which performed way worse.
> >>>> Cool, glad it's helping. Also, Peter found out that the main thing 
> >>>> keeping us from just using cntpct_el0 on the host directly and compare 
> >>>> with cval is that if we sleep, cval is going to be much < cntpct_el0 by 
> >>>> the sleep time. If we can get either the architecture or macos to read 
> >>>> out the sleep time then we might be able to not have to use a poll 
> >>>> interval either!
> >>>>> Along the way, I stumbled over a few issues though. For starters, the 
> >>>>> signal mask for SIG_IPI was not set correctly, so while pselect() would 
> >>>>> exit, the signal would never get delivered to the thread! For a fix, 
> >>>>> check out
> >>>>>
> >>>>> 
> >>>>> https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/
> >>>>>
> >>>> Thanks, we'll take a look :)
> >>>>
> >>>>> Please also have a look at my latest stab at WFI emulation. It doesn't 
> >>>>> handle WFE (that's only relevant in overcommitted scenarios). But it 
> >>>>> does handle WFI and even does something similar to hlt polling, albeit 
> >>>>> not with an adaptive threshold.
> >>> Sorry I'm not subscribed to qemu-devel (I'll subscribe in a bit) so
> >>> I'll reply to your patch here. You have:
> >>>
> >>> +/* Set cpu->hvf->sleeping so that we get a
> >>> SIG_IPI signal. */
> >>> +cpu->hvf->sleeping = true;
> >>> +smp_mb();
> >>> +
> >>> +/* Bail out if we received an IRQ meanwhile */
> >>> +if (cpu->thread_kicked || (cpu->interrupt_request &
> >>> +(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>> +cpu->hvf->sleeping = false;
> >>> +break;
> >>> +}
> >>> +
> >>> +/* nanosleep returns on signal, so we wake up on 
> >>> kick. */
> >>> +nanosleep(ts, NULL);
> >>>
> >>> and then send the signal conditional on whether sleeping is true, but
> >>> I think this is racy. If the signal is sent after sleeping is set to
> >>> true but before entering nanosleep then I think it will be ignored and
> >>> we will miss the wakeup. That's why in my implementation I block IPI
> >>> on the CPU thread at startup and then use pselect to atomically
> >>> unblock and begin sleeping. The signal is sent unconditionally so
> >>> there's no need to worry about races between actually sleeping and the
> >>> "we think we're sleeping" state. It may lead to an extra wakeup but
> >>> that's better than missing it entirely.
> >>
> >> Thanks a bunch for the comment! So the trick I was using here is to
> >> modify the timespec from the kick function before sending the IPI
> >> signal. That way, we know that either we are inside the sleep (where the
> >> signal wakes it up) or we are outside the sleep (where timespec={} will
> >> make it return immediately).
> >>
> >> The only race I can think of is if nanosleep does calculations based on
> >> the timespec and we happen to send the signal right there and then.
> > Yes that's the race I was thinking of. Admittedly it's a small window
> > but it's theoretically possible and part of the reason why pselect was
> > created.
> >
> >> The problem with blocking IPIs is basically what Frank was describing
> >> earlier: How do you unset the IPI signal pendin

Re: [PATCH 2/8] hvf: Move common code out

2020-11-30 Thread Peter Collingbourne
On Mon, Nov 30, 2020 at 1:40 PM Alexander Graf  wrote:
>
> Hi Peter,
>
> On 30.11.20 22:08, Peter Collingbourne wrote:
> > On Mon, Nov 30, 2020 at 12:56 PM Frank Yang  wrote:
> >>
> >>
> >> On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf  wrote:
> >>> Hi Frank,
> >>>
> >>> Thanks for the update :). Your previous email nudged me into the right 
> >>> direction. I previously had implemented WFI through the internal timer 
> >>> framework which performed way worse.
> >> Cool, glad it's helping. Also, Peter found out that the main thing keeping 
> >> us from just using cntpct_el0 on the host directly and compare with cval 
> >> is that if we sleep, cval is going to be much < cntpct_el0 by the sleep 
> >> time. If we can get either the architecture or macos to read out the sleep 
> >> time then we might be able to not have to use a poll interval either!
> >>> Along the way, I stumbled over a few issues though. For starters, the 
> >>> signal mask for SIG_IPI was not set correctly, so while pselect() would 
> >>> exit, the signal would never get delivered to the thread! For a fix, 
> >>> check out
> >>>
> >>>
> >>> https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/
> >>>
> >> Thanks, we'll take a look :)
> >>
> >>> Please also have a look at my latest stab at WFI emulation. It doesn't 
> >>> handle WFE (that's only relevant in overcommitted scenarios). But it does 
> >>> handle WFI and even does something similar to hlt polling, albeit not 
> >>> with an adaptive threshold.
> > Sorry I'm not subscribed to qemu-devel (I'll subscribe in a bit) so
> > I'll reply to your patch here. You have:
> >
> > +/* Set cpu->hvf->sleeping so that we get a
> > SIG_IPI signal. */
> > +cpu->hvf->sleeping = true;
> > +smp_mb();
> > +
> > +/* Bail out if we received an IRQ meanwhile */
> > +if (cpu->thread_kicked || (cpu->interrupt_request &
> > +(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > +cpu->hvf->sleeping = false;
> > +break;
> > +}
> > +
> > +/* nanosleep returns on signal, so we wake up on kick. 
> > */
> > +nanosleep(ts, NULL);
> >
> > and then send the signal conditional on whether sleeping is true, but
> > I think this is racy. If the signal is sent after sleeping is set to
> > true but before entering nanosleep then I think it will be ignored and
> > we will miss the wakeup. That's why in my implementation I block IPI
> > on the CPU thread at startup and then use pselect to atomically
> > unblock and begin sleeping. The signal is sent unconditionally so
> > there's no need to worry about races between actually sleeping and the
> > "we think we're sleeping" state. It may lead to an extra wakeup but
> > that's better than missing it entirely.
>
>
> Thanks a bunch for the comment! So the trick I was using here is to
> modify the timespec from the kick function before sending the IPI
> signal. That way, we know that either we are inside the sleep (where the
> signal wakes it up) or we are outside the sleep (where timespec={} will
> make it return immediately).
>
> The only race I can think of is if nanosleep does calculations based on
> the timespec and we happen to send the signal right there and then.

Yes that's the race I was thinking of. Admittedly it's a small window
but it's theoretically possible and part of the reason why pselect was
created.

> The problem with blocking IPIs is basically what Frank was describing
> earlier: How do you unset the IPI signal pending status? If the signal
> is never delivered, how can pselect differentiate "signal from last time
> is still pending" from "new signal because I got an IPI"?

In this case we would take the additional wakeup which should be
harmless since we will take the WFx exit again and put us in the
correct state. But that's a lot better than busy looping.

I reckon that you could improve things a little by unblocking the
signal and then reblocking it before unlocking iothread (e.g. with a
pselect with zero time interval), which would flush any pending
signals. Since any such signal would correspond to a signal from last
time (because we still have the iothread lock) we know that any future
signals should correspond to new IPIs.

Peter



Re: [PATCH 2/8] hvf: Move common code out

2020-11-30 Thread Peter Collingbourne
On Mon, Nov 30, 2020 at 12:56 PM Frank Yang  wrote:
>
>
>
> On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf  wrote:
>>
>> Hi Frank,
>>
>> Thanks for the update :). Your previous email nudged me into the right 
>> direction. I previously had implemented WFI through the internal timer 
>> framework which performed way worse.
>
> Cool, glad it's helping. Also, Peter found out that the main thing keeping us 
> from just using cntpct_el0 on the host directly and compare with cval is that 
> if we sleep, cval is going to be much < cntpct_el0 by the sleep time. If we 
> can get either the architecture or macos to read out the sleep time then we 
> might be able to not have to use a poll interval either!

We tracked down the discrepancies between CNTPCT_EL0 on the guest vs
on the host to the fact that CNTPCT_EL0 on the guest does not
increment while the system is asleep and as such corresponds to
mach_absolute_time() on the host (if you read the XNU sources you will
see that mach_absolute_time() is implemented as CNTPCT_EL0 plus a
constant representing the time spent asleep) while CNTPCT_EL0 on the
host does increment while asleep. This patch switches the
implementation over to using mach_absolute_time() instead of reading
CNTPCT_EL0 directly:

https://android-review.googlesource.com/c/platform/external/qemu/+/1514870

Peter

>>
>> Along the way, I stumbled over a few issues though. For starters, the signal 
>> mask for SIG_IPI was not set correctly, so while pselect() would exit, the 
>> signal would never get delivered to the thread! For a fix, check out
>>
>>   
>> https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/
>>
>
> Thanks, we'll take a look :)
>
>>
>> Please also have a look at my latest stab at WFI emulation. It doesn't 
>> handle WFE (that's only relevant in overcommitted scenarios). But it does 
>> handle WFI and even does something similar to hlt polling, albeit not with 
>> an adaptive threshold.
>>
>> Also, is there a particular reason you're working on this super interesting 
>> and useful code in a random downstream fork of QEMU? Wouldn't it be more 
>> helpful to contribute to the upstream code base instead?
>
> We'd actually like to contribute upstream too :) We do want to maintain our 
> own downstream though; Android Emulator codebase needs to work solidly on 
> macos and windows which has made keeping up with upstream difficult, and 
> staying on a previous version (2.12) with known quirks easier. (theres also 
> some android related customization relating to Qt Ui + different set of 
> virtual devices and snapshot support (incl. snapshots of graphics devices 
> with OpenGLES state tracking), which we hope to separate into other 
> libraries/processes, but its not insignificant)
>>
>>
>> Alex
>>
>>
>> On 30.11.20 21:15, Frank Yang wrote:
>>
>> Update: We're not quite sure how to compare the CNTV_CVAL and CNTVCT. But 
>> the high CPU usage seems to be mitigated by having a poll interval (like KVM 
>> does) in handling WFI:
>>
>> https://android-review.googlesource.com/c/platform/external/qemu/+/1512501
>>
>> This is loosely inspired by 
>> https://elixir.bootlin.com/linux/v5.10-rc6/source/virt/kvm/kvm_main.c#L2766 
>> which does seem to specify a poll interval.
>>
>> It would be cool if we could have a lightweight way to enter sleep and 
>> restart the vcpus precisely when CVAL passes, though.
>>
>> Frank
>>
>>
>> On Fri, Nov 27, 2020 at 3:30 PM Frank Yang  wrote:
>>>
>>> Hi all,
>>>
>>> +Peter Collingbourne
>>>
>>> I'm a developer on the Android Emulator, which is in a fork of QEMU.
>>>
>>> Peter and I have been working on an HVF Apple Silicon backend with an eye 
>>> toward Android guests.
>>>
>>> We have gotten things to basically switch to Android userspace already 
>>> (logcat/shell and graphics available at least)
>>>
>>> Our strategy so far has been to import logic from the KVM implementation 
>>> and hook into QEMU's software devices that previously assumed to only work 
>>> with TCG, or have KVM-specific paths.
>>>
>>> Thanks to Alexander for the tip on the 36-bit address space limitation btw; 
>>> our way of addressing this is to still allow highmem but not put pci high 
>>> mmio so high.
>>>
>>> Also, note we have a sleep/signal based mechanism to deal with WFx, which 
>>> might be worth looking into in Alexander's implementation as well:
>>>
>>> 

Re: [PATCH 2/8] hvf: Move common code out

2020-11-30 Thread Peter Collingbourne
On Mon, Nov 30, 2020 at 12:56 PM Frank Yang  wrote:
>
>
>
> On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf  wrote:
>>
>> Hi Frank,
>>
>> Thanks for the update :). Your previous email nudged me into the right 
>> direction. I previously had implemented WFI through the internal timer 
>> framework which performed way worse.
>
> Cool, glad it's helping. Also, Peter found out that the main thing keeping us 
> from just using cntpct_el0 on the host directly and compare with cval is that 
> if we sleep, cval is going to be much < cntpct_el0 by the sleep time. If we 
> can get either the architecture or macos to read out the sleep time then we 
> might be able to not have to use a poll interval either!
>>
>> Along the way, I stumbled over a few issues though. For starters, the signal 
>> mask for SIG_IPI was not set correctly, so while pselect() would exit, the 
>> signal would never get delivered to the thread! For a fix, check out
>>
>>   
>> https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/
>>
>
> Thanks, we'll take a look :)
>
>>
>> Please also have a look at my latest stab at WFI emulation. It doesn't 
>> handle WFE (that's only relevant in overcommitted scenarios). But it does 
>> handle WFI and even does something similar to hlt polling, albeit not with 
>> an adaptive threshold.

Sorry I'm not subscribed to qemu-devel (I'll subscribe in a bit) so
I'll reply to your patch here. You have:

+/* Set cpu->hvf->sleeping so that we get a
SIG_IPI signal. */
+cpu->hvf->sleeping = true;
+smp_mb();
+
+/* Bail out if we received an IRQ meanwhile */
+if (cpu->thread_kicked || (cpu->interrupt_request &
+(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
+cpu->hvf->sleeping = false;
+break;
+}
+
+/* nanosleep returns on signal, so we wake up on kick. */
+nanosleep(ts, NULL);

and then send the signal conditional on whether sleeping is true, but
I think this is racy. If the signal is sent after sleeping is set to
true but before entering nanosleep then I think it will be ignored and
we will miss the wakeup. That's why in my implementation I block IPI
on the CPU thread at startup and then use pselect to atomically
unblock and begin sleeping. The signal is sent unconditionally so
there's no need to worry about races between actually sleeping and the
"we think we're sleeping" state. It may lead to an extra wakeup but
that's better than missing it entirely.

Peter

>>
>> Also, is there a particular reason you're working on this super interesting 
>> and useful code in a random downstream fork of QEMU? Wouldn't it be more 
>> helpful to contribute to the upstream code base instead?
>
> We'd actually like to contribute upstream too :) We do want to maintain our 
> own downstream though; Android Emulator codebase needs to work solidly on 
> macos and windows which has made keeping up with upstream difficult, and 
> staying on a previous version (2.12) with known quirks easier. (theres also 
> some android related customization relating to Qt Ui + different set of 
> virtual devices and snapshot support (incl. snapshots of graphics devices 
> with OpenGLES state tracking), which we hope to separate into other 
> libraries/processes, but its not insignificant)
>>
>>
>> Alex
>>
>>
>> On 30.11.20 21:15, Frank Yang wrote:
>>
>> Update: We're not quite sure how to compare the CNTV_CVAL and CNTVCT. But 
>> the high CPU usage seems to be mitigated by having a poll interval (like KVM 
>> does) in handling WFI:
>>
>> https://android-review.googlesource.com/c/platform/external/qemu/+/1512501
>>
>> This is loosely inspired by 
>> https://elixir.bootlin.com/linux/v5.10-rc6/source/virt/kvm/kvm_main.c#L2766 
>> which does seem to specify a poll interval.
>>
>> It would be cool if we could have a lightweight way to enter sleep and 
>> restart the vcpus precisely when CVAL passes, though.
>>
>> Frank
>>
>>
>> On Fri, Nov 27, 2020 at 3:30 PM Frank Yang  wrote:
>>>
>>> Hi all,
>>>
>>> +Peter Collingbourne
>>>
>>> I'm a developer on the Android Emulator, which is in a fork of QEMU.
>>>
>>> Peter and I have been working on an HVF Apple Silicon backend with an eye 
>>> toward Android guests.
>>>
>>> We have gotten things to basically switch to Android userspace already 
>>> (logcat/sh

Re: [PATCH] target/arm: Fix decode of {LD,ST}RA[AB] instructions

2020-08-04 Thread Peter Collingbourne
On Tue, Aug 4, 2020 at 8:41 AM Richard Henderson
 wrote:
>
> On 8/3/20 5:21 PM, Peter Collingbourne wrote:
> > On Mon, Aug 3, 2020 at 3:27 PM Peter Collingbourne  wrote:
> >>
> >> These instructions use zero as the discriminator, not SP.
> >
> > Oh, there is no such thing as STRAA/STRAB. I must have been confused
> > by the name of the function, disas_ldst_pac. I will send a v2 with a
> > fixed commit message, and another patch to rename the function to
> > disas_ld_pac.
>
> It's called decode_ldst_pac because the Arm ARM section is called "Load/store
> register (pac)".  Page C4-311 in the F.a revision.
>
> But yes, there are only loads defined in the section.

I see. Arguably the ARM ARM section is misnamed then. There is a
sibling section named "Load register (literal)", so there is precedent
for naming a section after the types of instructions that are actually
supported. I will send mail to err...@arm.com to see if the section
can be renamed.

Peter



[PATCH v2] target/arm: Fix decode of LDRA[AB] instructions

2020-08-03 Thread Peter Collingbourne
These instructions use zero as the discriminator, not SP.

Signed-off-by: Peter Collingbourne 
---
v2:
- fixed commit message

 target/arm/translate-a64.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8c0764957c..c996ca1393 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3429,9 +3429,11 @@ static void disas_ldst_pac(DisasContext *s, uint32_t 
insn,
 
 if (s->pauth_active) {
 if (use_key_a) {
-gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
+gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
+ new_tmp_a64_zero(s));
 } else {
-gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
+gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
+ new_tmp_a64_zero(s));
 }
 }
 
-- 
2.28.0.163.g6104cc2f0b6-goog




[PATCH] target/arm: Rename function disas_ldst_pac to disas_ld_pac

2020-08-03 Thread Peter Collingbourne
The name disas_ldst_pac is misleading as it implies the existence
of authenticating store instructions, so rename it to avoid that
implication.

Signed-off-by: Peter Collingbourne 
---
 target/arm/translate-a64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8c0764957c..749de2e509 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3408,8 +3408,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t 
insn,
  * W: pre-indexing flag
  * S: sign for imm9.
  */
-static void disas_ldst_pac(DisasContext *s, uint32_t insn,
-   int size, int rt, bool is_vector)
+static void disas_ld_pac(DisasContext *s, uint32_t insn,
+ int size, int rt, bool is_vector)
 {
 int rn = extract32(insn, 5, 5);
 bool is_wback = extract32(insn, 11, 1);
@@ -3562,7 +3562,7 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
 return;
 default:
-disas_ldst_pac(s, insn, size, rt, is_vector);
+disas_ld_pac(s, insn, size, rt, is_vector);
 return;
 }
 break;
-- 
2.28.0.163.g6104cc2f0b6-goog




Re: [PATCH] target/arm: Fix decode of {LD,ST}RA[AB] instructions

2020-08-03 Thread Peter Collingbourne
On Mon, Aug 3, 2020 at 3:27 PM Peter Collingbourne  wrote:
>
> These instructions use zero as the discriminator, not SP.

Oh, there is no such thing as STRAA/STRAB. I must have been confused
by the name of the function, disas_ldst_pac. I will send a v2 with a
fixed commit message, and another patch to rename the function to
disas_ld_pac.

Peter

>
> Signed-off-by: Peter Collingbourne 
> ---
>  target/arm/translate-a64.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 8c0764957c..c996ca1393 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -3429,9 +3429,11 @@ static void disas_ldst_pac(DisasContext *s, uint32_t 
> insn,
>
>  if (s->pauth_active) {
>  if (use_key_a) {
> -gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
> +gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
> + new_tmp_a64_zero(s));
>  } else {
> -gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
> +gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
> + new_tmp_a64_zero(s));
>  }
>  }
>
> --
> 2.28.0.163.g6104cc2f0b6-goog
>



[PATCH] target/arm: Fix decode of {LD,ST}RA[AB] instructions

2020-08-03 Thread Peter Collingbourne
These instructions use zero as the discriminator, not SP.

Signed-off-by: Peter Collingbourne 
---
 target/arm/translate-a64.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8c0764957c..c996ca1393 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3429,9 +3429,11 @@ static void disas_ldst_pac(DisasContext *s, uint32_t 
insn,
 
 if (s->pauth_active) {
 if (use_key_a) {
-gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
+gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
+ new_tmp_a64_zero(s));
 } else {
-gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
+gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
+ new_tmp_a64_zero(s));
 }
 }
 
-- 
2.28.0.163.g6104cc2f0b6-goog




[Bug 1867072] Re: ARM: tag bits cleared in FAR_EL1

2020-03-12 Thread Peter Collingbourne
With those two patches applied I can no longer reproduce the problem,
thanks!

For posterity, this is how I've been reproducing the problem:

1. Build a Linux kernel with this patch applied: 
https://patchwork.kernel.org/patch/11435077/
2. Run this program under the kernel:

#include 
#include 
#include 

void handler(int signo, siginfo_t *siginfo, void *context) {
  uint32_t *begin = (uint32_t *)context;
  uint32_t *end = ((uint32_t *)context) + (sizeof(ucontext_t)/4);
  for (uint32_t *i = begin; i != end; ++i) {
printf("%08p %08x\n", i, *i);
  }
  _exit(0);
}

int main() {
  struct sigaction sa;
  sa.sa_sigaction = handler;
  sa.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, , 0);

  return *(int *)((1ULL << 56) + 0x123456);
}

I would expect this program's output to include something like the
following:

0xd5869bd0 46415201
0xd5869bd4 0010
0xd5869bd8 00123456
0xd5869bdc 0100

But the output that I was seeing with the bad qemu looked like this:

0xd5869bd0 46415201
0xd5869bd4 0010
0xd5869bd8 00123456
0xd5869bdc 

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1867072

Title:
  ARM: tag bits cleared in FAR_EL1

Status in QEMU:
  In Progress

Bug description:
  The ARM Architecture Reference Manual provides the following for
  FAR_EL1:

  "For a Data Abort or Watchpoint exception, if address tagging is
  enabled for the address accessed by the data access that caused the
  exception, then this field includes the tag."

  However, I have found that the tag bits in FAR_EL1 are always clear,
  even if the tag bits were set in the original access.

  I can reproduce the problem on both 4.1.1 and master
  (6e8a73e911f066527e775e04b98f31ebd19db600).

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1867072/+subscriptions



[Bug 1867072] [NEW] ARM: tag bits cleared in FAR_EL1

2020-03-11 Thread Peter Collingbourne
Public bug reported:

The ARM Architecture Reference Manual provides the following for
FAR_EL1:

"For a Data Abort or Watchpoint exception, if address tagging is enabled
for the address accessed by the data access that caused the exception,
then this field includes the tag."

However, I have found that the tag bits in FAR_EL1 are always clear,
even if the tag bits were set in the original access.

I can reproduce the problem on both 4.1.1 and master
(6e8a73e911f066527e775e04b98f31ebd19db600).

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1867072

Title:
  ARM: tag bits cleared in FAR_EL1

Status in QEMU:
  New

Bug description:
  The ARM Architecture Reference Manual provides the following for
  FAR_EL1:

  "For a Data Abort or Watchpoint exception, if address tagging is
  enabled for the address accessed by the data access that caused the
  exception, then this field includes the tag."

  However, I have found that the tag bits in FAR_EL1 are always clear,
  even if the tag bits were set in the original access.

  I can reproduce the problem on both 4.1.1 and master
  (6e8a73e911f066527e775e04b98f31ebd19db600).

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1867072/+subscriptions