date:20210706

Re: [RFC PATCH 8/8] target/i386: Move X86XSaveArea into TCG

2021-07-06 Thread Paolo Bonzini

Migration from KVM to TCG is broken anyway. The changing offsets do break
migration of a KVM guest from Intel to AMD or vice versa, because of the
difference in CPUID. That however is not changed by this patch.

Paolo

Il mer 7 lug 2021, 03:09 Richard Henderson 
ha scritto:

> On 7/5/21 3:46 AM, David Edmondson wrote:
> > Given that TCG is now the only consumer of X86XSaveArea, move the
> > structure definition and associated offset declarations and checks to a
> > TCG specific header.
> >
> > Signed-off-by: David Edmondson 
> > ---
> >   target/i386/cpu.h| 57 
> >   target/i386/tcg/fpu_helper.c |  1 +
> >   target/i386/tcg/tcg-cpu.h| 57 
> >   3 files changed, 58 insertions(+), 57 deletions(-)
> >
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 96b672f8bd..0f7ddbfeae 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1305,48 +1305,6 @@ typedef struct XSavePKRU {
> >   uint32_t padding;
> >   } XSavePKRU;
> >
> > -#define XSAVE_FCW_FSW_OFFSET0x000
> > -#define XSAVE_FTW_FOP_OFFSET0x004
> > -#define XSAVE_CWD_RIP_OFFSET0x008
> > -#define XSAVE_CWD_RDP_OFFSET0x010
> > -#define XSAVE_MXCSR_OFFSET  0x018
> > -#define XSAVE_ST_SPACE_OFFSET   0x020
> > -#define XSAVE_XMM_SPACE_OFFSET  0x0a0
> > -#define XSAVE_XSTATE_BV_OFFSET  0x200
> > -#define XSAVE_AVX_OFFSET0x240
> > -#define XSAVE_BNDREG_OFFSET 0x3c0
> > -#define XSAVE_BNDCSR_OFFSET 0x400
> > -#define XSAVE_OPMASK_OFFSET 0x440
> > -#define XSAVE_ZMM_HI256_OFFSET  0x480
> > -#define XSAVE_HI16_ZMM_OFFSET   0x680
> > -#define XSAVE_PKRU_OFFSET   0xa80
> > -
> > -typedef struct X86XSaveArea {
> > -X86LegacyXSaveArea legacy;
> > -X86XSaveHeader header;
> > -
> > -/* Extended save areas: */
> > -
> > -/* AVX State: */
> > -XSaveAVX avx_state;
> > -
> > -/* Ensure that XSaveBNDREG is properly aligned. */
> > -uint8_t padding[XSAVE_BNDREG_OFFSET
> > -- sizeof(X86LegacyXSaveArea)
> > -- sizeof(X86XSaveHeader)
> > -- sizeof(XSaveAVX)];
> > -
> > -/* MPX State: */
> > -XSaveBNDREG bndreg_state;
> > -XSaveBNDCSR bndcsr_state;
> > -/* AVX-512 State: */
> > -XSaveOpmask opmask_state;
> > -XSaveZMM_Hi256 zmm_hi256_state;
> > -XSaveHi16_ZMM hi16_zmm_state;
> > -/* PKRU State: */
> > -XSavePKRU pkru_state;
> > -} X86XSaveArea;
> > -
> >   QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
> >   QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
> >   QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
> > @@ -1355,21 +1313,6 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) !=
> 0x200);
> >   QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
> >   QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
> >
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fcw) !=
> XSAVE_FCW_FSW_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.ftw) !=
> XSAVE_FTW_FOP_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpip) !=
> XSAVE_CWD_RIP_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpdp) !=
> XSAVE_CWD_RDP_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.mxcsr) !=
> XSAVE_MXCSR_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpregs) !=
> XSAVE_ST_SPACE_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.xmm_regs) !=
> XSAVE_XMM_SPACE_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) !=
> XSAVE_AVX_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) !=
> XSAVE_BNDREG_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) !=
> XSAVE_BNDCSR_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) !=
> XSAVE_OPMASK_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) !=
> XSAVE_ZMM_HI256_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) !=
> XSAVE_HI16_ZMM_OFFSET);
> > -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) !=
> XSAVE_PKRU_OFFSET);
> > -
> >   typedef struct ExtSaveArea {
> >   uint32_t feature, bits;
> >   uint32_t offset, size;
> > diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
> > index 4e11965067..74bbe94b80 100644
> > --- a/target/i386/tcg/fpu_helper.c
> > +++ b/target/i386/tcg/fpu_helper.c
> > @@ -20,6 +20,7 @@
> >   #include "qemu/osdep.h"
> >   #include 
> >   #include "cpu.h"
> > +#include "tcg-cpu.h"
> >   #include "exec/helper-proto.h"
> >   #include "fpu/softfloat.h"
> >   #include "fpu/softfloat-macros.h"
> > diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h
> > index 36bd300af0..53a8494455 100644
> > --- a/target/i386/tcg/tcg-cpu.h
> > +++ b/target/i386/tcg/tcg-cpu.h
> > @@ -19,6 +19,63 @@
> >   #ifndef TCG_CPU_H
> >   #define TCG_CPU_H
> >
> > +#define XSAVE_FCW_FSW_OFFSET0x000
> > +#define XSAVE_FTW_FOP_OFFSET0x004
> > +#define XSAVE_CWD_RIP_OFFSET0x008
> >

Re: [PATCH] Fix libdaxctl option

2021-07-06 Thread Thomas Huth


On 07/07/2021 08.31, Miroslav Rezanina wrote:

For some reason, libdaxctl option setting was set to work in an opposite
way (--enable-libdaxctl disabled it and vice versa). Fixing this so
configuration works properly.

Signed-off-by: Miroslav Rezanina 
---
  configure | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 650d9c0735..4f51528a77 100755
--- a/configure
+++ b/configure
@@ -1531,9 +1531,9 @@ for opt do
;;
--disable-keyring) secret_keyring="no"
;;
-  --enable-libdaxctl) libdaxctl=disabled
+  --enable-libdaxctl) libdaxctl="enabled"
;;
-  --disable-libdaxctl) libdaxctl=enabled
+  --disable-libdaxctl) libdaxctl="disabled"
;;
--enable-fuse) fuse="enabled"
;;



Fixes: 83ef16821a ("configure, meson: convert libdaxctl detection to meson")

Reviewed-by: Thomas Huth

[PATCH] Fix libdaxctl option

2021-07-06 Thread Miroslav Rezanina

For some reason, libdaxctl option setting was set to work in an opposite
way (--enable-libdaxctl disabled it and vice versa). Fixing this so
configuration works properly.

Signed-off-by: Miroslav Rezanina 
---
 configure | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 650d9c0735..4f51528a77 100755
--- a/configure
+++ b/configure
@@ -1531,9 +1531,9 @@ for opt do
   ;;
   --disable-keyring) secret_keyring="no"
   ;;
-  --enable-libdaxctl) libdaxctl=disabled
+  --enable-libdaxctl) libdaxctl="enabled"
   ;;
-  --disable-libdaxctl) libdaxctl=enabled
+  --disable-libdaxctl) libdaxctl="disabled"
   ;;
   --enable-fuse) fuse="enabled"
   ;;
-- 
2.27.0

[PATCH v1] block/raw-format: implement .bdrv_get_specific_info handler

2021-07-06 Thread Or Ozeri

When using the raw format, allow exposing specific info by the underlying 
storage.
In particular, this will enable RBD images using the raw format to indicate
a LUKS2 encrypted image in the output of qemu-img info.

Signed-off-by: Or Ozeri 
---
 block/raw-format.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6..f6e70e2356 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -369,6 +369,12 @@ static int raw_get_info(BlockDriverState *bs, 
BlockDriverInfo *bdi)
 return bdrv_get_info(bs->file->bs, bdi);
 }
 
+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs,
+Error **errp)
+{
+return bdrv_get_specific_info(bs->file->bs, errp);
+}
+
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 if (bs->probed) {
@@ -603,6 +609,7 @@ BlockDriver bdrv_raw = {
 .has_variable_length  = true,
 .bdrv_measure = &raw_measure,
 .bdrv_get_info= &raw_get_info,
+.bdrv_get_specific_info = &raw_get_specific_info,
 .bdrv_refresh_limits  = &raw_refresh_limits,
 .bdrv_probe_blocksizes = &raw_probe_blocksizes,
 .bdrv_probe_geometry  = &raw_probe_geometry,
-- 
2.27.0

Re: pipe2 & configure script

2021-07-06 Thread Thomas Huth


On 07/07/2021 05.24, Richard Zak wrote:
What conditions are required for "#define CONFIG_PIPE2" to be set in 
build/config-host.h? It prevents building for Haiku as pipe2() doesn't 
exist. I didn't see anything in the configure script regarding pipe2. I also 
updated my code to the latest in the repository and this issue just popped up.


CONFIG_PIPE2 is set from meson.build instead of the configure script. But 
why is this blocking your build? The only relevant spot is in 
util/oslib-posix.c and there is a fallback to the normal pipe() function 
there...


 Thomas

Re: [PATCH v1 32/39] contrib/plugins: enable -Wall for building plugins

2021-07-06 Thread Thomas Huth


On 06/07/2021 16.58, Alex Bennée wrote:

Lets spot the obvious errors.

Signed-off-by: Alex Bennée 
---
  contrib/plugins/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b9d7935e5e..17e6e2ec4f 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -24,7 +24,7 @@ SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
  # The main QEMU uses Glib extensively so it's perfectly fine to use it
  # in plugins (which many example do).
  CFLAGS = $(GLIB_CFLAGS)
-CFLAGS += -fPIC
+CFLAGS += -fPIC -Wall


In pc-bios/s390-ccw/Makefile we're using:

 -Wall $(filter -W%, $(QEMU_CFLAGS))

to also get the other warning options from config-host.mak. You might want 
to consider to do that here, too.


Anyway:
Reviewed-by: Thomas Huth

Re: [PATCH v1 31/39] tcg/plugins: enable by default for TCG builds

2021-07-06 Thread Thomas Huth


On 06/07/2021 16.58, Alex Bennée wrote:

Aside from a minor bloat to file size the ability to have TCG plugins
has no real impact on performance unless a plugin is actively loaded.
Even then the libempty.so plugin shows only a minor degradation in
performance caused by the extra book keeping the TCG has to do to keep
track of instructions. As it's a useful feature lets just enable it by
default and reduce our testing matrix a little.

Cc: Paolo Bonzini 
Signed-off-by: Alex Bennée 

---
v2
   - properly handle plugins being ""
   - make the test for linker support handle default case
   - move linker tests to before the glib-modules test
---
  docs/devel/tcg-plugins.rst |   3 +-
  configure  | 125 -
  .gitlab-ci.d/buildtest.yml |  23 ---
  3 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 18c6581d85..0cd77c77d2 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -71,7 +71,8 @@ API
  Usage
  =
  
-The QEMU binary needs to be compiled for plugin support::

+Any QEMU binary with TCG support has plugins enabled by default.
+Earlier releases needed to be explicitly enabled with::
  
configure --enable-plugins
  
diff --git a/configure b/configure

index 9d72b31a9f..0ce6c1ff65 100755
--- a/configure
+++ b/configure
@@ -429,7 +429,7 @@ libxml2="auto"
  debug_mutex="no"
  libpmem="auto"
  default_devices="true"
-plugins="no"
+plugins="$default_feature"
  fuzzing="no"
  rng_none="no"
  secret_keyring="$default_feature"
@@ -3085,6 +3085,73 @@ for drv in $audio_drv_list; do
  esac
  done
  
+##

+# plugin linker support probe
+
+if test "$plugins" != "no"; then
+
+#
+# See if --dynamic-list is supported by the linker
+
+ld_dynamic_list="no"
+if test "$static" = "no" ; then
+cat > $TMPTXT < $TMPC <
+void foo(void);
+
+void foo(void)
+{
+  printf("foo\n");
+}
+
+int main(void)
+{
+  foo();
+  return 0;
+}
+EOF
+
+if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then
+ld_dynamic_list="yes"
+fi
+fi
+
+#
+# See if -exported_symbols_list is supported by the linker
+
+ld_exported_symbols_list="no"
+if test "$static" = "no" ; then
+cat > $TMPTXT <

Printing "($plugins)" here does not make much sense to me (it will always 
result in "(yes)", won't it?) ... but apart from that, the patch looks fine 
to me.


So with that "($plugins)" removed:

Reviewed-by: Thomas Huth

[Bug 1888303] Re: Intermittent buggines with user mode emulation of x86-64 on aarch64

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888303

Title:
  Intermittent buggines with user mode emulation of x86-64 on aarch64

Status in QEMU:
  Expired

Bug description:
  QEMU Version: 5.0.0
  ./configure --target-list=x86_64-linux-user --enable-user --prefix=/opt/qemu 
--static

  Testing using node_exporter from pmm-client-1.17.4-1.el8.x86_64.rpm

  aarch64 system is running CentOS 8 with a mainline 5.4.52 kernel built
  for 4KB memory pages.

  On aarch64 machine, invoke:

  ./qemu-x86_64-static /usr/local/percona/pmm-
  client/node_exporter.x86_64 -web.listen-address=192.168.0.10:42000
  -web.auth-file=/usr/local/percona/pmm-client/pmm.yml -web.ssl-key-
  file=/usr/local/percona/pmm-client/server.key -web.ssl-cert-
  file=/usr/local/percona/pmm-client/server.crt
  
-collectors.enabled=diskstats,filefd,filesystem,loadavg,meminfo,netdev,netstat,stat,time,uname,vmstat,meminfo_numa,textfile

  Most of the time it will outright segfault within a few seconds,
  seemingly when the prometheus server polls for data.

  But, about once every 10 times, it will not sefault and will continue
  working just fine forever.

  The dynamically linked version of qemu (built without --static) always
  works without segfaulting, but it just doesn't work, the prometheus
  server gets no data from it. Again, once in a while it will work, but
  even when it doesn't work it won't segfault.

  This vaguely feels like a memory alignment issue somewhere, but my
  debug-fu is not quite strong enough to attack the problem.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888303/+subscriptions

[Bug 1888417] Re: Latest QEMU git build on Arch linux causes PCI Passthrough host to hang on guest reboot.

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888417

Title:
  Latest QEMU git build on Arch linux causes PCI Passthrough host to
  hang on guest reboot.

Status in QEMU:
  Expired

Bug description:
  Current Arch linux release, up-to-date as of 7/21/2020.

  Running a windows 7 virtual machine (also happens with windows 10,
  possibly more OSes), with an nvidia GTX 1060 passthrough, if the VM is
  attempted to be restarted, either through the guest interface, or by
  libvirt's gui interface "Virtual Machine Manager", it hangs in a
  "paused" state once the VM shutsdown, and just before the reboot can
  take place.  A force-stop of the VM allows the VM to be properly
  booted without any disk error checks, alluding to a clean shutdown,
  but failed reboot.  The VM can be properly shutdown using the guests
  shutdown function, or the libvirt manager shutdown, without any hangs.
  Reverting to Arch stable build QEMU 5.0.0-7 fixes the issue.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888417/+subscriptions

[Bug 1888964] Re: Segfault using GTK display with dmabuf (iGVT-g) on Wayland

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888964

Title:
  Segfault using GTK display with dmabuf (iGVT-g) on Wayland

Status in QEMU:
  Expired

Bug description:
  When using...
   a) Intel virtualized graphics (iGVT-g) with dmabuf output
   b) QEMU's GTK display with GL output enabled (-display gtk,gl=on)
   c) A Wayland compositor (Sway in my case)
  a segfault occurs at some point on boot (I guess as soon as the guest starts 
using the virtual graphics card?)

  The origin is the function dpy_gl_scanout_dmabuf in ui/console.c, where it 
calls
  con->gl->ops->dpy_gl_scanout_dmabuf(con->gl, dmabuf);
  However, the ops field (struct DisplayChangeListenerOps) does not have 
dpy_gl_scanout_dmabuf set because it is set to dcl_gl_area_ops which does not 
have dpy_gl_scanout_dmabuf set.
  Only dcl_egl_ops has dpy_gl_scanout_dmabuf set.
  Currently, the GTK display uses EGL on X11 displays, but GtkGLArea on 
Wayland. This can be observed in early_gtk_display_init() in ui/gtk.c, where it 
says (simplified code):

  if (opts->has_gl && opts->gl != DISPLAYGL_MODE_OFF) {
  if (GDK_IS_WAYLAND_DISPLAY(gdk_display_get_default())) {
  gtk_use_gl_area = true;
  gtk_gl_area_init();
  } else {
  DisplayGLMode mode = opts->has_gl ? opts->gl : DISPLAYGL_MODE_ON;
  gtk_egl_init(mode);
  }
  }

  To reproduce the findings above, add this assertion to dpy_gl_scanout_dmabuf:
  assert(con->gl->ops->dpy_gl_scanout_dmabuf);
  This will make the segfault turn into an assertion failure.

  A workaround is to force QEMU to use GDK's X11 backend (using
  GDK_BACKEND=x11).

  Note: This might be a duplicate of 1775011, however the information
  provided in that bug report is not sufficient to make the assertion.

  QEMU version: b0ce3f021e0157e9a5ab836cb162c48caac132e1 (from Git master 
branch)
  OS: Arch Linux, Kernel Version 5.17.0-1

  Relevant flags of the QEMU invocation:
  qemu-system-x86_64 \
-vga none \
-device 
vfio-pci-nohotplug,sysfsdev="$GVT_DEV",romfile="${ROMFILE}",display=on,x-igd-opregion=on,ramfb=on
 \
-display gtk,gl=on

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888964/+subscriptions

[Bug 1889421] Re: VVFAT is not writable from Windows NT 3.5, 3.51 and 4.0

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1889421

Title:
  VVFAT is not writable from Windows NT 3.5, 3.51 and 4.0

Status in QEMU:
  Expired

Bug description:
  I'm running Windows NT 3.5, 3.51 and 4.0 in QEMU 4.2.0 on Linux. I'm
  using a VVFAT filesystem. Command lines:

  $ qemu-system-i386 -L pc -cpu 486 -m 64 -vga cirrus -drive
  file=nt351.img,format=raw -net nic,model=pcnet -net user -soundhw
  sb16,pcspk -drive file=fat:rw:drived,format=raw

  $ qemu-system-i386 --version
  QEMU emulator version 4.2.0 (Debian 1:4.2-6)
  Copyright (c) 2003-2019 Fabrice Bellard and the QEMU Project developers

  Creating a new directory or file on drive D: (the VVFAT filesystem)
  fails on Windows NT 3.5, 3.51 and 4.0 (see screenshot). It succeeds on
  Windows NT 3.1.

  Is there a workaround, e.g. a QEMU flag or a change in the Windows NT
  driver settings?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1889421/+subscriptions

[Bug 1889033] Re: qemu-img permission denied on vmdk creation on CIFS share

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1889033

Title:
  qemu-img permission denied on vmdk creation on CIFS share

Status in QEMU:
  Expired

Bug description:
  
  - on a CIFS mount qemu-img claims not to have permissions to write into a 
file.
  - VMDK sparse file creation succeeds
  - VMDK Flat file creation create the flat-file, but fails to write the 
description-file
  - VMDK flat file creation succeeds on native linux mount such as ~/tmp or /tmp
  - same effect as root or non-root
  - same effect with selinux setenforce 0

  a) I would have expected that the monolithic flat would have created only one 
large file just like sparse, but it seems to create a description file, in 
addition to the storing file.
  b) I am aware that qemu-img may have problem opening very large files on 
CIFS, however, this file is not very large

  Windows-10 latest updated 2004 19041.388
  Linux VM, Fedora-32 in Virtualbox 6.1.12 
  # rpm -qa | grep  qemu-img
  qemu-img-4.2.0-7.fc32.x86_64

  mount options: 
  mount -t cifs //10.x,x,x/$shname  /mnt/hshare -o 
defaults,username=gana,rw,uid=1000,gid=1000,vers=3.0

  [root@fedora ~]# cd /mnt/hshare/some/folder/createvmdk/
  [root@fedora createvmdk]# qemu-img create -f vmdk test1.vmdk 100M -o 
subformat=monolithicFlat
  Formatting 'test1.vmdk', fmt=vmdk size=104857600 compat6=off 
hwversion=undefined subformat=monolithicFlat
  qemu-img: test1.vmdk: Could not write description: Permission denied
  [root@fedora createvmdk]# ls -l test1*.*
  -rwxr-xr-x. 1 gana gana 104857600 Jul 26 23:02 test1-flat.vmdk
  -rwxr-xr-x. 1 gana gana 0 Jul 26 23:02 test1.vmdk
  [root@fedora createvmdk]# du -k test1*.*
  0   test1-flat.vmdk
  0   test1.vmdk
  # (doesn't seem to be really flat)

  creation in /tmp works
  # cd /tmp
  [root@fedora tmp]# qemu-img create -f vmdk test1.vmdk 100M -o 
subformat=monolithicFlat
  Formatting 'test1.vmdk', fmt=vmdk size=104857600 compat6=off 
hwversion=undefined subformat=monolithicFlat
  [root@fedora tmp]# ls -l /tmp/test1*.*
  -rw-r--r--. 1 root root 104857600 Jul 26 22:43 /tmp/test1-flat.vmdk
  -rw-r--r--. 1 root root   313 Jul 26 22:43 /tmp/test1.vmdk
  [root@fedora createvmdk]# du -k /tmp/test1*.*
  4   /tmp/test1-flat.vmdk
  4   /tmp/test1.vmdk

  [root@fedora createvmdk]# cat /tmp/test1.vmdk
  # Disk DescriptorFile
  version=1
  CID=5f13c13d
  parentCID=
  createType="monolithicFlat"

  # Extent description
  RW 204800 FLAT "test1-flat.vmdk" 0

  # The Disk Data Base
  #DDB

  ddb.virtualHWVersion = "4"
  ddb.geometry.cylinders = "203"
  ddb.geometry.heads = "16"
  ddb.geometry.sectors = "63"
  ddb.adapterType = "ide"

  
  On the other-hand creating a sparse file works
  cd /mnt/hshare/some/folder/createvmdk/
  [root@fedora createvmdk]# qemu-img create -f vmdk test2.vmdk 100M -o 
subformat=monolithicSparse
  Formatting 'test2.vmdk', fmt=vmdk size=104857600 compat6=off 
hwversion=undefined subformat=monolithicSparse
  [root@fedora createvmdk]# ls l test2*.*
  -rwxr-xr-x. 1 gana gana 65536 Jul 26 22:52 test2.vmdk
  [root@fedora createvmdk]#  du -k  /tmp/test2*.*
  12  /tmp/test2.vmdk

  test2.vmdk is a binary file
  inside it, located among garbled ascii characters is an embedded VMDK 
description
  
  # Disk DescriptorFile
  version=1
  CID=cf302a20
  parentCID=
  createType="monolithicSparse"

  # Extent description
  RW 204800 SPARSE "test2.vmdk"

  # The Disk Data Base
  #DDB

  ddb.virtualHWVersion = "4"
  ddb.geometry.cylinders = "203"
  ddb.geometry.heads = "16"
  ddb.geometry.sectors = "63"
  ddb.adapterType = "ide"
  ```

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1889033/+subscriptions

[Bug 1889945] Re: virtiofsd exits when iommu_platform is enabled after virtiofs driver is loaded

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1889945

Title:
  virtiofsd exits when iommu_platform is enabled after virtiofs driver
  is loaded

Status in QEMU:
  Expired

Bug description:
  Bug in QEMU 5.0.0:

  virtiofsd exits when iommu_platform is enabled after virtiofs driver is 
loaded.
  If iommu_platform is disabled the guest immediately locks up as a result of 
the configured PCIe-Passthrough.

  Host system:
  - Arch Linux amd64
  - AMD Ryzen Platform
  - QEMU 5.0.0

  Guest system:
  - Windows Server 2019 (also happens in linux installations)
  - PCIe GPU hostdev
  - virtiofs passthrough

  Many thanks for any advice.

  QEMU LOG:
  2020-07-28 19:20:07.197+: Starting external device: virtiofsd
  /usr/lib/qemu/virtiofsd --fd=29 -o source=/viofstest
  2020-07-28 19:20:07.207+: starting up libvirt version: 6.5.0, qemu 
version: 5.0.0, kernel: 5.7.10-arch1-1, hostname: mspc
  LC_ALL=C \
  PATH=/usr/local/sbin:/usr/local/bin:/usr/bin \
  HOME=/var/lib/libvirt/qemu/domain-7-win \
  XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-7-win/.local/share \
  XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-7-win/.cache \
  XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-7-win/.config \
  QEMU_AUDIO_DRV=none \
  /usr/bin/qemu-system-x86_64 \
  -name guest=win,debug-threads=on \
  -S \
  -object 
secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-7-win/master-key.aes
 \
  -blockdev 
'{"driver":"file","filename":"/usr/share/ovmf/x64/OVMF_CODE.fd","node-name":"libvirt-pflash0-storage","auto-read-only":true,"discard":"unmap"}'
 \
  -blockdev 
'{"node-name":"libvirt-pflash0-format","read-only":true,"driver":"raw","file":"libvirt-pflash0-storage"}'
 \
  -blockdev 
'{"driver":"file","filename":"/var/lib/libvirt/qemu/nvram/win_VARS.fd","node-name":"libvirt-pflash1-storage","auto-read-only":true,"discard":"unmap"}'
 \
  -blockdev 
'{"node-name":"libvirt-pflash1-format","read-only":false,"driver":"raw","file":"libvirt-pflash1-storage"}'
 \
  -machine 
pc-q35-5.0,accel=kvm,usb=off,vmport=off,dump-guest-core=off,kernel_irqchip=on,pflash0=libvirt-pflash0-format,pflash1=libvirt-pflash1-format
 \
  -cpu 
host,migratable=on,hv-time,hv-relaxed,hv-vapic,hv-spinlocks=0x1fff,hv-vendor-id=whatever,kvm=off
 \
  -m 2048 \
  -overcommit mem-lock=off \
  -smp 8,sockets=8,cores=1,threads=1 \
  -object 
memory-backend-file,id=ram-node0,prealloc=yes,mem-path=/dev/hugepages/libvirt/qemu/7-win,share=yes,size=2147483648
 \
  -numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
  -uuid c8efa194-52f8-4526-a0f8-29a254839b55 \
  -display none \
  -no-user-config \
  -nodefaults \
  -chardev socket,id=charmonitor,fd=29,server,nowait \
  -mon chardev=charmonitor,id=monitor,mode=control \
  -rtc base=localtime,driftfix=slew \
  -global kvm-pit.lost_tick_policy=delay \
  -no-hpet \
  -no-shutdown \
  -global ICH9-LPC.disable_s3=1 \
  -global ICH9-LPC.disable_s4=1 \
  -boot menu=off,strict=on \
  -device 
pcie-root-port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2
 \
  -device pcie-pci-bridge,id=pci.2,bus=pci.1,addr=0x0 \
  -device pcie-root-port,port=0x11,chassis=3,id=pci.3,bus=pcie.0,addr=0x2.0x1 \
  -device pcie-root-port,port=0x12,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x2 \
  -device pcie-root-port,port=0x13,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x3 \
  -device pcie-root-port,port=0x14,chassis=6,id=pci.6,bus=pcie.0,addr=0x2.0x4 \
  -device pcie-root-port,port=0x15,chassis=7,id=pci.7,bus=pcie.0,addr=0x2.0x5 \
  -device pcie-root-port,port=0x16,chassis=8,id=pci.8,bus=pcie.0,addr=0x2.0x6 \
  -device pcie-root-port,port=0x17,chassis=9,id=pci.9,bus=pcie.0,addr=0x2.0x7 \
  -device 
pcie-root-port,port=0x18,chassis=10,id=pci.10,bus=pcie.0,multifunction=on,addr=0x3
 \
  -device pcie-root-port,port=0x19,chassis=11,id=pci.11,bus=pcie.0,addr=0x3.0x1 
\
  -device pcie-root-port,port=0x1a,chassis=12,id=pci.12,bus=pcie.0,addr=0x3.0x2 
\
  -device 
pcie-root-port,port=0x8,chassis=13,id=pci.13,bus=pcie.0,multifunction=on,addr=0x1
 \
  -device pcie-root-port,port=0x9,chassis=14,id=pci.14,bus=pcie.0,addr=0x1.0x1 \
  -device pcie-root-port,port=0xa,chassis=15,id=pci.15,bus=pcie.0,addr=0x1.0x2 \
  -device pcie-root-port,port=0xb,chassis=16,id=pci.16,bus=pcie.0,addr=0x1.0x3 \
  -device nec-usb-xhci,id=usb,bus=pci.7,addr=0x0 \
  -device virtio-serial-pci,id=virtio-serial0,bus=pci.14,addr=0x0 \
  -blockdev 
'{"driver":"host_device","filename":"/dev/zvol/ssd/windows","aio":"threads","node-name":"libvirt-3-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}'
 \
  -blockdev 
'{"node-name":"libvirt-3-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-3-storage"}'
 \
  -device 
virtio-blk-pci,bus=pci.3,addr=0x0,drive=libvirt-3-format,id=vi

[Bug 1888467] Re: qemu-img http convert bug

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888467

Title:
  qemu-img http convert bug

Status in QEMU:
  Expired

Bug description:
  Hello, Why the file sizes of http conversion and local conversion are
  inconsistent?

  Use the http method of qemu-img for conversion. The size of some formats 
after conversion is different from the local method of qemu-img. Such as vhd, 
vdi. qcow2 and vmdk are normal。
  My image size is 40 G, raw format.

  The source is the same file, but the access method is different
  http method of qemu-img： qemu-img convert -f raw -O vdi http://xxx 
xxx.vdi（19G，after conversion）
  local method of qemu-img： qemu-img convert -f raw -O vdi xxx.raw 
xxx.vdi（3G，after conversion）

  thank you

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888467/+subscriptions

[Bug 1889943] Re: Improper TCP/IP packet splitting on e1000e/vmxnet3

2021-07-06 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1889943

Title:
  Improper TCP/IP packet splitting on e1000e/vmxnet3

Status in QEMU:
  Expired

Bug description:
  Update: The sw implementation of fragmentation also creates malformed
  IPv6 packets when their size is above the MTU. See comment #3

  Problem Description:
  When using a tap interface and the guest sends a TCP packet that would need 
to be segmented, it is fragmented using IP fragmentation. The host does not 
reassemble the IP fragments and forwards them to the next hop. This causes 
issues on certain ISPs, which seemingly reject IP fragments(Verizon Fios).
  This issue occurs on the e1000e and vmxnet3 NIC models, and possibly others. 
It does not occur on the virtio(which passes the entire packet through to the 
host w/o fragmentation or segmentation) or the e1000 model().

  Test scenario:
  Setup a tap and network bridge using the directions here: 
https://gist.github.com/extremecoders-re/e8fd8a67a515fee0c873dcafc81d811c
  Boot the machine into any modern guest(a Fedora 31 live iso was used for 
testing)
  Begin a wireshark capture on the host machine
  On the host(or another machine on the network) run: npx http-echo-server(See 
https://github.com/watson/http-echo-server)
  On the guest run
  Curl -d “Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas 
venenatis viverra ipsum, ac tincidunt est rhoncus eu. Suspendisse vehicula 
congue ante, non rhoncus elit tempus vitae. Duis ac leo massa. Donec rutrum 
condimentum turpis nec ultricies. Duis laoreet elit eu arcu pulvinar, vitae 
congue neque mattis. Mauris sed ante nunc. Vestibulum vitae urna a tellus 
maximus sagittis. Vivamus luctus pellentesque neque, vel tempor purus porta ut. 
Phasellus at quam bibendum, fermentum libero sit amet, ullamcorper mauris. In 
rutrum sit amet dui id maximus. Ut lectus ligula, hendrerit nec aliquam non, 
finibus a turpis. Proin scelerisque convallis ante, et pharetra elit. Donec 
nunc nisl, viverra vitae dui at, posuere rhoncus nibh. Mauris in massa quis 
neque posuere placerat quis quis massa. Donec quis lacus ligula. Donec mollis 
vel nisi eget elementum. Nam id magna porta nunc consectetur efficitur ac quis 
lorem. Cras faucibus vel ex porttitor mattis. Praesent in mattis tortor. In 
venenatis convallis quam, in posuere nibh. Proin non dignissim massa. Cras at 
mi ut lorem tristique fringilla. Nulla ac quam condimentum metus tincidunt 
vulputate ut at leo. Nunc pellentesque, nunc vel rhoncus condimentum, arcu sem 
molestie augue, in suscipit mauris odio mollis odio. Integer hendrerit lectus a 
leo facilisis, in accumsan urna maximus. Nam nec odio volutpat, varius est id, 
tempus libero. Vestibulum lobortis tortor quam, ac scelerisque urna rhoncus in. 
Etiam tempor, est sit amet vulputate molestie, urna neque sodales leo, sit amet 
blandit risus felis sed est. Nulla eu eros nec tortor dapibus maximus faucibus 
ut erat. Ut pharetra tempor massa in bibendum. Interdum et malesuada fames ac 
ante ipsum primis in faucibus. Etiam mattis molestie felis eu efficitur. Morbi 
tincidunt consectetur diam tincidunt feugiat. Morbi euismod ut lorem finibus 
pellentesque. Aliquam eu porta ex. Aliquam cursus, orci sit amet volutpat 
egestas, est est pulvinar erat, sed luctus nisl ligula eget justo vestibulum.” 


  2000 bytes of Lorem Ipsum taken from https://www.lipsum.com/

  Compare results from an e1000, a virtio, and a e1000e card:
  ++---+-++
  | Model  | Fragment  | Segment | Wire Size  |
  ++---+-++
  | e1000e | Yes   | NO  | 1484 + 621 |
  ++---+-++
  | e1000  | No| Yes | 1516 + 620 |
  ++---+-++
  | Virtio | NO| NO  | 2068   |
  ++---+-++

  Expected Results:
  TCP Segment to proper size OR pass full size to host and let the host split 
if necessary.

  Configuration changes that did not work:
  Disable host, guest, router firewalls
  Different Hosts
  Different Physical NICs
  Libvirt based NAT/Routed modes
  Fedora 32 vs 31
  Qemu 4.2.0 vs github commit d74824cf7c8b352f9045e949dc636c7207a41eee

  System Information:
  lsb_release -rd
  Description:  Fedora release 32 (Thirty Two)
  Release:  32

  uname -a
  Linux pats-laptop-linux 5.7.10-201.fc32.x86_64 #1 SMP Thu Jul 23 00:58:39 UTC 
2020 x86_64 x86_64 x86_64 GNU/Linux

  I can provide additional logs, debug info, etc. if needed.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1889943/+subscriptions

Re: [PATCH v1 30/39] configure: stop user enabling plugins on Windows for now

2021-07-06 Thread Thomas Huth


On 06/07/2021 16.58, Alex Bennée wrote:

There are some patches on the list that enable plugins on Windows but
they still need some changes to be ready:

   https://patchew.org/QEMU/20201013002806.1447-1-luoyongg...@gmail.com/

In the meantime lets stop the user from being able to configure the
support so they don't get confused by the weird linker error messages
later.

Signed-off-by: Alex Bennée 
Cc: Yonggang Luo 
---
  configure | 5 +
  1 file changed, 5 insertions(+)

diff --git a/configure b/configure
index 44a487e090..9d72b31a9f 100755
--- a/configure
+++ b/configure
@@ -707,6 +707,11 @@ MINGW32*)
else
  audio_drv_list=""
fi
+  if "$plugins" = "yes"; then
+  error_exit "TCG plugins not currently supported on Windows platforms"
+  else
+  plugins="no"
+  fi
supported_os="yes"
pie="no"
  ;;


This also looks like the wrong spot for testing - the "--enable-plugins" 
handling is around line 1510 in the configure script, but your check has 
been added in line 707 already. Again, this has to be checked after the 
options have been parsed.


 Thomas

Re: [PATCH v1 29/39] configure: don't allow plugins to be enabled for a non-TCG build

2021-07-06 Thread Thomas Huth


On 06/07/2021 16.58, Alex Bennée wrote:

Signed-off-by: Alex Bennée 
---
  configure | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 650d9c0735..44a487e090 100755
--- a/configure
+++ b/configure
@@ -1098,6 +1098,7 @@ for opt do
--enable-cap-ng) cap_ng="enabled"
;;
--disable-tcg) tcg="disabled"
+ plugins="no"
;;
--enable-tcg) tcg="enabled"
;;
@@ -1509,7 +1510,11 @@ for opt do
;;
--disable-xkbcommon) xkbcommon="disabled"
;;
-  --enable-plugins) plugins="yes"
+  --enable-plugins) if test "$tcg" = "enabled"; then
+plugins="yes"
+else
+error_exit "Can't enable plugins on non-TCG builds"
+fi


That's the wrong spot for checking. While it works fine with:

 .../configure --disable-tcg --enable-plugins

it fails to bail out with:

 .../configure --enable-plugins --disable-tcg

You should do such checks after all options have been parsed.

 Thomas

[PATCH] tcg: Move tb_phys_invalidate_count to tb_ctx (#457)

2021-07-06 Thread Richard Henderson

We can call do_tb_phys_invalidate from an iocontext, which has
no per-thread tcg_ctx.  Move this to tb_ctx, which is global.
The actual update still takes place with a lock held, so only
an atomic set is required, not an atomic increment.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/457
Signed-off-by: Richard Henderson 
---
 accel/tcg/tb-context.h|  1 +
 include/tcg/tcg.h |  3 ---
 accel/tcg/translate-all.c |  8 
 tcg/region.c  | 14 --
 4 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cc33979113..cac62d9749 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -34,6 +34,7 @@ struct TBContext {
 
 /* statistics */
 unsigned tb_flush_count;
+unsigned tb_phys_invalidate_count;
 };
 
 extern TBContext tb_ctx;
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index dedb86939a..25dd19d6e1 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -579,8 +579,6 @@ struct TCGContext {
 /* Threshold to flush the translated code buffer.  */
 void *code_gen_highwater;
 
-size_t tb_phys_invalidate_count;
-
 /* Track which vCPU triggers events */
 CPUState *cpu;  /* *_trans */
 
@@ -815,7 +813,6 @@ size_t tcg_code_capacity(void);
 
 void tcg_tb_insert(TranslationBlock *tb);
 void tcg_tb_remove(TranslationBlock *tb);
-size_t tcg_tb_phys_invalidate_count(void);
 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
 size_t tcg_nb_tbs(void);
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 57455d8639..4df26de858 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1219,8 +1219,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, 
bool rm_from_page_list)
 /* suppress any remaining jumps to this TB */
 tb_jmp_unlink(tb);
 
-qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
-   tcg_ctx->tb_phys_invalidate_count + 1);
+qatomic_set(&tb_ctx.tb_phys_invalidate_count,
+tb_ctx.tb_phys_invalidate_count + 1);
 }
 
 static void tb_phys_invalidate__locked(TranslationBlock *tb)
@@ -2128,8 +2128,8 @@ void dump_exec_info(void)
 qemu_printf("\nStatistics:\n");
 qemu_printf("TB flush count  %u\n",
 qatomic_read(&tb_ctx.tb_flush_count));
-qemu_printf("TB invalidate count %zu\n",
-tcg_tb_phys_invalidate_count());
+qemu_printf("TB invalidate count %u\n",
+qatomic_read(&tb_ctx.tb_phys_invalidate_count));
 
 tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
 qemu_printf("TLB full flushes%zu\n", flush_full);
diff --git a/tcg/region.c b/tcg/region.c
index d3a3658e81..e64c3ea230 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -980,17 +980,3 @@ size_t tcg_code_capacity(void)
 
 return capacity;
 }
-
-size_t tcg_tb_phys_invalidate_count(void)
-{
-unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
-unsigned int i;
-size_t total = 0;
-
-for (i = 0; i < n_ctxs; i++) {
-const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-
-total += qatomic_read(&s->tb_phys_invalidate_count);
-}
-return total;
-}
-- 
2.25.1

Re: [PATCH] docs: Add '-device intel-iommu' entry

2021-07-06 Thread Jason Wang




在 2021/6/12 上午2:55, Peter Xu 写道:

The parameters of intel-iommu device are non-trivial to understand.  Add an
entry for it so that people can reference to it when using.

There're actually a few more options there, but I hide them explicitly because
they shouldn't be used by normal QEMU users.

Cc: Chao Yang 
Cc: Lei Yang 
Cc: Jing Zhao 
Cc: Jason Wang 
Cc: Michael S. Tsirkin 
Cc: Alex Williamson 
Signed-off-by: Peter Xu 
---
  qemu-options.hx | 32 
  1 file changed, 32 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 14258784b3a..4bb04243907 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -926,6 +926,38 @@ SRST
  
  ``-device pci-ipmi-bt,bmc=id``

  Like the KCS interface, but defines a BT interface on the PCI bus.
+
+``-device intel-iommu[,option=...]``
+This is only supported by ``-machine q35``, which will enable Intel VT-d
+emulation within the guest.  It supports below options:
+
+``intremap=on|off`` (default: auto)
+This enables interrupt remapping feature in the guest.



I think we don't need "in the guest" here.

Other than this:

Reviewed-by: Jason Wang 

Thanks



  It's required
+to enable complete x2apic.  Currently it only supports kvm
+kernel-irqchip modes ``off`` or ``split``.  Full kernel-irqchip is not
+yet supported.
+
+``caching-mode=on|off`` (default: off)
+This enables caching mode for the VT-d emulated device.  When
+caching-mode is enabled, each guest DMA buffer mapping will generate an
+IOTLB invalidation from the guest IOMMU driver to the vIOMMU device in
+a synchronous way.  It is required for ``-device vfio-pci`` to work
+with the VT-d device, because host assigned devices requires to setup
+the DMA mapping on the host before guest DMA starts.
+
+``device-iotlb=on|off`` (default: off)
+This enables device-iotlb capability for the emulated VT-d device.  So
+far virtio/vhost should be the only real user for this parameter,
+paired with ats=on configured for the device.
+
+``aw-bits=39|48`` (default: 39)
+This decides the address width of IOVA address space.  The address
+space has 39 bits width for 3-level IOMMU page tables, and 48 bits for
+4-level IOMMU page tables.
+
+Please also refer to the wiki page for general scenarios of VT-d
+emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
+
  ERST
  
  DEF("name", HAS_ARG, QEMU_OPTION_name,

Re: [PATCH] docs/nvdimm: update doc

2021-07-06 Thread lizhij...@fujitsu.com


ping...


On 11/06/2021 11:41, Li Zhijian wrote:
> The prompt was updated since def835f0da ('hostmem: Don't report pmem 
> attribute if unsupported')
>
> Signed-off-by: Li Zhijian 
> ---
>   docs/nvdimm.txt | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
> index 0aae682be3e..71cdbdf554b 100644
> --- a/docs/nvdimm.txt
> +++ b/docs/nvdimm.txt
> @@ -247,7 +247,8 @@ is built with libpmem [2] support (configured with 
> --enable-libpmem), QEMU
>   will take necessary operations to guarantee the persistence of its own 
> writes
>   to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
>   If 'pmem' is 'on' while there is no libpmem support, qemu will exit and 
> report
> -a "lack of libpmem support" message to ensure the persistence is available.
> +a "lack of libpmem support" (or "Invalid parameter 'pmem'" since v6.0.0)
> +message to ensure the persistence is available.
>   For example, if we want to ensure the persistence for some backend file,
>   use the QEMU command line:
>

pipe2 & configure script

2021-07-06 Thread Richard Zak

What conditions are required for "#define CONFIG_PIPE2" to be set in
build/config-host.h? It prevents building for Haiku as pipe2() doesn't
exist. I didn't see anything in the configure script regarding pipe2. I
also updated my code to the latest in the repository and this issue just
popped up.

-- 
Regards,

Richard J. Zak
Professional Genius
PGP Key: https://keybase.io/rjzak/key.asc

Re: [PATCH 2/2] tcg: Bake tb_destroy() into tcg_region_tree

2021-07-06 Thread Liren Wei


On 7/7/21 8:34 AM, Richard Henderson wrote:

On 7/4/21 7:31 AM, Liren Wei wrote:

-static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer _)


Using _ here as the variable name isn't ideal.  I guess if this were 
c++ we would actually omit the name, which is kinda the same.  But I 
think it's just as easy to name it userdata, as per glib docs.


I'll fix that up while queuing, thanks.


Got it, thanks.
I'm not keen that the spinlock init and destroy are in different 
places, but surely that should be fixed by moving the init to 
tcg_tb_alloc, probably moving it to tcg/region.c as well.



r~
Indeed, that would be much more clear. But I kind of feel that 
initialization of TB spinlock is deliberately placed after 
tcg_gen_code() in the current implementation to prevent buffer_overflow 
or any rewinding from leaking the initialized spinlock (, through it 
seems to me that there is nothing to leak for a spinlock whatsoever).


Liren Wei

Re: [PATCH 1/2] qapi/run-state: Add a new shutdown cause 'migration-completed'

2021-07-06 Thread Kunkun Jiang


On 2021/7/6 18:27, Dr. David Alan Gilbert wrote:

* Kunkun Jiang (jiangkun...@huawei.com) wrote:

Hi Daniel,

On 2021/7/5 20:48, Daniel P. Berrangé wrote:

On Mon, Jul 05, 2021 at 08:36:52PM +0800, Kunkun Jiang wrote:

In the current version, the source QEMU process does not automatic
exit after a successful migration. Additional action is required,
such as sending { "execute": "quit" } or ctrl+c. For simplify, add
a new shutdown cause 'migration-completed' to exit the source QEMU
process after a successful migration.

IIUC, 'STATUS_COMPLETED' state is entered on the source host
once it has finished sending all VM state, and thus does not
guarantee that the target host has successfully received and
loaded all VM state.

Thanks for your reply.

If the target host doesn't successfully receive and load all VM state,
we can send { "execute": "cont" } to resume the soruce in time to
ensure that VM will not lost?

Yes, that's pretty common at the moment;  the failed migration can
happen at lots of different points:
   a) The last part of the actual migration stream/loading the devices
 - that's pretty easy, since the destination hasn't actually got
 the full migration stream.

   b) If the migration itself completes, but then the management system
 then tries to reconfigure the networking/storage on the destination,
 and something goes wrong in that, then it can roll that back and
 cont on the source.

So, it's a pretty common type of failure/recovery  - the management
application has to be a bit careful not to do anything destructive
until as late as possible, so it knows it can switch back.

Okay， I see.

Typically a mgmt app will need to directly confirm that the
target host QEMU has succesfully started running, before it
will tell the source QEMU to quit.

'a mgmt app', such as libvirt?

Yes, it's currently libvirt that does that; but any of the control
things could (it's just libvirt has been going long enough so it knows
about lots and lots of nasty cases of migration failure, and recovering
properly).

Can you explain why did you want to get the source to automatically
quit?  In a real setup where does it help?

Sorry, my thoughts on live migration scenarios are not comprehensive enough.

Thanks,
Kunkun Jiang

Dave



Thanks,
Kunkun Jiang

So, AFAICT, this automatic exit after STATUS_COMPLETED is
not safe and could lead to total loss of the running VM in
error scenarios.




Signed-off-by: Kunkun Jiang 
---
   migration/migration.c | 1 +
   qapi/run-state.json   | 4 +++-
   2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index 4228635d18..16782c93c2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3539,6 +3539,7 @@ static void migration_iteration_finish(MigrationState *s)
   case MIGRATION_STATUS_COMPLETED:
   migration_calculate_complete(s);
   runstate_set(RUN_STATE_POSTMIGRATE);
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_MIGRATION_COMPLETED);
   break;
   case MIGRATION_STATUS_ACTIVE:
diff --git a/qapi/run-state.json b/qapi/run-state.json
index 43d66d700f..66aaef4e2b 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -86,12 +86,14 @@
   #   ignores --no-reboot. This is useful for sanitizing
   #   hypercalls on s390 that are used during kexec/kdump/boot
   #
+# @migration-completed: Reaction to the successful migration
+#
   ##
   { 'enum': 'ShutdownCause',
 # Beware, shutdown_caused_by_guest() depends on enumeration order
 'data': [ 'none', 'host-error', 'host-qmp-quit', 'host-qmp-system-reset',
   'host-signal', 'host-ui', 'guest-shutdown', 'guest-reset',
-'guest-panic', 'subsystem-reset'] }
+'guest-panic', 'subsystem-reset', 'migration-completed'] }
   ##
   # @StatusInfo:
--
2.23.0



Regards,
Daniel

[PATCH v6 10/10] tests/data/acpi/virt: Update IORT files for ITS

2021-07-06 Thread Shashi Mallela

Updated expected IORT files applicable with latest GICv3
ITS changes.

Full diff of new file disassembly:

/*
 * Intel ACPI Component Architecture
 * AML/ASL+ Disassembler version 20180629 (64-bit version)
 * Copyright (c) 2000 - 2018 Intel Corporation
 *
 * Disassembly of tests/data/acpi/virt/IORT.pxb, Tue Jun 29 17:35:38 2021
 *
 * ACPI Data Table [IORT]
 *
 * Format: [HexOffset DecimalOffset ByteLength]  FieldName : FieldValue
 */

[000h    4]Signature : "IORT"[IO Remapping Table]
[004h 0004   4] Table Length : 007C
[008h 0008   1] Revision : 00
[009h 0009   1] Checksum : 07
[00Ah 0010   6]   Oem ID : "BOCHS "
[010h 0016   8] Oem Table ID : "BXPC"
[018h 0024   4] Oem Revision : 0001
[01Ch 0028   4]  Asl Compiler ID : "BXPC"
[020h 0032   4]Asl Compiler Revision : 0001

[024h 0036   4]   Node Count : 0002
[028h 0040   4]  Node Offset : 0030
[02Ch 0044   4] Reserved : 

[030h 0048   1] Type : 00
[031h 0049   2]   Length : 0018
[033h 0051   1] Revision : 00
[034h 0052   4] Reserved : 
[038h 0056   4]Mapping Count : 
[03Ch 0060   4]   Mapping Offset : 

[040h 0064   4] ItsCount : 0001
[044h 0068   4]  Identifiers : 

[048h 0072   1] Type : 02
[049h 0073   2]   Length : 0034
[04Bh 0075   1] Revision : 00
[04Ch 0076   4] Reserved : 
[050h 0080   4]Mapping Count : 0001
[054h 0084   4]   Mapping Offset : 0020

[058h 0088   8]Memory Properties : [IORT Memory Access Properties]
[058h 0088   4]  Cache Coherency : 0001
[05Ch 0092   1]Hints (decoded below) : 00
   Transient : 0
  Write Allocate : 0
   Read Allocate : 0
Override : 0
[05Dh 0093   2] Reserved : 
[05Fh 0095   1] Memory Flags (decoded below) : 03
   Coherency : 1
Device Attribute : 1
[060h 0096   4]ATS Attribute : 
[064h 0100   4]   PCI Segment Number : 
[068h 0104   1]Memory Size Limit : 00
[069h 0105   3] Reserved : 00

[068h 0104   4]   Input base : 
[06Ch 0108   4] ID Count : 
[070h 0112   4]  Output Base : 
[074h 0116   4] Output Reference : 0030
[078h 0120   4]Flags (decoded below) : 
  Single Mapping : 0

Raw Table Data: Length 124 (0x7C)

: 49 4F 52 54 7C 00 00 00 00 07 42 4F 43 48 53 20  // IORT|.BOCHS
0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43  // BXPCBXPC
0020: 01 00 00 00 02 00 00 00 30 00 00 00 00 00 00 00  // 0...
0030: 00 18 00 00 00 00 00 00 00 00 00 00 00 00 00 00  // 
0040: 01 00 00 00 00 00 00 00 02 34 00 00 00 00 00 00  // .4..
0050: 01 00 00 00 20 00 00 00 01 00 00 00 00 00 00 03  //  ...
0060: 00 00 00 00 00 00 00 00 00 00 00 00 FF FF 00 00  // 
0070: 00 00 00 00 30 00 00 00 00 00 00 00  // 0...

Signed-off-by: Shashi Mallela 
---
 tests/data/acpi/virt/IORT   | Bin 0 -> 124 bytes
 tests/data/acpi/virt/IORT.memhp | Bin 0 -> 124 bytes
 tests/data/acpi/virt/IORT.numamem   | Bin 0 -> 124 bytes
 tests/data/acpi/virt/IORT.pxb   | Bin 0 -> 124 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |   4 
 5 files changed, 4 deletions(-)

diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT
index 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..521acefe9ba66706c5607321a82d330586f3f280
 100644
GIT binary patch
literal 124
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
QRGb+i3L*dhhtM#y0PN=p0RR91

literal 0
HcmV?d1

diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp
index 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..521acefe9ba66706c5607321a82d330586f3f280
 100644
GIT binary patch
literal 124
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
QRGb+i3L*dhhtM#y0PN=p0RR91

literal 0
HcmV?d1

diff --git a/tests/data/acpi/virt/IORT.numamem 
b/tests/data/acpi/virt/IORT.numamem
index 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..521acefe9ba66706c5607321a82d330586f3f280
 100644
GIT binary patch
literal 124
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
QRGb+i3L*dhhtM#y0PN=p0RR91

[PATCH v6 08/10] tests/data/acpi/virt: Add IORT files for ITS

2021-07-06 Thread Shashi Mallela

Added expected IORT files applicable with latest GICv3
ITS changes.Temporarily differences in these files are
okay.

Signed-off-by: Shashi Mallela 
---
 tests/data/acpi/virt/IORT   | 0
 tests/data/acpi/virt/IORT.memhp | 0
 tests/data/acpi/virt/IORT.numamem   | 0
 tests/data/acpi/virt/IORT.pxb   | 0
 tests/qtest/bios-tables-test-allowed-diff.h | 4 
 5 files changed, 4 insertions(+)
 create mode 100644 tests/data/acpi/virt/IORT
 create mode 100644 tests/data/acpi/virt/IORT.memhp
 create mode 100644 tests/data/acpi/virt/IORT.numamem
 create mode 100644 tests/data/acpi/virt/IORT.pxb

diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/virt/IORT.numamem 
b/tests/data/acpi/virt/IORT.numamem
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/virt/IORT.pxb b/tests/data/acpi/virt/IORT.pxb
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..2ef211df59 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,5 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/virt/IORT",
+"tests/data/acpi/virt/IORT.memhp",
+"tests/data/acpi/virt/IORT.numamem",
+"tests/data/acpi/virt/IORT.pxb",
-- 
2.27.0

[PATCH v6 07/10] hw/arm/sbsa-ref: add ITS support in SBSA GIC

2021-07-06 Thread Shashi Mallela

Included creation of ITS as part of SBSA platform GIC
initialization.

Signed-off-by: Shashi Mallela 
---
 hw/arm/sbsa-ref.c | 26 +++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 43c19b4923..3d9c073636 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -34,7 +34,7 @@
 #include "hw/boards.h"
 #include "hw/ide/internal.h"
 #include "hw/ide/ahci_internal.h"
-#include "hw/intc/arm_gicv3_common.h"
+#include "hw/intc/arm_gicv3_its_common.h"
 #include "hw/loader.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/qdev-properties.h"
@@ -64,6 +64,7 @@ enum {
 SBSA_CPUPERIPHS,
 SBSA_GIC_DIST,
 SBSA_GIC_REDIST,
+SBSA_GIC_ITS,
 SBSA_SECURE_EC,
 SBSA_GWDT,
 SBSA_GWDT_REFRESH,
@@ -107,6 +108,7 @@ static const MemMapEntry sbsa_ref_memmap[] = {
 [SBSA_CPUPERIPHS] = { 0x4000, 0x0004 },
 [SBSA_GIC_DIST] =   { 0x4006, 0x0001 },
 [SBSA_GIC_REDIST] = { 0x4008, 0x0400 },
+[SBSA_GIC_ITS] ={ 0x4409, 0x0002 },
 [SBSA_SECURE_EC] =  { 0x5000, 0x1000 },
 [SBSA_GWDT_REFRESH] =   { 0x5001, 0x1000 },
 [SBSA_GWDT_CONTROL] =   { 0x50011000, 0x1000 },
@@ -377,7 +379,20 @@ static void create_secure_ram(SBSAMachineState *sms,
 memory_region_add_subregion(secure_sysmem, base, secram);
 }
 
-static void create_gic(SBSAMachineState *sms)
+static void create_its(SBSAMachineState *sms)
+{
+DeviceState *dev;
+
+dev = qdev_new(TYPE_ARM_GICV3_ITS);
+SysBusDevice *s = SYS_BUS_DEVICE(dev);
+
+object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(sms->gic),
+ &error_abort);
+sysbus_realize_and_unref(s, &error_fatal);
+sysbus_mmio_map(s, 0, sbsa_ref_memmap[SBSA_GIC_ITS].base);
+}
+
+static void create_gic(SBSAMachineState *sms, MemoryRegion *mem)
 {
 unsigned int smp_cpus = MACHINE(sms)->smp.cpus;
 SysBusDevice *gicbusdev;
@@ -404,6 +419,10 @@ static void create_gic(SBSAMachineState *sms)
 qdev_prop_set_uint32(sms->gic, "len-redist-region-count", 1);
 qdev_prop_set_uint32(sms->gic, "redist-region-count[0]", redist0_count);
 
+object_property_set_link(OBJECT(sms->gic), "sysmem", OBJECT(mem),
+ &error_fatal);
+qdev_prop_set_bit(sms->gic, "has-lpi", true);
+
 gicbusdev = SYS_BUS_DEVICE(sms->gic);
 sysbus_realize_and_unref(gicbusdev, &error_fatal);
 sysbus_mmio_map(gicbusdev, 0, sbsa_ref_memmap[SBSA_GIC_DIST].base);
@@ -450,6 +469,7 @@ static void create_gic(SBSAMachineState *sms)
 sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
 }
+create_its(sms);
 }
 
 static void create_uart(const SBSAMachineState *sms, int uart,
@@ -762,7 +782,7 @@ static void sbsa_ref_init(MachineState *machine)
 
 create_secure_ram(sms, secure_sysmem);
 
-create_gic(sms);
+create_gic(sms, sysmem);
 
 create_uart(sms, SBSA_UART, sysmem, serial_hd(0));
 create_uart(sms, SBSA_SECURE_UART, secure_sysmem, serial_hd(1));
-- 
2.27.0

[PATCH v6 06/10] hw/intc: GICv3 redistributor ITS processing

2021-07-06 Thread Shashi Mallela

Implemented lpi processing at redistributor to get lpi config info
from lpi configuration table,determine priority,set pending state in
lpi pending table and forward the lpi to cpuif.Added logic to invoke
redistributor lpi processing with translated LPI which set/clear LPI
from ITS device as part of ITS INT,CLEAR,DISCARD command and
GITS_TRANSLATER processing.

Signed-off-by: Shashi Mallela 
---
 hw/intc/arm_gicv3.c|  14 +++
 hw/intc/arm_gicv3_common.c |   1 +
 hw/intc/arm_gicv3_cpuif.c  |   7 +-
 hw/intc/arm_gicv3_its.c|  23 +
 hw/intc/arm_gicv3_redist.c | 142 +
 hw/intc/gicv3_internal.h   |   9 ++
 include/hw/intc/arm_gicv3_common.h |   7 ++
 7 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index d63f8af604..3f24707838 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -165,6 +165,16 @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs)
 cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq);
 }
 
+if ((cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) && cs->gic->lpi_enable &&
+(cs->hpplpi.prio != 0xff)) {
+if (irqbetter(cs, cs->hpplpi.irq, cs->hpplpi.prio)) {
+cs->hppi.irq = cs->hpplpi.irq;
+cs->hppi.prio = cs->hpplpi.prio;
+cs->hppi.grp = cs->hpplpi.grp;
+seenbetter = true;
+}
+}
+
 /* If the best interrupt we just found would preempt whatever
  * was the previous best interrupt before this update, then
  * we know it's definitely the best one now.
@@ -339,9 +349,13 @@ static void gicv3_set_irq(void *opaque, int irq, int level)
 
 static void arm_gicv3_post_load(GICv3State *s)
 {
+int i;
 /* Recalculate our cached idea of the current highest priority
  * pending interrupt, but don't set IRQ or FIQ lines.
  */
+for (i = 0; i < s->num_cpu; i++) {
+gicv3_redist_update_lpi(&s->cpu[i]);
+}
 gicv3_full_update_noirqset(s);
 /* Repopulate the cache of GICv3CPUState pointers for target CPUs */
 gicv3_cache_all_target_cpustates(s);
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 53dea2a775..223db16fec 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -435,6 +435,7 @@ static void arm_gicv3_common_reset(DeviceState *dev)
 memset(cs->gicr_ipriorityr, 0, sizeof(cs->gicr_ipriorityr));
 
 cs->hppi.prio = 0xff;
+cs->hpplpi.prio = 0xff;
 
 /* State in the CPU interface must *not* be reset here, because it
  * is part of the CPU's reset domain, not the GIC device's.
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 3e0641aff9..184b92e7de 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -899,10 +899,12 @@ static void icc_activate_irq(GICv3CPUState *cs, int irq)
 cs->gicr_iactiver0 = deposit32(cs->gicr_iactiver0, irq, 1, 1);
 cs->gicr_ipendr0 = deposit32(cs->gicr_ipendr0, irq, 1, 0);
 gicv3_redist_update(cs);
-} else {
+} else if (irq < GICV3_LPI_INTID_START) {
 gicv3_gicd_active_set(cs->gic, irq);
 gicv3_gicd_pending_clear(cs->gic, irq);
 gicv3_update(cs->gic, irq, 1);
+} else {
+gicv3_redist_lpi_pending(cs, irq, 0);
 }
 }
 
@@ -1318,7 +1320,8 @@ static void icc_eoir_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 trace_gicv3_icc_eoir_write(is_eoir0 ? 0 : 1,
gicv3_redist_affid(cs), value);
 
-if (irq >= cs->gic->num_irq) {
+if ((irq >= cs->gic->num_irq) &&
+!(cs->gic->lpi_enable && (irq >= GICV3_LPI_INTID_START))) {
 /* This handles two cases:
  * 1. If software writes the ID of a spurious interrupt [ie 1020-1023]
  * to the GICC_EOIR, the GIC ignores that write.
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
index bf92a8d0f1..20f85a3759 100644
--- a/hw/intc/arm_gicv3_its.c
+++ b/hw/intc/arm_gicv3_its.c
@@ -226,6 +226,7 @@ static MemTxResult process_its_cmd(GICv3ITSState *s, 
uint64_t value,
 bool ite_valid = false;
 uint64_t cte = 0;
 bool cte_valid = false;
+uint64_t rdbase;
 IteEntry ite;
 
 if (cmd == NONE) {
@@ -287,6 +288,18 @@ static MemTxResult process_its_cmd(GICv3ITSState *s, 
uint64_t value,
  * Current implementation only supports rdbase == procnum
  * Hence rdbase physical address is ignored
  */
+rdbase = (cte & GITS_CTE_RDBASE_PROCNUM_MASK) >> 1U;
+
+if (rdbase > s->gicv3->num_cpu) {
+return res;
+}
+
+if ((cmd == CLEAR) || (cmd == DISCARD)) {
+gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 0);
+} else {
+gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 1);
+}
+
 if (cmd == DISCARD) {
 memset(&ite, 0 , sizeof(ite));
 /* re

[PATCH v6 05/10] hw/intc: GICv3 ITS Feature enablement

2021-07-06 Thread Shashi Mallela

Added properties to enable ITS feature and define qemu system
address space memory in gicv3 common,setup distributor and
redistributor registers to indicate LPI support.

Signed-off-by: Shashi Mallela 
Reviewed-by: Peter Maydell 
---
 hw/intc/arm_gicv3_common.c | 12 
 hw/intc/arm_gicv3_dist.c   |  5 -
 hw/intc/arm_gicv3_redist.c | 12 +---
 hw/intc/gicv3_internal.h   |  2 ++
 include/hw/intc/arm_gicv3_common.h |  1 +
 5 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 58ef65f589..53dea2a775 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -345,6 +345,11 @@ static void arm_gicv3_common_realize(DeviceState *dev, 
Error **errp)
 return;
 }
 
+if (s->lpi_enable && !s->dma) {
+error_setg(errp, "Redist-ITS: Guest 'sysmem' reference link not set");
+return;
+}
+
 s->cpu = g_new0(GICv3CPUState, s->num_cpu);
 
 for (i = 0; i < s->num_cpu; i++) {
@@ -381,6 +386,10 @@ static void arm_gicv3_common_realize(DeviceState *dev, 
Error **errp)
 (1 << 24) |
 (i << 8) |
 (last << 4);
+
+if (s->lpi_enable) {
+s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS;
+}
 }
 }
 
@@ -494,9 +503,12 @@ static Property arm_gicv3_common_properties[] = {
 DEFINE_PROP_UINT32("num-cpu", GICv3State, num_cpu, 1),
 DEFINE_PROP_UINT32("num-irq", GICv3State, num_irq, 32),
 DEFINE_PROP_UINT32("revision", GICv3State, revision, 3),
+DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0),
 DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0),
 DEFINE_PROP_ARRAY("redist-region-count", GICv3State, nb_redist_regions,
   redist_region_count, qdev_prop_uint32, uint32_t),
+DEFINE_PROP_LINK("sysmem", GICv3State, dma, TYPE_MEMORY_REGION,
+ MemoryRegion *),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c
index b65f56f903..43128b376d 100644
--- a/hw/intc/arm_gicv3_dist.c
+++ b/hw/intc/arm_gicv3_dist.c
@@ -371,7 +371,9 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
  * A3V == 1 (non-zero values of Affinity level 3 supported)
  * IDbits == 0xf (we support 16-bit interrupt identifiers)
  * DVIS == 0 (Direct virtual LPI injection not supported)
- * LPIS == 0 (LPIs not supported)
+ * LPIS == 1 (LPIs are supported if affinity routing is enabled)
+ * num_LPIs == 0b0 (bits [15:11],Number of LPIs as indicated
+ *  by GICD_TYPER.IDbits)
  * MBIS == 0 (message-based SPIs not supported)
  * SecurityExtn == 1 if security extns supported
  * CPUNumber == 0 since for us ARE is always 1
@@ -386,6 +388,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
 bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS);
 
 *data = (1 << 25) | (1 << 24) | (sec_extn << 10) |
+(s->lpi_enable << GICD_TYPER_LPIS_SHIFT) |
 (0xf << 19) | itlinesnumber;
 return MEMTX_OK;
 }
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
index 8645220d61..fc3d95dcc6 100644
--- a/hw/intc/arm_gicv3_redist.c
+++ b/hw/intc/arm_gicv3_redist.c
@@ -248,10 +248,16 @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr 
offset,
 case GICR_CTLR:
 /* For our implementation, GICR_TYPER.DPGS is 0 and so all
  * the DPG bits are RAZ/WI. We don't do anything asynchronously,
- * so UWP and RWP are RAZ/WI. And GICR_TYPER.LPIS is 0 (we don't
- * implement LPIs) so Enable_LPIs is RES0. So there are no writable
- * bits for us.
+ * so UWP and RWP are RAZ/WI. GICR_TYPER.LPIS is 1 (we
+ * implement LPIs) so Enable_LPIs is programmable.
  */
+if (cs->gicr_typer & GICR_TYPER_PLPIS) {
+if (value & GICR_CTLR_ENABLE_LPIS) {
+cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS;
+} else {
+cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS;
+}
+}
 return MEMTX_OK;
 case GICR_STATUSR:
 /* RAZ/WI for our implementation */
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index 6e0343b0e2..b27e25dea3 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -68,6 +68,8 @@
 #define GICD_CTLR_E1NWF (1U << 7)
 #define GICD_CTLR_RWP   (1U << 31)
 
+#define GICD_TYPER_LPIS_SHIFT  17
+
 /* 16 bits EventId */
 #define GICD_TYPER_IDBITS0xf
 
diff --git a/include/hw/intc/arm_gicv3_common.h 
b/include/hw/intc/arm_gicv3_common.h
index 0715b0bc2a..c1348cc60a 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -221,6 +221,7 @@ struct GICv3State {
 uint32_t num_cpu;
 uint32_t num_irq;
 uint32_t r

[PATCH v6 01/10] hw/intc: GICv3 ITS initial framework

2021-07-06 Thread Shashi Mallela

Added register definitions relevant to ITS,implemented overall
ITS device framework with stubs for ITS control and translater
regions read/write,extended ITS common to handle mmio init between
existing kvm device and newer qemu device.

Signed-off-by: Shashi Mallela 
Reviewed-by: Peter Maydell 
---
 hw/intc/arm_gicv3_its.c| 245 +
 hw/intc/arm_gicv3_its_common.c |   7 +-
 hw/intc/arm_gicv3_its_kvm.c|   2 +-
 hw/intc/gicv3_internal.h   |  97 +-
 hw/intc/meson.build|   1 +
 include/hw/intc/arm_gicv3_its_common.h |   9 +-
 6 files changed, 347 insertions(+), 14 deletions(-)
 create mode 100644 hw/intc/arm_gicv3_its.c

diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
new file mode 100644
index 00..2286b3f757
--- /dev/null
+++ b/hw/intc/arm_gicv3_its.c
@@ -0,0 +1,245 @@
+/*
+ * ITS emulation for a GICv3-based system
+ *
+ * Copyright Linaro.org 2021
+ *
+ * Authors:
+ *  Shashi Mallela 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/qdev-properties.h"
+#include "hw/intc/arm_gicv3_its_common.h"
+#include "gicv3_internal.h"
+#include "qom/object.h"
+#include "qapi/error.h"
+
+typedef struct GICv3ITSClass GICv3ITSClass;
+/* This is reusing the GICv3ITSState typedef from ARM_GICV3_ITS_COMMON */
+DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSClass,
+ ARM_GICV3_ITS, TYPE_ARM_GICV3_ITS)
+
+struct GICv3ITSClass {
+GICv3ITSCommonClass parent_class;
+void (*parent_reset)(DeviceState *dev);
+};
+
+static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset,
+   uint64_t data, unsigned size,
+   MemTxAttrs attrs)
+{
+MemTxResult result = MEMTX_OK;
+
+return result;
+}
+
+static MemTxResult its_writel(GICv3ITSState *s, hwaddr offset,
+  uint64_t value, MemTxAttrs attrs)
+{
+MemTxResult result = MEMTX_OK;
+
+return result;
+}
+
+static MemTxResult its_readl(GICv3ITSState *s, hwaddr offset,
+ uint64_t *data, MemTxAttrs attrs)
+{
+MemTxResult result = MEMTX_OK;
+
+return result;
+}
+
+static MemTxResult its_writell(GICv3ITSState *s, hwaddr offset,
+   uint64_t value, MemTxAttrs attrs)
+{
+MemTxResult result = MEMTX_OK;
+
+return result;
+}
+
+static MemTxResult its_readll(GICv3ITSState *s, hwaddr offset,
+  uint64_t *data, MemTxAttrs attrs)
+{
+MemTxResult result = MEMTX_OK;
+
+return result;
+}
+
+static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data,
+  unsigned size, MemTxAttrs attrs)
+{
+GICv3ITSState *s = (GICv3ITSState *)opaque;
+MemTxResult result;
+
+switch (size) {
+case 4:
+result = its_readl(s, offset, data, attrs);
+break;
+case 8:
+result = its_readll(s, offset, data, attrs);
+break;
+default:
+result = MEMTX_ERROR;
+break;
+}
+
+if (result == MEMTX_ERROR) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: invalid guest read at offset " TARGET_FMT_plx
+  "size %u\n", __func__, offset, size);
+/*
+ * The spec requires that reserved registers are RAZ/WI;
+ * so use MEMTX_ERROR returns from leaf functions as a way to
+ * trigger the guest-error logging but don't return it to
+ * the caller, or we'll cause a spurious guest data abort.
+ */
+result = MEMTX_OK;
+*data = 0;
+}
+return result;
+}
+
+static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data,
+   unsigned size, MemTxAttrs attrs)
+{
+GICv3ITSState *s = (GICv3ITSState *)opaque;
+MemTxResult result;
+
+switch (size) {
+case 4:
+result = its_writel(s, offset, data, attrs);
+break;
+case 8:
+result = its_writell(s, offset, data, attrs);
+break;
+default:
+result = MEMTX_ERROR;
+break;
+}
+
+if (result == MEMTX_ERROR) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: invalid guest write at offset " TARGET_FMT_plx
+  "size %u\n", __func__, offset, size);
+/*
+ * The spec requires that reserved registers are RAZ/WI;
+ * so use MEMTX_ERROR returns from leaf functions as a way to
+ * trigger the guest-error logging but don't return it to
+ * the caller, or we'll cause a spurious guest data abort.
+ */
+result = MEMTX_OK;
+}
+return result;
+}
+
+static const MemoryRegionOps gicv3_its_control_ops = {
+.read_w

[PATCH v6 03/10] hw/intc: GICv3 ITS command queue framework

2021-07-06 Thread Shashi Mallela

Added functionality to trigger ITS command queue processing on
write to CWRITE register and process each command queue entry to
identify the command type and handle commands like MAPD,MAPC,SYNC.

Signed-off-by: Shashi Mallela 
Reviewed-by: Peter Maydell 
Reviewed-by: Eric Auger 
---
 hw/intc/arm_gicv3_its.c  | 305 +++
 hw/intc/gicv3_internal.h |  40 +
 2 files changed, 345 insertions(+)

diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
index 683d40182f..05cdc15e77 100644
--- a/hw/intc/arm_gicv3_its.c
+++ b/hw/intc/arm_gicv3_its.c
@@ -50,6 +50,304 @@ static uint64_t baser_base_addr(uint64_t value, uint32_t 
page_sz)
 return result;
 }
 
+static MemTxResult update_cte(GICv3ITSState *s, uint16_t icid, bool valid,
+  uint64_t rdbase)
+{
+AddressSpace *as = &s->gicv3->dma_as;
+uint64_t value;
+uint64_t l2t_addr;
+bool valid_l2t;
+uint32_t l2t_id;
+uint32_t max_l2_entries;
+uint64_t cte = 0;
+MemTxResult res = MEMTX_OK;
+
+if (!s->ct.valid) {
+return res;
+}
+
+if (valid) {
+/* add mapping entry to collection table */
+cte = (valid & TABLE_ENTRY_VALID_MASK) | (rdbase << 1ULL);
+}
+
+/*
+ * The specification defines the format of level 1 entries of a
+ * 2-level table, but the format of level 2 entries and the format
+ * of flat-mapped tables is IMPDEF.
+ */
+if (s->ct.indirect) {
+l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
+
+value = address_space_ldq_le(as,
+ s->ct.base_addr +
+ (l2t_id * L1TABLE_ENTRY_SIZE),
+ MEMTXATTRS_UNSPECIFIED, &res);
+
+if (res != MEMTX_OK) {
+return res;
+}
+
+valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+if (valid_l2t) {
+max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
+
+l2t_addr = value & ((1ULL << 51) - 1);
+
+address_space_stq_le(as, l2t_addr +
+ ((icid % max_l2_entries) * GITS_CTE_SIZE),
+ cte, MEMTXATTRS_UNSPECIFIED, &res);
+}
+} else {
+/* Flat level table */
+address_space_stq_le(as, s->ct.base_addr + (icid * GITS_CTE_SIZE),
+ cte, MEMTXATTRS_UNSPECIFIED, &res);
+}
+return res;
+}
+
+static MemTxResult process_mapc(GICv3ITSState *s, uint32_t offset)
+{
+AddressSpace *as = &s->gicv3->dma_as;
+uint16_t icid;
+uint64_t rdbase;
+bool valid;
+MemTxResult res = MEMTX_OK;
+uint64_t value;
+
+offset += NUM_BYTES_IN_DW;
+offset += NUM_BYTES_IN_DW;
+
+value = address_space_ldq_le(as, s->cq.base_addr + offset,
+ MEMTXATTRS_UNSPECIFIED, &res);
+
+if (res != MEMTX_OK) {
+return res;
+}
+
+icid = value & ICID_MASK;
+
+rdbase = (value >> R_MAPC_RDBASE_SHIFT) & RDBASE_PROCNUM_MASK;
+
+valid = (value & CMD_FIELD_VALID_MASK);
+
+if ((icid > s->ct.maxids.max_collids) || (rdbase > s->gicv3->num_cpu)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "ITS MAPC: invalid collection table attributes "
+  "icid %d rdbase %lu\n",  icid, rdbase);
+/*
+ * in this implementation, in case of error
+ * we ignore this command and move onto the next
+ * command in the queue
+ */
+} else {
+res = update_cte(s, icid, valid, rdbase);
+}
+
+return res;
+}
+
+static MemTxResult update_dte(GICv3ITSState *s, uint32_t devid, bool valid,
+  uint8_t size, uint64_t itt_addr)
+{
+AddressSpace *as = &s->gicv3->dma_as;
+uint64_t value;
+uint64_t l2t_addr;
+bool valid_l2t;
+uint32_t l2t_id;
+uint32_t max_l2_entries;
+uint64_t dte = 0;
+MemTxResult res = MEMTX_OK;
+
+if (s->dt.valid) {
+if (valid) {
+/* add mapping entry to device table */
+dte = (valid & TABLE_ENTRY_VALID_MASK) |
+  ((size & SIZE_MASK) << 1U) |
+  (itt_addr << GITS_DTE_ITTADDR_SHIFT);
+}
+} else {
+return res;
+}
+
+/*
+ * The specification defines the format of level 1 entries of a
+ * 2-level table, but the format of level 2 entries and the format
+ * of flat-mapped tables is IMPDEF.
+ */
+if (s->dt.indirect) {
+l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE);
+
+value = address_space_ldq_le(as,
+ s->dt.base_addr +
+ (l2t_id * L1TABLE_ENTRY_SIZE),
+ MEMTXATTRS_UNSPECIFIED, &res);
+
+if (res != MEMTX_OK) {
+return res;
+}
+
+valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+if (valid_l2t) {
+

[PATCH v6 04/10] hw/intc: GICv3 ITS Command processing

2021-07-06 Thread Shashi Mallela

Added ITS command queue handling for MAPTI,MAPI commands,handled ITS
translation which triggers an LPI via INT command as well as write
to GITS_TRANSLATER register,defined enum to differentiate between ITS
command interrupt trigger and GITS_TRANSLATER based interrupt trigger.
Each of these commands make use of other functionalities implemented to
get device table entry,collection table entry or interrupt translation
table entry required for their processing.

Signed-off-by: Shashi Mallela 
---
 hw/intc/arm_gicv3_its.c| 354 -
 hw/intc/gicv3_internal.h   |  12 +
 include/hw/intc/arm_gicv3_common.h |   2 +
 3 files changed, 367 insertions(+), 1 deletion(-)

diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
index 05cdc15e77..bf92a8d0f1 100644
--- a/hw/intc/arm_gicv3_its.c
+++ b/hw/intc/arm_gicv3_its.c
@@ -29,6 +29,22 @@ struct GICv3ITSClass {
 void (*parent_reset)(DeviceState *dev);
 };
 
+/*
+ * This is an internal enum used to distinguish between LPI triggered
+ * via command queue and LPI triggered via gits_translater write.
+ */
+typedef enum ItsCmdType {
+NONE = 0, /* internal indication for GITS_TRANSLATER write */
+CLEAR = 1,
+DISCARD = 2,
+INT = 3,
+} ItsCmdType;
+
+typedef struct {
+uint32_t iteh;
+uint64_t itel;
+} IteEntry;
+
 static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
 {
 uint64_t result = 0;
@@ -50,6 +66,323 @@ static uint64_t baser_base_addr(uint64_t value, uint32_t 
page_sz)
 return result;
 }
 
+static bool get_cte(GICv3ITSState *s, uint16_t icid, uint64_t *cte,
+MemTxResult *res)
+{
+AddressSpace *as = &s->gicv3->dma_as;
+uint64_t l2t_addr;
+uint64_t value;
+bool valid_l2t;
+uint32_t l2t_id;
+uint32_t max_l2_entries;
+
+if (s->ct.indirect) {
+l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
+
+value = address_space_ldq_le(as,
+ s->ct.base_addr +
+ (l2t_id * L1TABLE_ENTRY_SIZE),
+ MEMTXATTRS_UNSPECIFIED, res);
+
+if (*res == MEMTX_OK) {
+valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+if (valid_l2t) {
+max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
+
+l2t_addr = value & ((1ULL << 51) - 1);
+
+*cte =  address_space_ldq_le(as, l2t_addr +
+((icid % max_l2_entries) * GITS_CTE_SIZE),
+MEMTXATTRS_UNSPECIFIED, res);
+   }
+   }
+} else {
+/* Flat level table */
+*cte =  address_space_ldq_le(as, s->ct.base_addr +
+ (icid * GITS_CTE_SIZE),
+  MEMTXATTRS_UNSPECIFIED, res);
+}
+
+return (*cte & TABLE_ENTRY_VALID_MASK) != 0;
+}
+
+static MemTxResult update_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
+  IteEntry ite)
+{
+AddressSpace *as = &s->gicv3->dma_as;
+uint64_t itt_addr;
+MemTxResult res = MEMTX_OK;
+
+itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT;
+itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
+
+address_space_stq_le(as, itt_addr + (eventid * sizeof(uint64_t)),
+ ite.itel, MEMTXATTRS_UNSPECIFIED, &res);
+
+if (res == MEMTX_OK) {
+address_space_stl_le(as, itt_addr + ((eventid + sizeof(uint64_t)) *
+ sizeof(uint32_t)), ite.iteh,
+ MEMTXATTRS_UNSPECIFIED, &res);
+}
+   return res;
+}
+
+static bool get_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
+uint16_t *icid, uint32_t *pIntid, MemTxResult *res)
+{
+AddressSpace *as = &s->gicv3->dma_as;
+uint64_t itt_addr;
+bool status = false;
+IteEntry ite;
+
+itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT;
+itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
+
+memset(&ite, 0 , sizeof(ite));
+ite.itel = address_space_ldq_le(as, itt_addr +
+(eventid * sizeof(uint64_t)),
+MEMTXATTRS_UNSPECIFIED, res);
+
+if (*res == MEMTX_OK) {
+ite.iteh = address_space_ldl_le(as, itt_addr + ((eventid +
+sizeof(uint64_t)) * sizeof(uint32_t)),
+MEMTXATTRS_UNSPECIFIED, res);
+
+if (*res == MEMTX_OK) {
+if (ite.itel & TABLE_ENTRY_VALID_MASK) {
+if ((ite.itel >> ITE_ENTRY_INTTYPE_SHIFT) &
+GITS_TYPE_PHYSICAL) {
+*pIntid = (ite.itel & ITE_ENTRY_INTID_MASK) >>
+   ITE_ENTRY_INTID_SHIFT;
+*icid = ite.iteh & ITE_ENTRY_ICID_MASK;
+status = true;
+}
+}
+

[PATCH v6 09/10] hw/arm/virt: add ITS support in virt GIC

2021-07-06 Thread Shashi Mallela

Included creation of ITS as part of virt platform GIC
initialization. This Emulated ITS model now co-exists with kvm
ITS and is enabled in absence of kvm irq kernel support in a
platform.

Signed-off-by: Shashi Mallela 
Reviewed-by: Peter Maydell 
---
 hw/arm/virt.c | 28 ++--
 include/hw/arm/virt.h |  2 ++
 target/arm/kvm_arm.h  |  4 ++--
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4b96f06014..1d8c44685c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -583,6 +583,12 @@ static void create_its(VirtMachineState *vms)
 const char *itsclass = its_class_name();
 DeviceState *dev;
 
+if (!strcmp(itsclass, "arm-gicv3-its")) {
+if (!vms->tcg_its) {
+itsclass = NULL;
+}
+}
+
 if (!itsclass) {
 /* Do nothing if not supported */
 return;
@@ -620,7 +626,7 @@ static void create_v2m(VirtMachineState *vms)
 vms->msi_controller = VIRT_MSI_CTRL_GICV2M;
 }
 
-static void create_gic(VirtMachineState *vms)
+static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
 {
 MachineState *ms = MACHINE(vms);
 /* We create a standalone GIC */
@@ -654,6 +660,14 @@ static void create_gic(VirtMachineState *vms)
  nb_redist_regions);
 qdev_prop_set_uint32(vms->gic, "redist-region-count[0]", 
redist0_count);
 
+if (!kvm_irqchip_in_kernel()) {
+if (vms->tcg_its) {
+object_property_set_link(OBJECT(vms->gic), "sysmem",
+ OBJECT(mem), &error_fatal);
+qdev_prop_set_bit(vms->gic, "has-lpi", true);
+}
+}
+
 if (nb_redist_regions == 2) {
 uint32_t redist1_capacity =
 vms->memmap[VIRT_HIGH_GIC_REDIST2].size / 
GICV3_REDIST_SIZE;
@@ -2039,7 +2053,7 @@ static void machvirt_init(MachineState *machine)
 
 virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
 
-create_gic(vms);
+create_gic(vms, sysmem);
 
 virt_cpu_post_init(vms, sysmem);
 
@@ -2720,6 +2734,12 @@ static void virt_instance_init(Object *obj)
 } else {
 /* Default allows ITS instantiation */
 vms->its = true;
+
+if (vmc->no_tcg_its) {
+vms->tcg_its = false;
+} else {
+vms->tcg_its = true;
+}
 }
 
 /* Default disallows iommu instantiation */
@@ -2766,8 +2786,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(6, 1)
 
 static void virt_machine_6_0_options(MachineClass *mc)
 {
+VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
 virt_machine_6_1_options(mc);
 compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
+/* qemu ITS was introduced with 6.1 */
+vmc->no_tcg_its = true;
 }
 DEFINE_VIRT_MACHINE(6, 0)
 
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 921416f918..f873ab9068 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -120,6 +120,7 @@ struct VirtMachineClass {
 MachineClass parent;
 bool disallow_affinity_adjustment;
 bool no_its;
+bool no_tcg_its;
 bool no_pmu;
 bool claim_edge_triggered_timers;
 bool smbios_old_sys_ver;
@@ -141,6 +142,7 @@ struct VirtMachineState {
 bool highmem;
 bool highmem_ecam;
 bool its;
+bool tcg_its;
 bool virt;
 bool ras;
 bool mte;
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 34f8daa377..0613454975 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -525,8 +525,8 @@ static inline const char *its_class_name(void)
 /* KVM implementation requires this capability */
 return kvm_direct_msi_enabled() ? "arm-its-kvm" : NULL;
 } else {
-/* Software emulation is not implemented yet */
-return NULL;
+/* Software emulation based model */
+return "arm-gicv3-its";
 }
 }
 
-- 
2.27.0

[PATCH v6 00/10] GICv3 LPI and ITS feature implementation

2021-07-06 Thread Shashi Mallela

This patchset implements qemu device model for enabling physical
LPI support and ITS functionality in GIC as per GICv3 specification.
Both flat table and 2 level tables are implemented.The ITS commands
for adding/deleting ITS table entries,trigerring LPI interrupts are
implemented.Translated LPI interrupt ids are processed by redistributor
to determine priority and set pending state appropriately before
forwarding the same to cpu interface.
The ITS feature support has been added to sbsa-ref platform as well as
virt platform,wherein the emulated functionality co-exists with kvm
kernel functionality.

Changes in v6:
 - made changes to masking scheme being used in all relevant sections
 - updated process_mapti to validate idbits based on GICD_TYPER.IDbits
   instead of GICR_PROPBASER.IDbits
 - updated its_realize() to check every CPU for physical LPI support in
   each associated redistibutor and return error if not
 - removed GICR_TYPER_PLPIS check in its_reset()
 - addressed pending v4 comments in gicv3_internal.h #defines
 - refactored gicv3_redist_lpi_pending() as per v5 comments
 - added iasl disassembly for new IORT.pxb file
 - addressed all v5 comments around code formatting and
   gicv3_internal.h #defines 
 - All kvm_unit_tests PASS

Shashi Mallela (10):
  hw/intc: GICv3 ITS initial framework
  hw/intc: GICv3 ITS register definitions added
  hw/intc: GICv3 ITS command queue framework
  hw/intc: GICv3 ITS Command processing
  hw/intc: GICv3 ITS Feature enablement
  hw/intc: GICv3 redistributor ITS processing
  hw/arm/sbsa-ref: add ITS support in SBSA GIC
  tests/data/acpi/virt: Add IORT files for ITS
  hw/arm/virt: add ITS support in virt GIC
  tests/data/acpi/virt: Update IORT files for ITS

 hw/arm/sbsa-ref.c  |   26 +-
 hw/arm/virt.c  |   28 +-
 hw/intc/arm_gicv3.c|   14 +
 hw/intc/arm_gicv3_common.c |   13 +
 hw/intc/arm_gicv3_cpuif.c  |7 +-
 hw/intc/arm_gicv3_dist.c   |5 +-
 hw/intc/arm_gicv3_its.c| 1301 
 hw/intc/arm_gicv3_its_common.c |7 +-
 hw/intc/arm_gicv3_its_kvm.c|2 +-
 hw/intc/arm_gicv3_redist.c |  154 ++-
 hw/intc/gicv3_internal.h   |  189 +++-
 hw/intc/meson.build|1 +
 include/hw/arm/virt.h  |2 +
 include/hw/intc/arm_gicv3_common.h |   13 +
 include/hw/intc/arm_gicv3_its_common.h |   32 +-
 target/arm/kvm_arm.h   |4 +-
 tests/data/acpi/virt/IORT  |  Bin 0 -> 124 bytes
 tests/data/acpi/virt/IORT.memhp|  Bin 0 -> 124 bytes
 tests/data/acpi/virt/IORT.numamem  |  Bin 0 -> 124 bytes
 tests/data/acpi/virt/IORT.pxb  |  Bin 0 -> 124 bytes
 20 files changed, 1771 insertions(+), 27 deletions(-)
 create mode 100644 hw/intc/arm_gicv3_its.c
 create mode 100644 tests/data/acpi/virt/IORT
 create mode 100644 tests/data/acpi/virt/IORT.memhp
 create mode 100644 tests/data/acpi/virt/IORT.numamem
 create mode 100644 tests/data/acpi/virt/IORT.pxb

-- 
2.27.0

[PATCH v6 02/10] hw/intc: GICv3 ITS register definitions added

2021-07-06 Thread Shashi Mallela

Defined descriptors for ITS device table,collection table and ITS
command queue entities.Implemented register read/write functions,
extract ITS table parameters and command queue parameters,extended
gicv3 common to capture qemu address space(which host the ITS table
platform memories required for subsequent ITS processing) and
initialize the same in ITS device.

Signed-off-by: Shashi Mallela 
Reviewed-by: Peter Maydell 
Reviewed-by: Eric Auger 
---
 hw/intc/arm_gicv3_its.c| 376 +
 hw/intc/gicv3_internal.h   |  29 ++
 include/hw/intc/arm_gicv3_common.h |   3 +
 include/hw/intc/arm_gicv3_its_common.h |  23 ++
 4 files changed, 431 insertions(+)

diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
index 2286b3f757..683d40182f 100644
--- a/hw/intc/arm_gicv3_its.c
+++ b/hw/intc/arm_gicv3_its.c
@@ -29,6 +29,160 @@ struct GICv3ITSClass {
 void (*parent_reset)(DeviceState *dev);
 };
 
+static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
+{
+uint64_t result = 0;
+
+switch (page_sz) {
+case GITS_PAGE_SIZE_4K:
+case GITS_PAGE_SIZE_16K:
+result = FIELD_EX64(value, GITS_BASER, PHYADDR);
+break;
+
+case GITS_PAGE_SIZE_64K:
+result = FIELD_EX64(value, GITS_BASER, PHYADDRL_64K) << 16;
+result |= FIELD_EX64(value, GITS_BASER, PHYADDRH_64K) << 48;
+break;
+
+default:
+break;
+}
+return result;
+}
+
+/*
+ * This function extracts the ITS Device and Collection table specific
+ * parameters (like base_addr, size etc) from GITS_BASER register.
+ * It is called during ITS enable and also during post_load migration
+ */
+static void extract_table_params(GICv3ITSState *s)
+{
+uint16_t num_pages = 0;
+uint8_t  page_sz_type;
+uint8_t type;
+uint32_t page_sz = 0;
+uint64_t value;
+
+for (int i = 0; i < 8; i++) {
+value = s->baser[i];
+
+if (!value) {
+continue;
+}
+
+page_sz_type = FIELD_EX64(value, GITS_BASER, PAGESIZE);
+
+switch (page_sz_type) {
+case 0:
+page_sz = GITS_PAGE_SIZE_4K;
+break;
+
+case 1:
+page_sz = GITS_PAGE_SIZE_16K;
+break;
+
+case 2:
+case 3:
+page_sz = GITS_PAGE_SIZE_64K;
+break;
+
+default:
+g_assert_not_reached();
+}
+
+num_pages = FIELD_EX64(value, GITS_BASER, SIZE) + 1;
+
+type = FIELD_EX64(value, GITS_BASER, TYPE);
+
+switch (type) {
+
+case GITS_BASER_TYPE_DEVICE:
+memset(&s->dt, 0 , sizeof(s->dt));
+s->dt.valid = FIELD_EX64(value, GITS_BASER, VALID);
+
+if (!s->dt.valid) {
+return;
+}
+
+s->dt.page_sz = page_sz;
+s->dt.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT);
+s->dt.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE);
+
+if (!s->dt.indirect) {
+s->dt.max_entries = (num_pages * page_sz) / s->dt.entry_sz;
+} else {
+s->dt.max_entries = (((num_pages * page_sz) /
+ L1TABLE_ENTRY_SIZE) *
+ (page_sz / s->dt.entry_sz));
+}
+
+s->dt.maxids.max_devids = (1UL << (FIELD_EX64(s->typer, GITS_TYPER,
+   DEVBITS) + 1));
+
+s->dt.base_addr = baser_base_addr(value, page_sz);
+
+break;
+
+case GITS_BASER_TYPE_COLLECTION:
+memset(&s->ct, 0 , sizeof(s->ct));
+s->ct.valid = FIELD_EX64(value, GITS_BASER, VALID);
+
+/*
+ * GITS_TYPER.HCC is 0 for this implementation
+ * hence writes are discarded if ct.valid is 0
+ */
+if (!s->ct.valid) {
+return;
+}
+
+s->ct.page_sz = page_sz;
+s->ct.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT);
+s->ct.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE);
+
+if (!s->ct.indirect) {
+s->ct.max_entries = (num_pages * page_sz) / s->ct.entry_sz;
+} else {
+s->ct.max_entries = (((num_pages * page_sz) /
+ L1TABLE_ENTRY_SIZE) *
+ (page_sz / s->ct.entry_sz));
+}
+
+if (FIELD_EX64(s->typer, GITS_TYPER, CIL)) {
+s->ct.maxids.max_collids = (1UL << (FIELD_EX64(s->typer,
+GITS_TYPER, CIDBITS) + 1));
+} else {
+/* 16-bit CollectionId supported when CIL == 0 */
+s->ct.maxids.max_collids = (1UL << 16);
+}
+
+s->ct.base_addr = baser_base_addr(value, page_sz);
+
+break;
+
+default:
+break;
+}
+}
+}
+
+static void extract_c

Re: [PATCH 12/12] linux-user: Extract target errno related functions to 'target_errno.h'

2021-07-06 Thread Richard Henderson


On 7/4/21 11:37 AM, Philippe Mathieu-Daudé wrote:

Extract target errno related functions to a new 'target_errno.h'
header, so we can do the host <-> target errno conversion out of
the big syscall.c (which is already 13k LoC).

Signed-off-by: Philippe Mathieu-Daudé 
---
  linux-user/target_errno.h |  32 +++
  linux-user/syscall.c  | 162 +
  linux-user/target_errno.c | 183 ++
  linux-user/meson.build|   1 +
  4 files changed, 217 insertions(+), 161 deletions(-)
  create mode 100644 linux-user/target_errno.h
  create mode 100644 linux-user/target_errno.c


I guess this is just data movement, so it's ok.

But...


+/*
+ * target_to_host_errno_table[] is initialized from
+ * host_to_target_errno_table[] in target_to_host_errno_table_init().
+ */
+static uint16_t target_to_host_errno_table[ERRNO_TABLE_SIZE] = {
+};
+
+/*
+ * This list is the union of errno values overridden in asm-/errno.h
+ * minus the errnos that are not actually generic to all archs.
+ */
+static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = {
+[EAGAIN]= TARGET_EAGAIN,
+[EIDRM] = TARGET_EIDRM,
+[ECHRNG]= TARGET_ECHRNG,


... there's enough pattern here to make it easy to initialize both of these at 
compile-time.  We just need to move the list out to a .c.inc file.


--%<

E(EAGAIN)
E(EIDRM)
...
#ifdef EFOO
E(EFOO)
#endif

--%<

static const uint16_t target_to_host_errno_table[] = {
#define E(X)  [TARGET_##X] = X,
#include "errnos.c.inc"
#undef E
};

static const uint16_t host_to_target_errno_table[] = {
#define E(X)  [X] = TARGET_##X,
#include "errnos.c.inc"
#undef E
};


+int host_to_target_errno(int err)
+{
+if (err >= 0 && err < ERRNO_TABLE_SIZE &&
+host_to_target_errno_table[err]) {
+return host_to_target_errno_table[err];
+}
+return err;
+}


Here and


+int target_to_host_errno(int err)
+{
+if (err >= 0 && err < ERRNO_TABLE_SIZE &&
+target_to_host_errno_table[err]) {
+return target_to_host_errno_table[err];
+}
+return err;
+}


here, we might as well use ARRAY_SIZE(foo) instead of ERRNO_TABLE_SIZE.

Or even convert directly to switches, with no array, and let the compiler decide what it 
thinks is best.  Which might turn out to compile away to the identity function when host 
and guest are both asm-generic.



r~

Re: [PATCH v5 01/10] hw/intc: GICv3 ITS initial framework

2021-07-06 Thread shashi . mallela

On Mon, 2021-07-05 at 19:58 +0100, Peter Maydell wrote:
> On Mon, 5 Jul 2021 at 18:04,  wrote:
> > On Mon, 2021-07-05 at 17:25 +0100, Peter Maydell wrote:
> > > On Mon, 5 Jul 2021 at 16:55,  wrote:
> > > > On Mon, 2021-07-05 at 15:58 +0100, Peter Maydell wrote:
> > > > > On Wed, 30 Jun 2021 at 16:32, Shashi Mallela <
> > > > > shashi.mall...@linaro.org> wrote:
> > > > > > Added register definitions relevant to ITS,implemented
> > > > > > overall
> > > > > > ITS device framework with stubs for ITS control and
> > > > > > translater
> > > > > > regions read/write,extended ITS common to handle mmio init
> > > > > > between
> > > > > > existing kvm device and newer qemu device.
> > > > > > 
> > > > > > Signed-off-by: Shashi Mallela 
> > > > > > Reviewed-by: Peter Maydell 
> > > > > > +static void gicv3_arm_its_realize(DeviceState *dev, Error
> > > > > > **errp)
> > > > > > +{
> > > > > > +GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
> > > > > > +
> > > > > > +gicv3_its_init_mmio(s, &gicv3_its_control_ops,
> > > > > > &gicv3_its_translation_ops);
> > > > > > +
> > > > > > +if (s->gicv3->cpu->gicr_typer & GICR_TYPER_PLPIS) {
> > > > > 
> > > > > Can you remind me why we make this check, please? When would
> > > > > we
> > > > > have created an ITS device but not have a GICv3 with LPI
> > > > > support?
> > > > This check applies to GIC's physical LPI support only as
> > > > against
> > > > GIC's
> > > > virtual LPI support.
> > > 
> > > Right, but when would we have a GIC with no physical LPI support
> > > but an ITS is present ?
> > If we only support Direct injection of virtual interrupts (which
> > can
> > have their own vPEID and the vPE table),then the ITS present could
> > havejust virtual LPI support
> 
> This patchset does not support a virtual-LPI-only ITS, though:
> it doesn't support virtual LPIs at all.
> If you use it with CPUs without physical LPI support , this code will
> skip
> entirely setting GITS_TYPER and will make reset do nothing, and then
> the
> rest of the ITS implementation will misbehave.
> 
> I think what we should do is:
>  * in realize, check every CPU to make sure its redistributor
>supports physical LPIs, and return an error from realize if not
>  * in reset, don't check anything
Done
> 
> If we add virtual-LPI-only ITS support later, we can always update
> this code appropriately.
> 
> thanks
> -- PMM

Re: [PATCH v5 04/10] hw/intc: GICv3 ITS Command processing

2021-07-06 Thread shashi . mallela

On Tue, 2021-07-06 at 14:27 +0100, Peter Maydell wrote:
> On Tue, 6 Jul 2021 at 13:46,  wrote:
> > On Tue, 2021-07-06 at 10:19 +0100, Peter Maydell wrote:
> > > On Tue, 6 Jul 2021 at 04:25,  wrote:
> > > 
> > > But the pseudocode for MAPTI does not say anywhere that we should
> > > be checking the pIntID against any CPU's GICR_PROPBASER field.
> > > It is checked only by the checks in LPIOutOfRange(), which tests:
> > >  * is it larger than permitted by GICD_TYPER.IDbits
> > >  * is it not in the LPI range and not 1023
> > > 
> > > Checking whether the intID is too big and would cause us to index
> > > off the end of the redistributor's configuration table should be
> > > done
> > > later, only when the ITS actually sends the interrupt to a
> > > particular
> > > redistributor, I think.
> > > 
> > > (You can't rely on the guest having done the MAPC before the
> > > MAPTI;
> > > and in any case the guest could choose to do a MAPC to a
> > > different
> > > redistributor after it's done the MAPTI.)
> > We already have the "intID too big check" in place within the
> > redistributor processing when ITS sends the interrupt trigger.
> > "the LPI range and not 1023" is also handled in this function,but
> > for
> > validating "is it larger than permitted by GICD_TYPER.IDbits",the
> > source of GICD_TYPER.IDbits is GICR_PROPBASER because we pick up
> > min of
> > GICR_PROPBASER.IDbits and GICD_TYPER.IDBits.
> > 
> > If we are to not use gicr_propbaser,then are we good to just accept
> > the
> > intID value here since we are validating the same during interrupt
> > processing?
> 
> You should check the things the pseudocode says you should check.
> When processing MAPTI, that's GICD_TYPER.IDbits.
> GICR_PROPBASER.IDbits
> is not the same thing because the guest can set it to a smaller
> value.
Have made changes in code to check "intID too big" case using
GICD_TYPER.IDbits instead of GICR_PROPBASER.IDbits
> thanks
> -- PMM

Re: [PATCH v5 01/10] hw/intc: GICv3 ITS initial framework

2021-07-06 Thread shashi . mallela

On Tue, 2021-07-06 at 09:44 +0200, Eric Auger wrote:
> Hi,
> 
> On 6/30/21 5:31 PM, Shashi Mallela wrote:
> > Added register definitions relevant to ITS,implemented overall
> > ITS device framework with stubs for ITS control and translater
> > regions read/write,extended ITS common to handle mmio init between
> > existing kvm device and newer qemu device.
> > 
> > Signed-off-by: Shashi Mallela 
> > Reviewed-by: Peter Maydell 
> 
> Some of my comments in v4 were not commented nor addressed in v5.
> 
> Also here and in the other respinned patches, please add an
> individual
> history log to track the major changes you made from n-1 to n to help
> the review.
Have addressed all the pending v4 comments and summarized all major
changes in v6 series in the cover-letter section

> Thanks
> 
> Eric
> > ---
> >  hw/intc/arm_gicv3_its.c| 240
> > +
> >  hw/intc/arm_gicv3_its_common.c |   7 +-
> >  hw/intc/arm_gicv3_its_kvm.c|   2 +-
> >  hw/intc/gicv3_internal.h   |  88 +++--
> >  hw/intc/meson.build|   1 +
> >  include/hw/intc/arm_gicv3_its_common.h |   9 +-
> >  6 files changed, 331 insertions(+), 16 deletions(-)
> >  create mode 100644 hw/intc/arm_gicv3_its.c
> > 
> > diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
> > new file mode 100644
> > index 00..545cda3665
> > --- /dev/null
> > +++ b/hw/intc/arm_gicv3_its.c
> > @@ -0,0 +1,240 @@
> > +/*
> > + * ITS emulation for a GICv3-based system
> > + *
> > + * Copyright Linaro.org 2021
> > + *
> > + * Authors:
> > + *  Shashi Mallela 
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2
> > or (at your
> > + * option) any later version.  See the COPYING file in the top-
> > level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/log.h"
> > +#include "hw/qdev-properties.h"
> > +#include "hw/intc/arm_gicv3_its_common.h"
> > +#include "gicv3_internal.h"
> > +#include "qom/object.h"
> > +
> > +typedef struct GICv3ITSClass GICv3ITSClass;
> > +/* This is reusing the GICv3ITSState typedef from
> > ARM_GICV3_ITS_COMMON */
> > +DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSClass,
> > + ARM_GICV3_ITS, TYPE_ARM_GICV3_ITS)
> > +
> > +struct GICv3ITSClass {
> > +GICv3ITSCommonClass parent_class;
> > +void (*parent_reset)(DeviceState *dev);
> > +};
> > +
> > +static MemTxResult gicv3_its_translation_write(void *opaque,
> > hwaddr offset,
> > +   uint64_t data,
> > unsigned size,
> > +   MemTxAttrs attrs)
> > +{
> > +MemTxResult result = MEMTX_OK;
> > +
> > +return result;
> > +}
> > +
> > +static MemTxResult its_writel(GICv3ITSState *s, hwaddr offset,
> > +  uint64_t value, MemTxAttrs attrs)
> > +{
> > +MemTxResult result = MEMTX_OK;
> > +
> > +return result;
> > +}
> > +
> > +static MemTxResult its_readl(GICv3ITSState *s, hwaddr offset,
> > + uint64_t *data, MemTxAttrs attrs)
> > +{
> > +MemTxResult result = MEMTX_OK;
> > +
> > +return result;
> > +}
> > +
> > +static MemTxResult its_writell(GICv3ITSState *s, hwaddr offset,
> > +   uint64_t value, MemTxAttrs attrs)
> > +{
> > +MemTxResult result = MEMTX_OK;
> > +
> > +return result;
> > +}
> > +
> > +static MemTxResult its_readll(GICv3ITSState *s, hwaddr offset,
> > +  uint64_t *data, MemTxAttrs attrs)
> > +{
> > +MemTxResult result = MEMTX_OK;
> > +
> > +return result;
> > +}
> > +
> > +static MemTxResult gicv3_its_read(void *opaque, hwaddr offset,
> > uint64_t *data,
> > +  unsigned size, MemTxAttrs attrs)
> > +{
> > +GICv3ITSState *s = (GICv3ITSState *)opaque;
> > +MemTxResult result;
> > +
> > +switch (size) {
> > +case 4:
> > +result = its_readl(s, offset, data, attrs);
> > +break;
> > +case 8:
> > +result = its_readll(s, offset, data, attrs);
> > +break;
> > +default:
> > +result = MEMTX_ERROR;
> > +break;
> > +}
> > +
> > +if (result == MEMTX_ERROR) {
> > +qemu_log_mask(LOG_GUEST_ERROR,
> > +  "%s: invalid guest read at offset "
> > TARGET_FMT_plx
> > +  "size %u\n", __func__, offset, size);
> > +/*
> > + * The spec requires that reserved registers are RAZ/WI;
> > + * so use MEMTX_ERROR returns from leaf functions as a way
> > to
> > + * trigger the guest-error logging but don't return it to
> > + * the caller, or we'll cause a spurious guest data abort.
> > + */
> > +result = MEMTX_OK;
> > +*data = 0;
> > +}
> > +return result;
> > +}
> > +
> > +static MemTxResult gicv3_its_write(void *opaque, hwaddr offset,
> > uint64_t data,
> > +

Re: [PATCH v5 04/10] hw/intc: GICv3 ITS Command processing

2021-07-06 Thread shashi . mallela

On Tue, 2021-07-06 at 11:27 +0200, Eric Auger wrote:
> Hi,
> 
> On 7/5/21 4:07 PM, Peter Maydell wrote:
> > On Wed, 30 Jun 2021 at 16:32, Shashi Mallela <
> > shashi.mall...@linaro.org> wrote:
> > > Added ITS command queue handling for MAPTI,MAPI commands,handled
> > > ITS
> > > translation which triggers an LPI via INT command as well as
> > > write
> > > to GITS_TRANSLATER register,defined enum to differentiate between
> > > ITS
> > > command interrupt trigger and GITS_TRANSLATER based interrupt
> > > trigger.
> > > Each of these commands make use of other functionalities
> > > implemented to
> > > get device table entry,collection table entry or interrupt
> > > translation
> > > table entry required for their processing.
> > > 
> > > Signed-off-by: Shashi Mallela 
> > > ---
> > >  hw/intc/arm_gicv3_its.c| 361
> > > -
> > >  hw/intc/gicv3_internal.h   |  26 +++
> > >  include/hw/intc/arm_gicv3_common.h |   2 +
> > >  3 files changed, 388 insertions(+), 1 deletion(-)
> > > +/*
> > > + * This function handles the processing of following commands
> > > based on
> > > + * the ItsCmdType parameter passed:-
> > > + * 1. trigerring of lpi interrupt translation via ITS INT
> > > command
> > > + * 2. trigerring of lpi interrupt translation via
> > > gits_translater register
> > > + * 3. handling of ITS CLEAR command
> > > + * 4. handling of ITS DISCARD command
> > > + */
> > 
> > "triggering"
> > 
> > >  #define DEVID_SHIFT  32
> > >  #define DEVID_MASKMAKE_64BIT_MASK(32, 32)
> > > @@ -347,6 +368,11 @@ FIELD(MAPC, RDBASE, 16, 32)
> > >   * vPEID = 16 bits
> > >   */
> > >  #define ITS_ITT_ENTRY_SIZE0xC
> > > +#define ITE_ENTRY_INTTYPE_SHIFT1
> > > +#define ITE_ENTRY_INTID_SHIFT  2
> > > +#define ITE_ENTRY_INTID_MASK ((1ULL << 24) - 1)
> > > +#define ITE_ENTRY_INTSP_SHIFT  26
> > > +#define ITE_ENTRY_ICID_MASK  ((1ULL << 16) - 1)
> > 
> > This is still using a MASK value that's at the bottom of the
> > integer, not in its shifted location.
> There are other locations, pointed out by former comments, where this
> kind of unusual masking scheme is used but well...
Have taken care of masking scheme as desired in all relevant sections
in v6 patch
> 
> Thanks
> 
> Eric
> 
> > Otherwise
> > Reviewed-by: Peter Maydell 
> > 
> > thanks
> > -- PMM
> >

Re: [PATCH 05/12] linux-user: Extract target errno to 'target_errno_defs.h'

2021-07-06 Thread Richard Henderson


On 7/4/21 11:37 AM, Philippe Mathieu-Daudé wrote:

We want to access the target errno indepently of the rest
of the linux-user code. Extract it to a new target-specific
header: 'target_errno_defs.h'.

Signed-off-by: Philippe Mathieu-Daudé 
---
  linux-user/aarch64/target_errno_defs.h| 6 ++
  linux-user/alpha/target_errno_defs.h  | 4 
  linux-user/arm/target_errno_defs.h| 6 ++
  linux-user/cris/target_errno_defs.h   | 6 ++
  linux-user/errno_defs.h   | 3 +++
  linux-user/hexagon/target_errno_defs.h| 6 ++
  linux-user/hppa/target_errno_defs.h   | 4 
  linux-user/i386/target_errno_defs.h   | 6 ++
  linux-user/m68k/target_errno_defs.h   | 6 ++
  linux-user/microblaze/target_errno_defs.h | 6 ++
  linux-user/mips/target_errno_defs.h   | 4 
  linux-user/mips64/target_errno_defs.h | 4 
  linux-user/nios2/target_errno_defs.h  | 6 ++
  linux-user/openrisc/target_errno_defs.h   | 6 ++
  linux-user/ppc/target_errno_defs.h| 6 ++
  linux-user/riscv/target_errno_defs.h  | 6 ++
  linux-user/s390x/target_errno_defs.h  | 6 ++
  linux-user/sh4/target_errno_defs.h| 6 ++
  linux-user/sparc/target_syscall.h | 2 --
  linux-user/x86_64/target_errno_defs.h | 6 ++
  linux-user/xtensa/target_errno_defs.h | 6 ++
  21 files changed, 109 insertions(+), 2 deletions(-)
  create mode 100644 linux-user/aarch64/target_errno_defs.h
  create mode 100644 linux-user/alpha/target_errno_defs.h
  create mode 100644 linux-user/arm/target_errno_defs.h
  create mode 100644 linux-user/cris/target_errno_defs.h
  create mode 100644 linux-user/hexagon/target_errno_defs.h
  create mode 100644 linux-user/hppa/target_errno_defs.h
  create mode 100644 linux-user/i386/target_errno_defs.h
  create mode 100644 linux-user/m68k/target_errno_defs.h
  create mode 100644 linux-user/microblaze/target_errno_defs.h
  create mode 100644 linux-user/mips/target_errno_defs.h
  create mode 100644 linux-user/mips64/target_errno_defs.h
  create mode 100644 linux-user/nios2/target_errno_defs.h
  create mode 100644 linux-user/openrisc/target_errno_defs.h
  create mode 100644 linux-user/ppc/target_errno_defs.h
  create mode 100644 linux-user/riscv/target_errno_defs.h
  create mode 100644 linux-user/s390x/target_errno_defs.h
  create mode 100644 linux-user/sh4/target_errno_defs.h
  create mode 100644 linux-user/x86_64/target_errno_defs.h
  create mode 100644 linux-user/xtensa/target_errno_defs.h

diff --git a/linux-user/aarch64/target_errno_defs.h 
b/linux-user/aarch64/target_errno_defs.h
new file mode 100644
index 000..a809381165a
--- /dev/null
+++ b/linux-user/aarch64/target_errno_defs.h
@@ -0,0 +1,6 @@
+#ifndef AARCH64_TARGET_ERRNO_H
+#define AARCH64_TARGET_ERRNO_H
+
+/* Target uses generic errno */
+
+#endif


This could be better.

Consider e.g. termbits.h as the model.
These targets should have exactly one line:

#include "../generic/target_errno.h"


diff --git a/linux-user/alpha/target_errno_defs.h 
b/linux-user/alpha/target_errno_defs.h
new file mode 100644
index 000..13770b14b82
--- /dev/null
+++ b/linux-user/alpha/target_errno_defs.h
@@ -0,0 +1,4 @@
+#ifndef ALPHA_TARGET_ERRNO_H
+#define ALPHA_TARGET_ERRNO_H
+
+#endif


This one, and ones like it, become

#ifndef ALPHA_TARGET_ERRNO_H
#define ALPHA_TARGET_ERRNO_H

#include "../generic/target_errno.h"

#undef  TARGET_EBAR
#define TARGET_EBAR  xxx
...

#endif


r~

Re: [PATCH 06/12] linux-user/alpha: Remove hardcoded tabs (code style)

2021-07-06 Thread Richard Henderson


On 7/4/21 11:37 AM, Philippe Mathieu-Daudé wrote:

We are going to move this code, fix its style first.

Signed-off-by: Philippe Mathieu-Daudé
---
Patch trivial to review using 'git-diff --ignore-all-space'
---
  linux-user/alpha/target_syscall.h | 196 +++---
  1 file changed, 97 insertions(+), 99 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 03/12] linux-user/mips: Handle TARGET_EWOULDBLOCK as TARGET_EAGAIN

2021-07-06 Thread Richard Henderson


On 7/4/21 11:37 AM, Philippe Mathieu-Daudé wrote:

Linux kernel defines EWOULDBLOCK as EAGAIN (since before v2.6.12-rc2).

Signed-off-by: Philippe Mathieu-Daudé
---
  linux-user/mips/target_syscall.h   | 2 ++
  linux-user/mips64/target_syscall.h | 2 ++
  2 files changed, 4 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 02/12] linux-user/hppa: Handle TARGET_EWOULDBLOCK as TARGET_EAGAIN

2021-07-06 Thread Richard Henderson


On 7/4/21 11:37 AM, Philippe Mathieu-Daudé wrote:

Linux kernel defines EWOULDBLOCK as EAGAIN (since before v2.6.12-rc2).

Signed-off-by: Philippe Mathieu-Daudé
---
  linux-user/hppa/target_syscall.h | 2 ++
  1 file changed, 2 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 01/12] linux-user/alpha: Handle TARGET_EWOULDBLOCK as TARGET_EAGAIN

2021-07-06 Thread Richard Henderson


On 7/4/21 11:37 AM, Philippe Mathieu-Daudé wrote:

Linux kernel defines EWOULDBLOCK as EAGAIN (since before v2.6.12-rc2).

Signed-off-by: Philippe Mathieu-Daudé
---
  linux-user/alpha/target_syscall.h | 2 ++
  1 file changed, 2 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 09/11] hw/gpio/pl061: Convert to 3-phase reset and assert GPIO lines correctly on reset

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

The PL061 comes out of reset with all its lines configured as input,
which means they might need to be pulled to 0 or 1 depending on the
'pullups' and 'pulldowns' properties.  Currently we do not assert
these lines on reset; they will only be set whenever the guest first
touches a register that triggers a call to pl061_update().

Convert the device to three-phase reset so we have a place where we
can safely call qemu_set_irq() to set the floating lines to their
correct values.

Signed-off-by: Peter Maydell
---
  hw/gpio/pl061.c  | 29 +
  hw/gpio/trace-events |  1 +
  2 files changed, 26 insertions(+), 4 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 08/11] hw/arm/virt: Make PL061 GPIO lines pulled low, not high

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

For the virt board we have two PL061 devices -- one for NonSecure which
is inputs only, and one for Secure which is outputs only. For the former,
we don't care whether its outputs are pulled low or high when the line is
configured as an input, because we don't connect them. For the latter,
we do care, because we wire the lines up to the gpio-pwr device, which
assumes that level 1 means "do the action" and 1 means "do nothing".
For consistency in case we add more outputs in future, configure both
PL061s to pull GPIO lines down to 0.

Reported-by: Maxim Uvarov
Signed-off-by: Peter Maydell
---
  hw/arm/virt.c | 3 +++
  1 file changed, 3 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 07/11] hw/gpio/pl061: Make pullup/pulldown of outputs configurable

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

The PL061 GPIO does not itself include pullup or pulldown resistors
to set the value of a GPIO line treated as an output when it is
configured as an input (ie when the PL061 itself is not driving it).
In real hardware it is up to the board to add suitable pullups or
pulldowns.  Currently our implementation hardwires this to "outputs
pulled high", which is correct for some boards (eg the realview ones:
see figure 3-29 in the "RealView Platform Baseboard for ARM926EJ-S
User Guide" DUI0224I), but wrong for others.

In particular, the wiring in the 'virt' board and the gpio-pwr device
assumes that wires should be pulled low, because otherwise the
pull-to-high will trigger a shutdown or reset action.  (The only
reason this doesn't happen immediately on startup is due to another
bug in the PL061, where we don't assert the GPIOs to the correct
value on reset, but will do so as soon as the guest touches a
register and pl061_update() gets called.)

Add properties to the pl061 so the board can configure whether it
wants GPIO lines to have pullup, pulldown, or neither.

Signed-off-by: Peter Maydell
---
  hw/gpio/pl061.c | 51 +
  1 file changed, 47 insertions(+), 4 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [RFC PATCH 6/6] dp8393x: Rewrite dp8393x_get() / dp8393x_put()

2021-07-06 Thread Finn Thain

On Mon, 5 Jul 2021, Mark Cave-Ayland wrote:

> On 05/07/2021 02:36, Finn Thain wrote:
> 
> > > Unfortunately I don't have a test mips64el image available to see if 
> > > this combination works for Linux. Phil, do you have a suitable test 
> > > kernel and rootfs image available to allow this to be tested?
> > > 
> > 
> > You can build and boot a mipsel vmlinux by following the steps I 
> > described previously. In the kernel messages you'll see the jazzsonic 
> > driver attempt to probe the device. When it succeeds, you'll see the 
> > MAC address reported. You can also observe the regression I reported 
> > with regards to patch 2/6, "dp8393x: don't force 32-bit register 
> > access".
> 
> Those instructions are useful, but since I am not a MIPS developer I 
> don't have an existing toolchain/kernel tree and rootfs available to 
> test this.
> 

You don't need a rootfs to see the jazzsonic driver messages. But if you 
still want one, you could try the mipsel builds from these distros (not 
the 64-bit ones):

https://ftp.jaist.ac.jp/pub/Linux/Gentoo/experimental/mips/stages/
https://landley.net/aboriginal/downloads/binaries/

> If you can provide me with a link to your vmlinux and rootfs with 
> busybox or similar in it, I can take a look to see what is happening 
> here. Otherwise it's almost impossible for me to understand and debug 
> the problem you are seeing on your setup.
> 

Uploading kernels is a hassle (for me) as it brings a trust question and 
requires a file hosting service. I really should use PGP and organise a 
web of trust but that's very difficult given my rural location.

Re: [PATCH 06/11] hw/gpio/pl061: Honour Luminary PL061 PUR and PDR registers

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

The Luminary variant of the PL061 has registers GPIOPUR and GPIOPDR
which lets the guest configure whether the GPIO lines are pull-up,
pull-down, or truly floating. Instead of assuming all lines are pulled
high, honour the PUR and PDR registers.

For the plain PL061, continue to assume that lines have an external
pull-up resistor, as we did before.

The stellaris board actually relies on this behaviour -- the CD line
of the ssd0323 display device is connected to GPIO output C7, and it
is only because of a different bug which we're about to fix that we
weren't incorrectly driving this line high on reset and putting the
ssd0323 into data mode.

Signed-off-by: Peter Maydell
---
  hw/gpio/pl061.c  | 58 +---
  hw/gpio/trace-events |  2 +-
  2 files changed, 55 insertions(+), 5 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 05/11] hw/gpio/pl061: Document the interface of this device

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

Add a comment documenting the "QEMU interface" of this device:
which MMIO regions, IRQ lines, GPIO lines, etc it exposes.

Signed-off-by: Peter Maydell
---
  hw/gpio/pl061.c | 7 +++
  1 file changed, 7 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 04/11] hw/gpio/pl061: Add tracepoints for register read and write

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

Add tracepoints for reads and writes to the PL061 registers. This requires
restructuring pl061_read() to only return after the tracepoint, rather
than having lots of early-returns.

Signed-off-by: Peter Maydell
---
  hw/gpio/pl061.c  | 70 ++--
  hw/gpio/trace-events |  2 ++
  2 files changed, 50 insertions(+), 22 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 03/11] hw/gpio/pl061: Clean up read/write offset handling logic

2021-07-06 Thread Richard Henderson


On 7/2/21 4:45 AM, Peter Maydell wrote:

On Fri, 2 Jul 2021 at 12:02, Philippe Mathieu-Daudé  wrote:


Hi Peter,

On 7/2/21 12:40 PM, Peter Maydell wrote:

Currently the pl061_read() and pl061_write() functions handle offsets
using a combination of three if() statements and a switch().  Clean
this up to use just a switch, using case ranges.

This requires that instead of catching accesses to the luminary-only
registers on a stock PL061 via a check on s->rsvd_start we use
an "is this luminary?" check in the cases for each luminary-only
register.

Signed-off-by: Peter Maydell 
---
  hw/gpio/pl061.c | 106 
  1 file changed, 81 insertions(+), 25 deletions(-)

diff --git a/hw/gpio/pl061.c b/hw/gpio/pl061.c
index a6ace88895d..0f5d12e6d5a 100644
--- a/hw/gpio/pl061.c
+++ b/hw/gpio/pl061.c
@@ -55,7 +55,6 @@ struct PL061State {
  qemu_irq irq;
  qemu_irq out[N_GPIOS];
  const unsigned char *id;
-uint32_t rsvd_start; /* reserved area: [rsvd_start, 0xfcc] */
  };

  static const VMStateDescription vmstate_pl061 = {
@@ -151,16 +150,9 @@ static uint64_t pl061_read(void *opaque, hwaddr offset,
  {
  PL061State *s = (PL061State *)opaque;

-if (offset < 0x400) {
-return s->data & (offset >> 2);
-}
-if (offset >= s->rsvd_start && offset <= 0xfcc) {
-goto err_out;
-}
-if (offset >= 0xfd0 && offset < 0x1000) {
-return s->id[(offset - 0xfd0) >> 2];
-}
  switch (offset) {
+case 0x0 ... 0x3fc: /* Data */
+return s->data & (offset >> 2);


Don't we need to set pl061_ops.impl.min/max_access_size = 4
to keep the same logic?


I think the hardware intends to permit accesses of any width, but only
at 4-byte boundaries. There is a slight behaviour change here:
accesses to 0x3fd, 0x3fe, 0x3ff now fall into the default case (ie error)
rather than being treated like 0x3fc, and similarly accesses to 0xfdd,
0xfde, 0xfdf are errors rather than treated like 0xfdc. But I think
that it's probably more correct to consider those to be errors.

(We could explicitly check and goto err_out if (offset & 3)
right at the top, I suppose.)


Perhaps just better to retain current behaviour with this patch by extending the case to 
the ends.  If you want to check oddness of offset, use a separate patch.



r~

Re: [PATCH 03/11] hw/gpio/pl061: Clean up read/write offset handling logic

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

+case 0x52c ... 0xfcc: /* Reserved */
+goto bad_offset;


Any reason to not just use default for these?

Otherwise,
Reviewed-by: Richard Henderson 


r~

Re: [PATCH 02/11] hw/gpio/pl061: Convert DPRINTF to tracepoints

2021-07-06 Thread Richard Henderson


On 7/2/21 3:40 AM, Peter Maydell wrote:

Convert the use of the DPRINTF debug macro in the PL061 model to
use tracepoints.

Signed-off-by: Peter Maydell
---
  hw/gpio/pl061.c  | 27 +--
  hw/gpio/trace-events |  6 ++
  2 files changed, 15 insertions(+), 18 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [RFC PATCH 8/8] target/i386: Move X86XSaveArea into TCG

2021-07-06 Thread Richard Henderson


On 7/5/21 3:46 AM, David Edmondson wrote:

Given that TCG is now the only consumer of X86XSaveArea, move the
structure definition and associated offset declarations and checks to a
TCG specific header.

Signed-off-by: David Edmondson 
---
  target/i386/cpu.h| 57 
  target/i386/tcg/fpu_helper.c |  1 +
  target/i386/tcg/tcg-cpu.h| 57 
  3 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 96b672f8bd..0f7ddbfeae 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1305,48 +1305,6 @@ typedef struct XSavePKRU {
  uint32_t padding;
  } XSavePKRU;
  
-#define XSAVE_FCW_FSW_OFFSET0x000

-#define XSAVE_FTW_FOP_OFFSET0x004
-#define XSAVE_CWD_RIP_OFFSET0x008
-#define XSAVE_CWD_RDP_OFFSET0x010
-#define XSAVE_MXCSR_OFFSET  0x018
-#define XSAVE_ST_SPACE_OFFSET   0x020
-#define XSAVE_XMM_SPACE_OFFSET  0x0a0
-#define XSAVE_XSTATE_BV_OFFSET  0x200
-#define XSAVE_AVX_OFFSET0x240
-#define XSAVE_BNDREG_OFFSET 0x3c0
-#define XSAVE_BNDCSR_OFFSET 0x400
-#define XSAVE_OPMASK_OFFSET 0x440
-#define XSAVE_ZMM_HI256_OFFSET  0x480
-#define XSAVE_HI16_ZMM_OFFSET   0x680
-#define XSAVE_PKRU_OFFSET   0xa80
-
-typedef struct X86XSaveArea {
-X86LegacyXSaveArea legacy;
-X86XSaveHeader header;
-
-/* Extended save areas: */
-
-/* AVX State: */
-XSaveAVX avx_state;
-
-/* Ensure that XSaveBNDREG is properly aligned. */
-uint8_t padding[XSAVE_BNDREG_OFFSET
-- sizeof(X86LegacyXSaveArea)
-- sizeof(X86XSaveHeader)
-- sizeof(XSaveAVX)];
-
-/* MPX State: */
-XSaveBNDREG bndreg_state;
-XSaveBNDCSR bndcsr_state;
-/* AVX-512 State: */
-XSaveOpmask opmask_state;
-XSaveZMM_Hi256 zmm_hi256_state;
-XSaveHi16_ZMM hi16_zmm_state;
-/* PKRU State: */
-XSavePKRU pkru_state;
-} X86XSaveArea;
-
  QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
  QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
  QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
@@ -1355,21 +1313,6 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
  QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
  QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
  
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fcw) != XSAVE_FCW_FSW_OFFSET);

-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.ftw) != XSAVE_FTW_FOP_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpip) != XSAVE_CWD_RIP_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpdp) != XSAVE_CWD_RDP_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.mxcsr) != XSAVE_MXCSR_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpregs) != 
XSAVE_ST_SPACE_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.xmm_regs) != 
XSAVE_XMM_SPACE_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != XSAVE_AVX_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != XSAVE_BNDREG_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != XSAVE_BNDCSR_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != XSAVE_OPMASK_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != 
XSAVE_ZMM_HI256_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != 
XSAVE_HI16_ZMM_OFFSET);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET);
-
  typedef struct ExtSaveArea {
  uint32_t feature, bits;
  uint32_t offset, size;
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 4e11965067..74bbe94b80 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -20,6 +20,7 @@
  #include "qemu/osdep.h"
  #include 
  #include "cpu.h"
+#include "tcg-cpu.h"
  #include "exec/helper-proto.h"
  #include "fpu/softfloat.h"
  #include "fpu/softfloat-macros.h"
diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h
index 36bd300af0..53a8494455 100644
--- a/target/i386/tcg/tcg-cpu.h
+++ b/target/i386/tcg/tcg-cpu.h
@@ -19,6 +19,63 @@
  #ifndef TCG_CPU_H
  #define TCG_CPU_H
  
+#define XSAVE_FCW_FSW_OFFSET0x000

+#define XSAVE_FTW_FOP_OFFSET0x004
+#define XSAVE_CWD_RIP_OFFSET0x008
+#define XSAVE_CWD_RDP_OFFSET0x010
+#define XSAVE_MXCSR_OFFSET  0x018
+#define XSAVE_ST_SPACE_OFFSET   0x020
+#define XSAVE_XMM_SPACE_OFFSET  0x0a0
+#define XSAVE_XSTATE_BV_OFFSET  0x200
+#define XSAVE_AVX_OFFSET0x240
+#define XSAVE_BNDREG_OFFSET 0x3c0
+#define XSAVE_BNDCSR_OFFSET 0x400
+#define XSAVE_OPMASK_OFFSET 0x440
+#define XSAVE_ZMM_HI256_OFFSET  0x480
+#define XSAVE_HI16_ZMM_OFFSET   0x680
+#define XSAVE_PKRU_OFFSET   0xa80
+
+typedef struct X86XSaveArea {
+X86LegacyXSaveArea legacy;
+X86XSaveHeader header;
+
+/* Extended save areas: */
+
+/* AVX State: */
+XSaveAVX avx_state;
+
+/* Ensure that XSaveBNDREG is properly aligned. */
+uint8_t padding[XSAV

Re: [PATCH] tests/acceptance: Test NetBSD 5.1 on the Jazz Magnum machine

2021-07-06 Thread Finn Thain

On Mon, 5 Jul 2021, Philippe Mathieu-Daudé wrote:

> On 7/5/21 2:58 AM, Finn Thain wrote:
> > On Mon, 5 Jul 2021, Philippe Mathieu-Daudé wrote:
> > 
> >> Test NetBSD 5.1 on the Jazz Magnum machine. As the firmware is not
> >> redistributable, it has to be extracted from the floppy configuration
> >> disk coming with a Mips Magnum 4000 system, then the NTPROM_BIN_PATH
> >> environment variable has to be set. For convenience a NVRAM pre-
> >> initialized to boot NetBSD is included. The test can be run as:
> >>
> >>   $ NTPROM_BIN_PATH=/path/to/ntprom.bin \
> >> avocado --show=app,console \
> >> run -t machine:magnum tests/acceptance/
> >>   Fetching asset from 
> >> tests/acceptance/machine_mips_jazz.py:MipsJazz.test_magnum_netbsd_5_1
> >>(1/1) 
> >> tests/acceptance/machine_mips_jazz.py:MipsJazz.test_magnum_netbsd_5_1:
> >>   console: EISA Bus 0 Initialization In Progress... Direct Memory Access 
> >> (DMA) System Control Port B Timer 1 OK.
> >>   console: ARC Multiboot Version 174 (SGI Version 2.6)
> >>   console: Copyright (c) 1991,1992  Microsoft Corporation
> >>   console: Actions:
> >>   console: Start Windows NT
> >>   console: Run a program
> >>   console: Run setup
> >>   console: Use the arrow keys to select.
> >>   console: Press Enter to choose.
> >>   console: Program to run:
> >>   console: scsi(0)cdrom(2)fdisk(0)boot scsi(0)cdrom(2)fdisk(0)netbsd
> >>   console: NetBSD/arc Bootstrap, Revision 1.1
> >>   console: (bui...@b7.netbsd.org, Sat Nov  6 14:06:36 UTC 2010)
> >>   console: devopen: scsi(0)cdrom(2)fdisk(0) type disk file netbsd
> >>   console: NetBSD 5.1 (RAMDISK) #0: Sat Nov  6 14:17:36 UTC 2010
> >>   console: 
> >> bui...@b7.netbsd.org:/home/builds/ab/netbsd-5-1-RELEASE/arc/201011061943Z-obj/home/builds/ab/netbsd-5-1-RELEASE/src/sys/arch/arc/compile/RAMDISK
> >>   console: MIPS Magnum
> 
> >> +class MipsJazz(Test):
> >> +
> >> +timeout = 60
> >> +
> >> +@skipUnless(os.getenv('NTPROM_BIN_PATH'), 'NTPROM_BIN_PATH not 
> >> available')
> >> +def test_magnum_netbsd_5_1(self):
> >> +"""
> >> +:avocado: tags=arch:mips64el
> >> +:avocado: tags=machine:magnum
> >> +:avocado: tags=os:netbsd
> >> +:avocado: tags=device:sonic
> >> +:avocado: tags=device:esp
> >> +"""
> >> +drive_url = ('http://archive.netbsd.org/pub/NetBSD-archive/'
> >> + 'NetBSD-5.1/iso/arccd-5.1.iso')
> > 
> > This can be updated to NetBSD 9.2 (the regressions in NetBSD have been 
> > fixed).
> 
> Indeed, with this change:
> 
> -- >8 --
> diff --git a/tests/acceptance/machine_mips_jazz.py
> b/tests/acceptance/machine_mips_jazz.py
> index 0b6640edc12..54968959372 100644
> --- a/tests/acceptance/machine_mips_jazz.py
> +++ b/tests/acceptance/machine_mips_jazz.py
> @@ -33,13 +33,11 @@ def test_magnum_netbsd_5_1(self):
>  :avocado: tags=device:sonic
>  :avocado: tags=device:esp
>  """
> -drive_url = ('http://archive.netbsd.org/pub/NetBSD-archive/'
> - 'NetBSD-5.1/iso/arccd-5.1.iso')
> -drive_hash = ('c91a57fb373636247d1f1ce283a610ba529e208604a'
> -  'f2a9e0237551fb3d25459c7697775af8c8d35a9764e'
> -  'fca87cfb591f363643e93417cfdb8857215ceb405e')
> +drive_url = ('http://cdn.netbsd.org/pub/NetBSD/'
> + 'NetBSD-9.2/images/NetBSD-9.2-arc.iso')
> +drive_hash = '409c61aee5459e762cdb120d2591ed2e'
>  drive_path = self.fetch_asset(drive_url, asset_hash=drive_hash,
> -  algorithm='sha512')
> +  algorithm='md5')
>  ntprom_hash = '316de17820192c89b8ee6d9936ab8364a739ca53'
>  ntprom_path = self.fetch_asset('file://' +
> os.getenv('NTPROM_BIN_PATH'),
> asset_hash=ntprom_hash,
> algorithm='sha1')
> ---
> 
> I get:
> 
> console: [   1.000] NetBSD 9.2 (RAMDISK) #0: Wed May 12 13:15:55 UTC
> 2021
> console: [   1.000]
> mkre...@mkrepro.netbsd.org:/usr/src/sys/arch/arc/compile/RAMDISK
> console: [   1.000] MIPS Magnum
> ...
> console: # [   6.1232105]
> pmap_tlb_update_addr(0x87eb5f60,0x69a000,0x1ee59e, 0)
> ...
> console: [  20.2174752] rebooting...
> PASS (48.98 s)
> 
> >> +nvram_path = 'nvram.bin'
> > 
> > Does the test work when nvram.bin is uninitialized by the ARC firmware?
> 
> No, because the serial is not used by default.
> 
> > Perhaps the default MAC address (from the SONIC PROM) would be more 
> > appropriate? Alternatively, if you want to check that '00:00:00:02:03:04' 
> > actually got used, you can do this:
> > 
> > erase ^H, werase ^W, kill ^U, intr ^C, status ^T
> > Terminal type? [vt100] 
> > Erase is backspace. 
> > (I)nstall, (S)hell or (H)alt ? s
> > # ifconfig
> > sn0: flags=0x8802 mtu 1500
> > ec_capabilities=1
> > ec_enabled=0
> > address: 00:00:00:02:03:04
>

Re: [PATCH v5 0/2] Clean up MMU translation

2021-07-06 Thread David Gibson

On Tue, Jul 06, 2021 at 12:03:14PM -0300, Bruno Larsen (billionai) wrote:
> This is the final change relating to mmu_idx permission checking,
> correcting a technical hiccup on how it was handled beforehand.
> It also introduces a common header to be used by all BookS MMUs to help
> with common code in the future.
> 
> Based-on: dgibson's ppc-for-6.1 tree

Applied to ppc-for-6.1, thanks.

> 
> Changes for v5:
>  * introduced a common header to also change hash32
> 
> Changes for v4:
>  * added r-b and t-b tags
>  * changes commit message of the first patch
>  * removed function parameters that were no longer used
> 
> Changes for v3:
>  * removed patches that were already applied
>  * fixed comments on last patch
>  * added 2 new patches
> 
> Changes for v2:
>  * rebase on ppc-for-6.1
>  * added the bugfix
> 
> Bruno Larsen (billionai) (2):
>   target/ppc: introduce mmu-books.h
>   target/ppc: change ppc_hash32_xlate to use mmu_idx
> 
>  target/ppc/mmu-book3s-v3.h | 14 +
>  target/ppc/mmu-books.h | 30 
>  target/ppc/mmu-hash32.c| 40 ++
>  target/ppc/mmu-hash32.h|  2 +-
>  target/ppc/mmu_helper.c|  2 +-
>  5 files changed, 52 insertions(+), 36 deletions(-)
>  create mode 100644 target/ppc/mmu-books.h
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[PATCH v4 3/3] memory_hotplug.c: send DEVICE_UNPLUG_ERROR in acpi_memory_hotplug_write()

2021-07-06 Thread Daniel Henrique Barboza

MEM_UNPLUG_ERROR is deprecated since the introduction of
DEVICE_UNPLUG_ERROR. Keep emitting both while the deprecation of
MEM_UNPLUG_ERROR is pending.

CC: Michael S. Tsirkin 
CC: Igor Mammedov 
Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/acpi/memory_hotplug.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index af37889423..fb9f4d2de7 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -8,6 +8,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-events-acpi.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 
 #define MEMORY_SLOTS_NUMBER  "MDNR"
 #define MEMORY_HOTPLUG_IO_REGION "HPMR"
@@ -177,9 +178,17 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr 
addr, uint64_t data,
 /* call pc-dimm unplug cb */
 hotplug_handler_unplug(hotplug_ctrl, dev, &local_err);
 if (local_err) {
+const char *error_pretty = error_get_pretty(local_err);
+
 trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector);
-qapi_event_send_mem_unplug_error(dev->id,
- error_get_pretty(local_err));
+
+/*
+ * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
+ * while the deprecation of MEM_UNPLUG_ERROR is
+ * pending.
+ */
+qapi_event_send_mem_unplug_error(dev->id, error_pretty);
+qapi_event_send_device_unplug_error(dev->id, error_pretty);
 error_free(local_err);
 break;
 }
-- 
2.31.1

[PATCH v4 2/3] spapr: use DEVICE_UNPLUG_ERROR to report unplug errors

2021-07-06 Thread Daniel Henrique Barboza

Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
error path, signalling that the hotunplug process wasn't successful.
This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
to signal this error to the management layer.

We also have another error path in spapr_memory_unplug_rollback() for
configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
in the hotunplug error path, but it will reconfigure them. Let's send
the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
case of older kernels.

Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr.c |  8 
 hw/ppc/spapr_drc.c | 15 +--
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4dd90b75cc..fc071a1767 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -29,6 +29,7 @@
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hostmem.h"
@@ -3641,7 +3642,14 @@ void spapr_memory_unplug_rollback(SpaprMachineState 
*spapr, DeviceState *dev)
  */
 qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
  "for device %s", dev->id);
+
+/*
+ * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
+ * while the deprecation of MEM_UNPLUG_ERROR is
+ * pending.
+ */
 qapi_event_send_mem_unplug_error(dev->id, qapi_error);
+qapi_event_send_device_unplug_error(dev->id, qapi_error);
 }
 
 /* Callback to be called during DRC release. */
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a2f2634601..45a7b1aa16 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -17,6 +17,8 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -160,6 +162,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
  * means that the kernel is refusing the removal.
  */
 if (drc->unplug_requested && drc->dev) {
+const char qapi_error_fmt[] = "Device hotunplug rejected by the "
+  "guest for device %s";
+g_autofree char *qapi_error = NULL;
+
 if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
 spapr = SPAPR_MACHINE(qdev_get_machine());
 
@@ -167,13 +173,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
 }
 
 drc->unplug_requested = false;
-error_report("Device hotunplug rejected by the guest "
- "for device %s", drc->dev->id);
+error_report(qapi_error_fmt, drc->dev->id);
 
-/*
- * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
- * it is implemented.
- */
+qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
+qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
 }
 
 return RTAS_OUT_SUCCESS; /* Nothing to do */
-- 
2.31.1

Re: [PATCH 2/2] tcg: Bake tb_destroy() into tcg_region_tree

2021-07-06 Thread Richard Henderson


On 7/4/21 7:31 AM, Liren Wei wrote:

-static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer _)


Using _ here as the variable name isn't ideal.  I guess if this were c++ we would actually 
omit the name, which is kinda the same.  But I think it's just as easy to name it 
userdata, as per glib docs.


I'll fix that up while queuing, thanks.

I'm not keen that the spinlock init and destroy are in different places, but surely that 
should be fixed by moving the init to tcg_tb_alloc, probably moving it to tcg/region.c as 
well.



r~

[PATCH v4 1/3] qapi/qdev.json: add DEVICE_UNPLUG_ERROR QAPI event

2021-07-06 Thread Daniel Henrique Barboza

At this moment we only provide one event to report a hotunplug error,
MEM_UNPLUG_ERROR. As of Linux kernel 5.12 and QEMU 6.0.0, the pseries
machine is now able to report unplug errors for other device types, such
as CPUs.

Instead of creating a (device_type)_UNPLUG_ERROR for each new device,
create a generic DEVICE_UNPLUG_ERROR event that can be used by all
unplug errors in the future.

With this new generic event, MEM_UNPLUG_ERROR is now marked as deprecated.

Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 docs/system/deprecated.rst | 10 ++
 qapi/machine.json  |  6 +-
 qapi/qdev.json | 27 ++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 70e08baff6..ca6c7f9d43 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -204,6 +204,16 @@ The ``I7200`` guest CPU relies on the nanoMIPS ISA, which 
is deprecated
 (the ISA has never been upstreamed to a compiler toolchain). Therefore
 this CPU is also deprecated.
 
+
+QEMU API (QAPI) events
+--
+
+``MEM_UNPLUG_ERROR`` (since 6.1)
+
+
+Use the more generic event ``DEVICE_UNPLUG_ERROR`` instead.
+
+
 System emulator machines
 
 
diff --git a/qapi/machine.json b/qapi/machine.json
index c3210ee1fb..a595c753d2 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1271,6 +1271,9 @@
 #
 # @msg: Informative message
 #
+# Features:
+# @deprecated: This event is deprecated. Use @DEVICE_UNPLUG_ERROR instead.
+#
 # Since: 2.4
 #
 # Example:
@@ -1283,7 +1286,8 @@
 #
 ##
 { 'event': 'MEM_UNPLUG_ERROR',
-  'data': { 'device': 'str', 'msg': 'str' } }
+  'data': { 'device': 'str', 'msg': 'str' },
+  'features': ['deprecated'] }
 
 ##
 # @SMPConfiguration:
diff --git a/qapi/qdev.json b/qapi/qdev.json
index b83178220b..349d7439fa 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -84,7 +84,9 @@
 #This command merely requests that the guest begin the hot removal
 #process.  Completion of the device removal process is signaled with a
 #DEVICE_DELETED event. Guest reset will automatically complete removal
-#for all devices.
+#for all devices. If an error in the hot removal process is detected,
+#the device will not be removed and a DEVICE_UNPLUG_ERROR event is
+#sent.
 #
 # Since: 0.14
 #
@@ -124,3 +126,26 @@
 ##
 { 'event': 'DEVICE_DELETED',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_UNPLUG_ERROR:
+#
+# Emitted when a device hot unplug error occurs.
+#
+# @device: device name
+#
+# @msg: Informative message
+#
+# Since: 6.1
+#
+# Example:
+#
+# <- { "event": "DEVICE_UNPLUG_ERROR"
+#  "data": { "device": "dimm1",
+#"msg": "Memory hotunplug rejected by the guest for device 
dimm1"
+#  },
+#  "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
+#
+##
+{ 'event': 'DEVICE_UNPLUG_ERROR',
+  'data': { 'device': 'str', 'msg': 'str' } }
-- 
2.31.1

[PATCH v4 0/3] DEVICE_UNPLUG_ERROR QAPI event

2021-07-06 Thread Daniel Henrique Barboza

Hi,

This new version is rebased with current master (9aef0954195cc),
hopefully an adequate format of patch 1, and David's R-b on all
patches.

changes from v3:
- patch 1:
  * fixed format
- all patches:
  * rebased with master
  * added David's R-b
- v3 link: https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg05842.html

changes from v2:
- patch 1:
  * moved DEVICE_UNPLUG_ERROR declaration to qapi/qdev.json
  * updated 'device_del' description
  * added 'deprecated' notice on MEM_UNPLUG_ERROR
  * added MEM_UNPLUG_ERROR 'deprecated' info in docs/system/deprecated.rst
- patch 2:
  * send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
- patch 3 (new):
  * send DEVICE_UNPLUG_ERROR in acpi/memory_hotplug.c
- v2 link: https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg01304.html

changes from v1:
- former patches 1 and 2: dropped
- patch 1 (former 3): changed the version to '6.1'
- patch 2 (former 4): add a DEVICE_UNPLUG_ERROR event in the device
  unplug error path of CPUs and DIMMs
- v1 link: https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg04682.html

*** BLURB HERE ***

Daniel Henrique Barboza (3):
  qapi/qdev.json: add DEVICE_UNPLUG_ERROR QAPI event
  spapr: use DEVICE_UNPLUG_ERROR to report unplug errors
  memory_hotplug.c: send DEVICE_UNPLUG_ERROR in
acpi_memory_hotplug_write()

 docs/system/deprecated.rst | 10 ++
 hw/acpi/memory_hotplug.c   | 13 +++--
 hw/ppc/spapr.c |  8 
 hw/ppc/spapr_drc.c | 15 +--
 qapi/machine.json  |  6 +-
 qapi/qdev.json | 27 ++-
 6 files changed, 69 insertions(+), 10 deletions(-)

-- 
2.31.1

Re: [PATCH v3 1/3] qapi/qdev.json: add DEVICE_UNPLUG_ERROR QAPI event

2021-07-06 Thread Daniel Henrique Barboza





On 7/5/21 1:52 AM, David Gibson wrote:

On Mon, Jun 21, 2021 at 05:59:05PM -0300, Daniel Henrique Barboza wrote:

At this moment we only provide one event to report a hotunplug error,
MEM_UNPLUG_ERROR. As of Linux kernel 5.12 and QEMU 6.0.0, the pseries
machine is now able to report unplug errors for other device types, such
as CPUs.


Something seems to have gone weirdly wrong with the formatting here.


I have no idea what happened lol





Instead of creating a (device_type)_UNPLUG_ERROR for each new device,
create a generic DEVICE_UNPLUG_ERROR event that can be used by all
unplug errors in the future.

With this new generic event, MEM_UNPLUG_ERROR is now marked as 
deprecated.

Signed-off-by: Daniel Henrique Barboza 


Apart from that
Reviewed-by: David Gibson 


Thanks for the reviews! I'll resend the series (hopefully with the proper
formatting) with your R-bs.



Daniel




---
 docs/system/deprecated.rst | 10 ++
 qapi/machine.json  |  6 +-
 qapi/qdev.json | 27 ++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index e2e0090878..c8200d99d4 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -192,6 +192,16 @@ The ``I7200`` guest CPU relies on the nanoMIPS 
ISA, which is deprecated
 (the ISA has never been upstreamed to a compiler toolchain). Therefore
 this CPU is also deprecated.

+
+QEMU API (QAPI) events
+--
+
+``MEM_UNPLUG_ERROR`` (since 6.1)
+
+
+Use the more generic event ``DEVICE_UNPLUG_ERROR`` instead.
+
+
 System emulator machines
 

diff --git a/qapi/machine.json b/qapi/machine.json
index e4d0f9b24f..91dc520734 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1271,6 +1271,9 @@
 #
 # @msg: Informative message
 #
+# Features:
+# @deprecated: This event is deprecated. Use @DEVICE_UNPLUG_ERROR 
instead.
+#
 # Since: 2.4
 #
 # Example:
@@ -1283,4 +1286,5 @@
 #
 ##
 { 'event': 'MEM_UNPLUG_ERROR',
-  'data': { 'device': 'str', 'msg': 'str' } }
+  'data': { 'device': 'str', 'msg': 'str' },
+  'features': ['deprecated'] }
diff --git a/qapi/qdev.json b/qapi/qdev.json
index b83178220b..349d7439fa 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -84,7 +84,9 @@
 #This command merely requests that the guest begin the hot 
removal
 #process.  Completion of the device removal process is 
signaled with a
 #DEVICE_DELETED event. Guest reset will automatically complete 
removal
-#for all devices.
+#for all devices. If an error in the hot removal process is 
detected,
+#the device will not be removed and a DEVICE_UNPLUG_ERROR 
event is
+#sent.
 #
 # Since: 0.14
 #
@@ -124,3 +126,26 @@
 ##
 { 'event': 'DEVICE_DELETED',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_UNPLUG_ERROR:
+#
+# Emitted when a device hot unplug error occurs.
+#
+# @device: device name
+#
+# @msg: Informative message
+#
+# Since: 6.1
+#
+# Example:
+#
+# <- { "event": "DEVICE_UNPLUG_ERROR"
+#  "data": { "device": "dimm1",
+#"msg": "Memory hotunplug rejected by the guest for device 
dimm1"
+#  },
+#  "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
+#
+##
+{ 'event': 'DEVICE_UNPLUG_ERROR',
+  'data': { 'device': 'str', 'msg': 'str' } }

Re: [PATCH v2] tcg: Avoid including 'trace-tcg.h' in target translate.c

2021-07-06 Thread Richard Henderson


On 6/28/21 10:09 PM, Philippe Mathieu-Daudé wrote:

The root trace-events only declares a single TCG event:

   $ git grep -w tcg trace-events
   trace-events:115:# tcg/tcg-op.c
   trace-events:137:vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", 
"vaddr=0x%016"PRIx64" info=%d"

and only a tcg/tcg-op.c uses it:

   $ git grep -l trace_guest_mem_before_tcg
   tcg/tcg-op.c

therefore it is pointless to include "trace-tcg.h" in each target
(because it is not used). Remove it.

Signed-off-by: Philippe Mathieu-Daudé
---
v2: Rebased due to mips-next conflict (was already queued)
---


Queued, thanks.

r~

[PATCH] tcg: Add separator in INDEX_op_call dump

2021-07-06 Thread Richard Henderson

We lost the ',' following the called function name.

Fixes: 3e92aa34434
Signed-off-by: Richard Henderson 
---
 tcg/tcg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5150ed700e..4dd4084419 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1849,7 +1849,7 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
 col += qemu_log("plugin(%p)", func);
 }
 
-col += qemu_log("$0x%x,$%d", info->flags, nb_oargs);
+col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
 for (i = 0; i < nb_oargs; i++) {
 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
op->args[i]));
-- 
2.25.1

[PATCH 2/2] ui/gtk-egl: blitting partial guest fb to the proper scanout surface

2021-07-06 Thread Dongwon Kim

eb_fb_blit needs more parameters which describe x and y offsets and width
and height of the actual scanout to specify the size and cordination of
partial image to blit in the guest fb in case the guest fb contains multiple
display outputs.

Signed-off-by: Dongwon Kim 
---
 hw/display/virtio-gpu-udmabuf.c |  4 ++--
 include/ui/egl-helpers.h|  2 +-
 ui/egl-headless.c   |  2 +-
 ui/egl-helpers.c| 10 ++
 ui/gtk-egl.c|  7 ---
 ui/sdl2-gl.c|  2 +-
 6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c
index a64194c6de..3ea6e76371 100644
--- a/hw/display/virtio-gpu-udmabuf.c
+++ b/hw/display/virtio-gpu-udmabuf.c
@@ -186,8 +186,8 @@ static VGPUDMABuf
 dmabuf->buf.stride = fb->stride;
 dmabuf->buf.x = r->x;
 dmabuf->buf.y = r->y;
-dmabuf->buf.scanout_width;
-dmabuf->buf.scanout_height;
+dmabuf->buf.scanout_width = r->width;
+dmabuf->buf.scanout_height = r->height;
 dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format);
 dmabuf->buf.fd = res->dmabuf_fd;
 
diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index f1bf8f97fc..e21118501e 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -26,7 +26,7 @@ void egl_fb_setup_default(egl_fb *fb, int width, int height);
 void egl_fb_setup_for_tex(egl_fb *fb, int width, int height,
   GLuint texture, bool delete);
 void egl_fb_setup_new_tex(egl_fb *fb, int width, int height);
-void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip);
+void egl_fb_blit(egl_fb *dst, egl_fb *src, int x, int y, int w, int h, bool 
flip);
 void egl_fb_read(DisplaySurface *dst, egl_fb *src);
 
 void egl_texture_blit(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip);
diff --git a/ui/egl-headless.c b/ui/egl-headless.c
index da377a74af..bdf10fec84 100644
--- a/ui/egl-headless.c
+++ b/ui/egl-headless.c
@@ -144,7 +144,7 @@ static void egl_scanout_flush(DisplayChangeListener *dcl,
   1.0, 1.0);
 } else {
 /* no cursor -> use simple framebuffer blit */
-egl_fb_blit(&edpy->blit_fb, &edpy->guest_fb, edpy->y_0_top);
+egl_fb_blit(&edpy->blit_fb, &edpy->guest_fb, x, y, w, h, 
edpy->y_0_top);
 }
 
 egl_fb_read(edpy->ds, &edpy->blit_fb);
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 6d0cb2b5cb..2af3dcc0a6 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -88,16 +88,18 @@ void egl_fb_setup_new_tex(egl_fb *fb, int width, int height)
 egl_fb_setup_for_tex(fb, width, height, texture, true);
 }
 
-void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip)
+void egl_fb_blit(egl_fb *dst, egl_fb *src, int x, int y, int w, int h, bool 
flip)
 {
 GLuint y1, y2;
 
 glBindFramebuffer(GL_READ_FRAMEBUFFER, src->framebuffer);
 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst->framebuffer);
 glViewport(0, 0, dst->width, dst->height);
-y1 = flip ? src->height : 0;
-y2 = flip ? 0 : src->height;
-glBlitFramebuffer(0, y1, src->width, y2,
+w = (x + w) > src->width ? src->width - x : w;
+h = (y + h) > src->height ? src->height - y : h;
+y1 = flip ? h + y : y;
+y2 = flip ? y : h + y;
+glBlitFramebuffer(x, y1, x + w, y2,
   0, 0, dst->width, dst->height,
   GL_COLOR_BUFFER_BIT, GL_LINEAR);
 }
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 2a2e6d3a17..ceb52b1045 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -73,7 +73,7 @@ void gd_egl_draw(VirtualConsole *vc)
 wh = gdk_window_get_height(window);
 
 if (vc->gfx.scanout_mode) {
-gd_egl_scanout_flush(&vc->gfx.dcl, 0, 0, vc->gfx.w, vc->gfx.h);
+   gd_egl_scanout_flush(&vc->gfx.dcl, vc->gfx.x, vc->gfx.y, vc->gfx.w, 
vc->gfx.h);
 
 vc->gfx.scale_x = (double)ww / vc->gfx.w;
 vc->gfx.scale_y = (double)wh / vc->gfx.h;
@@ -216,7 +216,8 @@ void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
 
 gd_egl_scanout_texture(dcl, dmabuf->texture,
false, dmabuf->width, dmabuf->height,
-   0, 0, dmabuf->width, dmabuf->height);
+   dmabuf->x, dmabuf->y, dmabuf->scanout_width,
+   dmabuf->scanout_height);
 #endif
 }
 
@@ -286,7 +287,7 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
   vc->gfx.cursor_x, vc->gfx.cursor_y,
   vc->gfx.scale_x, vc->gfx.scale_y);
 } else {
-egl_fb_blit(&vc->gfx.win_fb, &vc->gfx.guest_fb, !vc->gfx.y0_top);
+egl_fb_blit(&vc->gfx.win_fb, &vc->gfx.guest_fb, x, y, w, h, 
!vc->gfx.y0_top);
 }
 
 eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
diff --git a/ui/sdl2-gl.c b/ui/sdl2-gl.c
index a21d2deed9..67bc8b5f4e 100644
--- a/ui/sdl2-gl.c
+++ b/ui/sdl2-gl.c
@@ -238,7 +238,7 @@ void sdl2_gl_scanout_flush(DisplayChangeListener *dcl,
 
 SDL_GetWin

[PATCH 1/2] virtio-gpu: splitting one extended mode guest fb into n-scanouts

2021-07-06 Thread Dongwon Kim

When guest is running Linux/X11 with extended multiple displays mode enabled,
the guest shares one scanout resource each time containing whole surface
rather than sharing individual display output separately. This extended frame
is properly splited and rendered on the corresponding scanout surfaces but
not in case of blob-resource (zero copy).

This code change lets the qemu split this one large surface data into multiple
in case of blob-resource as well so that each sub frame then can be blitted
properly to each scanout.

Signed-off-by: Dongwon Kim 
---
 hw/display/virtio-gpu-udmabuf.c | 19 +++
 hw/display/virtio-gpu.c |  5 +++--
 include/hw/virtio/virtio-gpu.h  |  5 +++--
 include/ui/console.h|  4 
 stubs/virtio-gpu-udmabuf.c  |  3 ++-
 5 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c
index 3c01a415e7..a64194c6de 100644
--- a/hw/display/virtio-gpu-udmabuf.c
+++ b/hw/display/virtio-gpu-udmabuf.c
@@ -171,7 +171,8 @@ static VGPUDMABuf
 *virtio_gpu_create_dmabuf(VirtIOGPU *g,
   uint32_t scanout_id,
   struct virtio_gpu_simple_resource *res,
-  struct virtio_gpu_framebuffer *fb)
+  struct virtio_gpu_framebuffer *fb,
+  struct virtio_gpu_rect *r)
 {
 VGPUDMABuf *dmabuf;
 
@@ -183,6 +184,10 @@ static VGPUDMABuf
 dmabuf->buf.width = fb->width;
 dmabuf->buf.height = fb->height;
 dmabuf->buf.stride = fb->stride;
+dmabuf->buf.x = r->x;
+dmabuf->buf.y = r->y;
+dmabuf->buf.scanout_width;
+dmabuf->buf.scanout_height;
 dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format);
 dmabuf->buf.fd = res->dmabuf_fd;
 
@@ -195,24 +200,22 @@ static VGPUDMABuf
 int virtio_gpu_update_dmabuf(VirtIOGPU *g,
  uint32_t scanout_id,
  struct virtio_gpu_simple_resource *res,
- struct virtio_gpu_framebuffer *fb)
+ struct virtio_gpu_framebuffer *fb,
+ struct virtio_gpu_rect *r)
 {
 struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
 VGPUDMABuf *new_primary, *old_primary = NULL;
 
-new_primary = virtio_gpu_create_dmabuf(g, scanout_id, res, fb);
+new_primary = virtio_gpu_create_dmabuf(g, scanout_id, res, fb, r);
 if (!new_primary) {
 return -EINVAL;
 }
 
 if (g->dmabuf.primary) {
-old_primary = g->dmabuf.primary;
+old_primary = g->dmabuf.primary[scanout_id];
 }
 
-g->dmabuf.primary = new_primary;
-qemu_console_resize(scanout->con,
-new_primary->buf.width,
-new_primary->buf.height);
+g->dmabuf.primary[scanout_id] = new_primary;
 dpy_gl_scanout_dmabuf(scanout->con, &new_primary->buf);
 
 if (old_primary) {
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index e183f4ecda..11a87dad79 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -523,9 +523,9 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
 console_has_gl(scanout->con)) {
 dpy_gl_update(scanout->con, 0, 0, scanout->width,
   scanout->height);
-return;
 }
 }
+return;
 }
 
 if (!res->blob &&
@@ -598,6 +598,7 @@ static void virtio_gpu_update_scanout(VirtIOGPU *g,
 scanout->y = r->y;
 scanout->width = r->width;
 scanout->height = r->height;
+qemu_console_resize(scanout->con, scanout->width, scanout->height);
 }
 
 static void virtio_gpu_do_set_scanout(VirtIOGPU *g,
@@ -633,7 +634,7 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g,
 
 if (res->blob) {
 if (console_has_gl(scanout->con)) {
-if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb)) {
+if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) {
 virtio_gpu_update_scanout(g, scanout_id, res, r);
 return;
 }
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index bcf54d970f..6372f4bbb5 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -187,7 +187,7 @@ struct VirtIOGPU {
 
 struct {
 QTAILQ_HEAD(, VGPUDMABuf) bufs;
-VGPUDMABuf *primary;
+VGPUDMABuf *primary[VIRTIO_GPU_MAX_SCANOUTS];
 } dmabuf;
 };
 
@@ -273,7 +273,8 @@ void virtio_gpu_fini_udmabuf(struct 
virtio_gpu_simple_resource *res);
 int virtio_gpu_update_dmabuf(VirtIOGPU *g,
  uint32_t scanout_id,
  struct virtio_gpu_simple_resource *res,
- struct virtio_gpu_framebuffer *fb);
+ struct virtio_gpu_framebuffer *fb,
+ struct virtio_gpu_rect *r

[PATCH v2 36/36] linux-user/xtensa: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the rt signal trampoline.
Use it when the guest does not use SA_RESTORER.

Reviewed-by: Max Filippov 
Signed-off-by: Richard Henderson 
---
 linux-user/xtensa/target_signal.h |  2 ++
 linux-user/xtensa/signal.c| 50 ++-
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/linux-user/xtensa/target_signal.h 
b/linux-user/xtensa/target_signal.h
index c60bf656f6..1c7ee73154 100644
--- a/linux-user/xtensa/target_signal.h
+++ b/linux-user/xtensa/target_signal.h
@@ -20,4 +20,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif
diff --git a/linux-user/xtensa/signal.c b/linux-user/xtensa/signal.c
index 72771e1294..fd57481bf5 100644
--- a/linux-user/xtensa/signal.c
+++ b/linux-user/xtensa/signal.c
@@ -163,26 +163,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 if (ka->sa_flags & TARGET_SA_RESTORER) {
 ra = ka->sa_restorer;
 } else {
-ra = frame_addr + offsetof(struct target_rt_sigframe, retcode);
-#ifdef TARGET_WORDS_BIGENDIAN
-/* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
-__put_user(0x22, &frame->retcode[0]);
-__put_user(0x0a, &frame->retcode[1]);
-__put_user(TARGET_NR_rt_sigreturn, &frame->retcode[2]);
-/* Generate instruction:  SYSCALL */
-__put_user(0x00, &frame->retcode[3]);
-__put_user(0x05, &frame->retcode[4]);
-__put_user(0x00, &frame->retcode[5]);
-#else
-/* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
-__put_user(0x22, &frame->retcode[0]);
-__put_user(0xa0, &frame->retcode[1]);
-__put_user(TARGET_NR_rt_sigreturn, &frame->retcode[2]);
-/* Generate instruction:  SYSCALL */
-__put_user(0x00, &frame->retcode[3]);
-__put_user(0x50, &frame->retcode[4]);
-__put_user(0x00, &frame->retcode[5]);
-#endif
+ra = default_rt_sigreturn;
 }
 memset(env->regs, 0, sizeof(env->regs));
 env->pc = ka->_sa_handler;
@@ -263,3 +244,32 @@ badframe:
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint8_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6, 0);
+assert(tramp != NULL);
+
+#ifdef TARGET_WORDS_BIGENDIAN
+/* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
+__put_user(0x22, &tramp[0]);
+__put_user(0x0a, &tramp[1]);
+__put_user(TARGET_NR_rt_sigreturn, &tramp[2]);
+/* Generate instruction:  SYSCALL */
+__put_user(0x00, &tramp[3]);
+__put_user(0x05, &tramp[4]);
+__put_user(0x00, &tramp[5]);
+#else
+/* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
+__put_user(0x22, &tramp[0]);
+__put_user(0xa0, &tramp[1]);
+__put_user(TARGET_NR_rt_sigreturn, &tramp[2]);
+/* Generate instruction:  SYSCALL */
+__put_user(0x00, &tramp[3]);
+__put_user(0x50, &tramp[4]);
+__put_user(0x00, &tramp[5]);
+#endif
+
+default_rt_sigreturn = sigtramp_page;
+unlock_user(tramp, sigtramp_page, 6);
+}
-- 
2.25.1

[PATCH v2 32/36] linux-user/riscv: Add vdso and use it for sigreturn

2021-07-06 Thread Richard Henderson

Building the vdso itself is not actually wired up to anything, since
we require a cross-compiler.  Just check in those files for now.

This fixes a bug wrt libgcc fallback unwinding.  It expects the stack
pointer to point to the siginfo_t, whereas we had inexplicably placed
our private signal trampoline at the start of the signal frame instead
of the end.  Now moot because we have removed it from the stack
frame entirely.

Cc: qemu-ri...@nongnu.org
Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c   |   4 +
 linux-user/riscv/signal.c  |  10 +-
 linux-user/meson.build |   1 +
 linux-user/riscv/Makefile.vdso |  11 ++
 linux-user/riscv/meson.build   |   9 ++
 linux-user/riscv/vdso-32.so| Bin 0 -> 5624 bytes
 linux-user/riscv/vdso-64.so| Bin 0 -> 6120 bytes
 linux-user/riscv/vdso.S| 207 +
 linux-user/riscv/vdso.ld   |  76 
 9 files changed, 309 insertions(+), 9 deletions(-)
 create mode 100644 linux-user/riscv/Makefile.vdso
 create mode 100644 linux-user/riscv/meson.build
 create mode 100755 linux-user/riscv/vdso-32.so
 create mode 100755 linux-user/riscv/vdso-64.so
 create mode 100644 linux-user/riscv/vdso.S
 create mode 100644 linux-user/riscv/vdso.ld

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 43c985f318..782d2904bc 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1484,10 +1484,14 @@ static void elf_core_copy_regs(target_elf_gregset_t 
*regs,
 
 #ifdef TARGET_RISCV32
 #define ELF_CLASS ELFCLASS32
+#include "vdso-32.c.inc"
 #else
 #define ELF_CLASS ELFCLASS64
+#include "vdso-64.c.inc"
 #endif
 
+#define vdso_image_info()&vdso_image_info
+
 static inline void init_thread(struct target_pt_regs *regs,
struct image_info *infop)
 {
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index 9405c7fd9a..fe7cf7723b 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -46,7 +46,6 @@ struct target_ucontext {
 };
 
 struct target_rt_sigframe {
-uint32_t tramp[2]; /* not in kernel, which uses VDSO instead */
 struct target_siginfo info;
 struct target_ucontext uc;
 };
@@ -104,12 +103,6 @@ static void setup_ucontext(struct target_ucontext *uc,
 setup_sigcontext(&uc->uc_mcontext, env);
 }
 
-static inline void install_sigtramp(uint32_t *tramp)
-{
-__put_user(0x08b00893, tramp + 0);  /* li a7, 139 = __NR_rt_sigreturn */
-__put_user(0x0073, tramp + 1);  /* ecall */
-}
-
 void setup_rt_frame(int sig, struct target_sigaction *ka,
 target_siginfo_t *info,
 target_sigset_t *set, CPURISCVState *env)
@@ -126,14 +119,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 setup_ucontext(&frame->uc, env, set);
 tswap_siginfo(&frame->info, info);
-install_sigtramp(frame->tramp);
 
 env->pc = ka->_sa_handler;
 env->gpr[xSP] = frame_addr;
 env->gpr[xA0] = sig;
 env->gpr[xA1] = frame_addr + offsetof(struct target_rt_sigframe, info);
 env->gpr[xA2] = frame_addr + offsetof(struct target_rt_sigframe, uc);
-env->gpr[xRA] = frame_addr + offsetof(struct target_rt_sigframe, tramp);
+env->gpr[xRA] = default_rt_sigreturn;
 
 return;
 
diff --git a/linux-user/meson.build b/linux-user/meson.build
index 8021044053..e581d5ceba 100644
--- a/linux-user/meson.build
+++ b/linux-user/meson.build
@@ -35,6 +35,7 @@ subdir('microblaze')
 subdir('mips64')
 subdir('mips')
 subdir('ppc')
+subdir('riscv')
 subdir('s390x')
 subdir('sh4')
 subdir('sparc')
diff --git a/linux-user/riscv/Makefile.vdso b/linux-user/riscv/Makefile.vdso
new file mode 100644
index 00..de55a0b9f9
--- /dev/null
+++ b/linux-user/riscv/Makefile.vdso
@@ -0,0 +1,11 @@
+CROSS_CC ?= riscv64-linux-gnu-gcc
+LDFLAGS := -nostdlib -shared -Wl,-T,vdso.ld \
+  -Wl,-h,linux-vdso.so.1 -Wl,--hash-style=both -Wl,--build-id=sha1
+
+all: vdso-64.so vdso-32.so
+
+vdso-64.so: vdso.S vdso.ld Makefile.vdso
+   $(CROSS_CC) $(LDFLAGS) -mabi=lp64d -march=rv64g -fpic -o $@ vdso.S
+
+vdso-32.so: vdso.S vdso.ld Makefile.vdso
+   $(CROSS_CC) $(LDFLAGS) -mabi=ilp32d -march=rv32g -fpic -o $@ vdso.S
diff --git a/linux-user/riscv/meson.build b/linux-user/riscv/meson.build
new file mode 100644
index 00..475b816da1
--- /dev/null
+++ b/linux-user/riscv/meson.build
@@ -0,0 +1,9 @@
+gen32 = [
+  gen_vdso.process('vdso-32.so', extra_args: ['-r', '__vdso_rt_sigreturn']),
+]
+gen64 = [
+  gen_vdso.process('vdso-64.so', extra_args: ['-r', '__vdso_rt_sigreturn'])
+]
+
+linux_user_ss.add(when: 'TARGET_RISCV32', if_true: gen32)
+linux_user_ss.add(when: 'TARGET_RISCV64', if_true: gen64)
diff --git a/linux-user/riscv/vdso-32.so b/linux-user/riscv/vdso-32.so
new file mode 100755
index 
..0925aae9f50145bab6ef5d1da4a58c2dcb2ebec3
GIT binary patch
literal 5624
zcmeHLU2KzO6n?*!!WeAqFi>z)fq_hvy5SI%iTmpqP{uMgord_ewB0)0I&3>K$iiEN
z5Ml@{LJT3q3?zgYLl!ke4N(

Re: [PATCH 1/2] accel/tcg: Hoist tcg_tb_insert() up above tb_link_page()

2021-07-06 Thread Richard Henderson


On 7/4/21 7:31 AM, Liren Wei wrote:

TranslationBlocks not inserted into the corresponding region
tree shall be regarded as partially initialized objects, and
needs to be finalized first before inserting into QHT.

Signed-off-by: Liren Wei
---
  accel/tcg/translate-all.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)


Queued, thanks.

r~

[PATCH v2 35/36] linux-user/sparc: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.
Use them when the guest does not use SA_RESTORER.

Cc: Mark Cave-Ayland 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/sparc/target_signal.h |  4 
 linux-user/sparc/signal.c| 32 ++--
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/linux-user/sparc/target_signal.h b/linux-user/sparc/target_signal.h
index 34f9a12519..e661ddd6ab 100644
--- a/linux-user/sparc/target_signal.h
+++ b/linux-user/sparc/target_signal.h
@@ -69,6 +69,10 @@ typedef struct target_sigaltstack {
 
 #ifdef TARGET_ABI32
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+#else
+/* For sparc64, use of KA_RESTORER is mandatory. */
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
 #endif
 
 /* bit-flags */
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index 0cc3db5570..65e9b7f8b4 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -290,13 +290,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
 if (ka->ka_restorer) {
 env->regwptr[WREG_O7] = ka->ka_restorer;
 } else {
-env->regwptr[WREG_O7] = sf_addr +
-offsetof(struct target_signal_frame, insns) - 2 * 4;
-
-/* mov __NR_sigreturn, %g1 */
-__put_user(0x821020d8u, &sf->insns[0]);
-/* t 0x10 */
-__put_user(0x91d02010u, &sf->insns[1]);
+env->regwptr[WREG_O7] = default_sigreturn;
 }
 unlock_user(sf, sf_addr, sf_size);
 }
@@ -357,13 +351,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 if (ka->ka_restorer) {
 env->regwptr[WREG_O7] = ka->ka_restorer;
 } else {
-env->regwptr[WREG_O7] =
-sf_addr + offsetof(struct target_rt_signal_frame, insns) - 2 * 4;
-
-/* mov __NR_rt_sigreturn, %g1 */
-__put_user(0x82102065u, &sf->insns[0]);
-/* t 0x10 */
-__put_user(0x91d02010u, &sf->insns[1]);
+env->regwptr[WREG_O7] = default_rt_sigreturn;
 }
 #else
 env->regwptr[WREG_O7] = ka->ka_restorer;
@@ -774,4 +762,20 @@ do_sigsegv:
 unlock_user_struct(ucp, ucp_addr, 1);
 force_sig(TARGET_SIGSEGV);
 }
+#else
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+assert(tramp != NULL);
+
+default_sigreturn = sigtramp_page;
+__put_user(0x821020d8u, &tramp[0]);   /* mov __NR_sigreturn, %g1 */
+__put_user(0x91d02010u, &tramp[1]);   /* t 0x10 */
+
+default_rt_sigreturn = sigtramp_page + 8;
+__put_user(0x82102065u, &tramp[2]);   /* mov __NR_rt_sigreturn, %g1 */
+__put_user(0x91d02010u, &tramp[3]);   /* t 0x10 */
+
+unlock_user(tramp, sigtramp_page, 2 * 8);
+}
 #endif
-- 
2.25.1

[PATCH v2 25/36] linux-user/microblaze: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the rt signal trampoline.

Cc: Edgar E. Iglesias 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/microblaze/target_signal.h |  2 ++
 linux-user/microblaze/signal.c| 24 +---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/linux-user/microblaze/target_signal.h 
b/linux-user/microblaze/target_signal.h
index 1c326296de..e8b510f6b1 100644
--- a/linux-user/microblaze/target_signal.h
+++ b/linux-user/microblaze/target_signal.h
@@ -21,4 +21,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* MICROBLAZE_TARGET_SIGNAL_H */
diff --git a/linux-user/microblaze/signal.c b/linux-user/microblaze/signal.c
index 4c483bd8c6..aa27454931 100644
--- a/linux-user/microblaze/signal.c
+++ b/linux-user/microblaze/signal.c
@@ -160,17 +160,11 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 /* Kernel does not use SA_RESTORER. */
 
-/* addi r12, r0, __NR_sigreturn */
-__put_user(0x3180U | TARGET_NR_rt_sigreturn, frame->tramp + 0);
-/* brki r14, 0x8 */
-__put_user(0xb9cc0008U, frame->tramp + 1);
-
 /*
  * Return from sighandler will jump to the tramp.
  * Negative 8 offset because return is rtsd r15, 8
  */
-env->regs[15] =
-frame_addr + offsetof(struct target_rt_sigframe, tramp) - 8;
+env->regs[15] = default_rt_sigreturn - 8;
 
 /* Set up registers for signal handler */
 env->regs[1] = frame_addr;
@@ -219,3 +213,19 @@ long do_rt_sigreturn(CPUMBState *env)
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 8, 0);
+assert(tramp != NULL);
+
+/*
+ * addi r12, r0, __NR_rt_sigreturn
+ * brki r14, 0x8
+ */
+__put_user(0x3180U | TARGET_NR_rt_sigreturn, tramp);
+__put_user(0xb9cc0008U, tramp + 1);
+
+default_rt_sigreturn = sigtramp_page;
+unlock_user(tramp, sigtramp_page, 8);
+}
-- 
2.25.1

[PATCH v2 30/36] target/ppc: Simplify encode_trampoline

2021-07-06 Thread Richard Henderson

The sigret parameter is never 0, and even if it was the encoding
of the LI instruction would still work.

Reported-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/ppc/signal.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index edfad28a37..70cc27b0f6 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -308,10 +308,8 @@ static void save_user_regs(CPUPPCState *env, struct 
target_mcontext *frame)
 static void encode_trampoline(int sigret, uint32_t *tramp)
 {
 /* Set up the sigreturn trampoline: li r0,sigret; sc.  */
-if (sigret) {
-__put_user(0x3800 | sigret, &tramp[0]);
-__put_user(0x4402, &tramp[1]);
-}
+__put_user(0x3800 | sigret, &tramp[0]);
+__put_user(0x4402, &tramp[1]);
 }
 
 static void restore_user_regs(CPUPPCState *env,
-- 
2.25.1

[PATCH v2 28/36] linux-user/nios2: Document non-use of setup_sigtramp

2021-07-06 Thread Richard Henderson

Cc: Chris Wulff 
Cc: Marek Vasut 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/nios2/target_signal.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/linux-user/nios2/target_signal.h b/linux-user/nios2/target_signal.h
index aebf749f12..fe266c4c51 100644
--- a/linux-user/nios2/target_signal.h
+++ b/linux-user/nios2/target_signal.h
@@ -19,4 +19,7 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+/* Nios2 uses a fixed address on the kuser page for sigreturn. */
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
+
 #endif /* NIOS2_TARGET_SIGNAL_H */
-- 
2.25.1

Re: [PATCH 1/4] dp8393x: don't force 32-bit register access

2021-07-06 Thread Finn Thain

On Mon, 5 Jul 2021, Mark Cave-Ayland wrote:

> Commit 3fe9a838ec "dp8393x: Always use 32-bit accesses" set 
> .impl.min_access_size
> and .impl.max_access_size to 4 to try and fix the Linux jazzsonic driver 
> which uses
> 32-bit accesses.
> 
> The problem with forcing the register access to 32-bit in this way is that 
> since the
> dp8393x uses 16-bit registers, a manual endian swap is required for devices 
> on big
> endian machines with 32-bit accesses.
> 
> For both access sizes and machine endians the QEMU memory API can do the 
> right thing
> automatically: all that is needed is to set .impl.min_access_size to 2 to 
> declare that
> the dp8393x implements 16-bit registers.
> 
> Normally .impl.max_access_size should also be set to 2, however that doesn't 
> quite
> work in this case since the register stride is specified using a (dynamic) 
> it_shift
> property which is applied during the MMIO access itself. The effect of this 
> is that
> for a 32-bit access the memory API performs 2 x 16-bit accesses, but the use 
> of
> it_shift within the MMIO access itself causes the register value to be 
> repeated in both
> the top 16-bits and bottom 16-bits. The Linux jazzsonic driver expects the 
> stride to be
> zero-extended up to access size and therefore fails to correctly detect the 
> dp8393x
> device due to the extra data in the top 16-bits.
> 
> The solution here is to remove .impl.max_access_size so that the memory API 
> will
> correctly zero-extend the 16-bit registers to the access size up to and 
> including
> it_shift. Since it_shift is never greater than 2 than this will always do the 
> right
> thing for both 16-bit and 32-bit accesses regardless of the machine endian, 
> allowing
> the manual endian swap code to be removed.
> 

IIUC, this patch replaces an explicit word swap with an implicit byte 
swap. The explicit word swap was conditional on the big_endian flag.

This flag seems to work like the chip's BMODE pin which switches between 
Intel and Motorola bus modes (not just byte ordering but bus signalling in 
general). The BMODE pin or big_endian flag should effect a byte swap not a 
word swap so there must be a bug though it's not clear how that manifests.

Regardless of this patch, the big_endian flag also controls byte swapping 
during DMA by the device. IIUC, the flag is set to indicate that RAM is 
big_endian, so it's not actually a property of the dp8393x but of the 
RAM...

The Magnum hardware can run in big endian or little endian mode. But the 
SONIC chip must remain in little endian mode always because asserting 
BMODE would invoke Motorola signalling and that would contradict 
Philippe's datasheet which says that the SONIC device is attached to an 
"i386 compatible bus".

This seems contrary to mips_jazz_init(), which sets the dp8393x big_endian 
flag whenever TARGET_WORDS_BIGENDIAN is defined, i.e. risc/os guest. 

QEMU's dp8393x device has native endianness, so perhaps a big endian guest 
or a big endian host could trigger the bug that's being addressed in this 
patch.

Anyway, I think that this patch is heading in the right direction but 
can't it go further? Shouldn't the big_endian flag disappear altogether so 
that the memory API can also take care of the byte swapping needed by 
dp8393x_get() and dp8393x_put() for DMA?

> Signed-off-by: Mark Cave-Ayland 
> Fixes: 3fe9a838ec ("dp8393x: Always use 32-bit accesses")
> ---
>  hw/net/dp8393x.c | 14 +-
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
> index 11810c9b60..44a1955015 100644
> --- a/hw/net/dp8393x.c
> +++ b/hw/net/dp8393x.c
> @@ -602,15 +602,14 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, 
> unsigned int size)
>  
>  trace_dp8393x_read(reg, reg_names[reg], val, size);
>  
> -return s->big_endian ? val << 16 : val;
> +return val;
>  }
>  
> -static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
> +static void dp8393x_write(void *opaque, hwaddr addr, uint64_t val,
>unsigned int size)
>  {
>  dp8393xState *s = opaque;
>  int reg = addr >> s->it_shift;
> -uint32_t val = s->big_endian ? data >> 16 : data;
>  
>  trace_dp8393x_write(reg, reg_names[reg], val, size);
>  
> @@ -691,11 +690,16 @@ static void dp8393x_write(void *opaque, hwaddr addr, 
> uint64_t data,
>  }
>  }
>  
> +/*
> + * Since .impl.max_access_size is effectively controlled by the it_shift
> + * property, leave it unspecified for now to allow the memory API to
> + * correctly zero extend the 16-bit register values to the access size up to 
> and
> + * including it_shift.
> + */
>  static const MemoryRegionOps dp8393x_ops = {
>  .read = dp8393x_read,
>  .write = dp8393x_write,
> -.impl.min_access_size = 4,
> -.impl.max_access_size = 4,
> +.impl.min_access_size = 2,
>  .endianness = DEVICE_NATIVE_ENDIAN,
>  };
>  
>

[PATCH v2 31/36] linux-user/ppc: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.

Cc: qemu-...@nongnu.org
Signed-off-by: Richard Henderson 
---
 linux-user/ppc/target_signal.h |  2 ++
 linux-user/ppc/signal.c| 34 ++
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/linux-user/ppc/target_signal.h b/linux-user/ppc/target_signal.h
index 72fcdd9bfa..82184ab8f2 100644
--- a/linux-user/ppc/target_signal.h
+++ b/linux-user/ppc/target_signal.h
@@ -24,4 +24,6 @@ typedef struct target_sigaltstack {
 #if !defined(TARGET_PPC64)
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* PPC_TARGET_SIGNAL_H */
diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index 70cc27b0f6..2124eef932 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -202,9 +202,6 @@ struct target_func_ptr {
 
 #endif
 
-/* We use the mc_pad field for the signal return trampoline.  */
-#define tramp mc_pad
-
 /* See arch/powerpc/kernel/signal.c.  */
 static target_ulong get_sigframe(struct target_sigaction *ka,
  CPUPPCState *env,
@@ -435,12 +432,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
 /* Save user regs.  */
 save_user_regs(env, &frame->mctx);
 
-/* Construct the trampoline code on the stack. */
-encode_trampoline(TARGET_NR_sigreturn, (uint32_t *)&frame->mctx.tramp);
-
-/* The kernel checks for the presence of a VDSO here.  We don't
-   emulate a vdso, so use a sigreturn system call.  */
-env->lr = (target_ulong) h2g(frame->mctx.tramp);
+env->lr = default_sigreturn;
 
 /* Turn off all fp exceptions.  */
 env->fpscr = 0;
@@ -476,7 +468,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 target_sigset_t *set, CPUPPCState *env)
 {
 struct target_rt_sigframe *rt_sf;
-uint32_t *trampptr = 0;
 struct target_mcontext *mctx = 0;
 target_ulong rt_sf_addr, newsp = 0;
 int i, err = 0;
@@ -506,22 +497,17 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 #if defined(TARGET_PPC64)
 mctx = &rt_sf->uc.tuc_sigcontext.mcontext;
-trampptr = &rt_sf->trampoline[0];
 
 sc = &rt_sf->uc.tuc_sigcontext;
 __put_user(h2g(mctx), &sc->regs);
 __put_user(sig, &sc->signal);
 #else
 mctx = &rt_sf->uc.tuc_mcontext;
-trampptr = (uint32_t *)&rt_sf->uc.tuc_mcontext.tramp;
 #endif
 
 save_user_regs(env, mctx);
-encode_trampoline(TARGET_NR_rt_sigreturn, trampptr);
 
-/* The kernel checks for the presence of a VDSO here.  We don't
-   emulate a vdso, so use a sigreturn system call.  */
-env->lr = (target_ulong) h2g(trampptr);
+env->lr = default_rt_sigreturn;
 
 /* Turn off all fp exceptions.  */
 env->fpscr = 0;
@@ -719,3 +705,19 @@ abi_long do_swapcontext(CPUArchState *env, abi_ulong 
uold_ctx,
 
 return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+assert(tramp != NULL);
+
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+default_sigreturn = sigtramp_page;
+encode_trampoline(TARGET_NR_sigreturn, tramp + 0);
+#endif
+
+default_rt_sigreturn = sigtramp_page + 8;
+encode_trampoline(TARGET_NR_rt_sigreturn, tramp + 2);
+
+unlock_user(tramp, sigtramp_page, 2 * 8);
+}
-- 
2.25.1

[PATCH v2 33/36] linux-user/s390x: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.
Use them when the guest does not use SA_RESTORER.

Cc: qemu-s3...@nongnu.org
Tested-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/s390x/target_signal.h |  2 ++
 linux-user/s390x/signal.c| 24 
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/linux-user/s390x/target_signal.h b/linux-user/s390x/target_signal.h
index bbfc464d44..64f5f42201 100644
--- a/linux-user/s390x/target_signal.h
+++ b/linux-user/s390x/target_signal.h
@@ -19,4 +19,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* S390X_TARGET_SIGNAL_H */
diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index bf8a8fbfe9..18b1997f1e 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -67,7 +67,6 @@ typedef struct {
 target_sigregs sregs;
 int signo;
 target_sigregs_ext sregs_ext;
-uint16_t retcode;
 } sigframe;
 
 #define TARGET_UC_VXRS 2
@@ -84,7 +83,6 @@ struct target_ucontext {
 
 typedef struct {
 uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
-uint16_t retcode;
 struct target_siginfo info;
 struct target_ucontext uc;
 } rt_sigframe;
@@ -208,9 +206,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
 if (ka->sa_flags & TARGET_SA_RESTORER) {
 restorer = ka->sa_restorer;
 } else {
-restorer = frame_addr + offsetof(sigframe, retcode);
-__put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn,
-   &frame->retcode);
+restorer = default_sigreturn;
 }
 
 /* Set up registers for signal handler */
@@ -261,9 +257,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 if (ka->sa_flags & TARGET_SA_RESTORER) {
 restorer = ka->sa_restorer;
 } else {
-restorer = frame_addr + offsetof(typeof(*frame), retcode);
-__put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn,
-   &frame->retcode);
+restorer = default_rt_sigreturn;
 }
 
 /* Create siginfo on the signal stack. */
@@ -404,3 +398,17 @@ long do_rt_sigreturn(CPUS390XState *env)
 unlock_user_struct(frame, frame_addr, 0);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 + 2, 0);
+assert(tramp != NULL);
+
+default_sigreturn = sigtramp_page;
+__put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn, &tramp[0]);
+
+default_rt_sigreturn = sigtramp_page + 2;
+__put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn, &tramp[1]);
+
+unlock_user(tramp, sigtramp_page, 2 + 2);
+}
-- 
2.25.1

[PATCH v2 34/36] linux-user/sh4: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.
Use them when the guest does not use SA_RESTORER.

Cc: Yoshinori Sato 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/sh4/target_signal.h |  2 ++
 linux-user/sh4/signal.c| 40 +++---
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/linux-user/sh4/target_signal.h b/linux-user/sh4/target_signal.h
index d7309b7136..04069cba66 100644
--- a/linux-user/sh4/target_signal.h
+++ b/linux-user/sh4/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* SH4_TARGET_SIGNAL_H */
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index 0451e65806..5a5ae69785 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -51,7 +51,6 @@ struct target_sigframe
 {
 struct target_sigcontext sc;
 target_ulong extramask[TARGET_NSIG_WORDS-1];
-uint16_t retcode[3];
 };
 
 
@@ -67,7 +66,6 @@ struct target_rt_sigframe
 {
 struct target_siginfo info;
 struct target_ucontext uc;
-uint16_t retcode[3];
 };
 
 
@@ -189,15 +187,9 @@ void setup_frame(int sig, struct target_sigaction *ka,
 /* Set up to return from userspace.  If provided, use a stub
already in userspace.  */
 if (ka->sa_flags & TARGET_SA_RESTORER) {
-regs->pr = (unsigned long) ka->sa_restorer;
+regs->pr = ka->sa_restorer;
 } else {
-/* Generate return code (system call to sigreturn) */
-abi_ulong retcode_addr = frame_addr +
- offsetof(struct target_sigframe, retcode);
-__put_user(MOVW(2), &frame->retcode[0]);
-__put_user(TRAP_NOARG, &frame->retcode[1]);
-__put_user((TARGET_NR_sigreturn), &frame->retcode[2]);
-regs->pr = (unsigned long) retcode_addr;
+regs->pr = default_sigreturn;
 }
 
 /* Set up registers for signal handler */
@@ -247,15 +239,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 /* Set up to return from userspace.  If provided, use a stub
already in userspace.  */
 if (ka->sa_flags & TARGET_SA_RESTORER) {
-regs->pr = (unsigned long) ka->sa_restorer;
+regs->pr = ka->sa_restorer;
 } else {
-/* Generate return code (system call to sigreturn) */
-abi_ulong retcode_addr = frame_addr +
- offsetof(struct target_rt_sigframe, retcode);
-__put_user(MOVW(2), &frame->retcode[0]);
-__put_user(TRAP_NOARG, &frame->retcode[1]);
-__put_user((TARGET_NR_rt_sigreturn), &frame->retcode[2]);
-regs->pr = (unsigned long) retcode_addr;
+regs->pr = default_rt_sigreturn;
 }
 
 /* Set up registers for signal handler */
@@ -333,3 +319,21 @@ badframe:
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 6, 0);
+assert(tramp != NULL);
+
+default_sigreturn = sigtramp_page;
+__put_user(MOVW(2), &tramp[0]);
+__put_user(TRAP_NOARG, &tramp[1]);
+__put_user(TARGET_NR_sigreturn, &tramp[2]);
+
+default_rt_sigreturn = sigtramp_page + 6;
+__put_user(MOVW(2), &tramp[3]);
+__put_user(TRAP_NOARG, &tramp[4]);
+__put_user(TARGET_NR_rt_sigreturn, &tramp[5]);
+
+unlock_user(tramp, sigtramp_page, 2 * 6);
+}
-- 
2.25.1

[PATCH v2 23/36] linux-user/x86_64: Add vdso

2021-07-06 Thread Richard Henderson

Building the vdso itself is not actually wired up to anything, since
we require a cross-compiler.  Just check in that file for now.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c|   4 +-
 linux-user/x86_64/Makefile.vdso |   5 ++
 linux-user/x86_64/meson.build   |   6 ++
 linux-user/x86_64/vdso.S| 122 
 linux-user/x86_64/vdso.ld   |  74 +++
 linux-user/x86_64/vdso.so   | Bin 0 -> 6008 bytes
 6 files changed, 209 insertions(+), 2 deletions(-)
 create mode 100644 linux-user/x86_64/Makefile.vdso
 create mode 100644 linux-user/x86_64/vdso.S
 create mode 100644 linux-user/x86_64/vdso.ld
 create mode 100755 linux-user/x86_64/vdso.so

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 7a598f9f76..43c985f318 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -284,12 +284,12 @@ static void elf_core_copy_regs(target_elf_gregset_t 
*regs, const CPUX86State *en
 #define DLINFO_ARCH_ITEMS 1
 #define ARCH_DLINFO   NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry);
 
+#endif /* TARGET_X86_64 */
+
 #include "vdso.c.inc"
 
 #define vdso_image_info()&vdso_image_info
 
-#endif /* TARGET_X86_64 */
-
 #define USE_ELF_CORE_DUMP
 #define ELF_EXEC_PAGESIZE   4096
 
diff --git a/linux-user/x86_64/Makefile.vdso b/linux-user/x86_64/Makefile.vdso
new file mode 100644
index 00..532c9d6964
--- /dev/null
+++ b/linux-user/x86_64/Makefile.vdso
@@ -0,0 +1,5 @@
+CROSS_CC ?= $(CC)
+
+vdso.so: vdso.S vdso.ld Makefile.vdso
+   $(CROSS_CC) -nostdlib -shared -Wl,-T,vdso.ld -Wl,--build-id=sha1 \
+ -Wl,-h,linux-vdso.so.1 -Wl,--hash-style=both vdso.S -o $@
diff --git a/linux-user/x86_64/meson.build b/linux-user/x86_64/meson.build
index 203af9a60c..f6a0015953 100644
--- a/linux-user/x86_64/meson.build
+++ b/linux-user/x86_64/meson.build
@@ -3,3 +3,9 @@ syscall_nr_generators += {
   arguments: [ meson.current_source_dir() / 
'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ],
   output: '@BASENAME@_nr.h')
 }
+
+gen = [
+  gen_vdso.process('vdso.so')
+]
+
+linux_user_ss.add(when: 'TARGET_X86_64', if_true: gen)
diff --git a/linux-user/x86_64/vdso.S b/linux-user/x86_64/vdso.S
new file mode 100644
index 00..bbd75a79aa
--- /dev/null
+++ b/linux-user/x86_64/vdso.S
@@ -0,0 +1,122 @@
+/*
+ * x86-64 linux replacement vdso.
+ *
+ * Copyright 2021 Linaro, Ltd.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include 
+
+   .globl  __vdso_clock_gettime
+   .type   __vdso_clock_gettime, @function
+   .balign 16
+   .cfi_startproc
+__vdso_clock_gettime:
+   mov $__NR_clock_gettime, %eax
+   syscall
+   ret
+   .cfi_endproc
+   .size   __vdso_clock_gettime, . - __vdso_clock_gettime
+
+clock_gettime = __vdso_clock_gettime
+   .weak   clock_gettime
+
+   .globl  __vdso_clock_getres
+   .type   __vdso_clock_getres, @function
+   .balign 16
+   .cfi_startproc
+__vdso_clock_getres:
+   mov $__NR_clock_getres, %eax
+   syscall
+   ret
+   .cfi_endproc
+   .size   __vdso_clock_getres, . - __vdso_clock_getres
+
+clock_getres = __vdso_clock_getres
+   .weak   clock_getres
+
+   .globl  __vdso_gettimeofday
+   .type   __vdso_gettimeofday, @function
+   .balign 16
+   .cfi_startproc
+__vdso_gettimeofday:
+   mov $__NR_gettimeofday, %eax
+   syscall
+   ret
+   .cfi_endproc
+   .size   __vdso_gettimeofday, . - __vdso_gettimeofday
+
+gettimeofday = __vdso_gettimeofday
+   .weak   gettimeofday
+
+
+   .globl  __vdso_time
+   .type   __vdso_time, @function
+   .balign 16
+   .cfi_startproc
+__vdso_time:
+   mov $__NR_time, %eax
+   syscall
+   ret
+   .cfi_endproc
+   .size   __vdso_time, . - __vdso_time
+
+time = __vdso_time
+   .weak   time
+
+
+   .globl  __vdso_getcpu
+   .type   __vdso_getcpu, @function
+   .balign 16
+   .cfi_startproc
+__vdso_getcpu:
+   /*
+ * ??? There is no syscall number for this allocated on x64.
+* We can handle this several ways:
+ *
+* (1) Invent a syscall number for use within qemu.
+ * It should be easy enough to pick a number that
+ * is well out of the way of the kernel numbers.
+ *
+ * (2) Force the emulated cpu to support the rdtscp insn,
+* and initialize the TSC_AUX value the appropriate value.
+ *
+* (3) Pretend that we're always running on cpu 0.
+ *
+* This last is the one that's implemented here, with the
+* tiny bit of extra code to support rdtscp in place.
+ */
+   xor %ecx, %ecx  /* rdtscp w/ tsc_aux = 0 */
+
+   /* if (cpu != NULL) *cpu = (ecx & 0xfff); */
+   test%rdi, %rdi
+   jz  1f
+   mov %ecx, %eax
+   and $0xfff, %eax
+   mov %eax, (%rdi)
+
+

[PATCH v2 24/36] linux-user/m68k: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/m68k/target_signal.h |  2 ++
 linux-user/m68k/signal.c| 47 +++--
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/linux-user/m68k/target_signal.h b/linux-user/m68k/target_signal.h
index d096544ef8..94157bf1f4 100644
--- a/linux-user/m68k/target_signal.h
+++ b/linux-user/m68k/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* M68K_TARGET_SIGNAL_H */
diff --git a/linux-user/m68k/signal.c b/linux-user/m68k/signal.c
index d06230655e..b4fade1ed6 100644
--- a/linux-user/m68k/signal.c
+++ b/linux-user/m68k/signal.c
@@ -38,7 +38,6 @@ struct target_sigframe
 int sig;
 int code;
 abi_ulong psc;
-char retcode[8];
 abi_ulong extramask[TARGET_NSIG_WORDS-1];
 struct target_sigcontext sc;
 };
@@ -75,7 +74,6 @@ struct target_rt_sigframe
 int sig;
 abi_ulong pinfo;
 abi_ulong puc;
-char retcode[8];
 struct target_siginfo info;
 struct target_ucontext uc;
 };
@@ -129,7 +127,6 @@ void setup_frame(int sig, struct target_sigaction *ka,
 {
 struct target_sigframe *frame;
 abi_ulong frame_addr;
-abi_ulong retcode_addr;
 abi_ulong sc_addr;
 int i;
 
@@ -151,16 +148,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
 }
 
 /* Set up to return from userspace.  */
-
-retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode);
-__put_user(retcode_addr, &frame->pretcode);
-
-/* moveq #,d0; trap #0 */
-
-__put_user(0x70004e40 + (TARGET_NR_sigreturn << 16),
-   (uint32_t *)(frame->retcode));
-
-/* Set up to return from userspace */
+__put_user(default_sigreturn, &frame->pretcode);
 
 env->aregs[7] = frame_addr;
 env->pc = ka->_sa_handler;
@@ -287,7 +275,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 {
 struct target_rt_sigframe *frame;
 abi_ulong frame_addr;
-abi_ulong retcode_addr;
 abi_ulong info_addr;
 abi_ulong uc_addr;
 int err = 0;
@@ -324,17 +311,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 }
 
 /* Set up to return from userspace.  */
-
-retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode);
-__put_user(retcode_addr, &frame->pretcode);
-
-/* moveq #,d0; notb d0; trap #0 */
-
-__put_user(0x70004600 + ((TARGET_NR_rt_sigreturn ^ 0xff) << 16),
-   (uint32_t *)(frame->retcode + 0));
-__put_user(0x4e40, (uint16_t *)(frame->retcode + 4));
-
-/* Set up to return from userspace */
+__put_user(default_rt_sigreturn, &frame->pretcode);
 
 env->aregs[7] = frame_addr;
 env->pc = ka->_sa_handler;
@@ -410,3 +387,23 @@ badframe:
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+void *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 4 + 6, 0);
+assert(tramp != NULL);
+
+default_sigreturn = sigtramp_page;
+
+/* moveq #,d0; trap #0 */
+__put_user(0x70004e40 + (TARGET_NR_sigreturn << 16), (uint32_t *)tramp);
+
+default_rt_sigreturn = sigtramp_page + 4;
+
+/* moveq #,d0; notb d0; trap #0 */
+__put_user(0x70004600 + ((TARGET_NR_rt_sigreturn ^ 0xff) << 16),
+   (uint32_t *)(tramp + 4));
+__put_user(0x4e40, (uint16_t *)(tramp + 8));
+
+unlock_user(tramp, sigtramp_page, 4 + 6);
+}
-- 
2.25.1

[PATCH v2 26/36] linux-user/mips: Tidy install_sigtramp

2021-07-06 Thread Richard Henderson

The return value is constant 0, and unused as well -- change to void.
Drop inline marker.  Change tramp type to uint32_t* for clarity.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/mips/signal.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/linux-user/mips/signal.c b/linux-user/mips/signal.c
index e6be807a81..7cad7526ea 100644
--- a/linux-user/mips/signal.c
+++ b/linux-user/mips/signal.c
@@ -86,10 +86,8 @@ struct target_rt_sigframe {
 };
 
 /* Install trampoline to jump back from signal handler */
-static inline int install_sigtramp(unsigned int *tramp,   unsigned int syscall)
+static void install_sigtramp(uint32_t *tramp, unsigned int syscall)
 {
-int err = 0;
-
 /*
  * Set up the return code ...
  *
@@ -99,7 +97,6 @@ static inline int install_sigtramp(unsigned int *tramp,   
unsigned int syscall)
 
 __put_user(0x2402 + syscall, tramp + 0);
 __put_user(0x000c  , tramp + 1);
-return err;
 }
 
 static inline void setup_sigcontext(CPUMIPSState *regs,
-- 
2.25.1

[PATCH v2 29/36] linux-user/openrisc: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the rt signal trampoline.

Reviewed-by: Stafford Horne 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/openrisc/target_signal.h |  2 ++
 linux-user/openrisc/signal.c| 24 
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/linux-user/openrisc/target_signal.h 
b/linux-user/openrisc/target_signal.h
index 8283eaf544..077ec3d5e8 100644
--- a/linux-user/openrisc/target_signal.h
+++ b/linux-user/openrisc/target_signal.h
@@ -26,4 +26,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* OPENRISC_TARGET_SIGNAL_H */
diff --git a/linux-user/openrisc/signal.c b/linux-user/openrisc/signal.c
index 5c5640a284..b411b01864 100644
--- a/linux-user/openrisc/signal.c
+++ b/linux-user/openrisc/signal.c
@@ -37,7 +37,6 @@ typedef struct target_ucontext {
 typedef struct target_rt_sigframe {
 struct target_siginfo info;
 target_ucontext uc;
-uint32_t retcode[4];  /* trampoline code */
 } target_rt_sigframe;
 
 static void restore_sigcontext(CPUOpenRISCState *env, target_sigcontext *sc)
@@ -115,14 +114,8 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
 }
 
-/* This is l.ori r11,r0,__NR_sigreturn; l.sys 1; l.nop; l.nop */
-__put_user(0xa960 | TARGET_NR_rt_sigreturn, frame->retcode + 0);
-__put_user(0x2001, frame->retcode + 1);
-__put_user(0x1500, frame->retcode + 2);
-__put_user(0x1500, frame->retcode + 3);
-
 /* Set up registers for signal handler */
-cpu_set_gpr(env, 9, frame_addr + offsetof(target_rt_sigframe, retcode));
+cpu_set_gpr(env, 9, default_rt_sigreturn);
 cpu_set_gpr(env, 3, sig);
 cpu_set_gpr(env, 4, frame_addr + offsetof(target_rt_sigframe, info));
 cpu_set_gpr(env, 5, frame_addr + offsetof(target_rt_sigframe, uc));
@@ -168,3 +161,18 @@ long do_rt_sigreturn(CPUOpenRISCState *env)
 force_sig(TARGET_SIGSEGV);
 return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 4 * 4, 0);
+assert(tramp != NULL);
+
+/* This is l.ori r11,r0,__NR_sigreturn; l.sys 1; l.nop; l.nop */
+__put_user(0xa960 | TARGET_NR_rt_sigreturn, tramp + 0);
+__put_user(0x2001, tramp + 1);
+__put_user(0x1500, tramp + 2);
+__put_user(0x1500, tramp + 3);
+
+default_rt_sigreturn = sigtramp_page;
+unlock_user(tramp, sigtramp_page, 4 * 4);
+}
-- 
2.25.1

[PATCH v2 18/36] linux-user/cris: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Split out setup_sigreturn so that we can continue to
initialize the words on the stack, as documented.
However, use the off-stack trampoline.

Cc: Edgar E. Iglesias 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/cris/target_signal.h |  2 ++
 linux-user/cris/signal.c| 29 +
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/linux-user/cris/target_signal.h b/linux-user/cris/target_signal.h
index 495a142896..83a5155507 100644
--- a/linux-user/cris/target_signal.h
+++ b/linux-user/cris/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* CRIS_TARGET_SIGNAL_H */
diff --git a/linux-user/cris/signal.c b/linux-user/cris/signal.c
index 1e02194377..9dad50f31f 100644
--- a/linux-user/cris/signal.c
+++ b/linux-user/cris/signal.c
@@ -96,6 +96,14 @@ static abi_ulong get_sigframe(CPUCRISState *env, int 
framesize)
 return sp - framesize;
 }
 
+static void setup_sigreturn(uint16_t *retcode)
+{
+/* This is movu.w __NR_sigreturn, r9; break 13; */
+__put_user(0x9c5f, retcode + 0);
+__put_user(TARGET_NR_sigreturn, retcode + 1);
+__put_user(0xe93d, retcode + 2);
+}
+
 void setup_frame(int sig, struct target_sigaction *ka,
  target_sigset_t *set, CPUCRISState *env)
 {
@@ -111,14 +119,8 @@ void setup_frame(int sig, struct target_sigaction *ka,
 /*
  * The CRIS signal return trampoline. A real linux/CRIS kernel doesn't
  * use this trampoline anymore but it sets it up for GDB.
- * In QEMU, using the trampoline simplifies things a bit so we use it.
- *
- * This is movu.w __NR_sigreturn, r9; break 13;
  */
-__put_user(0x9c5f, frame->retcode+0);
-__put_user(TARGET_NR_sigreturn,
-   frame->retcode + 1);
-__put_user(0xe93d, frame->retcode + 2);
+setup_sigreturn(frame->retcode);
 
 /* Save the mask.  */
 __put_user(set->sig[0], &frame->sc.oldmask);
@@ -134,7 +136,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
 env->regs[10] = sig;
 env->pc = (unsigned long) ka->_sa_handler;
 /* Link SRP so the guest returns through the trampoline.  */
-env->pregs[PR_SRP] = frame_addr + offsetof(typeof(*frame), retcode);
+env->pregs[PR_SRP] = default_sigreturn;
 
 unlock_user_struct(frame, frame_addr, 1);
 return;
@@ -186,3 +188,14 @@ long do_rt_sigreturn(CPUCRISState *env)
 qemu_log_mask(LOG_UNIMP, "do_rt_sigreturn: not implemented\n");
 return -TARGET_ENOSYS;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6, 0);
+assert(tramp != NULL);
+
+default_sigreturn = sigtramp_page;
+setup_sigreturn(tramp);
+
+unlock_user(tramp, sigtramp_page, 6);
+}
-- 
2.25.1

[PATCH v2 27/36] linux-user/mips: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/mips/target_signal.h   |  1 +
 linux-user/mips64/target_signal.h |  2 ++
 linux-user/mips/signal.c  | 34 ++-
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/linux-user/mips/target_signal.h b/linux-user/mips/target_signal.h
index d521765f6b..780a4ddf29 100644
--- a/linux-user/mips/target_signal.h
+++ b/linux-user/mips/target_signal.h
@@ -73,6 +73,7 @@ typedef struct target_sigaltstack {
 /* compare linux/arch/mips/kernel/signal.c:setup_frame() */
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/mips64/target_signal.h 
b/linux-user/mips64/target_signal.h
index d857c55e4c..275e9b7f9a 100644
--- a/linux-user/mips64/target_signal.h
+++ b/linux-user/mips64/target_signal.h
@@ -76,4 +76,6 @@ typedef struct target_sigaltstack {
 /* compare linux/arch/mips/kernel/signal.c:setup_frame() */
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* MIPS64_TARGET_SIGNAL_H */
diff --git a/linux-user/mips/signal.c b/linux-user/mips/signal.c
index 7cad7526ea..a3db08bfec 100644
--- a/linux-user/mips/signal.c
+++ b/linux-user/mips/signal.c
@@ -208,8 +208,6 @@ void setup_frame(int sig, struct target_sigaction * ka,
 goto give_sigsegv;
 }
 
-install_sigtramp(frame->sf_code, TARGET_NR_sigreturn);
-
 setup_sigcontext(regs, &frame->sf_sc);
 
 for(i = 0; i < TARGET_NSIG_WORDS; i++) {
@@ -230,7 +228,7 @@ void setup_frame(int sig, struct target_sigaction * ka,
 regs->active_tc.gpr[ 5] = 0;
 regs->active_tc.gpr[ 6] = frame_addr + offsetof(struct sigframe, sf_sc);
 regs->active_tc.gpr[29] = frame_addr;
-regs->active_tc.gpr[31] = frame_addr + offsetof(struct sigframe, sf_code);
+regs->active_tc.gpr[31] = default_sigreturn;
 /* The original kernel code sets CP0_EPC to the handler
 * since it returns to userland using eret
 * we cannot do this here, and we must set PC directly */
@@ -304,8 +302,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 goto give_sigsegv;
 }
 
-install_sigtramp(frame->rs_code, TARGET_NR_rt_sigreturn);
-
 tswap_siginfo(&frame->rs_info, info);
 
 __put_user(0, &frame->rs_uc.tuc_flags);
@@ -334,11 +330,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 env->active_tc.gpr[ 6] = frame_addr
  + offsetof(struct target_rt_sigframe, rs_uc);
 env->active_tc.gpr[29] = frame_addr;
-env->active_tc.gpr[31] = frame_addr
- + offsetof(struct target_rt_sigframe, rs_code);
-/* The original kernel code sets CP0_EPC to the handler
-* since it returns to userland using eret
-* we cannot do this here, and we must set PC directly */
+env->active_tc.gpr[31] = default_rt_sigreturn;
+
+/*
+ * The original kernel code sets CP0_EPC to the handler
+ * since it returns to userland using eret
+ * we cannot do this here, and we must set PC directly
+ */
 env->active_tc.PC = env->active_tc.gpr[25] = ka->_sa_handler;
 mips_set_hflags_isa_mode_from_pc(env);
 unlock_user_struct(frame, frame_addr, 1);
@@ -378,3 +376,19 @@ badframe:
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+assert(tramp != NULL);
+
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+default_sigreturn = sigtramp_page;
+install_sigtramp(tramp, TARGET_NR_sigreturn);
+#endif
+
+default_rt_sigreturn = sigtramp_page + 8;
+install_sigtramp(tramp + 2, TARGET_NR_rt_sigreturn);
+
+unlock_user(tramp, sigtramp_page, 2 * 8);
+}
-- 
2.25.1

[PATCH v2 22/36] linux-user/i386: Add vdso and use it for sigreturn

2021-07-06 Thread Richard Henderson

Building the vdso itself is not actually wired up to anything, since
we require a cross-compiler.  Just check in that file for now.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c  |  16 +++-
 linux-user/i386/signal.c  |  39 -
 linux-user/i386/Makefile.vdso |   5 ++
 linux-user/i386/meson.build   |   7 ++
 linux-user/i386/vdso.S| 149 ++
 linux-user/i386/vdso.ld   |  76 +
 linux-user/i386/vdso.so   | Bin 0 -> 5528 bytes
 7 files changed, 271 insertions(+), 21 deletions(-)
 create mode 100644 linux-user/i386/Makefile.vdso
 create mode 100644 linux-user/i386/vdso.S
 create mode 100644 linux-user/i386/vdso.ld
 create mode 100755 linux-user/i386/vdso.so

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index ec3a854b44..7a598f9f76 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -276,12 +276,24 @@ static void elf_core_copy_regs(target_elf_gregset_t 
*regs, const CPUX86State *en
 (*regs)[15] = env->regs[R_ESP];
 (*regs)[16] = env->segs[R_SS].selector & 0x;
 }
-#endif
+
+/*
+ * i386 is the only target which supplies AT_SYSINFO for the vdso.
+ * All others only supply AT_SYSINFO_EHDR.
+ */
+#define DLINFO_ARCH_ITEMS 1
+#define ARCH_DLINFO   NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry);
+
+#include "vdso.c.inc"
+
+#define vdso_image_info()&vdso_image_info
+
+#endif /* TARGET_X86_64 */
 
 #define USE_ELF_CORE_DUMP
 #define ELF_EXEC_PAGESIZE   4096
 
-#endif
+#endif /* TARGET_I386 */
 
 #ifdef TARGET_ARM
 
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 9894e7c9ac..8e4677e800 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -313,7 +313,7 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, 
size_t frame_size)
 void setup_frame(int sig, struct target_sigaction *ka,
  target_sigset_t *set, CPUX86State *env)
 {
-abi_ulong frame_addr;
+abi_ulong frame_addr, retcode_addr;
 struct sigframe *frame;
 int i;
 
@@ -335,19 +335,19 @@ void setup_frame(int sig, struct target_sigaction *ka,
 /* Set up to return from userspace.  If provided, use a stub
already in userspace.  */
 if (ka->sa_flags & TARGET_SA_RESTORER) {
-__put_user(ka->sa_restorer, &frame->pretcode);
+retcode_addr = ka->sa_restorer;
 } else {
-uint16_t val16;
-abi_ulong retcode_addr;
-retcode_addr = frame_addr + offsetof(struct sigframe, retcode);
-__put_user(retcode_addr, &frame->pretcode);
-/* This is popl %eax ; movl $,%eax ; int $0x80 */
-val16 = 0xb858;
-__put_user(val16, (uint16_t *)(frame->retcode+0));
+/*
+ * This is popl %eax ; movl $,%eax ; int $0x80.
+ * This is no longer used, but is retained for ABI compatibility.
+ */
+__put_user(0xb858, (uint16_t *)(frame->retcode+0));
 __put_user(TARGET_NR_sigreturn, (int *)(frame->retcode+2));
-val16 = 0x80cd;
-__put_user(val16, (uint16_t *)(frame->retcode+6));
+__put_user(0x80cd, (uint16_t *)(frame->retcode+6));
+
+retcode_addr = default_sigreturn;
 }
+__put_user(retcode_addr, &frame->pretcode);
 
 /* Set up registers for signal handler */
 env->regs[R_ESP] = frame_addr;
@@ -373,7 +373,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 target_siginfo_t *info,
 target_sigset_t *set, CPUX86State *env)
 {
-abi_ulong frame_addr;
+abi_ulong frame_addr, retcode_addr;
 #ifndef TARGET_X86_64
 abi_ulong addr;
 #endif
@@ -412,22 +412,23 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 /* Set up to return from userspace.  If provided, use a stub
already in userspace.  */
 if (ka->sa_flags & TARGET_SA_RESTORER) {
-__put_user(ka->sa_restorer, &frame->pretcode);
+retcode_addr = ka->sa_restorer;
 } else {
 #ifdef TARGET_X86_64
 /* For x86_64, SA_RESTORER is required ABI.  */
 goto give_sigsegv;
 #else
-uint16_t val16;
-addr = frame_addr + offsetof(struct rt_sigframe, retcode);
-__put_user(addr, &frame->pretcode);
-/* This is movl $,%eax ; int $0x80 */
+/*
+ * This is movl $,%eax ; int $0x80
+ * This is no longer used, but is retained for ABI compatibility.
+ */
 __put_user(0xb8, (char *)(frame->retcode+0));
 __put_user(TARGET_NR_rt_sigreturn, (int *)(frame->retcode+1));
-val16 = 0x80cd;
-__put_user(val16, (uint16_t *)(frame->retcode+5));
+__put_user(0x80cd, (uint16_t *)(frame->retcode+5));
+retcode_addr = default_rt_sigreturn;
 #endif
 }
+__put_user(retcode_addr, &frame->pretcode);
 
 /* Set up registers for signal handler */
 env->regs[R_ESP] = frame_addr;
diff --git a/linux-user/i386/Makefile.vdso b/linux-user/i386/Makefile.vdso
new file mode 100644
index 00

[PATCH v2 10/36] linux-user: Load vdso image if available

2021-07-06 Thread Richard Henderson

The vdso image will be pre-processed into a C data array, with
a simple list of relocations to perform, and identifying the
location of signal trampolines.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c | 80 +++-
 1 file changed, 72 insertions(+), 8 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index ec4b5f9d54..f76281fe40 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -30,6 +30,15 @@
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
 #endif
 
+typedef struct {
+const uint8_t *image;
+const uint32_t *relocs;
+unsigned image_size;
+unsigned reloc_count;
+unsigned sigreturn_ofs;
+unsigned rt_sigreturn_ofs;
+} VdsoImageInfo;
+
 #define ELF_OSABI   ELFOSABI_SYSV
 
 /* from personality.h */
@@ -1971,7 +1980,8 @@ static abi_ulong loader_build_fdpic_loadmap(struct 
image_info *info, abi_ulong s
 static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
struct elfhdr *exec,
struct image_info *info,
-   struct image_info *interp_info)
+   struct image_info *interp_info,
+   struct image_info *vdso_info)
 {
 abi_ulong sp;
 abi_ulong u_argc, u_argv, u_envp, u_auxv;
@@ -2043,8 +2053,12 @@ static abi_ulong create_elf_tables(abi_ulong p, int 
argc, int envc,
 }
 
 size = (DLINFO_ITEMS + 1) * 2;
-if (k_platform)
+if (k_platform) {
 size += 2;
+}
+if (vdso_info) {
+size += 2;
+}
 #ifdef DLINFO_ARCH_ITEMS
 size += DLINFO_ARCH_ITEMS * 2;
 #endif
@@ -2121,6 +2135,9 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
 if (u_platform) {
 NEW_AUX_ENT(AT_PLATFORM, u_platform);
 }
+if (vdso_info) {
+NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr);
+}
 NEW_AUX_ENT (AT_NULL, 0);
 #undef NEW_AUX_ENT
 
@@ -2947,6 +2964,49 @@ static void load_elf_interp(const char *filename, struct 
image_info *info,
 load_elf_image(filename, &src, info, &ehdr, NULL);
 }
 
+#ifndef vdso_image_info
+#define vdso_image_info()NULL
+#endif
+
+static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso)
+{
+ImageSource src;
+struct elfhdr ehdr;
+abi_ulong load_bias, load_addr;
+
+src.fd = -1;
+src.cache = vdso->image;
+src.cache_size = vdso->image_size;
+
+load_elf_image("", &src, info, &ehdr, NULL);
+load_addr = info->load_addr;
+load_bias = info->load_bias;
+
+/*
+ * We need to relocate the VDSO image.  The one built into the kernel
+ * is built for a fixed address.  The one built for QEMU is not, since
+ * that requires close control of the guest address space.
+ * We pre-processed the image to locate all of the addresses that need
+ * to be updated.
+ */
+for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) {
+abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]);
+*addr = tswapal(tswapal(*addr) + load_bias);
+}
+
+/* Install signal trampolines, if present. */
+if (vdso->sigreturn_ofs) {
+default_sigreturn = load_addr + vdso->sigreturn_ofs;
+}
+if (vdso->rt_sigreturn_ofs) {
+default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs;
+}
+
+/* Mark the VDSO writable segment read-only. */
+target_mprotect(info->start_data, info->end_data - info->start_data,
+PROT_READ);
+}
+
 static int symfind(const void *s0, const void *s1)
 {
 target_ulong addr = *(target_ulong *)s0;
@@ -3151,7 +3211,7 @@ int load_elf_binary(struct linux_binprm *bprm, struct 
image_info *info)
  * and let elf_load_image do any swapping that may be required.
  */
 struct elfhdr ehdr;
-struct image_info interp_info;
+struct image_info interp_info, vdso_info;
 char *elf_interpreter = NULL;
 char *scratch;
 
@@ -3221,10 +3281,13 @@ int load_elf_binary(struct linux_binprm *bprm, struct 
image_info *info)
 }
 
 /*
- * TODO: load a vdso, which would also contain the signal trampolines.
- * Otherwise, allocate a private page to hold them.
+ * Load a vdso if available, which will amongst other things contain the
+ * signal trampolines.  Otherwise, allocate a separate page for them.
  */
-if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
+const VdsoImageInfo *vdso = vdso_image_info();
+if (vdso) {
+load_elf_vdso(&vdso_info, vdso);
+} else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
 abi_ulong tramp_page = target_mmap(0, TARGET_PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -3232,8 +3295,9 @@ int load_elf_binary(struct linux_binprm *bprm, struct 
image_info *info)
 target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXE

[PATCH v2 15/36] target/arm: Add isar_feature_aa32_a32

2021-07-06 Thread Richard Henderson

Add a probe for whether A32 mode is supported.
Fill in the field for the pre-v5 cpus.

Cc: qemu-...@nongnu.org
Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h | 5 +
 target/arm/cpu_tcg.c | 7 +++
 2 files changed, 12 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index be9a4dceae..33f7ce9bc5 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3802,6 +3802,11 @@ static inline bool isar_feature_aa32_i8mm(const 
ARMISARegisters *id)
 return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0;
 }
 
+static inline bool isar_feature_aa32_a32(const ARMISARegisters *id)
+{
+return FIELD_EX32(id->id_pfr0, ID_PFR0, STATE0) != 0;
+}
+
 static inline bool isar_feature_aa32_ras(const ARMISARegisters *id)
 {
 return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0;
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index d2d97115ea..980f62f35d 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -73,6 +73,8 @@ static void arm926_initfn(Object *obj)
 cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
 cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1);
 cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1);
+/* Similarly, we need to set STATE0 for A32 support.  */
+cpu->isar.id_pfr0 = FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, STATE0, 1);
 }
 
 static void arm946_initfn(Object *obj)
@@ -86,6 +88,9 @@ static void arm946_initfn(Object *obj)
 cpu->midr = 0x41059461;
 cpu->ctr = 0x0f004006;
 cpu->reset_sctlr = 0x0078;
+
+/* We need to set STATE0 for A32 support. */
+cpu->isar.id_pfr0 = FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, STATE0, 1);
 }
 
 static void arm1026_initfn(Object *obj)
@@ -115,6 +120,8 @@ static void arm1026_initfn(Object *obj)
 cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
 cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1);
 cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1);
+/* Similarly, we need to set STATE0 for A32 support.  */
+cpu->isar.id_pfr0 = FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, STATE0, 1);
 
 {
 /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */
-- 
2.25.1

[PATCH v2 21/36] linux-user/x86_64: Raise SIGSEGV if SA_RESTORER not set

2021-07-06 Thread Richard Henderson

This has been a fixme for some time.  The effect of
returning -EFAULT from the kernel code is to raise SIGSEGV.

Signed-off-by: Richard Henderson 
---
 linux-user/i386/signal.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 8701774e37..9894e7c9ac 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -411,10 +411,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 /* Set up to return from userspace.  If provided, use a stub
already in userspace.  */
-#ifndef TARGET_X86_64
 if (ka->sa_flags & TARGET_SA_RESTORER) {
 __put_user(ka->sa_restorer, &frame->pretcode);
 } else {
+#ifdef TARGET_X86_64
+/* For x86_64, SA_RESTORER is required ABI.  */
+goto give_sigsegv;
+#else
 uint16_t val16;
 addr = frame_addr + offsetof(struct rt_sigframe, retcode);
 __put_user(addr, &frame->pretcode);
@@ -423,12 +426,8 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 __put_user(TARGET_NR_rt_sigreturn, (int *)(frame->retcode+1));
 val16 = 0x80cd;
 __put_user(val16, (uint16_t *)(frame->retcode+5));
-}
-#else
-/* XXX: Would be slightly better to return -EFAULT here if test fails
-   assert(ka->sa_flags & TARGET_SA_RESTORER); */
-__put_user(ka->sa_restorer, &frame->pretcode);
 #endif
+}
 
 /* Set up registers for signal handler */
 env->regs[R_ESP] = frame_addr;
-- 
2.25.1

[PATCH v2 08/36] linux-user: Replace bprm->fd with bprm->src.fd

2021-07-06 Thread Richard Henderson

There are only a couple of uses of bprm->fd remaining.
Migrate to the other field.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/qemu.h  | 1 -
 linux-user/flatload.c  | 8 
 linux-user/linuxload.c | 5 ++---
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index f4cdfb16b3..dafaae6293 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -211,7 +211,6 @@ struct linux_binprm {
 char buf[BPRM_BUF_SIZE] __attribute__((aligned));
 ImageSource src;
 abi_ulong p;
-int fd;
 int e_uid, e_gid;
 int argc, envc;
 char **argv;
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index 3e5594cf89..58d0d9352c 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -460,7 +460,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
 
 textpos = target_mmap(0, text_len, PROT_READ|PROT_EXEC,
-  MAP_PRIVATE, bprm->fd, 0);
+  MAP_PRIVATE, bprm->src.fd, 0);
 if (textpos == -1) {
 fprintf(stderr, "Unable to mmap process text\n");
 return -1;
@@ -487,7 +487,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 } else
 #endif
 {
-result = target_pread(bprm->fd, datapos,
+result = target_pread(bprm->src.fd, datapos,
   data_len + (relocs * sizeof(abi_ulong)),
   fpos);
 }
@@ -537,10 +537,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 else
 #endif
 {
-result = target_pread(bprm->fd, textpos,
+result = target_pread(bprm->src.fd, textpos,
   text_len, 0);
 if (result >= 0) {
-result = target_pread(bprm->fd, datapos,
+result = target_pread(bprm->src.fd, datapos,
 data_len + (relocs * sizeof(abi_ulong)),
 ntohl(hdr->data_start));
 }
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index 8b93b9704c..d0d3f2ed0e 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -36,7 +36,7 @@ static int prepare_binprm(struct linux_binprm *bprm)
 int mode;
 int retval;
 
-if (fstat(bprm->fd, &st) < 0) {
+if (fstat(bprm->src.fd, &st) < 0) {
 return -errno;
 }
 
@@ -66,7 +66,7 @@ static int prepare_binprm(struct linux_binprm *bprm)
 bprm->e_gid = st.st_gid;
 }
 
-retval = read(bprm->fd, bprm->buf, BPRM_BUF_SIZE);
+retval = read(bprm->src.fd, bprm->buf, BPRM_BUF_SIZE);
 if (retval < 0) {
 perror("prepare_binprm");
 exit(-1);
@@ -133,7 +133,6 @@ int loader_exec(int fdexec, const char *filename, char 
**argv, char **envp,
 {
 int retval;
 
-bprm->fd = fdexec;
 bprm->src.fd = fdexec;
 bprm->filename = (char *)filename;
 bprm->argc = count(argv);
-- 
2.25.1

[PATCH v2 20/36] linux-user/hppa: Add vdso and use it for rt_sigreturn

2021-07-06 Thread Richard Henderson

Building the vdso itself is not actually wired up to anything, since
we require a cross-compiler.  Just check in that file for now.

Drop the now-unused 9 trampoline words, and describe the frame
without the trampoline in __kernel_rt_sigreturn.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c  |   4 +
 linux-user/hppa/signal.c  |   8 +-
 linux-user/hppa/Makefile.vdso |   6 ++
 linux-user/hppa/meson.build   |   6 ++
 linux-user/hppa/vdso.S| 149 ++
 linux-user/hppa/vdso.ld   |  75 +
 linux-user/hppa/vdso.so   | Bin 0 -> 5196 bytes
 7 files changed, 241 insertions(+), 7 deletions(-)
 create mode 100644 linux-user/hppa/Makefile.vdso
 create mode 100644 linux-user/hppa/vdso.S
 create mode 100644 linux-user/hppa/vdso.ld
 create mode 100755 linux-user/hppa/vdso.so

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 40cc79b129..ec3a854b44 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1496,6 +1496,10 @@ static inline void init_thread(struct target_pt_regs 
*regs,
 #define STACK_GROWS_DOWN 0
 #define STACK_ALIGNMENT  64
 
+#include "vdso.c.inc"
+
+#define vdso_image_info()&vdso_image_info
+
 static inline void init_thread(struct target_pt_regs *regs,
struct image_info *infop)
 {
diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index 0e266f472d..44e2db6d3e 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -40,7 +40,6 @@ struct target_ucontext {
 };
 
 struct target_rt_sigframe {
-abi_uint tramp[9];
 target_siginfo_t info;
 struct target_ucontext uc;
 /* hidden location of upper halves of pa2.0 64-bit gregs */
@@ -138,14 +137,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 setup_sigcontext(&frame->uc.tuc_mcontext, env);
 
-__put_user(0x3419, frame->tramp + 0); /* ldi 0,%r25 */
-__put_user(0x3414015a, frame->tramp + 1); /* ldi __NR_rt_sigreturn,%r20 */
-__put_user(0xe4008200, frame->tramp + 2); /* be,l 0x100(%sr2,%r0) */
-__put_user(0x08000240, frame->tramp + 3); /* nop */
-
 unlock_user_struct(frame, frame_addr, 1);
 
-env->gr[2] = h2g(frame->tramp);
+env->gr[2] = default_rt_sigreturn;
 env->gr[30] = sp;
 env->gr[26] = sig;
 env->gr[25] = h2g(&frame->info);
diff --git a/linux-user/hppa/Makefile.vdso b/linux-user/hppa/Makefile.vdso
new file mode 100644
index 00..d4362c4961
--- /dev/null
+++ b/linux-user/hppa/Makefile.vdso
@@ -0,0 +1,6 @@
+CROSS_CC ?= hppa-linux-gnu-gcc
+
+vdso.so: vdso.S vdso.ld Makefile.vdso
+   $(CROSS_CC) -nostdlib -shared -Wl,-T,vdso.ld \
+ -Wl,-h,linux-vdso.so.1 -Wl,--build-id=sha1 \
+ -Wl,--hash-style=sysv vdso.S -o $@
diff --git a/linux-user/hppa/meson.build b/linux-user/hppa/meson.build
index 4709508a09..e065a16a96 100644
--- a/linux-user/hppa/meson.build
+++ b/linux-user/hppa/meson.build
@@ -3,3 +3,9 @@ syscall_nr_generators += {
 arguments: [ meson.current_source_dir() / 'syscallhdr.sh', 
'@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ],
 output: '@BASENAME@_nr.h')
 }
+
+gen = [
+  gen_vdso.process('vdso.so', extra_args: ['-r', '__kernel_rt_sigreturn'])
+]
+
+linux_user_ss.add(when: 'TARGET_HPPA', if_true: gen)
diff --git a/linux-user/hppa/vdso.S b/linux-user/hppa/vdso.S
new file mode 100644
index 00..eeae2c999a
--- /dev/null
+++ b/linux-user/hppa/vdso.S
@@ -0,0 +1,149 @@
+/*
+ * hppa linux kernel vdso replacement.
+ *
+ * Copyright 2021 Linaro, Ltd.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include 
+
+   .text
+
+#define sizeof_rt_sigframe 696
+#define offsetof_sigcontext152
+#define offsetof_sigcontext_gr offsetof_sigcontext + 4
+#define offsetof_sigcontext_fr offsetof_sigcontext_gr + 32 * 4
+#define offsetof_sigcontext_iasq   offsetof_sigcontext_fr + 32 * 8
+#define offsetof_sigcontext_iaoq   offsetof_sigcontext_iasq + 8
+#define offsetof_sigcontext_saroffsetof_sigcontext_iaoq + 8
+
+   /*
+* While this frame is marked as a signal frame, that only applies
+* to how this return address is handled for the outer frame.
+* The return address that arrived here, from the inner frame, is
+* not marked as a signal frame and so the unwinder still tries to
+* subtract 1 to examine the presumed call insn.  Thus we must
+* extend the unwind info to a nop before the start.
+*/
+
+   .cfi_startproc simple
+   .cfi_signal_frame
+
+   /* Compare pa32_fallback_frame_state from libgcc. */
+
+   /* Record the size of the stack frame. */
+   .cfi_def_cfa30, -sizeof_rt_sigframe
+
+   /* Record save offset of general registers. */
+   .cfi_offset 1, offsetof_sigcontext_gr + 1 * 4
+   .cfi_offset 2, offsetof_sigcontext_gr + 2 * 4
+   .cfi_offset 3, offsetof_sigcontext_gr + 3 * 4
+

[PATCH v2 14/36] linux-user/arm: Drop "_v2" from symbols in signal.c

2021-07-06 Thread Richard Henderson

Since we no longer support "v1", there's no need to
distinguish "v2".

Cc: qemu-...@nongnu.org
Signed-off-by: Richard Henderson 
---
 linux-user/arm/signal.c | 155 +---
 1 file changed, 65 insertions(+), 90 deletions(-)

diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index f8288ab51c..565a355fc4 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -45,7 +45,7 @@ struct target_sigcontext {
 abi_ulong fault_address;
 };
 
-struct target_ucontext_v2 {
+struct target_ucontext {
 abi_ulong tuc_flags;
 abi_ulong tuc_link;
 target_stack_t tuc_stack;
@@ -89,16 +89,16 @@ struct target_iwmmxt_sigframe {
 #define TARGET_VFP_MAGIC 0x56465001
 #define TARGET_IWMMXT_MAGIC 0x12ef842a
 
-struct sigframe_v2
+struct sigframe
 {
-struct target_ucontext_v2 uc;
+struct target_ucontext uc;
 abi_ulong retcode[4];
 };
 
-struct rt_sigframe_v2
+struct rt_sigframe
 {
 struct target_siginfo info;
-struct target_ucontext_v2 uc;
+struct target_ucontext uc;
 abi_ulong retcode[4];
 };
 
@@ -270,7 +270,7 @@ setup_return(CPUARMState *env, struct target_sigaction *ka,
 return 0;
 }
 
-static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env)
+static abi_ulong *setup_sigframe_vfp(abi_ulong *regspace, CPUARMState *env)
 {
 int i;
 struct target_vfp_sigframe *vfpframe;
@@ -287,8 +287,7 @@ static abi_ulong *setup_sigframe_v2_vfp(abi_ulong 
*regspace, CPUARMState *env)
 return (abi_ulong*)(vfpframe+1);
 }
 
-static abi_ulong *setup_sigframe_v2_iwmmxt(abi_ulong *regspace,
-   CPUARMState *env)
+static abi_ulong *setup_sigframe_iwmmxt(abi_ulong *regspace, CPUARMState *env)
 {
 int i;
 struct target_iwmmxt_sigframe *iwmmxtframe;
@@ -307,15 +306,15 @@ static abi_ulong *setup_sigframe_v2_iwmmxt(abi_ulong 
*regspace,
 return (abi_ulong*)(iwmmxtframe+1);
 }
 
-static void setup_sigframe_v2(struct target_ucontext_v2 *uc,
-  target_sigset_t *set, CPUARMState *env)
+static void setup_sigframe(struct target_ucontext *uc,
+   target_sigset_t *set, CPUARMState *env)
 {
 struct target_sigaltstack stack;
 int i;
 abi_ulong *regspace;
 
 /* Clear all the bits of the ucontext we don't use.  */
-memset(uc, 0, offsetof(struct target_ucontext_v2, tuc_mcontext));
+memset(uc, 0, offsetof(struct target_ucontext, tuc_mcontext));
 
 memset(&stack, 0, sizeof(stack));
 target_save_altstack(&stack, env);
@@ -325,10 +324,10 @@ static void setup_sigframe_v2(struct target_ucontext_v2 
*uc,
 /* Save coprocessor signal frame.  */
 regspace = uc->tuc_regspace;
 if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
-regspace = setup_sigframe_v2_vfp(regspace, env);
+regspace = setup_sigframe_vfp(regspace, env);
 }
 if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-regspace = setup_sigframe_v2_iwmmxt(regspace, env);
+regspace = setup_sigframe_iwmmxt(regspace, env);
 }
 
 /* Write terminating magic word */
@@ -339,10 +338,10 @@ static void setup_sigframe_v2(struct target_ucontext_v2 
*uc,
 }
 }
 
-static void setup_frame_v2(int usig, struct target_sigaction *ka,
-   target_sigset_t *set, CPUARMState *regs)
+void setup_frame(int usig, struct target_sigaction *ka,
+ target_sigset_t *set, CPUARMState *regs)
 {
-struct sigframe_v2 *frame;
+struct sigframe *frame;
 abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame));
 
 trace_user_setup_frame(regs, frame_addr);
@@ -350,10 +349,10 @@ static void setup_frame_v2(int usig, struct 
target_sigaction *ka,
 goto sigsegv;
 }
 
-setup_sigframe_v2(&frame->uc, set, regs);
+setup_sigframe(&frame->uc, set, regs);
 
 if (setup_return(regs, ka, frame->retcode, frame_addr, usig,
- frame_addr + offsetof(struct sigframe_v2, retcode))) {
+ frame_addr + offsetof(struct sigframe, retcode))) {
 goto sigsegv;
 }
 
@@ -364,51 +363,38 @@ sigsegv:
 force_sigsegv(usig);
 }
 
-void setup_frame(int usig, struct target_sigaction *ka,
- target_sigset_t *set, CPUARMState *regs)
-{
-setup_frame_v2(usig, ka, set, regs);
-}
-
-static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
-  target_siginfo_t *info,
-  target_sigset_t *set, CPUARMState *env)
-{
-struct rt_sigframe_v2 *frame;
-abi_ulong frame_addr = get_sigframe(ka, env, sizeof(*frame));
-abi_ulong info_addr, uc_addr;
-
-trace_user_setup_rt_frame(env, frame_addr);
-if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-goto sigsegv;
-}
-
-info_addr = frame_addr + offsetof(struct rt_sigframe_v2, info);
-uc_addr = frame_addr + offsetof(struct rt_sigframe_v2, uc);
-tswap_siginfo(&frame->info, in

[PATCH v2 13/36] linux-user/arm: Drop v1 signal frames

2021-07-06 Thread Richard Henderson

Version 2 signal frames are used from 2.6.12.
Since cbc14e6f286, we have set UNAME_MINIMUM_RELEASE to 2.6.32,
which means that version 1 signal frames are never used.

Cc: qemu-...@nongnu.org
Suggested-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/arm/signal.c | 220 +---
 1 file changed, 4 insertions(+), 216 deletions(-)

diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index 32b68ee302..f8288ab51c 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -45,14 +45,6 @@ struct target_sigcontext {
 abi_ulong fault_address;
 };
 
-struct target_ucontext_v1 {
-abi_ulong tuc_flags;
-abi_ulong tuc_link;
-target_stack_t tuc_stack;
-struct target_sigcontext tuc_mcontext;
-target_sigset_t  tuc_sigmask;   /* mask last for extensibility */
-};
-
 struct target_ucontext_v2 {
 abi_ulong tuc_flags;
 abi_ulong tuc_link;
@@ -97,28 +89,12 @@ struct target_iwmmxt_sigframe {
 #define TARGET_VFP_MAGIC 0x56465001
 #define TARGET_IWMMXT_MAGIC 0x12ef842a
 
-struct sigframe_v1
-{
-struct target_sigcontext sc;
-abi_ulong extramask[TARGET_NSIG_WORDS-1];
-abi_ulong retcode[4];
-};
-
 struct sigframe_v2
 {
 struct target_ucontext_v2 uc;
 abi_ulong retcode[4];
 };
 
-struct rt_sigframe_v1
-{
-abi_ulong pinfo;
-abi_ulong puc;
-struct target_siginfo info;
-struct target_ucontext_v1 uc;
-abi_ulong retcode[4];
-};
-
 struct rt_sigframe_v2
 {
 struct target_siginfo info;
@@ -363,37 +339,6 @@ static void setup_sigframe_v2(struct target_ucontext_v2 
*uc,
 }
 }
 
-/* compare linux/arch/arm/kernel/signal.c:setup_frame() */
-static void setup_frame_v1(int usig, struct target_sigaction *ka,
-   target_sigset_t *set, CPUARMState *regs)
-{
-struct sigframe_v1 *frame;
-abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame));
-int i;
-
-trace_user_setup_frame(regs, frame_addr);
-if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-goto sigsegv;
-}
-
-setup_sigcontext(&frame->sc, regs, set->sig[0]);
-
-for(i = 1; i < TARGET_NSIG_WORDS; i++) {
-__put_user(set->sig[i], &frame->extramask[i - 1]);
-}
-
-if (setup_return(regs, ka, frame->retcode, frame_addr, usig,
- frame_addr + offsetof(struct sigframe_v1, retcode))) {
-goto sigsegv;
-}
-
-unlock_user_struct(frame, frame_addr, 1);
-return;
-sigsegv:
-unlock_user_struct(frame, frame_addr, 1);
-force_sigsegv(usig);
-}
-
 static void setup_frame_v2(int usig, struct target_sigaction *ka,
target_sigset_t *set, CPUARMState *regs)
 {
@@ -422,60 +367,7 @@ sigsegv:
 void setup_frame(int usig, struct target_sigaction *ka,
  target_sigset_t *set, CPUARMState *regs)
 {
-if (get_osversion() >= 0x020612) {
-setup_frame_v2(usig, ka, set, regs);
-} else {
-setup_frame_v1(usig, ka, set, regs);
-}
-}
-
-/* compare linux/arch/arm/kernel/signal.c:setup_rt_frame() */
-static void setup_rt_frame_v1(int usig, struct target_sigaction *ka,
-  target_siginfo_t *info,
-  target_sigset_t *set, CPUARMState *env)
-{
-struct rt_sigframe_v1 *frame;
-abi_ulong frame_addr = get_sigframe(ka, env, sizeof(*frame));
-struct target_sigaltstack stack;
-int i;
-abi_ulong info_addr, uc_addr;
-
-trace_user_setup_rt_frame(env, frame_addr);
-if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-goto sigsegv;
-}
-
-info_addr = frame_addr + offsetof(struct rt_sigframe_v1, info);
-__put_user(info_addr, &frame->pinfo);
-uc_addr = frame_addr + offsetof(struct rt_sigframe_v1, uc);
-__put_user(uc_addr, &frame->puc);
-tswap_siginfo(&frame->info, info);
-
-/* Clear all the bits of the ucontext we don't use.  */
-memset(&frame->uc, 0, offsetof(struct target_ucontext_v1, tuc_mcontext));
-
-memset(&stack, 0, sizeof(stack));
-target_save_altstack(&stack, env);
-memcpy(&frame->uc.tuc_stack, &stack, sizeof(stack));
-
-setup_sigcontext(&frame->uc.tuc_mcontext, env, set->sig[0]);
-for(i = 0; i < TARGET_NSIG_WORDS; i++) {
-__put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
-}
-
-if (setup_return(env, ka, frame->retcode, frame_addr, usig,
- frame_addr + offsetof(struct rt_sigframe_v1, retcode))) {
-goto sigsegv;
-}
-
-env->regs[1] = info_addr;
-env->regs[2] = uc_addr;
-
-unlock_user_struct(frame, frame_addr, 1);
-return;
-sigsegv:
-unlock_user_struct(frame, frame_addr, 1);
-force_sigsegv(usig);
+setup_frame_v2(usig, ka, set, regs);
 }
 
 static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
@@ -516,11 +408,7 @@ void setup_rt_frame(int usig, struct target_sigaction *ka,
 target_siginfo_t *info,
 target_sigs

[PATCH v2 19/36] linux-user/hexagon: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Continue to initialize the words on the stack, as documented.
However, use the off-stack trampoline.

Cc: Taylor Simpson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/hexagon/target_signal.h |  2 ++
 linux-user/hexagon/signal.c| 19 +--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/linux-user/hexagon/target_signal.h 
b/linux-user/hexagon/target_signal.h
index 345cf1cbb8..9e0223d322 100644
--- a/linux-user/hexagon/target_signal.h
+++ b/linux-user/hexagon/target_signal.h
@@ -31,4 +31,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* TARGET_SIGNAL_H */
diff --git a/linux-user/hexagon/signal.c b/linux-user/hexagon/signal.c
index 85eab5e943..bd0f9b1c85 100644
--- a/linux-user/hexagon/signal.c
+++ b/linux-user/hexagon/signal.c
@@ -161,6 +161,11 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 setup_ucontext(&frame->uc, env, set);
 tswap_siginfo(&frame->info, info);
+/*
+ * The on-stack signal trampoline is no longer executed;
+ * however, the libgcc signal frame unwinding code checks
+ * for the presence of these two numeric magic values.
+ */
 install_sigtramp(frame->tramp);
 
 env->gpr[HEX_REG_PC] = ka->_sa_handler;
@@ -170,8 +175,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 frame_addr + offsetof(struct target_rt_sigframe, info);
 env->gpr[HEX_REG_R02] =
 frame_addr + offsetof(struct target_rt_sigframe, uc);
-env->gpr[HEX_REG_LR] =
-frame_addr + offsetof(struct target_rt_sigframe, tramp);
+env->gpr[HEX_REG_LR] = default_rt_sigreturn;
 
 return;
 
@@ -270,3 +274,14 @@ badframe:
 force_sig(TARGET_SIGSEGV);
 return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 4 * 2, 0);
+assert(tramp != NULL);
+
+default_rt_sigreturn = sigtramp_page;
+install_sigtramp(tramp);
+
+unlock_user(tramp, sigtramp_page, 4 * 2);
+}
-- 
2.25.1

[PATCH v2 03/36] linux-user: Introduce imgsrc_read, imgsrc_read_alloc

2021-07-06 Thread Richard Henderson

Introduced and initialized, but not yet really used.
These will tidy the current tests vs BPRM_BUF_SIZE.

Signed-off-by: Richard Henderson 
---
 linux-user/qemu.h  | 50 ++
 linux-user/linuxload.c | 46 ++
 2 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 9e5e2aa499..f4cdfb16b3 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -168,6 +168,37 @@ extern unsigned long mmap_min_addr;
 
 /* ??? See if we can avoid exposing so much of the loader internals.  */
 
+typedef struct {
+const void *cache;
+unsigned int cache_size;
+int fd;
+} ImageSource;
+
+/**
+ * imgsrc_read: Read from ImageSource
+ * @dst: destination for read
+ * @offset: offset within file for read
+ * @len: size of the read
+ * @img: ImageSource to read from
+ * @errp: Error details.
+ *
+ * Read into @dst, using the cache when possible.
+ */
+bool imgsrc_read(void *dst, off_t offset, size_t len,
+ const ImageSource *img, Error **errp);
+
+/**
+ * imgsrc_read_alloc: Read from ImageSource
+ * @offset: offset within file for read
+ * @size: size of the read
+ * @img: ImageSource to read from
+ * @errp: Error details.
+ *
+ * Read into newly allocated memory, using the cache when possible.
+ */
+void *imgsrc_read_alloc(off_t offset, size_t len,
+const ImageSource *img, Error **errp);
+
 /* Read a good amount of data initially, to hopefully get all the
program headers loaded.  */
 #define BPRM_BUF_SIZE  1024
@@ -177,15 +208,16 @@ extern unsigned long mmap_min_addr;
  * used when loading binaries.
  */
 struct linux_binprm {
-char buf[BPRM_BUF_SIZE] __attribute__((aligned));
-abi_ulong p;
-int fd;
-int e_uid, e_gid;
-int argc, envc;
-char **argv;
-char **envp;
-char * filename;/* Name of binary */
-int (*core_dump)(int, const CPUArchState *); /* coredump routine */
+char buf[BPRM_BUF_SIZE] __attribute__((aligned));
+ImageSource src;
+abi_ulong p;
+int fd;
+int e_uid, e_gid;
+int argc, envc;
+char **argv;
+char **envp;
+char *filename;  /* Name of binary */
+int (*core_dump)(int, const CPUArchState *); /* coredump routine */
 };
 
 typedef struct IOCTLEntry IOCTLEntry;
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index 9d4eb5e94b..3b0bafc490 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -2,6 +2,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "qapi/error.h"
 
 #define NGROUPS 32
 
@@ -74,6 +75,10 @@ static int prepare_binprm(struct linux_binprm *bprm)
 /* Make sure the rest of the loader won't read garbage.  */
 memset(bprm->buf + retval, 0, BPRM_BUF_SIZE - retval);
 }
+
+bprm->src.cache = bprm->buf;
+bprm->src.cache_size = retval;
+
 return retval;
 }
 
@@ -129,6 +134,7 @@ int loader_exec(int fdexec, const char *filename, char 
**argv, char **envp,
 int retval;
 
 bprm->fd = fdexec;
+bprm->src.fd = fdexec;
 bprm->filename = (char *)filename;
 bprm->argc = count(argv);
 bprm->argv = argv;
@@ -163,3 +169,43 @@ int loader_exec(int fdexec, const char *filename, char 
**argv, char **envp,
 
 return retval;
 }
+
+bool imgsrc_read(void *dst, off_t offset, size_t len,
+ const ImageSource *img, Error **errp)
+{
+ssize_t ret;
+
+if (offset + len <= img->cache_size) {
+memcpy(dst, img->cache + offset, len);
+return true;
+}
+
+if (img->fd < 0) {
+error_setg(errp, "read past end of buffer");
+return false;
+}
+
+ret = pread(img->fd, dst, len, offset);
+if (ret == len) {
+return true;
+}
+if (ret < 0) {
+error_setg_errno(errp, errno, "Error reading file header");
+} else {
+error_setg(errp, "Incomplete read of file header");
+}
+return false;
+}
+
+void *imgsrc_read_alloc(off_t offset, size_t len,
+const ImageSource *img, Error **errp)
+{
+void *alloc = g_malloc(len);
+bool ok = imgsrc_read(alloc, offset, len, img, errp);
+
+if (!ok) {
+g_free(alloc);
+alloc = NULL;
+}
+return alloc;
+}
-- 
2.25.1

[PATCH v2 16/36] linux-user/arm: Add vdso and use it for rt_sigreturn

2021-07-06 Thread Richard Henderson

Building of the vdsos are not actually wired up to anything, since
we require a cross-compiler.  Just check in the files for now.

The thumb vdso will only be used for m-profile, as all of our
a-profile cpus support arm mode.

Cc: qemu-...@nongnu.org
Signed-off-by: Richard Henderson 
---
 linux-user/arm/signal.c   |  83 +++---
 linux-user/elfload.c  |  24 
 linux-user/arm/Makefile.vdso  |  17 +++
 linux-user/arm/meson.build|  18 +++
 linux-user/arm/vdso-arm-be.so | Bin 0 -> 5648 bytes
 linux-user/arm/vdso-arm-le.so | Bin 0 -> 5648 bytes
 linux-user/arm/vdso-thm-be.so | Bin 0 -> 5620 bytes
 linux-user/arm/vdso-thm-le.so | Bin 0 -> 5620 bytes
 linux-user/arm/vdso.S | 209 ++
 linux-user/arm/vdso.ld|  74 
 10 files changed, 359 insertions(+), 66 deletions(-)
 create mode 100644 linux-user/arm/Makefile.vdso
 create mode 100755 linux-user/arm/vdso-arm-be.so
 create mode 100755 linux-user/arm/vdso-arm-le.so
 create mode 100755 linux-user/arm/vdso-thm-be.so
 create mode 100755 linux-user/arm/vdso-thm-le.so
 create mode 100644 linux-user/arm/vdso.S
 create mode 100644 linux-user/arm/vdso.ld

diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index 565a355fc4..8edc1b1373 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -102,40 +102,6 @@ struct rt_sigframe
 abi_ulong retcode[4];
 };
 
-/*
- * For ARM syscalls, we encode the syscall number into the instruction.
- */
-#define SWI_SYS_SIGRETURN   (0xef00|(TARGET_NR_sigreturn + 
ARM_SYSCALL_BASE))
-#define SWI_SYS_RT_SIGRETURN(0xef00|(TARGET_NR_rt_sigreturn + 
ARM_SYSCALL_BASE))
-
-/*
- * For Thumb syscalls, we pass the syscall number via r7.  We therefore
- * need two 16-bit instructions.
- */
-#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (TARGET_NR_sigreturn))
-#define SWI_THUMB_RT_SIGRETURN  (0xdf00 << 16 | 0x2700 | 
(TARGET_NR_rt_sigreturn))
-
-static const abi_ulong retcodes[4] = {
-SWI_SYS_SIGRETURN,  SWI_THUMB_SIGRETURN,
-SWI_SYS_RT_SIGRETURN,   SWI_THUMB_RT_SIGRETURN
-};
-
-/*
- * Stub needed to make sure the FD register (r9) contains the right
- * value.
- */
-static const unsigned long sigreturn_fdpic_codes[3] = {
-0xe59fc004, /* ldr r12, [pc, #4] to read function descriptor */
-0xe59c9004, /* ldr r9, [r12, #4] to setup GOT */
-0xe59cf000  /* ldr pc, [r12] to jump into restorer */
-};
-
-static const unsigned long sigreturn_fdpic_thumb_codes[3] = {
-0xc008f8df, /* ldr r12, [pc, #8] to read function descriptor */
-0x9004f8dc, /* ldr r9, [r12, #4] to setup GOT */
-0xf000f8dc  /* ldr pc, [r12] to jump into restorer */
-};
-
 static inline int valid_user_regs(CPUARMState *regs)
 {
 return 1;
@@ -188,9 +154,10 @@ setup_return(CPUARMState *env, struct target_sigaction *ka,
 abi_ulong handler = 0;
 abi_ulong handler_fdpic_GOT = 0;
 abi_ulong retcode;
-
-int thumb;
-int is_fdpic = info_is_fdpic(((TaskState *)thread_cpu->opaque)->info);
+bool thumb;
+bool is_fdpic = info_is_fdpic(((TaskState *)thread_cpu->opaque)->info);
+uint32_t cpsr;
+int idx;
 
 if (is_fdpic) {
 /* In FDPIC mode, ka->_sa_handler points to a function
@@ -208,7 +175,7 @@ setup_return(CPUARMState *env, struct target_sigaction *ka,
 
 thumb = handler & 1;
 
-uint32_t cpsr = cpsr_read(env);
+cpsr = cpsr_read(env);
 
 cpsr &= ~CPSR_IT;
 if (thumb) {
@@ -222,39 +189,23 @@ setup_return(CPUARMState *env, struct target_sigaction 
*ka,
 cpsr &= ~CPSR_E;
 }
 
+/* Our vdso default_sigreturn label is a table of entry points. */
+idx = is_fdpic * 2 + ((ka->sa_flags & TARGET_SA_SIGINFO) != 0);
+retcode = default_sigreturn + idx * 16;
+
+/*
+ * Put the sigreturn code on the stack no matter which return
+ * mechanism we use in order to remain ABI compliant.
+ */
+memcpy(rc, g2h_untagged(retcode & ~1), 16);
+
 if (ka->sa_flags & TARGET_SA_RESTORER) {
 if (is_fdpic) {
-/* For FDPIC we ensure that the restorer is called with a
- * correct r9 value.  For that we need to write code on
- * the stack that sets r9 and jumps back to restorer
- * value.
- */
-if (thumb) {
-__put_user(sigreturn_fdpic_thumb_codes[0], rc);
-__put_user(sigreturn_fdpic_thumb_codes[1], rc + 1);
-__put_user(sigreturn_fdpic_thumb_codes[2], rc + 2);
-__put_user((abi_ulong)ka->sa_restorer, rc + 3);
-} else {
-__put_user(sigreturn_fdpic_codes[0], rc);
-__put_user(sigreturn_fdpic_codes[1], rc + 1);
-__put_user(sigreturn_fdpic_codes[2], rc + 2);
-__put_user((abi_ulong)ka->sa_restorer, rc + 3);
-}
-
-retcode = rc_addr + thumb;
+/* Place the function descriptor in slot 3. */
+__put_us

[PATCH v2 06/36] linux-user: Use ImageSource in load_elf_image

2021-07-06 Thread Richard Henderson

Change parse_elf_properties as well, as the bprm_buf argument
ties the two functions closely.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c | 124 ---
 1 file changed, 47 insertions(+), 77 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index d1278c2d29..9113bf82f1 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2530,10 +2530,9 @@ static bool parse_elf_property(const uint32_t *data, int 
*off, int datasz,
 }
 
 /* Process NT_GNU_PROPERTY_TYPE_0. */
-static bool parse_elf_properties(int image_fd,
+static bool parse_elf_properties(const ImageSource *src,
  struct image_info *info,
  const struct elf_phdr *phdr,
- char bprm_buf[BPRM_BUF_SIZE],
  Error **errp)
 {
 union {
@@ -2561,14 +2560,8 @@ static bool parse_elf_properties(int image_fd,
 return false;
 }
 
-if (phdr->p_offset + n <= BPRM_BUF_SIZE) {
-memcpy(¬e, bprm_buf + phdr->p_offset, n);
-} else {
-ssize_t len = pread(image_fd, ¬e, n, phdr->p_offset);
-if (len != n) {
-error_setg_errno(errp, errno, "Error reading file header");
-return false;
-}
+if (!imgsrc_read(¬e, phdr->p_offset, n, src, errp)) {
+return false;
 }
 
 /*
@@ -2614,30 +2607,34 @@ static bool parse_elf_properties(int image_fd,
 }
 }
 
-/* Load an ELF image into the address space.
+/**
+ * load_elf_image: Load an ELF image into the address space.
+ * @image_name: the filename of the image, to use in error messages.
+ * @src: the ImageSource from which to read.
+ * @info: info collected from the loaded image.
+ * @ehdr: the ELF header, not yet bswapped.
+ * @pinterp_name: record any PT_INTERP string found.
+ *
+ * On return: @info values will be filled in, as necessary or available.
+ */
 
-   IMAGE_NAME is the filename of the image, to use in error messages.
-   IMAGE_FD is the open file descriptor for the image.
-
-   BPRM_BUF is a copy of the beginning of the file; this of course
-   contains the elf file header at offset 0.  It is assumed that this
-   buffer is sufficiently aligned to present no problems to the host
-   in accessing data at aligned offsets within the buffer.
-
-   On return: INFO values will be filled in, as necessary or available.  */
-
-static void load_elf_image(const char *image_name, int image_fd,
+static void load_elf_image(const char *image_name, const ImageSource *src,
struct image_info *info, struct elfhdr *ehdr,
-   char **pinterp_name,
-   char bprm_buf[BPRM_BUF_SIZE])
+   char **pinterp_name)
 {
-struct elf_phdr *phdr;
+g_autofree struct elf_phdr *phdr = NULL;
 abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
-int i, retval, prot_exec;
+int i, prot_exec;
 Error *err = NULL;
 
-/* First of all, some simple consistency checks */
-memcpy(ehdr, bprm_buf, sizeof(*ehdr));
+/*
+ * First of all, some simple consistency checks.
+ * Note that we rely on the bswapped ehdr staying in bprm_buf,
+ * for later use by load_elf_binary and create_elf_tables.
+ */
+if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) {
+goto exit_errmsg;
+}
 if (!elf_check_ident(ehdr)) {
 error_setg(&err, "Invalid ELF image for this architecture");
 goto exit_errmsg;
@@ -2648,15 +2645,11 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 goto exit_errmsg;
 }
 
-i = ehdr->e_phnum * sizeof(struct elf_phdr);
-if (ehdr->e_phoff + i <= BPRM_BUF_SIZE) {
-phdr = (struct elf_phdr *)(bprm_buf + ehdr->e_phoff);
-} else {
-phdr = (struct elf_phdr *) alloca(i);
-retval = pread(image_fd, phdr, i, ehdr->e_phoff);
-if (retval != i) {
-goto exit_read;
-}
+phdr = imgsrc_read_alloc(ehdr->e_phoff,
+ ehdr->e_phnum * sizeof(struct elf_phdr),
+ src, &err);
+if (phdr == NULL) {
+goto exit_errmsg;
 }
 bswap_phdr(phdr, ehdr->e_phnum);
 
@@ -2692,17 +2685,10 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 goto exit_errmsg;
 }
 
-interp_name = g_malloc(eppnt->p_filesz);
-
-if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) {
-memcpy(interp_name, bprm_buf + eppnt->p_offset,
-   eppnt->p_filesz);
-} else {
-retval = pread(image_fd, interp_name, eppnt->p_filesz,
-   eppnt->p_offset);
-if (retval != eppnt->p_filesz) {
-goto exit_read;
-}
+interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz,
+

[PATCH v2 12/36] linux-user/aarch64: Add vdso and use it for rt_sigreturn

2021-07-06 Thread Richard Henderson

Building of the vdsos are not actually wired up to anything, since
we require a cross-compiler.  Just check in the files for now.

Cc: qemu-...@nongnu.org
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c  |  17 ++-
 linux-user/elfload.c |   4 ++
 linux-user/aarch64/Makefile.vdso |  11 +
 linux-user/aarch64/meson.build   |  11 +
 linux-user/aarch64/vdso-be.so| Bin 0 -> 6000 bytes
 linux-user/aarch64/vdso-le.so| Bin 0 -> 6000 bytes
 linux-user/aarch64/vdso.S|  77 +++
 linux-user/aarch64/vdso.ld   |  74 +
 linux-user/meson.build   |   1 +
 9 files changed, 182 insertions(+), 13 deletions(-)
 create mode 100644 linux-user/aarch64/Makefile.vdso
 create mode 100644 linux-user/aarch64/meson.build
 create mode 100755 linux-user/aarch64/vdso-be.so
 create mode 100755 linux-user/aarch64/vdso-le.so
 create mode 100644 linux-user/aarch64/vdso.S
 create mode 100644 linux-user/aarch64/vdso.ld

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 662bcd1c4e..f4fd2cfd62 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -108,7 +108,6 @@ struct target_rt_sigframe {
 struct target_rt_frame_record {
 uint64_t fp;
 uint64_t lr;
-uint32_t tramp[2];
 };
 
 static void target_setup_general_frame(struct target_rt_sigframe *sf,
@@ -460,9 +459,9 @@ static void target_setup_frame(int usig, struct 
target_sigaction *ka,
 layout.total_size = MAX(layout.total_size,
 sizeof(struct target_rt_sigframe));
 
-/* Reserve space for the return code.  On a real system this would
- * be within the VDSO.  So, despite the name this is not a "real"
- * record within the frame.
+/*
+ * Reserve space for the standard frame unwind pair: fp, lr.
+ * Despite the name this is not a "real" record within the frame.
  */
 fr_ofs = layout.total_size;
 layout.total_size += sizeof(struct target_rt_frame_record);
@@ -495,15 +494,7 @@ static void target_setup_frame(int usig, struct 
target_sigaction *ka,
 if (ka->sa_flags & TARGET_SA_RESTORER) {
 return_addr = ka->sa_restorer;
 } else {
-/*
- * mov x8,#__NR_rt_sigreturn; svc #0
- * Since these are instructions they need to be put as little-endian
- * regardless of target default or current CPU endianness.
- */
-__put_user_e(0xd2801168, &fr->tramp[0], le);
-__put_user_e(0xd401, &fr->tramp[1], le);
-return_addr = frame_addr + fr_ofs
-+ offsetof(struct target_rt_frame_record, tramp);
+return_addr = default_rt_sigreturn;
 }
 env->xregs[0] = usig;
 env->xregs[29] = frame_addr + fr_ofs;
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f76281fe40..8088828f5f 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -525,10 +525,14 @@ static const char *get_elf_platform(void)
 #define ELF_CLASS   ELFCLASS64
 #ifdef TARGET_WORDS_BIGENDIAN
 # define ELF_PLATFORM"aarch64_be"
+# include "vdso-be.c.inc"
 #else
 # define ELF_PLATFORM"aarch64"
+# include "vdso-le.c.inc"
 #endif
 
+#define vdso_image_info()&vdso_image_info
+
 static inline void init_thread(struct target_pt_regs *regs,
struct image_info *infop)
 {
diff --git a/linux-user/aarch64/Makefile.vdso b/linux-user/aarch64/Makefile.vdso
new file mode 100644
index 00..97ede674e5
--- /dev/null
+++ b/linux-user/aarch64/Makefile.vdso
@@ -0,0 +1,11 @@
+CROSS_CC ?= aarch64-linux-gnu-gcc
+LDFLAGS := -nostdlib -shared -Wl,-T,vdso.ld \
+  -Wl,-h,linux-vdso.so.1 -Wl,--hash-style=sysv -Wl,--build-id=sha1
+
+all: vdso-le.so vdso-be.so
+
+vdso-le.so: vdso.S vdso.ld Makefile.vdso
+   $(CROSS_CC)  $(LDFLAGS) -mlittle-endian vdso.S -o $@
+
+vdso-be.so: vdso.S vdso.ld Makefile.vdso
+   $(CROSS_CC)  $(LDFLAGS) -mbig-endian vdso.S -o $@
diff --git a/linux-user/aarch64/meson.build b/linux-user/aarch64/meson.build
new file mode 100644
index 00..0b1b3a5560
--- /dev/null
+++ b/linux-user/aarch64/meson.build
@@ -0,0 +1,11 @@
+# ??? There does not seem to be a way to do
+#   when: ['TARGET_AARCH64', !'TARGET_WORDS_BIGENDIAN']
+# so we'd need to add TARGET_WORDS_LITTLEENDIAN.
+# In the meantime, build both files for aarch64 and aarch64_be.
+
+gen = [
+  gen_vdso.process('vdso-be.so', extra_args: ['-r', '__kernel_rt_sigreturn']),
+  gen_vdso.process('vdso-le.so', extra_args: ['-r', '__kernel_rt_sigreturn'])
+]
+
+linux_user_ss.add(when: 'TARGET_AARCH64', if_true: gen)
diff --git a/linux-user/aarch64/vdso-be.so b/linux-user/aarch64/vdso-be.so
new file mode 100755
index 
..b0f4a83e32f300dd41110740a78f3172efbe9d1d
GIT binary patch
literal 6000
zcmeHLJ!~9B6n=ZVBmpPk9D{;nL=NNvA+ip!fbb){pLa8(
zym`M|trQ9#*y6)C*k+7dv&h9-2TnG_s*S9+B=h`Fa4$XQ@LV5=ILKznd9Vt
z$TG*f@FmNSF}}+~Ck!T4RbK*#w}}Mx1oxwn|df
zZ>=Y

[PATCH v2 05/36] linux-user: Do not clobber bprm_buf swapping ehdr

2021-07-06 Thread Richard Henderson

Rearrange the allocation of storage for ehdr between load_elf_image
and load_elf_binary.  The same set of copies are done, but we don't
modify bprm_buf, which will be important later.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 9bec6613a2..d1278c2d29 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2627,16 +2627,17 @@ static bool parse_elf_properties(int image_fd,
On return: INFO values will be filled in, as necessary or available.  */
 
 static void load_elf_image(const char *image_name, int image_fd,
-   struct image_info *info, char **pinterp_name,
+   struct image_info *info, struct elfhdr *ehdr,
+   char **pinterp_name,
char bprm_buf[BPRM_BUF_SIZE])
 {
-struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
 struct elf_phdr *phdr;
 abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
 int i, retval, prot_exec;
 Error *err = NULL;
 
 /* First of all, some simple consistency checks */
+memcpy(ehdr, bprm_buf, sizeof(*ehdr));
 if (!elf_check_ident(ehdr)) {
 error_setg(&err, "Invalid ELF image for this architecture");
 goto exit_errmsg;
@@ -2949,6 +2950,7 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 static void load_elf_interp(const char *filename, struct image_info *info,
 char bprm_buf[BPRM_BUF_SIZE])
 {
+struct elfhdr ehdr;
 int fd, retval;
 Error *err = NULL;
 
@@ -2970,7 +2972,7 @@ static void load_elf_interp(const char *filename, struct 
image_info *info,
 memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval);
 }
 
-load_elf_image(filename, fd, info, NULL, bprm_buf);
+load_elf_image(filename, fd, info, &ehdr, NULL, bprm_buf);
 }
 
 static int symfind(const void *s0, const void *s1)
@@ -3162,8 +3164,14 @@ uint32_t get_elf_eflags(int fd)
 
 int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
 {
+/*
+ * We need a copy of the elf header for passing to create_elf_tables.
+ * We will have overwritten the original when we re-use bprm->buf
+ * while loading the interpreter.  Allocate the storage for this now
+ * and let elf_load_image do any swapping that may be required.
+ */
+struct elfhdr ehdr;
 struct image_info interp_info;
-struct elfhdr elf_ex;
 char *elf_interpreter = NULL;
 char *scratch;
 
@@ -3175,12 +3183,7 @@ int load_elf_binary(struct linux_binprm *bprm, struct 
image_info *info)
 info->start_mmap = (abi_ulong)ELF_START_MMAP;
 
 load_elf_image(bprm->filename, bprm->fd, info,
-   &elf_interpreter, bprm->buf);
-
-/* ??? We need a copy of the elf header for passing to create_elf_tables.
-   If we do nothing, we'll have overwritten this when we re-use bprm->buf
-   when we load the interpreter.  */
-elf_ex = *(struct elfhdr *)bprm->buf;
+   &ehdr, &elf_interpreter, bprm->buf);
 
 /* Do this so that we can load the interpreter, if need be.  We will
change some of these later */
@@ -3250,7 +3253,7 @@ int load_elf_binary(struct linux_binprm *bprm, struct 
image_info *info)
 target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC);
 }
 
-bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex,
+bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr,
 info, (elf_interpreter ? &interp_info : NULL));
 info->start_stack = bprm->p;
 
-- 
2.25.1

[PATCH v2 09/36] linux-user: Introduce imgsrc_mmap

2021-07-06 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 linux-user/qemu.h  | 11 +++
 linux-user/elfload.c   |  4 ++--
 linux-user/linuxload.c | 44 ++
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index dafaae6293..255182e133 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -199,6 +199,17 @@ bool imgsrc_read(void *dst, off_t offset, size_t len,
 void *imgsrc_read_alloc(off_t offset, size_t len,
 const ImageSource *img, Error **errp);
 
+/**
+ * imgsrc_mmap: Map from ImageSource
+ *
+ * If @src has a file descriptor, pass on to target_mmap.  Otherwise,
+ * this is "mapping" from a host buffer, which resolves to memcpy.
+ * Therefore, flags must be MAP_PRIVATE | MAP_FIXED; the argument is
+ * retained for clarity.
+ */
+abi_long imgsrc_mmap(abi_ulong start, abi_ulong len, int prot,
+ int flags, const ImageSource *src, abi_ulong offset);
+
 /* Read a good amount of data initially, to hopefully get all the
program headers loaded.  */
 #define BPRM_BUF_SIZE  1024
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 490b374d13..ec4b5f9d54 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2837,9 +2837,9 @@ static void load_elf_image(const char *image_name, const 
ImageSource *src,
  */
 if (eppnt->p_filesz != 0) {
 vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po);
-error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
+error = imgsrc_mmap(vaddr_ps, vaddr_len, elf_prot,
 MAP_PRIVATE | MAP_FIXED,
-src->fd, eppnt->p_offset - vaddr_po);
+src, eppnt->p_offset - vaddr_po);
 
 if (error == -1) {
 goto exit_mmap;
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index d0d3f2ed0e..a437a22b49 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -208,3 +208,47 @@ void *imgsrc_read_alloc(off_t offset, size_t len,
 }
 return alloc;
 }
+
+abi_long imgsrc_mmap(abi_ulong start, abi_ulong len, int prot,
+ int flags, const ImageSource *src, abi_ulong offset)
+{
+abi_long ret;
+int prot_write;
+void *haddr;
+
+assert(flags == (MAP_PRIVATE | MAP_FIXED));
+
+if (src->fd >= 0) {
+return target_mmap(start, len, prot, flags, src->fd, offset);
+}
+
+/*
+ * This case is for the vdso; we don't expect bad images.
+ * The mmap may extend beyond the end of the image, especially
+ * to the end of the page.  Zero fill.
+ */
+assert(offset < src->cache_size);
+
+prot_write = prot | PROT_WRITE;
+ret = target_mmap(start, len, prot_write, flags | MAP_ANON, -1, 0);
+if (ret == -1) {
+return ret;
+}
+
+haddr = lock_user(VERIFY_WRITE, start, len, 0);
+assert(haddr != NULL);
+if (offset + len < src->cache_size) {
+memcpy(haddr, src->cache + offset, len);
+} else {
+size_t rest = src->cache_size - offset;
+memcpy(haddr, src->cache + offset, rest);
+memset(haddr + rest, 0, len - rest);
+}
+unlock_user(haddr, start, len);
+
+if (prot != prot_write) {
+target_mprotect(start, len, prot);
+}
+
+return ret;
+}
-- 
2.25.1

[PATCH v2 17/36] linux-user/alpha: Implement setup_sigtramp

2021-07-06 Thread Richard Henderson

Create and record the two signal trampolines.
Use them when the guest does not use ka_restorer.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 linux-user/alpha/target_signal.h |  1 +
 linux-user/alpha/signal.c| 34 +++-
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h
index 250642913e..0b6a39de65 100644
--- a/linux-user/alpha/target_signal.h
+++ b/linux-user/alpha/target_signal.h
@@ -93,6 +93,7 @@ typedef struct target_sigaltstack {
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #define TARGET_ARCH_HAS_KA_RESTORER
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
index 1129ffeea1..e15f5438c3 100644
--- a/linux-user/alpha/signal.c
+++ b/linux-user/alpha/signal.c
@@ -54,13 +54,11 @@ struct target_ucontext {
 
 struct target_sigframe {
 struct target_sigcontext sc;
-unsigned int retcode[3];
 };
 
 struct target_rt_sigframe {
 target_siginfo_t info;
 struct target_ucontext uc;
-unsigned int retcode[3];
 };
 
 #define INSN_MOV_R30_R160x47fe0410
@@ -141,12 +139,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
 if (ka->ka_restorer) {
 r26 = ka->ka_restorer;
 } else {
-__put_user(INSN_MOV_R30_R16, &frame->retcode[0]);
-__put_user(INSN_LDI_R0 + TARGET_NR_sigreturn,
-   &frame->retcode[1]);
-__put_user(INSN_CALLSYS, &frame->retcode[2]);
-/* imb() */
-r26 = frame_addr + offsetof(struct target_sigframe, retcode);
+r26 = default_sigreturn;
 }
 
 unlock_user_struct(frame, frame_addr, 1);
@@ -195,12 +188,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 if (ka->ka_restorer) {
 r26 = ka->ka_restorer;
 } else {
-__put_user(INSN_MOV_R30_R16, &frame->retcode[0]);
-__put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn,
-   &frame->retcode[1]);
-__put_user(INSN_CALLSYS, &frame->retcode[2]);
-/* imb(); */
-r26 = frame_addr + offsetof(struct target_rt_sigframe, retcode);
+r26 = default_rt_sigreturn;
 }
 
 if (err) {
@@ -268,3 +256,21 @@ badframe:
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6 * 4, 0);
+assert(tramp != NULL);
+
+default_sigreturn = sigtramp_page;
+__put_user(INSN_MOV_R30_R16, &tramp[0]);
+__put_user(INSN_LDI_R0 + TARGET_NR_sigreturn, &tramp[1]);
+__put_user(INSN_CALLSYS, &tramp[2]);
+
+default_rt_sigreturn = sigtramp_page + 3 * 4;
+__put_user(INSN_MOV_R30_R16, &tramp[3]);
+__put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn, &tramp[4]);
+__put_user(INSN_CALLSYS, &tramp[5]);
+
+unlock_user(tramp, sigtramp_page, 6 * 4);
+}
-- 
2.25.1

1 2 3 4 5 >

1 - 100 of 442 matches

Mail list logo