Re: [PATCH-for-5.0 v2 07/11] hw/gpio/aspeed_gpio: Remove dead assignment

2020-03-22 Thread Cédric Le Goater
On 3/21/20 3:41 PM, Philippe Mathieu-Daudé wrote:
> Fix warning reported by Clang static code analyzer:
> 
>   hw/gpio/aspeed_gpio.c:717:18: warning: Value stored to 'g_idx' during its 
> initialization is never read
>   int set_idx, g_idx = *group_idx;
>^   ~~
> 
> Reported-by: Clang Static Analyzer
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Cédric Le Goater 

> ---
> v2: Do not declare g_idx in for() (Zoltan)
> ---
>  hw/gpio/aspeed_gpio.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
> index 41e11ea9b0..bd19db31f4 100644
> --- a/hw/gpio/aspeed_gpio.c
> +++ b/hw/gpio/aspeed_gpio.c
> @@ -714,7 +714,7 @@ static void aspeed_gpio_write(void *opaque, hwaddr 
> offset, uint64_t data,
>  static int get_set_idx(AspeedGPIOState *s, const char *group, int *group_idx)
>  {
>  AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s);
> -int set_idx, g_idx = *group_idx;
> +int set_idx, g_idx;
>  
>  for (set_idx = 0; set_idx < agc->nr_gpio_sets; set_idx++) {
>  const GPIOSetProperties *set_props = &agc->props[set_idx];
> 




Re: [PATCH] ppc/ppc405_boards: Remove unnecessary NULL check

2020-03-22 Thread David Gibson
On Fri, Mar 20, 2020 at 04:57:40PM +0100, Philippe Mathieu-Daudé wrote:
> This code is inside the "if (dinfo)" condition, so testing
> again here whether it is NULL is unnecessary.
> 
> Fixes: dd59bcae7 (Don't size flash memory to match backing image)
> Reported-by: Coverity (CID 1421917)
> Suggested-by: Peter Maydell 
> Signed-off-by: Philippe Mathieu-Daudé 

Applied to ppc-for-5.1.

> ---
>  hw/ppc/ppc405_boards.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
> index e6bffb9e1a..6198ec1035 100644
> --- a/hw/ppc/ppc405_boards.c
> +++ b/hw/ppc/ppc405_boards.c
> @@ -191,7 +191,7 @@ static void ref405ep_init(MachineState *machine)
>  bios_size = 8 * MiB;
>  pflash_cfi02_register((uint32_t)(-bios_size),
>"ef405ep.bios", bios_size,
> -  dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
> +  blk_by_legacy_dinfo(dinfo),
>64 * KiB, 1,
>2, 0x0001, 0x22DA, 0x, 0x, 0x555, 
> 0x2AA,
>1);
> @@ -459,7 +459,7 @@ static void taihu_405ep_init(MachineState *machine)
>  bios_size = 2 * MiB;
>  pflash_cfi02_register(0xFFE0,
>"taihu_405ep.bios", bios_size,
> -  dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
> +  blk_by_legacy_dinfo(dinfo),
>64 * KiB, 1,
>4, 0x0001, 0x22DA, 0x, 0x, 0x555, 
> 0x2AA,
>1);
> @@ -494,7 +494,7 @@ static void taihu_405ep_init(MachineState *machine)
>  if (dinfo) {
>  bios_size = 32 * MiB;
>  pflash_cfi02_register(0xfc00, "taihu_405ep.flash", bios_size,
> -  dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
> +  blk_by_legacy_dinfo(dinfo),
>64 * KiB, 1,
>4, 0x0001, 0x22DA, 0x, 0x, 0x555, 
> 0x2AA,
>1);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Bug 1861161] Re: qemu-arm-static stuck with 100% CPU when cross-compiling emacs

2020-03-22 Thread Philippe Vaucher
I tried several workarounds including removing `dir_index` from ext4
partitions and using a 32 bit qemu-user-static version, but it does not
work:

The process still gets stuck in a loop involving `getdents64`:


```
root@earth:~# file /usr/bin/qemu-arm-static
/usr/bin/qemu-arm-static: ELF 32-bit LSB executable, Intel 80386, version 1 
(GNU/Linux), statically linked, for GNU/Linux 3.2.0, 
BuildID[sha1]=ff1224d87ca5dece8d0b0f5735cfee7fae97ee58, stripped

root@earth:~# ps afx | grep qemu
21031 pts/0S+ 0:00  \_ grep --color=auto qemu
 1036 ?Ss 0:00 /usr/sbin/qemu-ga --daemonize -m virtio-serial -p 
/dev/virtio-ports/org.qemu.guest_agent.0
10584 ?Ssl0:00  |   |   \_ /usr/bin/qemu-arm-static 
/usr/bin/make install
28768 ?Sl 0:01  |   |   \_ /usr/bin/qemu-arm-static 
/usr/bin/make -C src VCSWITNESS=$(srcdir)/../.git/logs/HEAD all
16718 ?Sl 0:00  |   |   \_ /usr/bin/qemu-arm-static 
/usr/bin/make -C ../lisp compile-first EMACS=../src/bootstrap-emacs
16726 ?Rl48:24  |   |   \_ /usr/bin/qemu-arm-static 
../src/bootstrap-emacs -batch --no-site-file --no-site-lisp --eval (setq 
load-prefer-newer t) -f batch-byte-compile emacs-lisp/macroexp.el
10696 ?Ssl0:00  |   \_ /usr/bin/qemu-aarch64-static 
/usr/bin/make install
10972 ?Sl 0:02  |   \_ /usr/bin/qemu-aarch64-static 
/usr/bin/make -C src VCSWITNESS=$(srcdir)/../.git/logs/HEAD all
20397 ?Sl 0:00  |   \_ /usr/bin/qemu-aarch64-static 
/usr/bin/make -C ../lisp compile-first EMACS=../src/bootstrap-emacs
20405 ?Rl24:09  |   \_ 
/usr/bin/qemu-aarch64-static ../src/bootstrap-emacs -batch --no-site-file 
--no-site-lisp --eval (setq load-prefer-newer t) -f batch-byte-compile 
emacs-lisp/macroexp.el

root@earth:~# strace -p 16726
clock_gettime(CLOCK_REALTIME, {tv_sec=1584794027, tv_nsec=921230669}) = 0
getdents64(5, /* 0 entries */, 2048)= 0
_llseek(5, 0, [0], SEEK_SET)= 0
getdents64(5, /* 5 entries */, 2048)= 144
tgkill(29984, 29987, SIGRT_2)   = -1 EAGAIN (Resource temporarily 
unavailable)
clock_gettime(CLOCK_REALTIME, {tv_sec=1584794027, tv_nsec=921642405}) = 0
getdents64(5, /* 0 entries */, 2048)= 0
_llseek(5, 0, [0], SEEK_SET)= 0
getdents64(5, /* 5 entries */, 2048)= 144
tgkill(29984, 29987, SIGRT_2)   = -1 EAGAIN (Resource temporarily 
unavailable)
clock_gettime(CLOCK_REALTIME, {tv_sec=1584794027, tv_nsec=922333065}) = 0
getdents64(5, /* 0 entries */, 2048)= 0
_llseek(5, 0, [0], SEEK_SET)= 0
getdents64(5, /* 5 entries */, 2048)= 144
tgkill(29984, 29987, SIGRT_2)   = -1 EAGAIN (Resource temporarily 
unavailable)
clock_gettime(CLOCK_REALTIME, ^C{tv_sec=1584794027, tv_nsec=923201432}) = 0
strace: Process 16726 detached
```

What is interesting is that the qemu-aarch64-static process also get
stuck, which if I understand the bug correctly should not happen. I'll
try stracing the process to figure out what happens.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1861161

Title:
  qemu-arm-static stuck with 100% CPU when cross-compiling emacs

Status in QEMU:
  New

Bug description:
  Hello,

  I'm trying to build multi-arch docker images for
  https://hub.docker.com/r/silex/emacs.

  Here is the machine I'm building on (hetzner cloud machine):

  root@ubuntu-4gb-fsn1-1:~# lsb_release -a
  No LSB modules are available.
  Distributor ID: Ubuntu
  Description:Ubuntu 18.04.3 LTS
  Release:18.04
  Codename:   bionic
  root@ubuntu-4gb-fsn1-1:~# uname -a
  Linux ubuntu-4gb-fsn1-1 4.15.0-74-generic #84-Ubuntu SMP Thu Dec 19 08:06:28 
UTC 2019 x86_64 x86_64 x86_64 GNU/Linux

  Whenever I try to build the following alpine Dockerfile
  https://gitlab.com/Silex777/docker-
  emacs/blob/master/26.3/alpine/3.9/dev/Dockerfile like this:

  $ sysctl kernel.randomize_va_space=0
  $ docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
  $ docker build --pull -t test --platform arm .

  It builds fine until this:

  root@ubuntu-4gb-fsn1-1:~# ps -ef | grep qemu
  root 26473 26465 99 14:26 pts/001:59:58 /usr/bin/qemu-arm-static 
../src/bootstrap-emacs -batch --no-site-file --no-site-lisp --eval (setq 
load-prefer-newer t) -f batch-byte-compile emacs-lisp/macroexp.el

  This is supposed to take a few seconds, but here it takes 100% CPU and
  never ends. When I strace the process I see a never ending loop like
  this:

  getdents64(5, /* 0 entries */, 2048)= 0
  lseek(5, 0, SEEK_SET)   = 0
  getdents64(5, /* 5 entries */, 2048)= 120
  tgkill(5875, 5878, SIGRT_2) = -1 EAGAIN (Resource temporarily 
unavailable)
  getdents64(5, /* 0 entries */, 2048)= 0
  lseek(5, 0, SEEK_SET)   = 0
  getdents64(5, /* 5 entries 

Re: In tree configure errors since 6116aea9

2020-03-22 Thread Laurent Vivier
Le 21/03/2020 à 19:31, BALATON Zoltan a écrit :
> On Sat, 21 Mar 2020, Laurent Vivier wrote:
>> Le 21/03/2020 à 18:29, BALATON Zoltan a écrit :
>>> Hello,
>>>
>>> Since 6116aea99, or actually 4d6a835d (linux-user: introduce parameters
>>> to generate syscall_nr.h) but only next commit starts to enable it I get
>>> these errors when running configure in source tree:
>>>
>>> grep: ./.gitlab-ci.d: Is a directory
>>> grep: ./scripts/qemu-guest-agent/fsfreeze-hook.d: Is a directory
>>>
>>> for each entry in that loop over arches. Could this be silenced?
>>
>> I didn't see that because I always do an out-of-tree build.
> 
> Isn't there a test for that or should there be one?
> 
>> Could you try this?
>>
>> --- a/configure
>> +++ b/configure
>> @@ -1911,6 +1911,7 @@ for arch in alpha hppa m68k xtensa sh4 microblaze
>> arm ppc s390x sparc sparc64 \
>>     rm -f "${source_path}/linux-user/${arch}/syscall_nr.h"
>>     # remove the dependency files
>>     find . -name "*.d" \
>> +   -type f \
>>    -exec grep -q
>> "${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \
>>    -exec rm {} \;
>> done
> 
> This gets rid of the errors but seems to be much slower:
> 
> with 4d6a835d running my usual configure script:
> 
> real    0m5.968s
> user    0m4.642s
> sys    0m1.402s
> 
> with HEAD and above patch:
> 
> real    0m20.246s
> user    0m14.143s
> sys    0m6.152s
> 
> Given that configure is rerun when some files change if there's a way to
> get at least the previous speed back might be better if possible.
> 
> Regards,
> BALATON Zoltan

Could you try this:

--- a/configure
+++ b/configure
@@ -1910,9 +1910,9 @@ for arch in alpha hppa m68k xtensa sh4 microblaze
arm ppc s390x sparc sparc64 \
 # remove the file if it has been generated in the source directory
 rm -f "${source_path}/linux-user/${arch}/syscall_nr.h"
 # remove the dependency files
-find . -name "*.d" \
-   -exec grep -q
"${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \
-   -exec rm {} \;
+test -d ${arch}-linux-user && find ${arch}-linux-user -type f -name
"*.d" \
+ -exec grep -q "${source_path}/linux-user/${arch}/syscall_nr.h"
{} \; \
+ -exec rm {} \;
 done

 if test -z "$python"

Thanks,
Laurent



Re: [PATCH] ppc/ppc405_boards: Remove unnecessary NULL check

2020-03-22 Thread Peter Maydell
On Sun, 22 Mar 2020 at 08:50, David Gibson  wrote:
>
> On Fri, Mar 20, 2020 at 04:57:40PM +0100, Philippe Mathieu-Daudé wrote:
> > This code is inside the "if (dinfo)" condition, so testing
> > again here whether it is NULL is unnecessary.
> >
> > Fixes: dd59bcae7 (Don't size flash memory to match backing image)
> > Reported-by: Coverity (CID 1421917)
> > Suggested-by: Peter Maydell 
> > Signed-off-by: Philippe Mathieu-Daudé 
>
> Applied to ppc-for-5.1.

This would be OK for 5.0 too at this stage.

thanks
-- PMM



Re: [PATCH 0/3] target/arm: Coverity tweaks

2020-03-22 Thread Philippe Mathieu-Daudé

On 3/20/20 5:06 PM, Richard Henderson wrote:

Only the first of these appears to be a real bug.
The other two are adjustments to help satisfy Coverity.


r~


Richard Henderson (3):
   target/arm: Rearrange disabled check for watchpoints
   target/arm: Assert immh != 0 in disas_simd_shift_imm
   target/arm: Move computation of index in handle_simd_dupe

  target/arm/helper.c| 11 ++-
  target/arm/translate-a64.c |  6 +-
  2 files changed, 11 insertions(+), 6 deletions(-)



Series:
Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v2 4/6] linux-user/flatload.c: Use "" for include of QEMU header target_flat.h

2020-03-22 Thread Philippe Mathieu-Daudé

On 3/19/20 8:33 PM, Peter Maydell wrote:

The target_flat.h file is a QEMU header, so we should include it using
quotes, not angle brackets.

Coverity otherwise is unable to find the header:

"../linux-user/flatload.c", line 40: error #1712: cannot open source file
   "target_flat.h"
   #include 
   ^

because the relevant directory is only on the -iquote path, not the -I path.

Signed-off-by: Peter Maydell 
---
I don't know why Coverity in particular has trouble here but
real compilers don't. Still, the "" is the right thing.
---
  linux-user/flatload.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index 0122ab3afe6..66901f39cc5 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -37,7 +37,7 @@
  
  #include "qemu.h"

  #include "flat.h"
-#include 
+#include "target_flat.h"
  
  //#define DEBUG
  



Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v2 3/6] thread.h: Remove trailing semicolons from Coverity qemu_mutex_lock() etc

2020-03-22 Thread Philippe Mathieu-Daudé

On 3/19/20 8:33 PM, Peter Maydell wrote:

All the Coverity-specific definitions of qemu_mutex_lock() and friends
have a trailing semicolon. This works fine almost everywhere because
of QEMU's mandatory-braces coding style and because most callsites are
simple, but target/s390x/sigp.c has a use of qemu_mutex_trylock() as
an if() statement, which makes the ';' a syntax error:
"../target/s390x/sigp.c", line 461: warning #18: expected a ")"
   if (qemu_mutex_trylock(&qemu_sigp_mutex)) {
   ^

Remove the bogus semicolons from the macro definitions.

Signed-off-by: Peter Maydell 
---
  include/qemu/thread.h | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 10262c63f58..d22848138ea 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -57,17 +57,17 @@ extern QemuCondTimedWaitFunc qemu_cond_timedwait_func;
   * hide them.
   */
  #define qemu_mutex_lock(m)  \
-qemu_mutex_lock_impl(m, __FILE__, __LINE__);
+qemu_mutex_lock_impl(m, __FILE__, __LINE__)
  #define qemu_mutex_trylock(m)   \
-qemu_mutex_trylock_impl(m, __FILE__, __LINE__);
+qemu_mutex_trylock_impl(m, __FILE__, __LINE__)
  #define qemu_rec_mutex_lock(m)  \
-qemu_rec_mutex_lock_impl(m, __FILE__, __LINE__);
+qemu_rec_mutex_lock_impl(m, __FILE__, __LINE__)
  #define qemu_rec_mutex_trylock(m)   \
-qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__);
+qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__)
  #define qemu_cond_wait(c, m)\
-qemu_cond_wait_impl(c, m, __FILE__, __LINE__);
+qemu_cond_wait_impl(c, m, __FILE__, __LINE__)
  #define qemu_cond_timedwait(c, m, ms)   \
-qemu_cond_timedwait_impl(c, m, ms, __FILE__, __LINE__);
+qemu_cond_timedwait_impl(c, m, ms, __FILE__, __LINE__)
  #else
  #define qemu_mutex_lock(m) ({   \
  QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func);  \



Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v2 2/6] thread.h: Fix Coverity version of qemu_cond_timedwait()

2020-03-22 Thread Philippe Mathieu-Daudé

On 3/19/20 8:33 PM, Peter Maydell wrote:

For Coverity's benefit, we provide simpler versions of functions like
qemu_mutex_lock(), qemu_cond_wait() and qemu_cond_timedwait().  When
we added qemu_cond_timedwait() in commit 3dcc9c6ec4ea, a cut and
paste error meant that the Coverity version of qemu_cond_timedwait()
was using the wrong _impl function, which makes the Coverity parser
complain:

"/qemu/include/qemu/thread.h", line 159: warning #140: too many arguments in
   function call
   return qemu_cond_timedwait(cond, mutex, ms);
  ^

"/qemu/include/qemu/thread.h", line 159: warning #120: return value type does
   not match the function type
   return qemu_cond_timedwait(cond, mutex, ms);
  ^

"/qemu/include/qemu/thread.h", line 156: warning #1563: function
   "qemu_cond_timedwait" not emitted, consider modeling it or review
   parse diagnostics to improve fidelity
   static inline bool (qemu_cond_timedwait)(QemuCond *cond, QemuMutex *mutex,
   ^

These aren't fatal, but reduce the scope of the analysis. Fix the error.

Signed-off-by: Peter Maydell 
---
  include/qemu/thread.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 047db0307e7..10262c63f58 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -67,7 +67,7 @@ extern QemuCondTimedWaitFunc qemu_cond_timedwait_func;
  #define qemu_cond_wait(c, m)\
  qemu_cond_wait_impl(c, m, __FILE__, __LINE__);
  #define qemu_cond_timedwait(c, m, ms)   \
-qemu_cond_wait_impl(c, m, ms, __FILE__, __LINE__);
+qemu_cond_timedwait_impl(c, m, ms, __FILE__, __LINE__);
  #else
  #define qemu_mutex_lock(m) ({   \
  QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func);  \



Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH-for-5.0 v2 2/4] tests/test-util-sockets: Skip test on non-x86 Travis containers

2020-03-22 Thread Philippe Mathieu-Daudé

On 3/20/20 12:39 PM, Cornelia Huck wrote:

On Wed, 18 Mar 2020 23:27:15 +0100
Philippe Mathieu-Daudé  wrote:


Similarly to commit 4f370b1098, test-util-sockets fails in
restricted non-x86 Travis containers since they apparently
blacklisted some required system calls there.


Is "they" == "Travis admins"? Can we get them to remove those calls
from the blacklist?


I suppose, I copy/pasted Thomas's description from commit 4f370b1098.

No clue, but we can try :)



(I'm wondering why x86 allows those calls. Probably just because it has
been around for longer.)


Let's simply skip the test if we detect such an environment.

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Philippe Mathieu-Daudé 
---
  tests/test-util-sockets.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/tests/test-util-sockets.c b/tests/test-util-sockets.c
index 5fd947c7bf..046ebec8ba 100644
--- a/tests/test-util-sockets.c
+++ b/tests/test-util-sockets.c
@@ -231,11 +231,18 @@ static void test_socket_fd_pass_num_nocli(void)
  int main(int argc, char **argv)
  {
  bool has_ipv4, has_ipv6;
+char *travis_arch;
  
  socket_init();
  
  g_test_init(&argc, &argv, NULL);
  
+travis_arch = getenv("TRAVIS_CPU_ARCH");

+if (travis_arch && !g_str_equal(travis_arch, "x86_64")) {
+g_printerr("Test does not work on non-x86 Travis containers.");
+goto end;
+}
+
  /* We're creating actual IPv4/6 sockets, so we should
   * check if the host running tests actually supports
   * each protocol to avoid breaking tests on machines







Re: [PATCH 0/3] iotests: Fix intermittent 030 hang

2020-03-22 Thread Peter Maydell
On Fri, 13 Mar 2020 at 08:37, Kevin Wolf  wrote:
>
> Peter ran into a 030 hang while testing a pull request. This turned out
> to be two bugs in the test suite at once: First was the test failing
> because a timeout was apparently too short, second was that the timeout
> would actually cause the test to hang instead of failing. This series
> should fix both.
>
> Kevin Wolf (3):
>   iotests.py: Enable faulthandler
>   python/qemu: Kill QEMU process if 'quit' doesn't work
>   iotests: Increase pause_wait() timeout
>
>  python/qemu/machine.py| 1 +
>  tests/qemu-iotests/iotests.py | 5 -
>  2 files changed, 5 insertions(+), 1 deletion(-)

Applied to master since I've been seeing this hang off-and-on.

thanks
-- PMM



Re: [PATCH v3 10/16] hw/i386/vmport: Add support for CMD_GETTIME

2020-03-22 Thread Liran Alon



On 15/03/2020 13:56, Liran Alon wrote:


On 14/03/2020 22:56, Michael S. Tsirkin wrote:

On Sat, Mar 14, 2020 at 10:05:20PM +0200, Liran Alon wrote:

Michael, you can also refer to this VMware time-keeping whitepaper:
https://urldefense.com/v3/__https://www.vmware.com/pdf/vmware_timekeeping.pdf__;!!GqivPVa7Brio!K8sfnfvVgKwrQ4SMwX-K6-S5yR4ln9_qZ6o4GzIpQkohfWtinlplNhXzFlyUgks$ 
.

According to section "Initializing and Correcting Wall-Clock Time":
"""
VMware Tools can also optionally be used to correct long‐term drift and
errors by periodically
resynchronizing the virtual machine’s clock to the host’s clock, but 
the

current version at this writing is limited.
In particular, in guest operating systems other than NetWare, it 
does not

correct errors in which the guest clock
is ahead of real time, only those in which the guest clock is behind.

"""

This talks about guest time.
What this does not mention is whether hosts need to employ any 
mechanisms

to synchronise wall clock between hosts.
The above mentioned whitepaper also discuss how VMware maintains the 
wallclock time across migrations (vMotion).

See section "Using VMware Tools Clock Synchronization" in whitepaper.

Specifically, there is an option in .vmx file named 
"time.synchronize.resume.disk" which:

"""
If set to TRUE, the clock syncs after resuming from suspend and after 
migrating to a new host using the VMware VMotion feature.

"""

The matching functionality in open-vm-tools can can be seen in 
services/plugins/timeSync/timeSync.c where ToolsOnLoad()
registers the "Time_Synchronize" RpcCallback, which is 
TimeSyncTcloHandler(), that is possibly allowed to sync time backwards 
(Note the "backwardSync" var).


The current patch-series I have submitted doesn't implement this 
RpcCallback functionality.
That work can be delayed to a future patch-series that will add this 
extra functionality as-well.


If I understand correctly, this seems to validate my assumption that 
current

implementation for CMD_GETTIME is sufficient.

So I am concerned this does not interact well with other time sources
in QEMU. For example, it's very useful to set guest time with -rtc base
flag.

Can you use qemu_get_timedate?


This is a very good point.
VMware also have the ability that allows user to explicitly set guest 
time with .vmx "rtc.startTime" option.
(The time-zone can also be set by specifying an offset from UTC with 
"rtc.diffFromUTC" option)


However, if you will read section "Using VMware Tools Clock 
Synchronization -> Disabling All Synchronization" in above mentioned 
whitepaper,
you will notice that in VMware's design, VMPort CMD_GETTIME command is 
intentionally not synced with virtual CMOS TOD. i.e. The section 
explicitly
documents that if a user wants to set guest time to fictitious time, 
user must disable VMware Tools time sync functionality by manipulating
"tools.syncTime" and "time.synchronize.*" configuration options as 
desired.


Therefore, I think current patch VMPort CMD_GETTIME command 
implementation is correct.

What do you think?

-Liran


Gentle ping.

I would like to send the next version of the patch-series.
But before I do, I would like to know what we have agreed upon regarding 
this patch.


Thanks,
-Liran





Re: In tree configure errors since 6116aea9

2020-03-22 Thread BALATON Zoltan

On Sun, 22 Mar 2020, Laurent Vivier wrote:

Le 21/03/2020 à 19:31, BALATON Zoltan a écrit :

On Sat, 21 Mar 2020, Laurent Vivier wrote:

Le 21/03/2020 à 18:29, BALATON Zoltan a écrit :

Hello,

Since 6116aea99, or actually 4d6a835d (linux-user: introduce parameters
to generate syscall_nr.h) but only next commit starts to enable it I get
these errors when running configure in source tree:

grep: ./.gitlab-ci.d: Is a directory
grep: ./scripts/qemu-guest-agent/fsfreeze-hook.d: Is a directory

for each entry in that loop over arches. Could this be silenced?


I didn't see that because I always do an out-of-tree build.


Isn't there a test for that or should there be one?


Could you try this?

--- a/configure
+++ b/configure
@@ -1911,6 +1911,7 @@ for arch in alpha hppa m68k xtensa sh4 microblaze
arm ppc s390x sparc sparc64 \
    rm -f "${source_path}/linux-user/${arch}/syscall_nr.h"
    # remove the dependency files
    find . -name "*.d" \
+   -type f \
   -exec grep -q
"${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \
   -exec rm {} \;
done


This gets rid of the errors but seems to be much slower:

with 4d6a835d running my usual configure script:

real    0m5.968s
user    0m4.642s
sys    0m1.402s

with HEAD and above patch:

real    0m20.246s
user    0m14.143s
sys    0m6.152s

Given that configure is rerun when some files change if there's a way to
get at least the previous speed back might be better if possible.

Regards,
BALATON Zoltan


Could you try this:

--- a/configure
+++ b/configure
@@ -1910,9 +1910,9 @@ for arch in alpha hppa m68k xtensa sh4 microblaze
arm ppc s390x sparc sparc64 \
# remove the file if it has been generated in the source directory
rm -f "${source_path}/linux-user/${arch}/syscall_nr.h"
# remove the dependency files
-find . -name "*.d" \
-   -exec grep -q
"${source_path}/linux-user/${arch}/syscall_nr.h" {} \; \
-   -exec rm {} \;
+test -d ${arch}-linux-user && find ${arch}-linux-user -type f -name
"*.d" \
+ -exec grep -q "${source_path}/linux-user/${arch}/syscall_nr.h"
{} \; \
+ -exec rm {} \;
done


This is better, runs in 6-8 seconds. Thanks.

Regards,
BALATON Zoltan

[PATCH-for-5.0 v2 0/4] tests/docker: Fixes for 5.0

2020-03-22 Thread Philippe Mathieu-Daudé
Easy fixes for our Docker images.

Since v1:
- Reword gcrypt patch description (requested by Aleksandar)

Philippe Mathieu-Daudé (4):
  tests/docker: Keep package list sorted
  tests/docker: Install gcrypt devel package in Debian image
  tests/docker: Use Python3 PyYAML in the Fedora image
  tests/docker: Add libepoxy and libudev packages to the Fedora image

 tests/docker/dockerfiles/centos7.docker  |  6 --
 tests/docker/dockerfiles/debian-amd64.docker |  1 +
 tests/docker/dockerfiles/fedora.docker   | 10 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

-- 
2.21.1




[PATCH-for-5.0 v2 2/4] tests/docker: Install gcrypt devel package in Debian image

2020-03-22 Thread Philippe Mathieu-Daudé
In commit 6f8bbb374be we enabled building with the gcrypt library
on the the Debian 'x86 host', which was based on Debian Stretch.
Later in commit 698a71edbed we upgraded the Debian base image to
Buster.

Apparently Debian Stretch was listing gcrypt as a QEMU dependency,
but this is not the case anymore in Buster, so we need to install
it manually (it it not listed by 'apt-get -s build-dep qemu' in
the common debian10.docker anymore). This fixes:

 $ ../configure $QEMU_CONFIGURE_OPTS

  ERROR: User requested feature gcrypt
 configure was not able to find it.
 Install gcrypt devel >= 1.5.0

Signed-off-by: Philippe Mathieu-Daudé 
---
v2: Reword description, do not use 'Fixes:' tag (Aleksandar)
---
 tests/docker/dockerfiles/debian-amd64.docker | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/docker/dockerfiles/debian-amd64.docker 
b/tests/docker/dockerfiles/debian-amd64.docker
index d4849f509f..957f0bc2e7 100644
--- a/tests/docker/dockerfiles/debian-amd64.docker
+++ b/tests/docker/dockerfiles/debian-amd64.docker
@@ -16,6 +16,7 @@ RUN apt update && \
 apt install -y --no-install-recommends \
 libbz2-dev \
 liblzo2-dev \
+libgcrypt20-dev \
 librdmacm-dev \
 libsasl2-dev \
 libsnappy-dev \
-- 
2.21.1




[PATCH-for-5.0 v2 1/4] tests/docker: Keep package list sorted

2020-03-22 Thread Philippe Mathieu-Daudé
Keep package list sorted, this eases rebase/cherry-pick.

Fixes: 3a6784813
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/docker/dockerfiles/centos7.docker | 6 --
 tests/docker/dockerfiles/fedora.docker  | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/docker/dockerfiles/centos7.docker 
b/tests/docker/dockerfiles/centos7.docker
index cdd72de7eb..9a2a2e515d 100644
--- a/tests/docker/dockerfiles/centos7.docker
+++ b/tests/docker/dockerfiles/centos7.docker
@@ -2,6 +2,8 @@ FROM centos:7
 RUN yum install -y epel-release centos-release-xen-48
 
 RUN yum -y update
+
+# Please keep this list sorted alphabetically
 ENV PACKAGES \
 bison \
 bzip2 \
@@ -19,6 +21,7 @@ ENV PACKAGES \
 libepoxy-devel \
 libfdt-devel \
 librdmacm-devel \
+libzstd-devel \
 lzo-devel \
 make \
 mesa-libEGL-devel \
@@ -33,7 +36,6 @@ ENV PACKAGES \
 tar \
 vte-devel \
 xen-devel \
-zlib-devel \
-libzstd-devel
+zlib-devel
 RUN yum install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/fedora.docker 
b/tests/docker/dockerfiles/fedora.docker
index a658c0..019eb12dcb 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,4 +1,6 @@
 FROM fedora:30
+
+# Please keep this list sorted alphabetically
 ENV PACKAGES \
 bc \
 bison \
@@ -38,6 +40,7 @@ ENV PACKAGES \
 libubsan \
 libusbx-devel \
 libxml2-devel \
+libzstd-devel \
 llvm \
 lzo-devel \
 make \
@@ -92,8 +95,7 @@ ENV PACKAGES \
 vte291-devel \
 which \
 xen-devel \
-zlib-devel \
-libzstd-devel
+zlib-devel
 ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3
 
 RUN dnf install -y $PACKAGES
-- 
2.21.1




[PATCH-for-5.0 v2 3/4] tests/docker: Use Python3 PyYAML in the Fedora image

2020-03-22 Thread Philippe Mathieu-Daudé
The Python2 PyYAML is now pointless, switch to the Python3 version.

Fixes: bcbf27947 (docker: move tests from python2 to python3)
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/docker/dockerfiles/fedora.docker | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/docker/dockerfiles/fedora.docker 
b/tests/docker/dockerfiles/fedora.docker
index 019eb12dcb..174979c7af 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -79,8 +79,8 @@ ENV PACKAGES \
 perl-Test-Harness \
 pixman-devel \
 python3 \
+python3-PyYAML \
 python3-sphinx \
-PyYAML \
 rdma-core-devel \
 SDL2-devel \
 snappy-devel \
-- 
2.21.1




[PATCH-for-5.0 v2 4/4] tests/docker: Add libepoxy and libudev packages to the Fedora image

2020-03-22 Thread Philippe Mathieu-Daudé
Install optional dependencies of QEMU to get better coverage.

Suggested-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/docker/dockerfiles/fedora.docker | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/docker/dockerfiles/fedora.docker 
b/tests/docker/dockerfiles/fedora.docker
index 174979c7af..4bd2c953af 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -29,6 +29,7 @@ ENV PACKAGES \
 libblockdev-mpath-devel \
 libcap-ng-devel \
 libcurl-devel \
+libepoxy-devel \
 libfdt-devel \
 libiscsi-devel \
 libjpeg-devel \
@@ -38,6 +39,7 @@ ENV PACKAGES \
 libseccomp-devel \
 libssh-devel \
 libubsan \
+libudev-devel \
 libusbx-devel \
 libxml2-devel \
 libzstd-devel \
-- 
2.21.1




[PATCH v1 13/22] vfio: add bind stage-1 page table support

2020-03-22 Thread Liu Yi L
This patch adds bind_stage1_pgtbl() definition in HostIOMMUContextClass,
also adds corresponding implementation in VFIO. This is to expose a way
for vIOMMU to setup dual stage DMA translation for passthru devices on
hardware.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Alex Williamson 
Signed-off-by: Liu Yi L 
---
 hw/iommu/host_iommu_context.c | 49 ++-
 hw/vfio/common.c  | 55 ++-
 include/hw/iommu/host_iommu_context.h | 26 -
 3 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/hw/iommu/host_iommu_context.c b/hw/iommu/host_iommu_context.c
index af61899..8a53376 100644
--- a/hw/iommu/host_iommu_context.c
+++ b/hw/iommu/host_iommu_context.c
@@ -69,21 +69,67 @@ int host_iommu_ctx_pasid_free(HostIOMMUContext *host_icx, 
uint32_t pasid)
 return hicxc->pasid_free(host_icx, pasid);
 }
 
+int host_iommu_ctx_bind_stage1_pgtbl(HostIOMMUContext *host_icx,
+ DualIOMMUStage1BindData *data)
+{
+HostIOMMUContextClass *hicxc;
+
+if (!host_icx) {
+return -EINVAL;
+}
+
+hicxc = HOST_IOMMU_CONTEXT_GET_CLASS(host_icx);
+if (!hicxc) {
+return -EINVAL;
+}
+
+if (!(host_icx->flags & HOST_IOMMU_NESTING) ||
+!hicxc->bind_stage1_pgtbl) {
+return -EINVAL;
+}
+
+return hicxc->bind_stage1_pgtbl(host_icx, data);
+}
+
+int host_iommu_ctx_unbind_stage1_pgtbl(HostIOMMUContext *host_icx,
+   DualIOMMUStage1BindData *data)
+{
+HostIOMMUContextClass *hicxc;
+
+if (!host_icx) {
+return -EINVAL;
+}
+
+hicxc = HOST_IOMMU_CONTEXT_GET_CLASS(host_icx);
+if (!hicxc) {
+return -EINVAL;
+}
+
+if (!(host_icx->flags & HOST_IOMMU_NESTING) ||
+!hicxc->unbind_stage1_pgtbl) {
+return -EINVAL;
+}
+
+return hicxc->unbind_stage1_pgtbl(host_icx, data);
+}
+
 void host_iommu_ctx_init(void *_host_icx, size_t instance_size,
  const char *mrtypename,
- uint64_t flags)
+ uint64_t flags, uint32_t formats)
 {
 HostIOMMUContext *host_icx;
 
 object_initialize(_host_icx, instance_size, mrtypename);
 host_icx = HOST_IOMMU_CONTEXT(_host_icx);
 host_icx->flags = flags;
+host_icx->stage1_formats = formats;
 host_icx->initialized = true;
 }
 
 void host_iommu_ctx_destroy(HostIOMMUContext *host_icx)
 {
 host_icx->flags = 0x0;
+host_icx->stage1_formats = 0x0;
 host_icx->initialized = false;
 }
 
@@ -92,6 +138,7 @@ static void host_icx_init_fn(Object *obj)
 HostIOMMUContext *host_icx = HOST_IOMMU_CONTEXT(obj);
 
 host_icx->flags = 0x0;
+host_icx->stage1_formats = 0x0;
 host_icx->initialized = false;
 }
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index e0f2828..770a785 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1223,6 +1223,52 @@ static int vfio_host_icx_pasid_free(HostIOMMUContext 
*host_icx,
 return 0;
 }
 
+static int vfio_host_icx_bind_stage1_pgtbl(HostIOMMUContext *host_icx,
+   DualIOMMUStage1BindData *bind_data)
+{
+VFIOContainer *container = container_of(host_icx, VFIOContainer, host_icx);
+struct vfio_iommu_type1_bind *bind;
+unsigned long argsz;
+int ret = 0;
+
+argsz = sizeof(*bind) + sizeof(bind_data->bind_data);
+bind = g_malloc0(argsz);
+bind->argsz = argsz;
+bind->flags = VFIO_IOMMU_BIND_GUEST_PGTBL;
+memcpy(&bind->data, &bind_data->bind_data, sizeof(bind_data->bind_data));
+
+if (ioctl(container->fd, VFIO_IOMMU_BIND, bind)) {
+ret = -errno;
+error_report("%s: pasid (%u) bind failed: %d",
+  __func__, bind_data->pasid, ret);
+}
+g_free(bind);
+return ret;
+}
+
+static int vfio_host_icx_unbind_stage1_pgtbl(HostIOMMUContext *host_icx,
+DualIOMMUStage1BindData *bind_data)
+{
+VFIOContainer *container = container_of(host_icx, VFIOContainer, host_icx);
+struct vfio_iommu_type1_bind *bind;
+unsigned long argsz;
+int ret = 0;
+
+argsz = sizeof(*bind) + sizeof(bind_data->bind_data);
+bind = g_malloc0(argsz);
+bind->argsz = argsz;
+bind->flags = VFIO_IOMMU_UNBIND_GUEST_PGTBL;
+memcpy(&bind->data, &bind_data->bind_data, sizeof(bind_data->bind_data));
+
+if (ioctl(container->fd, VFIO_IOMMU_BIND, bind)) {
+ret = -errno;
+error_report("%s: pasid (%u) unbind failed: %d",
+  __func__, bind_data->pasid, ret);
+}
+g_free(bind);
+return ret;
+}
+
 /**
  * Get iommu info from host. Caller of this funcion should free
  * the memory pointed by the returned pointer stored in @info
@@ -1337,6 +1383,7 @@ static int vfio_init_container(VFIOContainer *container, 
int group_fd,
 struct vfio_iommu_type1_info_cap

[PATCH v1 05/22] hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps

2020-03-22 Thread Liu Yi L
This patch modifies pci_setup_iommu() to set PCIIOMMUOps
instead of setting PCIIOMMUFunc. PCIIOMMUFunc is used to
get an address space for a PCI device in vendor specific
way. The PCIIOMMUOps still offers this functionality. But
using PCIIOMMUOps leaves space to add more iommu related
vendor specific operations.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Michael S. Tsirkin 
Reviewed-by: David Gibson 
Reviewed-by: Peter Xu 
Signed-off-by: Liu Yi L 
---
 hw/alpha/typhoon.c   |  6 +-
 hw/arm/smmu-common.c |  6 +-
 hw/hppa/dino.c   |  6 +-
 hw/i386/amd_iommu.c  |  6 +-
 hw/i386/intel_iommu.c|  6 +-
 hw/pci-host/designware.c |  6 +-
 hw/pci-host/pnv_phb3.c   |  6 +-
 hw/pci-host/pnv_phb4.c   |  6 +-
 hw/pci-host/ppce500.c|  6 +-
 hw/pci-host/prep.c   |  6 +-
 hw/pci-host/sabre.c  |  6 +-
 hw/pci/pci.c | 12 +++-
 hw/ppc/ppc440_pcix.c |  6 +-
 hw/ppc/spapr_pci.c   |  6 +-
 hw/s390x/s390-pci-bus.c  |  8 ++--
 hw/virtio/virtio-iommu.c |  6 +-
 include/hw/pci/pci.h |  8 ++--
 include/hw/pci/pci_bus.h |  2 +-
 18 files changed, 90 insertions(+), 24 deletions(-)

diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 1795e2f..f271de1 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -740,6 +740,10 @@ static AddressSpace *typhoon_pci_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
 return &s->pchip.iommu_as;
 }
 
+static const PCIIOMMUOps typhoon_iommu_ops = {
+.get_address_space = typhoon_pci_dma_iommu,
+};
+
 static void typhoon_set_irq(void *opaque, int irq, int level)
 {
 TyphoonState *s = opaque;
@@ -897,7 +901,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, 
qemu_irq *p_rtc_irq,
  "iommu-typhoon", UINT64_MAX);
 address_space_init(&s->pchip.iommu_as, MEMORY_REGION(&s->pchip.iommu),
"pchip0-pci");
-pci_setup_iommu(b, typhoon_pci_dma_iommu, s);
+pci_setup_iommu(b, &typhoon_iommu_ops, s);
 
 /* Pchip0 PCI special/interrupt acknowledge, 0x801.F800., 64MB.  */
 memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), &alpha_pci_iack_ops,
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index e13a5f4..447146e 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -343,6 +343,10 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void 
*opaque, int devfn)
 return &sdev->as;
 }
 
+static const PCIIOMMUOps smmu_ops = {
+.get_address_space = smmu_find_add_as,
+};
+
 IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
 {
 uint8_t bus_n, devfn;
@@ -437,7 +441,7 @@ static void smmu_base_realize(DeviceState *dev, Error 
**errp)
 s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL);
 
 if (s->primary_bus) {
-pci_setup_iommu(s->primary_bus, smmu_find_add_as, s);
+pci_setup_iommu(s->primary_bus, &smmu_ops, s);
 } else {
 error_setg(errp, "SMMU is not attached to any PCI bus!");
 }
diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c
index 2b1b38c..3da4f84 100644
--- a/hw/hppa/dino.c
+++ b/hw/hppa/dino.c
@@ -459,6 +459,10 @@ static AddressSpace *dino_pcihost_set_iommu(PCIBus *bus, 
void *opaque,
 return &s->bm_as;
 }
 
+static const PCIIOMMUOps dino_iommu_ops = {
+.get_address_space = dino_pcihost_set_iommu,
+};
+
 /*
  * Dino interrupts are connected as shown on Page 78, Table 23
  * (Little-endian bit numbers)
@@ -580,7 +584,7 @@ PCIBus *dino_init(MemoryRegion *addr_space,
 memory_region_add_subregion(&s->bm, 0xfff0,
 &s->bm_cpu_alias);
 address_space_init(&s->bm_as, &s->bm, "pci-bm");
-pci_setup_iommu(b, dino_pcihost_set_iommu, s);
+pci_setup_iommu(b, &dino_iommu_ops, s);
 
 *p_rtc_irq = qemu_allocate_irq(dino_set_timer_irq, s, 0);
 *p_ser_irq = qemu_allocate_irq(dino_set_serial_irq, s, 0);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index b1175e5..5fec30e 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1451,6 +1451,10 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
 return &iommu_as[devfn]->as;
 }
 
+static const PCIIOMMUOps amdvi_iommu_ops = {
+.get_address_space = amdvi_host_dma_iommu,
+};
+
 static const MemoryRegionOps mmio_mem_ops = {
 .read = amdvi_mmio_read,
 .write = amdvi_mmio_write,
@@ -1577,7 +1581,7 @@ static void amdvi_realize(DeviceState *dev, Error **errp)
 
 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
-pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+pci_setup_iommu(bus, &amdvi_iommu_ops, s);
 s->devid = object_property_get_int(OBJECT(&s->pci), "addr", errp);
 msi_init(&s->pci.dev, 0, 1, true, false, errp);
 amdvi_init(s);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index df7ad25..4b22910 100644
--- a/

[PATCH v1 01/22] scripts/update-linux-headers: Import iommu.h

2020-03-22 Thread Liu Yi L
From: Eric Auger 

Update the script to import the new iommu.h uapi header.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Michael S. Tsirkin 
Cc: Cornelia Huck 
Cc: Paolo Bonzini 
Acked-by: Cornelia Huck 
Signed-off-by: Eric Auger 
---
 scripts/update-linux-headers.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 29c27f4..5b64ee3 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -141,7 +141,7 @@ done
 
 rm -rf "$output/linux-headers/linux"
 mkdir -p "$output/linux-headers/linux"
-for header in kvm.h vfio.h vfio_ccw.h vhost.h \
+for header in kvm.h vfio.h vfio_ccw.h vhost.h iommu.h \
   psci.h psp-sev.h userfaultfd.h mman.h; do
 cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
 done
-- 
2.7.4




[PATCH v1 06/22] hw/pci: introduce pci_device_set/unset_iommu_context()

2020-03-22 Thread Liu Yi L
This patch adds pci_device_set/unset_iommu_context() to set/unset
host_iommu_context for a given device. New callback is added in
PCIIOMMUOps. As such, vIOMMU could make use of host IOMMU capability.
e.g setup nested translation.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Michael S. Tsirkin 
Signed-off-by: Liu Yi L 
---
 hw/pci/pci.c | 49 -
 include/hw/pci/pci.h | 10 ++
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index aa9025c..8642ea8 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2638,7 +2638,8 @@ static void pci_device_class_base_init(ObjectClass 
*klass, void *data)
 }
 }
 
-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+  PCIBus **pbus, uint8_t *pdevfn)
 {
 PCIBus *bus = pci_get_bus(dev);
 PCIBus *iommu_bus = bus;
@@ -2683,14 +2684,52 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 
 iommu_bus = parent_bus;
 }
-if (iommu_bus && iommu_bus->iommu_ops &&
- iommu_bus->iommu_ops->get_address_space) {
-return iommu_bus->iommu_ops->get_address_space(bus,
- iommu_bus->iommu_opaque, devfn);
+*pbus = iommu_bus;
+*pdevfn = devfn;
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+PCIBus *bus;
+uint8_t devfn;
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &devfn);
+if (bus && bus->iommu_ops &&
+ bus->iommu_ops->get_address_space) {
+return bus->iommu_ops->get_address_space(bus,
+bus->iommu_opaque, devfn);
 }
 return &address_space_memory;
 }
 
+int pci_device_set_iommu_context(PCIDevice *dev,
+ HostIOMMUContext *host_icx)
+{
+PCIBus *bus;
+uint8_t devfn;
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &devfn);
+if (bus && bus->iommu_ops &&
+bus->iommu_ops->set_iommu_context) {
+return bus->iommu_ops->set_iommu_context(bus,
+  bus->iommu_opaque, devfn, host_icx);
+}
+return -ENOENT;
+}
+
+void pci_device_unset_iommu_context(PCIDevice *dev)
+{
+PCIBus *bus;
+uint8_t devfn;
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &devfn);
+if (bus && bus->iommu_ops &&
+bus->iommu_ops->unset_iommu_context) {
+bus->iommu_ops->unset_iommu_context(bus,
+ bus->iommu_opaque, devfn);
+}
+}
+
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
 bus->iommu_ops = ops;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index ffe192d..6fca2a0 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -9,6 +9,8 @@
 
 #include "hw/pci/pcie.h"
 
+#include "hw/iommu/host_iommu_context.h"
+
 extern bool pci_available;
 
 /* PCI bus */
@@ -489,9 +491,17 @@ typedef struct PCIIOMMUOps PCIIOMMUOps;
 struct PCIIOMMUOps {
 AddressSpace * (*get_address_space)(PCIBus *bus,
 void *opaque, int32_t devfn);
+int (*set_iommu_context)(PCIBus *bus, void *opaque,
+ int32_t devfn,
+ HostIOMMUContext *host_icx);
+void (*unset_iommu_context)(PCIBus *bus, void *opaque,
+int32_t devfn);
 };
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+int pci_device_set_iommu_context(PCIDevice *dev,
+ HostIOMMUContext *host_icx);
+void pci_device_unset_iommu_context(PCIDevice *dev);
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void *opaque);
 
 static inline void
-- 
2.7.4




[PATCH v1 02/22] header file update VFIO/IOMMU vSVA APIs

2020-03-22 Thread Liu Yi L
The kernel uapi/linux/iommu.h header file includes the
extensions for vSVA support. e.g. bind gpasid, iommu
fault report related user structures and etc.

Note: this should be replaced with a full header files update when
the vSVA uPAPI is stable.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Michael S. Tsirkin 
Cc: Cornelia Huck 
Cc: Paolo Bonzini 
Signed-off-by: Liu Yi L 
---
 linux-headers/linux/iommu.h | 378 
 linux-headers/linux/vfio.h  | 127 +++
 2 files changed, 505 insertions(+)
 create mode 100644 linux-headers/linux/iommu.h

diff --git a/linux-headers/linux/iommu.h b/linux-headers/linux/iommu.h
new file mode 100644
index 000..9025496
--- /dev/null
+++ b/linux-headers/linux/iommu.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOMMU user API definitions
+ */
+
+#ifndef _IOMMU_H
+#define _IOMMU_H
+
+#include 
+
+/**
+ * Current version of the IOMMU user API. This is intended for query
+ * between user and kernel to determine compatible data structures.
+ *
+ * UAPI version can be bumped up with the following rules:
+ * 1. All data structures passed between user and kernel space share
+ *the same version number. i.e. any extension to any structure
+ *results in version number increment.
+ *
+ * 2. Data structures are open to extension but closed to modification.
+ *Extension should leverage the padding bytes first where a new
+ *flag bit is required to indicate the validity of each new member.
+ *The above rule for padding bytes also applies to adding new union
+ *members.
+ *After padding bytes are exhausted, new fields must be added at the
+ *end of each data structure with 64bit alignment. Flag bits can be
+ *added without size change but existing ones cannot be altered.
+ *
+ * 3. Versions are backward compatible.
+ *
+ * 4. Version to size lookup is supported by kernel internal API for each
+ *API function type. @version is mandatory for new data structures
+ *and must be at the beginning with type of __u32.
+ */
+#define IOMMU_UAPI_VERSION 1
+static __inline__ int iommu_get_uapi_version(void)
+{
+   return IOMMU_UAPI_VERSION;
+}
+
+/*
+ * Supported UAPI features that can be reported to user space.
+ * These types represent the capability available in the kernel.
+ *
+ * REVISIT: UAPI version also implies the capabilities. Should we
+ * report them explicitly?
+ */
+enum IOMMU_UAPI_DATA_TYPES {
+   IOMMU_UAPI_BIND_GPASID,
+   IOMMU_UAPI_CACHE_INVAL,
+   IOMMU_UAPI_PAGE_RESP,
+   NR_IOMMU_UAPI_TYPE,
+};
+
+#define IOMMU_UAPI_CAP_MASK ((1 << IOMMU_UAPI_BIND_GPASID) |   \
+   (1 << IOMMU_UAPI_CACHE_INVAL) | \
+   (1 << IOMMU_UAPI_PAGE_RESP))
+
+#define IOMMU_FAULT_PERM_READ  (1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC  (1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV  (1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+   IOMMU_FAULT_DMA_UNRECOV = 1,/* unrecoverable fault */
+   IOMMU_FAULT_PAGE_REQ,   /* page request fault */
+};
+
+enum iommu_fault_reason {
+   IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+   /* Could not access the PASID table (fetch caused external abort) */
+   IOMMU_FAULT_REASON_PASID_FETCH,
+
+   /* PASID entry is invalid or has configuration errors */
+   IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+   /*
+* PASID is out of range (e.g. exceeds the maximum PASID
+* supported by the IOMMU) or disabled.
+*/
+   IOMMU_FAULT_REASON_PASID_INVALID,
+
+   /*
+* An external abort occurred fetching (or updating) a translation
+* table descriptor
+*/
+   IOMMU_FAULT_REASON_WALK_EABT,
+
+   /*
+* Could not access the page table entry (Bad address),
+* actual translation fault
+*/
+   IOMMU_FAULT_REASON_PTE_FETCH,
+
+   /* Protection flag check failed */
+   IOMMU_FAULT_REASON_PERMISSION,
+
+   /* access flag check failed */
+   IOMMU_FAULT_REASON_ACCESS,
+
+   /* Output address of a translation stage caused Address Size fault */
+   IOMMU_FAULT_REASON_OOR_ADDRESS,
+};
+
+/**
+ * struct iommu_fault_unrecoverable - Unrecoverable fault data
+ * @reason: reason of the fault, from &enum iommu_fault_reason
+ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
+ * @pasid: Process Address Space ID
+ * @perm: requested permission access using by the incoming transaction
+ *(IOMMU_FAULT_PERM_* values)
+ * @addr: offending page address
+ * @fetch_addr: address that caused a fetch abort, if any
+ */
+struct iommu_fault_unrecoverable {
+   __u32   reason;
+#define IOMMU_FAULT_UNRECOV_PASID_VALID(1 << 0)
+#define IOMMU_FAULT_UNRECOV_ADDR_VALID 

[PATCH v1 08/22] vfio: init HostIOMMUContext per-container

2020-03-22 Thread Liu Yi L
After confirming dual stage DMA translation support with kernel by
checking VFIO_TYPE1_NESTING_IOMMU, VFIO inits HostIOMMUContet instance
and exposes it to PCI layer. Thus vIOMMU emualtors may make use of
such capability by leveraging the methods provided by HostIOMMUContext.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Alex Williamson 
Signed-off-by: Liu Yi L 
---
 hw/vfio/common.c  | 80 +++
 hw/vfio/pci.c | 13 ++
 include/hw/iommu/host_iommu_context.h |  3 ++
 include/hw/vfio/vfio-common.h |  4 ++
 4 files changed, 100 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index c276732..e4f5f10 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1179,10 +1179,55 @@ static int vfio_get_iommu_type(VFIOContainer *container,
 return -EINVAL;
 }
 
+static int vfio_host_icx_pasid_alloc(HostIOMMUContext *host_icx,
+  uint32_t min, uint32_t max, uint32_t *pasid)
+{
+VFIOContainer *container = container_of(host_icx, VFIOContainer, host_icx);
+struct vfio_iommu_type1_pasid_request req;
+unsigned long argsz;
+int ret;
+
+argsz = sizeof(req);
+req.argsz = argsz;
+req.flags = VFIO_IOMMU_PASID_ALLOC;
+req.alloc_pasid.min = min;
+req.alloc_pasid.max = max;
+
+if (ioctl(container->fd, VFIO_IOMMU_PASID_REQUEST, &req)) {
+ret = -errno;
+error_report("%s: %d, alloc failed", __func__, ret);
+return ret;
+}
+*pasid = req.alloc_pasid.result;
+return 0;
+}
+
+static int vfio_host_icx_pasid_free(HostIOMMUContext *host_icx,
+uint32_t pasid)
+{
+VFIOContainer *container = container_of(host_icx, VFIOContainer, host_icx);
+struct vfio_iommu_type1_pasid_request req;
+unsigned long argsz;
+int ret;
+
+argsz = sizeof(req);
+req.argsz = argsz;
+req.flags = VFIO_IOMMU_PASID_FREE;
+req.free_pasid = pasid;
+
+if (ioctl(container->fd, VFIO_IOMMU_PASID_REQUEST, &req)) {
+ret = -errno;
+error_report("%s: %d, free failed", __func__, ret);
+return ret;
+}
+return 0;
+}
+
 static int vfio_init_container(VFIOContainer *container, int group_fd,
Error **errp)
 {
 int iommu_type, ret;
+uint64_t flags = 0;
 
 iommu_type = vfio_get_iommu_type(container, errp);
 if (iommu_type < 0) {
@@ -1210,6 +1255,18 @@ static int vfio_init_container(VFIOContainer *container, 
int group_fd,
 return -errno;
 }
 
+if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+/*
+ * TODO: config flags per host IOMMU nesting capability
+ * e.g. check if VFIO_TYPE1_NESTING_IOMMU supports PASID
+ * alloc/free
+ */
+host_iommu_ctx_init(&container->host_icx,
+sizeof(container->host_icx),
+TYPE_VFIO_HOST_IOMMU_CONTEXT,
+flags);
+}
+
 container->iommu_type = iommu_type;
 return 0;
 }
@@ -1456,6 +1513,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
 }
 
 trace_vfio_disconnect_container(container->fd);
+host_iommu_ctx_destroy(&container->host_icx);
 close(container->fd);
 g_free(container);
 
@@ -1791,3 +1849,25 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
 }
 return vfio_eeh_container_op(container, op);
 }
+
+static void vfio_host_iommu_context_class_init(ObjectClass *klass,
+   void *data)
+{
+HostIOMMUContextClass *hicxc = HOST_IOMMU_CONTEXT_CLASS(klass);
+
+hicxc->pasid_alloc = vfio_host_icx_pasid_alloc;
+hicxc->pasid_free = vfio_host_icx_pasid_free;
+}
+
+static const TypeInfo vfio_host_iommu_context_info = {
+.parent = TYPE_HOST_IOMMU_CONTEXT,
+.name = TYPE_VFIO_HOST_IOMMU_CONTEXT,
+.class_init = vfio_host_iommu_context_class_init,
+};
+
+static void vfio_register_types(void)
+{
+type_register_static(&vfio_host_iommu_context_info);
+}
+
+type_init(vfio_register_types)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5e75a95..f099df3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2717,6 +2717,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 VFIOPCIDevice *vdev = PCI_VFIO(pdev);
 VFIODevice *vbasedev_iter;
 VFIOGroup *group;
+VFIOContainer *container;
 char *tmp, *subsys, group_path[PATH_MAX], *group_name;
 Error *err = NULL;
 ssize_t len;
@@ -3028,6 +3029,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 vfio_register_req_notifier(vdev);
 vfio_setup_resetfn_quirk(vdev);
 
+container = vdev->vbasedev.group->container;
+if (container->host_icx.initialized) {
+pci_device_set_iommu_context(pdev, &container->host_icx);
+}
+
 return;
 
 out_deregister:
@@ -3072,9 +3078,16 @@ static void vfio

[PATCH v1 00/22] intel_iommu: expose Shared Virtual Addressing to VMs

2020-03-22 Thread Liu Yi L
Shared Virtual Addressing (SVA), a.k.a, Shared Virtual Memory (SVM) on
Intel platforms allows address space sharing between device DMA and
applications. SVA can reduce programming complexity and enhance security.

This QEMU series is intended to expose SVA usage to VMs. i.e. Sharing
guest application address space with passthru devices. This is called
vSVA in this series. The whole vSVA enabling requires QEMU/VFIO/IOMMU
changes.

The high-level architecture for SVA virtualization is as below, the key
design of vSVA support is to utilize the dual-stage IOMMU translation (
also known as IOMMU nesting translation) capability in host IOMMU.

.-.  .---.
|   vIOMMU|  | Guest process CR3, FL only|
| |  '---'
./
| PASID Entry |--- PASID cache flush -
'-'   |
| |   V
| |CR3 in GPA
'-'
Guest
--| Shadow |--|
  vv  v
Host
.-.  .--.
|   pIOMMU|  | Bind FL for GVA-GPA  |
| |  '--'
./  |
| PASID Entry | V (Nested xlate)
'\.--.
| |   |SL for GPA-HPA, default domain|
| |   '--'
'-'
Where:
 - FL = First level/stage one page tables
 - SL = Second level/stage two page tables

The complete vSVA kernel upstream patches are divided into three phases:
1. Common APIs and PCI device direct assignment
2. IOMMU-backed Mediated Device assignment
3. Page Request Services (PRS) support

This QEMU patchset is aiming for the phase 1 and phase 2. It is based
on the two kernel series below.
[1] [PATCH V10 00/11] Nested Shared Virtual Address (SVA) VT-d support:
https://lkml.org/lkml/2020/3/20/1172
[2] [PATCH v1 0/8] vfio: expose virtual Shared Virtual Addressing to VMs
https://lkml.org/lkml/2020/3/22/116

There are roughly two parts:
 1. Introduce HostIOMMUContext as abstract of host IOMMU. It provides explicit
method for vIOMMU emulators to communicate with host IOMMU. e.g. propagate
guest page table binding to host IOMMU to setup dual-stage DMA translation
in host IOMMU and flush iommu iotlb.
 2. Setup dual-stage IOMMU translation for Intel vIOMMU. Includes 
- Check IOMMU uAPI version compatibility and VFIO Nesting capabilities which
  includes hardware compatibility (stage 1 format) and VFIO_PASID_REQ
  availability. This is preparation for setting up dual-stage DMA 
translation
  in host IOMMU.
- Propagate guest PASID allocation and free request to host.
- Propagate guest page table binding to host to setup dual-stage IOMMU DMA
  translation in host IOMMU.
- Propagate guest IOMMU cache invalidation to host to ensure iotlb
  correctness.

The complete QEMU set can be found in below link:
https://github.com/luxis1999/qemu.git: sva_vtd_v10_v1

Complete kernel can be found in:
https://github.com/luxis1999/linux-vsva.git: vsva-linux-5.6-rc6

Tests: basci vSVA functionality test, VM reboot/shutdown/crash, kernel build in
guest, boot VM with vSVA disabled, full comapilation.

Regards,
Yi Liu

Changelog:
- RFC v3.1 -> Patch v1:
  a) Implement HostIOMMUContext in QOM manner.
  b) Add pci_set/unset_iommu_context() to register HostIOMMUContext to
 vIOMMU, thus the lifecircle of HostIOMMUContext is awared in vIOMMU
 side. In such way, vIOMMU could use the methods provided by the
 HostIOMMUContext safely.
  c) Add back patch "[RFC v3 01/25] hw/pci: modify pci_setup_iommu() to 
set PCIIOMMUOps"
  RFCv3.1: https://patchwork.kernel.org/cover/11397879/

- RFC v3 -> v3.1:
  a) Drop IOMMUContext, and rename DualStageIOMMUObject to 
HostIOMMUContext.
 HostIOMMUContext is per-vfio-container, it is exposed to  vIOMMU 
via PCI
 layer. VFIO registers a PCIHostIOMMUFunc callback to PCI layer, 
vIOMMU
 could get HostIOMMUContext instance via it.
  b) Check IOMMU uAPI version by VFIO_CHECK_EXTENSION
  c) Add a check on VFIO_PASID_REQ availability via VFIO_GET_IOMMU_IHNFO
  d) Reorder the series, put vSVA linux header file update in the 
beginning
 put the x-scalable-mode option mofification in the end of the 
series.
  e) Dropped patch "[RFC v3 01/25] hw/pci: modify pci_setup_iommu() to 
set PCIIOMMUOps"
  RFCv3: https://patchwork.kernel.org/cover/11356033/

- RFC v2 -> v3:
  a) Introduce DualStageIOMMUObject to abstract the host IOMMU 
programming
  capability. e.g. request PASID from host, setup IOMMU nesting 
translation
  on host IOMMU. The pasid_alloc/bind_guest_pag

[PATCH v1 20/22] intel_iommu: propagate PASID-based iotlb invalidation to host

2020-03-22 Thread Liu Yi L
This patch propagates PASID-based iotlb invalidation to host.

Intel VT-d 3.0 supports nested translation in PASID granular.
Guest SVA support could be implemented by configuring nested
translation on specific PASID. This is also known as dual stage
DMA translation.

Under such configuration, guest owns the GVA->GPA translation
which is configured as first level page table in host side for
a specific pasid, and host owns GPA->HPA translation. As guest
owns first level translation table, piotlb invalidation should
be propagated to host since host IOMMU will cache first level
page table related mappings during DMA address translation.

This patch traps the guest PASID-based iotlb flush and propagate
it to host.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 139 +
 hw/i386/intel_iommu_internal.h |   7 +++
 2 files changed, 146 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b9ac07d..10d314d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3134,15 +3134,154 @@ static bool vtd_process_pasid_desc(IntelIOMMUState *s,
 return (ret == 0) ? true : false;
 }
 
+/**
+ * Caller of this function should hold iommu_lock.
+ */
+static void vtd_invalidate_piotlb(IntelIOMMUState *s,
+  VTDBus *vtd_bus,
+  int devfn,
+  DualIOMMUStage1Cache *stage1_cache)
+{
+VTDHostIOMMUContext *vtd_dev_icx;
+HostIOMMUContext *host_icx;
+
+vtd_dev_icx = vtd_bus->dev_icx[devfn];
+if (!vtd_dev_icx) {
+goto out;
+}
+host_icx = vtd_dev_icx->host_icx;
+if (!host_icx) {
+goto out;
+}
+if (host_iommu_ctx_flush_stage1_cache(host_icx, stage1_cache)) {
+error_report("Cache flush failed");
+}
+out:
+return;
+}
+
+static inline bool vtd_pasid_cache_valid(
+  VTDPASIDAddressSpace *vtd_pasid_as)
+{
+return vtd_pasid_as->iommu_state &&
+   (vtd_pasid_as->iommu_state->pasid_cache_gen
+ == vtd_pasid_as->pasid_cache_entry.pasid_cache_gen);
+}
+
+/**
+ * This function is a loop function for the s->vtd_pasid_as
+ * list with VTDPIOTLBInvInfo as execution filter. It propagates
+ * the piotlb invalidation to host. Caller of this function
+ * should hold iommu_lock.
+ */
+static void vtd_flush_pasid_iotlb(gpointer key, gpointer value,
+  gpointer user_data)
+{
+VTDPIOTLBInvInfo *piotlb_info = user_data;
+VTDPASIDAddressSpace *vtd_pasid_as = value;
+uint16_t did;
+
+/*
+ * Needs to check whether the pasid entry cache stored in
+ * vtd_pasid_as is valid or not. "invalid" means the pasid
+ * cache has been flushed, thus host should have done piotlb
+ * invalidation together with a pasid cache invalidation, so
+ * no need to pass down piotlb invalidation to host for better
+ * performance. Only when pasid entry cache is "valid", should
+ * a piotlb invalidation be propagated to host since it means
+ * guest just modified a mapping in its page table.
+ */
+if (!vtd_pasid_cache_valid(vtd_pasid_as)) {
+return;
+}
+
+did = vtd_pe_get_domain_id(
+&(vtd_pasid_as->pasid_cache_entry.pasid_entry));
+
+if ((piotlb_info->domain_id == did) &&
+(piotlb_info->pasid == vtd_pasid_as->pasid)) {
+vtd_invalidate_piotlb(vtd_pasid_as->iommu_state,
+  vtd_pasid_as->vtd_bus,
+  vtd_pasid_as->devfn,
+  piotlb_info->stage1_cache);
+}
+
+/*
+ * TODO: needs to add QEMU piotlb flush when QEMU piotlb
+ * infrastructure is ready. For now, it is enough for passthru
+ * devices.
+ */
+}
+
 static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
 uint16_t domain_id,
 uint32_t pasid)
 {
+VTDPIOTLBInvInfo piotlb_info;
+DualIOMMUStage1Cache *stage1_cache;
+struct iommu_cache_invalidate_info *cache_info;
+
+stage1_cache = g_malloc0(sizeof(*stage1_cache));
+stage1_cache->pasid = pasid;
+
+cache_info = &stage1_cache->cache_info;
+cache_info->version = IOMMU_UAPI_VERSION;
+cache_info->cache = IOMMU_CACHE_INV_TYPE_IOTLB;
+cache_info->granularity = IOMMU_INV_GRANU_PASID;
+cache_info->pasid_info.pasid = pasid;
+cache_info->pasid_info.flags = IOMMU_INV_PASID_FLAGS_PASID;
+
+piotlb_info.domain_id = domain_id;
+piotlb_info.pasid = pasid;
+piotlb_info.stage1_cache = stage1_cache;
+
+vtd_iommu_lock(s);
+/*
+ * Here loops all the vtd_pasid_as instances in s->vtd_pasid_as
+ * to find out the affected devices since piotlb invalidation
+ * should check pasid cache per architecture poi

[PATCH v1 07/22] intel_iommu: add set/unset_iommu_context callback

2020-03-22 Thread Liu Yi L
This patch adds set/unset_iommu_context() impelementation in Intel
vIOMMU. For Intel platform, pass-through modules (e.g. VFIO) could
set HostIOMMUContext to Intel vIOMMU emulator.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c | 70 +++
 include/hw/i386/intel_iommu.h | 17 +--
 2 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4b22910..8d9204f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3354,23 +3354,35 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
 },
 };
 
-VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
+/**
+ * Fetch a VTDBus instance for given PCIBus. If no existing instance,
+ * allocate one.
+ */
+static VTDBus *vtd_find_add_bus(IntelIOMMUState *s, PCIBus *bus)
 {
 uintptr_t key = (uintptr_t)bus;
 VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
-VTDAddressSpace *vtd_dev_as;
-char name[128];
 
 if (!vtd_bus) {
 uintptr_t *new_key = g_malloc(sizeof(*new_key));
 *new_key = (uintptr_t)bus;
 /* No corresponding free() */
-vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
-PCI_DEVFN_MAX);
+vtd_bus = g_malloc0(sizeof(VTDBus) + PCI_DEVFN_MAX * \
+(sizeof(VTDAddressSpace *) + \
+ sizeof(VTDHostIOMMUContext *)));
 vtd_bus->bus = bus;
 g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
 }
+return vtd_bus;
+}
+
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
+{
+VTDBus *vtd_bus;
+VTDAddressSpace *vtd_dev_as;
+char name[128];
 
+vtd_bus = vtd_find_add_bus(s, bus);
 vtd_dev_as = vtd_bus->dev_as[devfn];
 
 if (!vtd_dev_as) {
@@ -3436,6 +3448,52 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus, int devfn)
 return vtd_dev_as;
 }
 
+static int vtd_dev_set_iommu_context(PCIBus *bus, void *opaque,
+ int devfn,
+ HostIOMMUContext *host_icx)
+{
+IntelIOMMUState *s = opaque;
+VTDBus *vtd_bus;
+VTDHostIOMMUContext *vtd_dev_icx;
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+vtd_bus = vtd_find_add_bus(s, bus);
+
+vtd_iommu_lock(s);
+vtd_dev_icx = vtd_bus->dev_icx[devfn];
+
+if (!vtd_dev_icx) {
+vtd_bus->dev_icx[devfn] = vtd_dev_icx =
+g_malloc0(sizeof(VTDHostIOMMUContext));
+vtd_dev_icx->vtd_bus = vtd_bus;
+vtd_dev_icx->devfn = (uint8_t)devfn;
+vtd_dev_icx->iommu_state = s;
+vtd_dev_icx->host_icx = host_icx;
+}
+vtd_iommu_unlock(s);
+
+return 0;
+}
+
+static void vtd_dev_unset_iommu_context(PCIBus *bus, void *opaque, int devfn)
+{
+IntelIOMMUState *s = opaque;
+VTDBus *vtd_bus;
+VTDHostIOMMUContext *vtd_dev_icx;
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+vtd_bus = vtd_find_add_bus(s, bus);
+
+vtd_iommu_lock(s);
+
+vtd_dev_icx = vtd_bus->dev_icx[devfn];
+g_free(vtd_dev_icx);
+
+vtd_iommu_unlock(s);
+}
+
 static uint64_t get_naturally_aligned_size(uint64_t start,
uint64_t size, int gaw)
 {
@@ -3731,6 +3789,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void 
*opaque, int devfn)
 
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
+.set_iommu_context = vtd_dev_set_iommu_context,
+.unset_iommu_context = vtd_dev_unset_iommu_context,
 };
 
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3870052..9b4fc0a 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -64,6 +64,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
 typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
 typedef struct VTDPASIDEntry VTDPASIDEntry;
+typedef struct VTDHostIOMMUContext VTDHostIOMMUContext;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -112,10 +113,20 @@ struct VTDAddressSpace {
 IOVATree *iova_tree;  /* Traces mapped IOVA ranges */
 };
 
+struct VTDHostIOMMUContext {
+VTDBus *vtd_bus;
+uint8_t devfn;
+HostIOMMUContext *host_icx;
+IntelIOMMUState *iommu_state;
+};
+
 struct VTDBus {
-PCIBus* bus;   /* A reference to the bus to provide 
translation for */
+/* A reference to the bus to provide translation for */
+PCIBus *bus;
 /* A table of VTDAddressSpace objects indexed by devfn */
-VTDAddressSpace *dev_as[];
+VTDAddressSpace *dev_as[PCI_DEVFN_MAX];
+/* A table of VTDHostIOMMUContext objects indexed by

[PATCH v1 03/22] vfio: check VFIO_TYPE1_NESTING_IOMMU support

2020-03-22 Thread Liu Yi L
VFIO needs to check VFIO_TYPE1_NESTING_IOMMU support with Kernel before
further using it. e.g. requires to check IOMMU UAPI version.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Alex Williamson 
Signed-off-by: Liu Yi L 
Signed-off-by: Yi Sun 
---
 hw/vfio/common.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0b3593b..c276732 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1157,12 +1157,21 @@ static void vfio_put_address_space(VFIOAddressSpace 
*space)
 static int vfio_get_iommu_type(VFIOContainer *container,
Error **errp)
 {
-int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
+  VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
   VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
-int i;
+int i, version;
 
 for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
 if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU) {
+version = ioctl(container->fd, VFIO_CHECK_EXTENSION,
+VFIO_NESTING_IOMMU_UAPI);
+if (version < IOMMU_UAPI_VERSION) {
+info_report("IOMMU UAPI incompatible for nesting");
+continue;
+}
+}
 return iommu_types[i];
 }
 }
@@ -1278,6 +1287,7 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 }
 
 switch (container->iommu_type) {
+case VFIO_TYPE1_NESTING_IOMMU:
 case VFIO_TYPE1v2_IOMMU:
 case VFIO_TYPE1_IOMMU:
 {
-- 
2.7.4




[PATCH v1 10/22] intel_iommu: add virtual command capability support

2020-03-22 Thread Liu Yi L
This patch adds virtual command support to Intel vIOMMU per
Intel VT-d 3.1 spec. And adds two virtual commands: allocate
pasid and free pasid.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Reviewed-by: Peter Xu 
Signed-off-by: Liu Yi L 
Signed-off-by: Yi Sun 
---
 hw/i386/intel_iommu.c  | 154 -
 hw/i386/intel_iommu_internal.h |  37 ++
 hw/i386/trace-events   |   1 +
 include/hw/i386/intel_iommu.h  |  10 ++-
 4 files changed, 200 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 8d9204f..0c402e4 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2651,6 +2651,129 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
 }
 }
 
+static int vtd_request_pasid_alloc(IntelIOMMUState *s, uint32_t *pasid)
+{
+VTDHostIOMMUContext *vtd_dev_icx;
+int ret = -1;
+
+vtd_iommu_lock(s);
+QLIST_FOREACH(vtd_dev_icx, &s->vtd_dev_icx_list, next) {
+HostIOMMUContext *host_icx = vtd_dev_icx->host_icx;
+
+/*
+ * We'll return the first valid result we got. It's
+ * a bit hackish in that we don't have a good global
+ * interface yet to talk to modules like vfio to deliver
+ * this allocation request, so we're leveraging this
+ * per-device iommu context to do the same thing just
+ * to make sure the allocation happens only once.
+ */
+ret = host_iommu_ctx_pasid_alloc(host_icx, VTD_MIN_HPASID,
+ VTD_MAX_HPASID, pasid);
+if (!ret) {
+break;
+}
+}
+vtd_iommu_unlock(s);
+
+return ret;
+}
+
+static int vtd_request_pasid_free(IntelIOMMUState *s, uint32_t pasid)
+{
+VTDHostIOMMUContext *vtd_dev_icx;
+int ret = -1;
+
+vtd_iommu_lock(s);
+QLIST_FOREACH(vtd_dev_icx, &s->vtd_dev_icx_list, next) {
+HostIOMMUContext *host_icx = vtd_dev_icx->host_icx;
+
+/*
+ * Similar with pasid allocation. We'll free the pasid
+ * on the first successful free operation. It's a bit
+ * hackish in that we don't have a good global interface
+ * yet to talk to modules like vfio to deliver this pasid
+ * free request, so we're leveraging this per-device iommu
+ * context to do the same thing just to make sure the free
+ * happens only once.
+ */
+ret = host_iommu_ctx_pasid_free(host_icx, pasid);
+if (!ret) {
+break;
+}
+}
+vtd_iommu_unlock(s);
+
+return ret;
+}
+
+/*
+ * If IP is not set, set it then return.
+ * If IP is already set, return.
+ */
+static void vtd_vcmd_set_ip(IntelIOMMUState *s)
+{
+s->vcrsp = 1;
+vtd_set_quad_raw(s, DMAR_VCRSP_REG,
+ ((uint64_t) s->vcrsp));
+}
+
+static void vtd_vcmd_clear_ip(IntelIOMMUState *s)
+{
+s->vcrsp &= (~((uint64_t)(0x1)));
+vtd_set_quad_raw(s, DMAR_VCRSP_REG,
+ ((uint64_t) s->vcrsp));
+}
+
+/* Handle write to Virtual Command Register */
+static int vtd_handle_vcmd_write(IntelIOMMUState *s, uint64_t val)
+{
+uint32_t pasid;
+int ret = -1;
+
+trace_vtd_reg_write_vcmd(s->vcrsp, val);
+
+if (!(s->vccap & VTD_VCCAP_PAS) ||
+ (s->vcrsp & 1)) {
+return -1;
+}
+
+/*
+ * Since vCPU should be blocked when the guest VMCD
+ * write was trapped to here. Should be no other vCPUs
+ * try to access VCMD if guest software is well written.
+ * However, we still emulate the IP bit here in case of
+ * bad guest software. Also align with the spec.
+ */
+vtd_vcmd_set_ip(s);
+
+switch (val & VTD_VCMD_CMD_MASK) {
+case VTD_VCMD_ALLOC_PASID:
+ret = vtd_request_pasid_alloc(s, &pasid);
+if (ret) {
+s->vcrsp |= VTD_VCRSP_SC(VTD_VCMD_NO_AVAILABLE_PASID);
+} else {
+s->vcrsp |= VTD_VCRSP_RSLT(pasid);
+}
+break;
+
+case VTD_VCMD_FREE_PASID:
+pasid = VTD_VCMD_PASID_VALUE(val);
+ret = vtd_request_pasid_free(s, pasid);
+if (ret < 0) {
+s->vcrsp |= VTD_VCRSP_SC(VTD_VCMD_FREE_INVALID_PASID);
+}
+break;
+
+default:
+s->vcrsp |= VTD_VCRSP_SC(VTD_VCMD_UNDEFINED_CMD);
+error_report_once("Virtual Command: unsupported command!!!");
+break;
+}
+vtd_vcmd_clear_ip(s);
+return 0;
+}
+
 static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
 {
 IntelIOMMUState *s = opaque;
@@ -2939,6 +3062,23 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 vtd_set_long(s, addr, val);
 break;
 
+case DMAR_VCMD_REG:
+if (!vtd_handle_vcmd_write(s, val)) {
+if (size == 4) {
+vtd_set_long(s, addr, val);
+} else {
+vtd_set_quad(s, addr, val);
+}
+}
+

[PATCH v1 21/22] intel_iommu: process PASID-based Device-TLB invalidation

2020-03-22 Thread Liu Yi L
This patch adds an empty handling for PASID-based Device-TLB
invalidation. For now it is enough as it is not necessary to
propagate it to host for passthru device and also there is no
emulated device has device tlb.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 18 ++
 hw/i386/intel_iommu_internal.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 10d314d..72cd739 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3339,6 +3339,17 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
 return true;
 }
 
+static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
+   VTDInvDesc *inv_desc)
+{
+/*
+ * no need to handle it for passthru device, for emulated
+ * devices with device tlb, it may be required, but for now,
+ * return is enough
+ */
+return true;
+}
+
 static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
   VTDInvDesc *inv_desc)
 {
@@ -3460,6 +3471,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
 }
 break;
 
+case VTD_INV_DESC_DEV_PIOTLB:
+trace_vtd_inv_desc("device-piotlb", inv_desc.hi, inv_desc.lo);
+if (!vtd_process_device_piotlb_desc(s, &inv_desc)) {
+return false;
+}
+break;
+
 case VTD_INV_DESC_DEVICE:
 trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo);
 if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 967cc4f..b5507ce 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -386,6 +386,7 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_WAIT   0x5 /* Invalidation Wait Descriptor */
 #define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */
 #define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */
+#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/
 #define VTD_INV_DESC_NONE   0   /* Not an Invalidate Descriptor */
 
 /* Masks for Invalidation Wait Descriptor*/
-- 
2.7.4




[PATCH v1 17/22] intel_iommu: do not pass down pasid bind for PASID #0

2020-03-22 Thread Liu Yi L
RID_PASID field was introduced in VT-d 3.0 spec, it is used
for DMA requests w/o PASID in scalable mode VT-d. It is also
known as IOVA. And in VT-d 3.1 spec, there is definition on it:

"Implementations not supporting RID_PASID capability
(ECAP_REG.RPS is 0b), use a PASID value of 0 to perform
address translation for requests without PASID."

This patch adds a check against the PASIDs which are going to be
bound to device. For PASID #0, it is not necessary to pass down
pasid bind request for it since PASID #0 is used as RID_PASID for
DMA requests without pasid. Further reason is current Intel vIOMMU
supports gIOVA by shadowing guest 2nd level page table. However,
in future, if guest IOMMU driver uses 1st level page table to store
IOVA mappings, then guest IOVA support will also be done via nested
translation. When gIOVA is over FLPT, then vIOMMU should pass down
the pasid bind request for PASID #0 to host, host needs to bind the
guest IOVA page table to a proper PASID. e.g PASID value in RID_PASID
field for PF/VF if ECAP_REG.RPS is clear or default PASID for ADI
(Assignable Device Interface in Scalable IOV solution).

IOVA over FLPT support on Intel VT-d:
https://lkml.org/lkml/2019/9/23/297

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1e0ccde..b007715 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1886,6 +1886,16 @@ static int vtd_bind_guest_pasid(IntelIOMMUState *s, 
VTDBus *vtd_bus,
 struct iommu_gpasid_bind_data *g_bind_data;
 int ret = -1;
 
+if (pasid < VTD_MIN_HPASID) {
+/*
+ * If pasid < VTD_HPASID_MIN, this pasid is not allocated
+ * from host. No need to pass down the changes on it to host.
+ * TODO: when IOVA over FLPT is ready, this switch should be
+ * refined.
+ */
+return 0;
+}
+
 vtd_dev_icx = vtd_bus->dev_icx[devfn];
 if (!vtd_dev_icx) {
 return -EINVAL;
-- 
2.7.4




[PATCH v1 16/22] intel_iommu: replay pasid binds after context cache invalidation

2020-03-22 Thread Liu Yi L
This patch replays guest pasid bindings after context cache
invalidation. This is a behavior to ensure safety. Actually,
programmer should issue pasid cache invalidation with proper
granularity after issuing a context cache invalidation.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 68 ++
 hw/i386/intel_iommu_internal.h |  6 +++-
 hw/i386/trace-events   |  1 +
 3 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 8ec638f..1e0ccde 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -68,6 +68,10 @@ static void vtd_address_space_refresh_all(IntelIOMMUState 
*s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
 static void vtd_pasid_cache_reset(IntelIOMMUState *s);
+static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
+   uint16_t *did, bool is_dsi);
+static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
+  VTDBus *vtd_bus, uint16_t devfn);
 
 static void vtd_panic_require_caching_mode(void)
 {
@@ -1865,6 +1869,8 @@ static void vtd_context_global_invalidate(IntelIOMMUState 
*s)
  * VT-d emulation codes.
  */
 vtd_iommu_replay_all(s);
+
+vtd_replay_guest_pasid_bindings(s, NULL, false);
 }
 
 /**
@@ -1999,6 +2005,22 @@ static void 
vtd_context_device_invalidate(IntelIOMMUState *s,
  * happened.
  */
 vtd_sync_shadow_page_table(vtd_as);
+/*
+ * Per spec, context flush should also followed with PASID
+ * cache and iotlb flush. Regards to a device selective
+ * context cache invalidation:
+ * if (emaulted_device)
+ *modify the pasid cache gen and pasid-based iotlb gen
+ *value (will be added in following patches)
+ * else if (assigned_device)
+ *check if the device has been bound to any pasid
+ *invoke pasid_unbind regards to each bound pasid
+ * Here, we have vtd_pasid_cache_devsi() to invalidate pasid
+ * caches, while for piotlb in QEMU, we don't have it yet, so
+ * no handling. For assigned device, host iommu driver would
+ * flush piotlb when a pasid unbind is pass down to it.
+ */
+ vtd_pasid_cache_devsi(s, vtd_bus, devfn_it);
 }
 }
 }
@@ -2631,6 +2653,12 @@ static gboolean vtd_flush_pasid(gpointer key, gpointer 
value,
 /* Fall through */
 case VTD_PASID_CACHE_GLOBAL:
 break;
+case VTD_PASID_CACHE_DEVSI:
+if (pc_info->vtd_bus != vtd_bus ||
+pc_info->devfn == devfn) {
+return false;
+}
+break;
 default:
 error_report("invalid pc_info->flags");
 abort();
@@ -2971,6 +2999,46 @@ static int vtd_pasid_cache_psi(IntelIOMMUState *s,
 return 0;
 }
 
+static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
+  VTDBus *vtd_bus, uint16_t devfn)
+{
+VTDPASIDCacheInfo pc_info;
+VTDContextEntry ce;
+VTDHostIOMMUContext *vtd_dev_icx;
+vtd_pasid_table_walk_info info;
+
+trace_vtd_pasid_cache_devsi(devfn);
+
+pc_info.flags = VTD_PASID_CACHE_DEVSI;
+pc_info.vtd_bus = vtd_bus;
+pc_info.devfn = devfn;
+
+vtd_iommu_lock(s);
+g_hash_table_foreach_remove(s->vtd_pasid_as, vtd_flush_pasid, &pc_info);
+
+/*
+ * To be safe, after invalidating the pasid caches,
+ * emulator needs to replay the pasid bindings by
+ * walking guest pasid dir and pasid table.
+ */
+vtd_dev_icx = vtd_bus->dev_icx[devfn];
+if (vtd_dev_icx && vtd_dev_icx->host_icx &&
+!vtd_dev_to_context_entry(s, pci_bus_num(vtd_bus->bus),
+  devfn, &ce)) {
+info.flags = 0x0;
+info.did = 0;
+info.vtd_bus = vtd_bus;
+info.devfn = devfn;
+vtd_sm_pasid_table_walk(s,
+VTD_CE_GET_PASID_DIR_TABLE(&ce),
+0,
+VTD_MAX_HPASID,
+&info);
+}
+
+vtd_iommu_unlock(s);
+}
+
 /**
  * Caller of this function should hold iommu_lock
  */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index b0a324c..6f32d7b 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -534,13 +534,17 @@ struct VTDPASIDCacheInfo {
 #define VTD_PASID_CACHE_GLOBAL   (1ULL << 0)
 #define VTD_PASID_CACHE_DOMSI(1ULL << 1)
 #define VTD_PASID_CACHE_PASIDSI  (1ULL << 2)
+#define VTD_PASID_CACHE_DEVSI(1ULL << 3)
 uint32_t flags;
 uint1

[PATCH v1 11/22] intel_iommu: process PASID cache invalidation

2020-03-22 Thread Liu Yi L
This patch adds PASID cache invalidation handling. When guest enabled
PASID usages (e.g. SVA), guest software should issue a proper PASID
cache invalidation when caching-mode is exposed. This patch only adds
the draft handling of pasid cache invalidation. Detailed handling will
be added in subsequent patches.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Reviewed-by: Peter Xu 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 66 ++
 hw/i386/intel_iommu_internal.h | 12 
 hw/i386/trace-events   |  3 ++
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0c402e4..1daeab2 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2395,6 +2395,63 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)
 return true;
 }
 
+static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
+{
+return 0;
+}
+
+static int vtd_pasid_cache_psi(IntelIOMMUState *s,
+   uint16_t domain_id, uint32_t pasid)
+{
+return 0;
+}
+
+static int vtd_pasid_cache_gsi(IntelIOMMUState *s)
+{
+return 0;
+}
+
+static bool vtd_process_pasid_desc(IntelIOMMUState *s,
+   VTDInvDesc *inv_desc)
+{
+uint16_t domain_id;
+uint32_t pasid;
+int ret = 0;
+
+if ((inv_desc->val[0] & VTD_INV_DESC_PASIDC_RSVD_VAL0) ||
+(inv_desc->val[1] & VTD_INV_DESC_PASIDC_RSVD_VAL1) ||
+(inv_desc->val[2] & VTD_INV_DESC_PASIDC_RSVD_VAL2) ||
+(inv_desc->val[3] & VTD_INV_DESC_PASIDC_RSVD_VAL3)) {
+error_report_once("non-zero-field-in-pc_inv_desc hi: 0x%" PRIx64
+  " lo: 0x%" PRIx64, inv_desc->val[1], inv_desc->val[0]);
+return false;
+}
+
+domain_id = VTD_INV_DESC_PASIDC_DID(inv_desc->val[0]);
+pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc->val[0]);
+
+switch (inv_desc->val[0] & VTD_INV_DESC_PASIDC_G) {
+case VTD_INV_DESC_PASIDC_DSI:
+ret = vtd_pasid_cache_dsi(s, domain_id);
+break;
+
+case VTD_INV_DESC_PASIDC_PASID_SI:
+ret = vtd_pasid_cache_psi(s, domain_id, pasid);
+break;
+
+case VTD_INV_DESC_PASIDC_GLOBAL:
+ret = vtd_pasid_cache_gsi(s);
+break;
+
+default:
+error_report_once("invalid-inv-granu-in-pc_inv_desc hi: 0x%" PRIx64
+  " lo: 0x%" PRIx64, inv_desc->val[1], inv_desc->val[0]);
+return false;
+}
+
+return (ret == 0) ? true : false;
+}
+
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
  VTDInvDesc *inv_desc)
 {
@@ -2501,12 +2558,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
 }
 break;
 
-/*
- * TODO: the entity of below two cases will be implemented in future 
series.
- * To make guest (which integrates scalable mode support patch set in
- * iommu driver) work, just return true is enough so far.
- */
 case VTD_INV_DESC_PC:
+trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]);
+if (!vtd_process_pasid_desc(s, &inv_desc)) {
+return false;
+}
 break;
 
 case VTD_INV_DESC_PIOTLB:
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 1d997a1..0ca5f0b 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -444,6 +444,18 @@ typedef union VTDInvDesc VTDInvDesc;
 (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
 (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 
+#define VTD_INV_DESC_PASIDC_G  (3ULL << 4)
+#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfULL)
+#define VTD_INV_DESC_PASIDC_DID(val)   (((val) >> 16) & VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_PASIDC_RSVD_VAL0  0xfff0ffc0ULL
+#define VTD_INV_DESC_PASIDC_RSVD_VAL1  0xULL
+#define VTD_INV_DESC_PASIDC_RSVD_VAL2  0xULL
+#define VTD_INV_DESC_PASIDC_RSVD_VAL3  0xULL
+
+#define VTD_INV_DESC_PASIDC_DSI(0ULL << 4)
+#define VTD_INV_DESC_PASIDC_PASID_SI   (1ULL << 4)
+#define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4)
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
 uint16_t domain_id;
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 71536a7..f7cd4e5 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -22,6 +22,9 @@ vtd_inv_qi_head(uint16_t head) "read head %d"
 vtd_inv_qi_tail(uint16_t head) "write tail %d"
 vtd_inv_qi_fetch(void) ""
 vtd_context_cache_reset(void) ""
+vtd_pasid_cache_gsi(void) ""
+vtd_pasid_cache_dsi(uint16_t domain) "Domian slective PC invalidation domain 
0x%"PRIx16
+vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID slective PC 
invalidation domain 0x%"PRIx16" pasid 0x%"PRI

[PATCH v1 09/22] vfio/common: check PASID alloc/free availability

2020-03-22 Thread Liu Yi L
VFIO exposes host IOMMU dual-stage DMA translation programming capability
to userspace by VFIO_TYPE1_NESTING_IOMMU type. However, userspace needs
more info on the nesting type. e.g. the supported stage 1 format and PASID
alloc/free request availability.

This patch gets the iommu nesting cap info from kernel by using IOCTL
VFIO_IOMMU_GET_INFO. And checks the HOST_IOMMU_PASID_REQUEST bit in the
nesting capabilities.

This patch referred some code from Shameer Kolothum.
https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Alex Williamson 
Signed-off-by: Shameer Kolothum 
Signed-off-by: Liu Yi L 
---
 hw/vfio/common.c | 96 +---
 1 file changed, 91 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index e4f5f10..e0f2828 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1223,6 +1223,84 @@ static int vfio_host_icx_pasid_free(HostIOMMUContext 
*host_icx,
 return 0;
 }
 
+/**
+ * Get iommu info from host. Caller of this funcion should free
+ * the memory pointed by the returned pointer stored in @info
+ * after a successful calling when finished its usage.
+ */
+static int vfio_get_iommu_info(VFIOContainer *container,
+ struct vfio_iommu_type1_info **info)
+{
+
+size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+*info = g_malloc0(argsz);
+
+retry:
+(*info)->argsz = argsz;
+
+if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+g_free(*info);
+*info = NULL;
+return -errno;
+}
+
+if (((*info)->argsz > argsz)) {
+argsz = (*info)->argsz;
+*info = g_realloc(*info, argsz);
+goto retry;
+}
+
+return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+struct vfio_info_cap_header *hdr;
+void *ptr = info;
+
+if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+return NULL;
+}
+
+for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+if (hdr->id == id) {
+return hdr;
+}
+}
+
+return NULL;
+}
+
+static int vfio_get_nesting_iommu_cap(VFIOContainer *container,
+   struct vfio_iommu_type1_info_cap_nesting *cap_nesting)
+{
+struct vfio_iommu_type1_info *info;
+struct vfio_info_cap_header *hdr;
+struct vfio_iommu_type1_info_cap_nesting *cap;
+int ret;
+
+ret = vfio_get_iommu_info(container, &info);
+if (ret) {
+return ret;
+}
+
+hdr = vfio_get_iommu_info_cap(info,
+VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+if (!hdr) {
+g_free(info);
+return -errno;
+}
+
+cap = container_of(hdr,
+struct vfio_iommu_type1_info_cap_nesting, header);
+*cap_nesting = *cap;
+
+g_free(info);
+return 0;
+}
+
 static int vfio_init_container(VFIOContainer *container, int group_fd,
Error **errp)
 {
@@ -1256,11 +1334,19 @@ static int vfio_init_container(VFIOContainer 
*container, int group_fd,
 }
 
 if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
-/*
- * TODO: config flags per host IOMMU nesting capability
- * e.g. check if VFIO_TYPE1_NESTING_IOMMU supports PASID
- * alloc/free
- */
+struct vfio_iommu_type1_info_cap_nesting nesting = {
+ .nesting_capabilities = 0x0,
+ .stage1_formats = 0, };
+
+ret = vfio_get_nesting_iommu_cap(container, &nesting);
+if (ret) {
+error_setg_errno(errp, -ret,
+ "Failed to get nesting iommu cap");
+return ret;
+}
+
+flags |= (nesting.nesting_capabilities & VFIO_IOMMU_PASID_REQS) ?
+ HOST_IOMMU_PASID_REQUEST : 0;
 host_iommu_ctx_init(&container->host_icx,
 sizeof(container->host_icx),
 TYPE_VFIO_HOST_IOMMU_CONTEXT,
-- 
2.7.4




[PATCH v1 15/22] intel_iommu: replay guest pasid bindings to host

2020-03-22 Thread Liu Yi L
This patch adds guest pasid bindings replay for domain
selective pasid cache invalidation(dsi) and global pasid
cache invalidation by walking guest pasid table.

Reason:
Guest OS may flush the pasid cache with a larger granularity.
e.g. guest does a svm_bind() but flush the pasid cache with
global or domain selective pasid cache invalidation instead
of pasid selective(psi) pasid cache invalidation. Regards to
such case, it works in host. Per spec, a global or domain
selective pasid cache invalidation should be able to cover
what a pasid selective invalidation does. The only concern
is performance deduction since dsi and global cache invalidation
will flush more than psi. To align with native, vIOMMU needs
emulator needs to do replay for the two invalidation granularity
to reflect the latest pasid bindings in guest pasid table.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 128 -
 hw/i386/intel_iommu_internal.h |   1 +
 2 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0423c83..8ec638f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2717,6 +2717,130 @@ static VTDPASIDAddressSpace 
*vtd_add_find_pasid_as(IntelIOMMUState *s,
 return vtd_pasid_as;
 }
 
+/**
+ * Constant information used during pasid table walk
+   @vtd_bus, @devfn: device info
+ * @flags: indicates if it is domain selective walk
+ * @did: domain ID of the pasid table walk
+ */
+typedef struct {
+VTDBus *vtd_bus;
+uint16_t devfn;
+#define VTD_PASID_TABLE_DID_SEL_WALK   (1ULL << 0);
+uint32_t flags;
+uint16_t did;
+} vtd_pasid_table_walk_info;
+
+/**
+ * Caller of this function should hold iommu_lock.
+ */
+static bool vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
+dma_addr_t pt_base,
+int start,
+int end,
+vtd_pasid_table_walk_info *info)
+{
+VTDPASIDEntry pe;
+int pasid = start;
+int pasid_next;
+VTDPASIDAddressSpace *vtd_pasid_as;
+VTDPASIDCacheEntry *pc_entry;
+
+while (pasid < end) {
+pasid_next = pasid + 1;
+
+if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
+&& vtd_pe_present(&pe)) {
+vtd_pasid_as = vtd_add_find_pasid_as(s,
+   info->vtd_bus, info->devfn, pasid);
+pc_entry = &vtd_pasid_as->pasid_cache_entry;
+if (s->pasid_cache_gen == pc_entry->pasid_cache_gen) {
+vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
+} else {
+vtd_fill_in_pe_in_cache(s, vtd_pasid_as, &pe);
+}
+}
+pasid = pasid_next;
+}
+return true;
+}
+
+/*
+ * Currently, VT-d scalable mode pasid table is a two level table,
+ * this function aims to loop a range of PASIDs in a given pasid
+ * table to identify the pasid config in guest.
+ * Caller of this function should hold iommu_lock.
+ */
+static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
+dma_addr_t pdt_base,
+int start,
+int end,
+vtd_pasid_table_walk_info *info)
+{
+VTDPASIDDirEntry pdire;
+int pasid = start;
+int pasid_next;
+dma_addr_t pt_base;
+
+while (pasid < end) {
+pasid_next = pasid + VTD_PASID_TBL_ENTRY_NUM;
+if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
+&& vtd_pdire_present(&pdire)) {
+pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+if (!vtd_sm_pasid_table_walk_one(s,
+  pt_base, pasid, pasid_next, info)) {
+break;
+}
+}
+pasid = pasid_next;
+}
+}
+
+/**
+ * This function replay the guest pasid bindings to hots by
+ * walking the guest PASID table. This ensures host will have
+ * latest guest pasid bindings.
+ */
+static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
+uint16_t *did,
+bool is_dsi)
+{
+VTDContextEntry ce;
+VTDHostIOMMUContext *vtd_dev_icx;
+int bus_n, devfn;
+vtd_pasid_table_walk_info info;
+
+if (is_dsi) {
+info.flags = VTD_PASID_TABLE_DID_SEL_WALK;
+info.did = *did;
+}
+
+/*
+ * In this replay, only needs to care about the devices which
+ * are backed by host IOMMU. For such devices, their vtd_dev_icx
+ * instances are in the s->vtd_dev_icx_list. For devices which
+ * are not backed byhost IOMMU, it is not necessary to replay
+ * the bindings since their cache could be re-created in the future
+

[PATCH v1 12/22] intel_iommu: add PASID cache management infrastructure

2020-03-22 Thread Liu Yi L
This patch adds a PASID cache management infrastructure based on
new added structure VTDPASIDAddressSpace, which is used to track
the PASID usage and future PASID tagged DMA address translation
support in vIOMMU.

struct VTDPASIDAddressSpace {
VTDBus *vtd_bus;
uint8_t devfn;
AddressSpace as;
uint32_t pasid;
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
QLIST_ENTRY(VTDPASIDAddressSpace) next;
VTDPASIDCacheEntry pasid_cache_entry;
};

Ideally, a VTDPASIDAddressSpace instance is created when a PASID
is bound with a DMA AddressSpace. Intel VT-d spec requires guest
software to issue pasid cache invalidation when bind or unbind a
pasid with an address space under caching-mode. However, as
VTDPASIDAddressSpace instances also act as pasid cache in this
implementation, its creation also happens during vIOMMU PASID
tagged DMA translation. The creation in this path will not be
added in this patch since no PASID-capable emulated devices for
now.

The implementation in this patch manages VTDPASIDAddressSpace
instances per PASID+BDF (lookup and insert will use PASID and
BDF) since Intel VT-d spec allows per-BDF PASID Table. When a
guest bind a PASID with an AddressSpace, QEMU will capture the
guest pasid selective pasid cache invalidation, and allocate
remove a VTDPASIDAddressSpace instance per the invalidation
reasons:

*) a present pasid entry moved to non-present
*) a present pasid entry to be a present entry
*) a non-present pasid entry moved to present

vIOMMU emulator could figure out the reason by fetching latest
guest pasid entry.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 394 +
 hw/i386/intel_iommu_internal.h |  14 ++
 hw/i386/trace-events   |   1 +
 include/hw/i386/intel_iommu.h  |  33 +++-
 4 files changed, 441 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1daeab2..c985cae 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -40,6 +40,7 @@
 #include "kvm_i386.h"
 #include "migration/vmstate.h"
 #include "trace.h"
+#include "qemu/jhash.h"
 
 /* context entry operations */
 #define VTD_CE_GET_RID2PASID(ce) \
@@ -65,6 +66,8 @@
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
+static void vtd_pasid_cache_reset(IntelIOMMUState *s);
+
 static void vtd_panic_require_caching_mode(void)
 {
 error_report("We need to set caching-mode=on for intel-iommu to enable "
@@ -276,6 +279,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
 vtd_iommu_lock(s);
 vtd_reset_iotlb_locked(s);
 vtd_reset_context_cache_locked(s);
+vtd_pasid_cache_reset(s);
 vtd_iommu_unlock(s);
 }
 
@@ -686,6 +690,11 @@ static inline bool vtd_pe_type_check(X86IOMMUState 
*x86_iommu,
 return true;
 }
 
+static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
+{
+return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
+}
+
 static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
 {
 return pdire->val & 1;
@@ -2395,19 +2404,402 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)
 return true;
 }
 
+static inline void vtd_init_pasid_key(uint32_t pasid,
+ uint16_t sid,
+ struct pasid_key *key)
+{
+key->pasid = pasid;
+key->sid = sid;
+}
+
+static guint vtd_pasid_as_key_hash(gconstpointer v)
+{
+struct pasid_key *key = (struct pasid_key *)v;
+uint32_t a, b, c;
+
+/* Jenkins hash */
+a = b = c = JHASH_INITVAL + sizeof(*key);
+a += key->sid;
+b += extract32(key->pasid, 0, 16);
+c += extract32(key->pasid, 16, 16);
+
+__jhash_mix(a, b, c);
+__jhash_final(a, b, c);
+
+return c;
+}
+
+static gboolean vtd_pasid_as_key_equal(gconstpointer v1, gconstpointer v2)
+{
+const struct pasid_key *k1 = v1;
+const struct pasid_key *k2 = v2;
+
+return (k1->pasid == k2->pasid) && (k1->sid == k2->sid);
+}
+
+static inline int vtd_dev_get_pe_from_pasid(IntelIOMMUState *s,
+uint8_t bus_num,
+uint8_t devfn,
+uint32_t pasid,
+VTDPASIDEntry *pe)
+{
+VTDContextEntry ce;
+int ret;
+dma_addr_t pasid_dir_base;
+
+if (!s->root_scalable) {
+return -VTD_FR_PASID_TABLE_INV;
+}
+
+ret = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
+if (ret) {
+return ret;
+}
+
+pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(&ce);
+ret = vtd_get_pe_from_pasid_table(s,
+  pasid_dir_base, pasid, pe);
+
+   

[PATCH v1 14/22] intel_iommu: bind/unbind guest page table to host

2020-03-22 Thread Liu Yi L
This patch captures the guest PASID table entry modifications and
propagates the changes to host to setup dual stage DMA translation.
The guest page table is configured as 1st level page table (GVA->GPA)
whose translation result would further go through host VT-d 2nd
level page table(GPA->HPA) under nested translation mode. This is the
key part of vSVA support, and also a key to support IOVA over 1st-
level page table for Intel VT-d in virtualization environment.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 98 +++---
 hw/i386/intel_iommu_internal.h | 25 +++
 2 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c985cae..0423c83 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -41,6 +41,7 @@
 #include "migration/vmstate.h"
 #include "trace.h"
 #include "qemu/jhash.h"
+#include 
 
 /* context entry operations */
 #define VTD_CE_GET_RID2PASID(ce) \
@@ -695,6 +696,16 @@ static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry 
*pe)
 return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
 }
 
+static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
+{
+return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
+}
+
+static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
+{
+return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
+}
+
 static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
 {
 return pdire->val & 1;
@@ -1856,6 +1867,81 @@ static void 
vtd_context_global_invalidate(IntelIOMMUState *s)
 vtd_iommu_replay_all(s);
 }
 
+/**
+ * Caller should hold iommu_lock.
+ */
+static int vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
+int devfn, int pasid, VTDPASIDEntry *pe,
+VTDPASIDOp op)
+{
+VTDHostIOMMUContext *vtd_dev_icx;
+HostIOMMUContext *host_icx;
+DualIOMMUStage1BindData *bind_data;
+struct iommu_gpasid_bind_data *g_bind_data;
+int ret = -1;
+
+vtd_dev_icx = vtd_bus->dev_icx[devfn];
+if (!vtd_dev_icx) {
+return -EINVAL;
+}
+
+host_icx = vtd_dev_icx->host_icx;
+if (!host_icx) {
+return -EINVAL;
+}
+
+if (!(host_icx->stage1_formats
+ & IOMMU_PASID_FORMAT_INTEL_VTD)) {
+error_report_once("IOMMU Stage 1 format is not compatible!\n");
+}
+
+bind_data = g_malloc0(sizeof(*bind_data));
+bind_data->pasid = pasid;
+g_bind_data = &bind_data->bind_data.gpasid_bind;
+
+g_bind_data->flags = 0;
+g_bind_data->vtd.flags = 0;
+switch (op) {
+case VTD_PASID_BIND:
+case VTD_PASID_UPDATE:
+g_bind_data->version = IOMMU_UAPI_VERSION;
+g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
+g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
+g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
+g_bind_data->hpasid = pasid;
+g_bind_data->gpasid = pasid;
+g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
+g_bind_data->vtd.flags =
+ (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
+   | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
+   | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
+   | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
+   | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
+   | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
+g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
+g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
+ret = host_iommu_ctx_bind_stage1_pgtbl(host_icx, bind_data);
+break;
+case VTD_PASID_UNBIND:
+g_bind_data->version = IOMMU_UAPI_VERSION;
+g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
+g_bind_data->gpgd = 0;
+g_bind_data->addr_width = 0;
+g_bind_data->hpasid = pasid;
+g_bind_data->gpasid = pasid;
+g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
+ret = host_iommu_ctx_unbind_stage1_pgtbl(host_icx, bind_data);
+break;
+default:
+error_report_once("Unknown VTDPASIDOp!!!\n");
+break;
+}
+
+g_free(bind_data);
+
+return ret;
+}
+
 /* Do a context-cache device-selective invalidation.
  * @func_mask: FM field after shifting
  */
@@ -2481,10 +2567,10 @@ static inline void 
vtd_fill_in_pe_in_cache(IntelIOMMUState *s,
 
 pc_entry->pasid_entry = *pe;
 pc_entry->pasid_cache_gen = s->pasid_cache_gen;
-/*
- * TODO:
- * - send pasid bind to host for passthru devices
- */
+vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
+ vtd_pasid_as->devfn,
+ vtd_pasid_as->pasid,
+ pe, VTD_PASID_BIND);
 }
 
 /**
@@

[PATCH v1 22/22] intel_iommu: modify x-scalable-mode to be string option

2020-03-22 Thread Liu Yi L
Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
related to scalable mode translation, thus there are multiple combinations.
While this vIOMMU implementation wants simplify it for user by providing
typical combinations. User could config it by "x-scalable-mode" option. The
usage is as below:

"-device intel-iommu,x-scalable-mode=["legacy"|"modern"|"off"]"

 - "legacy": gives support for SL page table
 - "modern": gives support for FL page table, pasid, virtual command
 - "off": no scalable mode support
 -  if not configured, means no scalable mode support, if not proper
configured, will throw error

Note: this patch is supposed to be merged when  the whole vSVA patch series
were merged.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
Signed-off-by: Yi Sun 
---
 hw/i386/intel_iommu.c  | 29 +++--
 hw/i386/intel_iommu_internal.h |  4 
 include/hw/i386/intel_iommu.h  |  2 ++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 72cd739..ea1f5c4 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4171,7 +4171,7 @@ static Property vtd_properties[] = {
 DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits,
   VTD_HOST_ADDRESS_WIDTH),
 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
-DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
+DEFINE_PROP_STRING("x-scalable-mode", IntelIOMMUState, scalable_mode_str),
 DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
 DEFINE_PROP_END_OF_LIST(),
 };
@@ -4802,8 +4802,12 @@ static void vtd_init(IntelIOMMUState *s)
 }
 
 /* TODO: read cap/ecap from host to decide which cap to be exposed. */
-if (s->scalable_mode) {
+if (s->scalable_mode && !s->scalable_modern) {
 s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
+} else if (s->scalable_mode && s->scalable_modern) {
+s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_PASID
+   | VTD_ECAP_FLTS | VTD_ECAP_PSS | VTD_ECAP_VCS;
+s->vccap |= VTD_VCCAP_PAS;
 }
 
 vtd_reset_caches(s);
@@ -4935,6 +4939,27 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
 return false;
 }
 
+if (s->scalable_mode_str &&
+(strcmp(s->scalable_mode_str, "modern") &&
+ strcmp(s->scalable_mode_str, "legacy"))) {
+error_setg(errp, "Invalid x-scalable-mode config,"
+ "Please use \"modern\", \"legacy\" or \"off\"");
+return false;
+}
+
+if (s->scalable_mode_str &&
+!strcmp(s->scalable_mode_str, "legacy")) {
+s->scalable_mode = true;
+s->scalable_modern = false;
+} else if (s->scalable_mode_str &&
+!strcmp(s->scalable_mode_str, "modern")) {
+s->scalable_mode = true;
+s->scalable_modern = true;
+} else {
+s->scalable_mode = false;
+s->scalable_modern = false;
+}
+
 return true;
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index b5507ce..52b25ff 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -196,8 +196,12 @@
 #define VTD_ECAP_PT (1ULL << 6)
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_PSS(19ULL << 35)
+#define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
+#define VTD_ECAP_VCS(1ULL << 44)
 #define VTD_ECAP_SLTS   (1ULL << 46)
+#define VTD_ECAP_FLTS   (1ULL << 47)
 
 /* CAP_REG */
 /* (offset >> 4) << 24 */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 9782ac4..07494d4 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -268,6 +268,8 @@ struct IntelIOMMUState {
 
 bool caching_mode;  /* RO - is cap CM enabled? */
 bool scalable_mode; /* RO - is Scalable Mode supported? */
+char *scalable_mode_str;/* RO - admin's Scalable Mode config */
+bool scalable_modern;   /* RO - is modern SM supported? */
 
 dma_addr_t root;/* Current root table pointer */
 bool root_scalable; /* Type of root table (scalable or not) */
-- 
2.7.4




[PATCH v1 19/22] intel_iommu: process PASID-based iotlb invalidation

2020-03-22 Thread Liu Yi L
This patch adds the basic PASID-based iotlb (piotlb) invalidation
support. piotlb is used during walking Intel VT-d 1st level page
table. This patch only adds the basic processing. Detailed handling
will be added in next patch.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Yi Sun 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Liu Yi L 
---
 hw/i386/intel_iommu.c  | 57 ++
 hw/i386/intel_iommu_internal.h | 13 ++
 2 files changed, 70 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b007715..b9ac07d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3134,6 +3134,59 @@ static bool vtd_process_pasid_desc(IntelIOMMUState *s,
 return (ret == 0) ? true : false;
 }
 
+static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
+uint16_t domain_id,
+uint32_t pasid)
+{
+}
+
+static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
+ uint32_t pasid, hwaddr addr, uint8_t am, bool ih)
+{
+}
+
+static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
+VTDInvDesc *inv_desc)
+{
+uint16_t domain_id;
+uint32_t pasid;
+uint8_t am;
+hwaddr addr;
+
+if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
+(inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1)) {
+error_report_once("non-zero-field-in-piotlb_inv_desc hi: 0x%" PRIx64
+  " lo: 0x%" PRIx64, inv_desc->val[1], inv_desc->val[0]);
+return false;
+}
+
+domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]);
+pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]);
+switch (inv_desc->val[0] & VTD_INV_DESC_IOTLB_G) {
+case VTD_INV_DESC_PIOTLB_ALL_IN_PASID:
+vtd_piotlb_pasid_invalidate(s, domain_id, pasid);
+break;
+
+case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
+am = VTD_INV_DESC_PIOTLB_AM(inv_desc->val[1]);
+addr = (hwaddr) VTD_INV_DESC_PIOTLB_ADDR(inv_desc->val[1]);
+if (am > VTD_MAMV) {
+error_report_once("Invalid am, > max am value, hi: 0x%" PRIx64
+" lo: 0x%" PRIx64, inv_desc->val[1], inv_desc->val[0]);
+return false;
+}
+vtd_piotlb_page_invalidate(s, domain_id, pasid,
+ addr, am, VTD_INV_DESC_PIOTLB_IH(inv_desc->val[1]));
+break;
+
+default:
+error_report_once("Invalid granularity in P-IOTLB desc hi: 0x%" PRIx64
+  " lo: 0x%" PRIx64, inv_desc->val[1], inv_desc->val[0]);
+return false;
+}
+return true;
+}
+
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
  VTDInvDesc *inv_desc)
 {
@@ -3248,6 +3301,10 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
 break;
 
 case VTD_INV_DESC_PIOTLB:
+trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
+if (!vtd_process_piotlb_desc(s, &inv_desc)) {
+return false;
+}
 break;
 
 case VTD_INV_DESC_WAIT:
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 6f32d7b..314e2c4 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -457,6 +457,19 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_PASIDC_PASID_SI   (1ULL << 4)
 #define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4)
 
+#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
+#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
+
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff0ffc0ULL
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
+
+#define VTD_INV_DESC_PIOTLB_PASID(val)(((val) >> 32) & 0xfULL)
+#define VTD_INV_DESC_PIOTLB_DID(val)  (((val) >> 16) & \
+ VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_PIOTLB_ADDR(val) ((val) & ~0xfffULL)
+#define VTD_INV_DESC_PIOTLB_AM(val)   ((val) & 0x3fULL)
+#define VTD_INV_DESC_PIOTLB_IH(val)   (((val) >> 6) & 0x1)
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
 uint16_t domain_id;
-- 
2.7.4




[PATCH v1 18/22] vfio: add support for flush iommu stage-1 cache

2020-03-22 Thread Liu Yi L
This patch adds flush_stage1_cache() definition in HostIOMUContextClass.
And adds corresponding implementation in VFIO. This is to expose a way
for vIOMMU to flush stage-1 cache in host side since guest owns stage-1
translation structures in dual stage DMA translation configuration.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Alex Williamson 
Signed-off-by: Liu Yi L 
---
 hw/iommu/host_iommu_context.c | 19 +++
 hw/vfio/common.c  | 24 
 include/hw/iommu/host_iommu_context.h | 14 ++
 3 files changed, 57 insertions(+)

diff --git a/hw/iommu/host_iommu_context.c b/hw/iommu/host_iommu_context.c
index 8a53376..4bff1a1 100644
--- a/hw/iommu/host_iommu_context.c
+++ b/hw/iommu/host_iommu_context.c
@@ -113,6 +113,25 @@ int host_iommu_ctx_unbind_stage1_pgtbl(HostIOMMUContext 
*host_icx,
 return hicxc->unbind_stage1_pgtbl(host_icx, data);
 }
 
+int host_iommu_ctx_flush_stage1_cache(HostIOMMUContext *host_icx,
+  DualIOMMUStage1Cache *cache)
+{
+HostIOMMUContextClass *hicxc;
+
+hicxc = HOST_IOMMU_CONTEXT_GET_CLASS(host_icx);
+
+if (!hicxc) {
+return -EINVAL;
+}
+
+if (!(host_icx->flags & HOST_IOMMU_NESTING) ||
+!hicxc->flush_stage1_cache) {
+return -EINVAL;
+}
+
+return hicxc->flush_stage1_cache(host_icx, cache);
+}
+
 void host_iommu_ctx_init(void *_host_icx, size_t instance_size,
  const char *mrtypename,
  uint64_t flags, uint32_t formats)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 770a785..e69fe94 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1269,6 +1269,29 @@ static int 
vfio_host_icx_unbind_stage1_pgtbl(HostIOMMUContext *host_icx,
 return ret;
 }
 
+static int vfio_host_icx_flush_stage1_cache(HostIOMMUContext *host_icx,
+DualIOMMUStage1Cache *cache)
+{
+VFIOContainer *container = container_of(host_icx, VFIOContainer, host_icx);
+struct vfio_iommu_type1_cache_invalidate *cache_inv;
+unsigned long argsz;
+int ret = 0;
+
+argsz = sizeof(*cache_inv) + sizeof(cache->cache_info);
+cache_inv = g_malloc0(argsz);
+cache_inv->argsz = argsz;
+cache_inv->flags = 0;
+memcpy(&cache_inv->cache_info, &cache->cache_info,
+   sizeof(cache->cache_info));
+
+if (ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, cache_inv)) {
+error_report("%s: iommu cache flush failed: %d", __func__, -errno);
+ret = -errno;
+}
+g_free(cache_inv);
+return ret;
+}
+
 /**
  * Get iommu info from host. Caller of this funcion should free
  * the memory pointed by the returned pointer stored in @info
@@ -1996,6 +2019,7 @@ static void 
vfio_host_iommu_context_class_init(ObjectClass *klass,
 hicxc->pasid_free = vfio_host_icx_pasid_free;
 hicxc->bind_stage1_pgtbl = vfio_host_icx_bind_stage1_pgtbl;
 hicxc->unbind_stage1_pgtbl = vfio_host_icx_unbind_stage1_pgtbl;
+hicxc->flush_stage1_cache = vfio_host_icx_flush_stage1_cache;
 }
 
 static const TypeInfo vfio_host_iommu_context_info = {
diff --git a/include/hw/iommu/host_iommu_context.h 
b/include/hw/iommu/host_iommu_context.h
index 97c9473..6230daa 100644
--- a/include/hw/iommu/host_iommu_context.h
+++ b/include/hw/iommu/host_iommu_context.h
@@ -42,6 +42,7 @@
 
 typedef struct HostIOMMUContext HostIOMMUContext;
 typedef struct DualIOMMUStage1BindData DualIOMMUStage1BindData;
+typedef struct DualIOMMUStage1Cache DualIOMMUStage1Cache;
 
 typedef struct HostIOMMUContextClass {
 /* private */
@@ -65,6 +66,12 @@ typedef struct HostIOMMUContextClass {
 /* Undo a previous bind. @bind_data specifies the unbind info. */
 int (*unbind_stage1_pgtbl)(HostIOMMUContext *dsi_obj,
DualIOMMUStage1BindData *bind_data);
+/*
+ * Propagate stage-1 cache flush to host IOMMU, cache
+ * info specifid in @cache
+ */
+int (*flush_stage1_cache)(HostIOMMUContext *host_icx,
+  DualIOMMUStage1Cache *cache);
 } HostIOMMUContextClass;
 
 /*
@@ -86,6 +93,11 @@ struct DualIOMMUStage1BindData {
 } bind_data;
 };
 
+struct DualIOMMUStage1Cache {
+uint32_t pasid;
+struct iommu_cache_invalidate_info cache_info;
+};
+
 int host_iommu_ctx_pasid_alloc(HostIOMMUContext *host_icx, uint32_t min,
uint32_t max, uint32_t *pasid);
 int host_iommu_ctx_pasid_free(HostIOMMUContext *host_icx, uint32_t pasid);
@@ -93,6 +105,8 @@ int host_iommu_ctx_bind_stage1_pgtbl(HostIOMMUContext 
*host_icx,
  DualIOMMUStage1BindData *data);
 int host_iommu_ctx_unbind_stage1_pgtbl(HostIOMMUContext *host_icx,
DualIOMMUStage1BindData *data);
+int host_iommu_ctx_flush_stage1_cache(HostIOMMUContext *host_icx,
+   

[PATCH v1 04/22] hw/iommu: introduce HostIOMMUContext

2020-03-22 Thread Liu Yi L
Currently, many platform vendors provide the capability of dual stage
DMA address translation in hardware. For example, nested translation
on Intel VT-d scalable mode, nested stage translation on ARM SMMUv3,
and etc. In dual stage DMA address translation, there are two stages
address translation, stage-1 (a.k.a first-level) and stage-2 (a.k.a
second-level) translation structures. Stage-1 translation results are
also subjected to stage-2 translation structures. Take vSVA (Virtual
Shared Virtual Addressing) as an example, guest IOMMU driver owns
stage-1 translation structures (covers GVA->GPA translation), and host
IOMMU driver owns stage-2 translation structures (covers GPA->HPA
translation). VMM is responsible to bind stage-1 translation structures
to host, thus hardware could achieve GVA->GPA and then GPA->HPA
translation. For more background on SVA, refer the below links.
 - https://www.youtube.com/watch?v=Kq_nfGK5MwQ
 - https://events19.lfasiallc.com/wp-content/uploads/2017/11/\
Shared-Virtual-Memory-in-KVM_Yi-Liu.pdf

In QEMU, vIOMMU emulators expose IOMMUs to VM per their own spec (e.g.
Intel VT-d spec). Devices are pass-through to guest via device pass-
through components like VFIO. VFIO is a userspace driver framework
which exposes host IOMMU programming capability to userspace in a
secure manner. e.g. IOVA MAP/UNMAP requests. Thus the major connection
between VFIO and vIOMMU are MAP/UNMAP. However, with the dual stage
DMA translation support, there are more interactions between vIOMMU and
VFIO as below:
 1) PASID allocation (allow host to intercept in PASID allocation)
 2) bind stage-1 translation structures to host
 3) propagate stage-1 cache invalidation to host
 4) DMA address translation fault (I/O page fault) servicing etc.

With the above new interactions in QEMU, it requires an abstract layer
to facilitate the above operations and expose to vIOMMU emulators as an
explicit way for vIOMMU emulators call into VFIO. This patch introduces
HostIOMMUContext to stand for hardware IOMMU w/ dual stage DMA address
translation capability. And introduces HostIOMMUContextClass to provide
methods for vIOMMU emulators to propagate dual-stage translation related
requests to host. As a beginning, PASID allocation/free are defined to
propagate PASID allocation/free requests to host which is helpful for the
vendors who manage PASID in system-wide. In future, there will be more
operations like bind_stage1_pgtbl, flush_stage1_cache and etc.

Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Peter Xu 
Cc: Eric Auger 
Cc: Yi Sun 
Cc: David Gibson 
Cc: Michael S. Tsirkin 
Signed-off-by: Liu Yi L 
---
 hw/Makefile.objs  |   1 +
 hw/iommu/Makefile.objs|   1 +
 hw/iommu/host_iommu_context.c | 112 ++
 include/hw/iommu/host_iommu_context.h |  75 +++
 4 files changed, 189 insertions(+)
 create mode 100644 hw/iommu/Makefile.objs
 create mode 100644 hw/iommu/host_iommu_context.c
 create mode 100644 include/hw/iommu/host_iommu_context.h

diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 660e2b4..cab83fe 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -40,6 +40,7 @@ devices-dirs-$(CONFIG_MEM_DEVICE) += mem/
 devices-dirs-$(CONFIG_NUBUS) += nubus/
 devices-dirs-y += semihosting/
 devices-dirs-y += smbios/
+devices-dirs-y += iommu/
 endif
 
 common-obj-y += $(devices-dirs-y)
diff --git a/hw/iommu/Makefile.objs b/hw/iommu/Makefile.objs
new file mode 100644
index 000..e6eed4e
--- /dev/null
+++ b/hw/iommu/Makefile.objs
@@ -0,0 +1 @@
+obj-y += host_iommu_context.o
diff --git a/hw/iommu/host_iommu_context.c b/hw/iommu/host_iommu_context.c
new file mode 100644
index 000..af61899
--- /dev/null
+++ b/hw/iommu/host_iommu_context.c
@@ -0,0 +1,112 @@
+/*
+ * QEMU abstract of Host IOMMU
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ *
+ * Authors: Liu Yi L 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "qapi/visitor.h"
+#include "hw/iommu/host_iommu_context.h"
+
+int host_iommu_ctx_pasid_alloc(HostIOMMUContext *host_icx, uint32_t min,
+   uint32_t max, uint32_t *pasid)
+{
+HostIOMMUContextClass *hicxc;
+
+if (!host_icx) {
+return -EINVAL;
+}
+
+hicxc = HOST_IOMMU_CONTEXT_GET_CLASS(host_icx);
+
+   

Re: [PATCH v1 00/22] intel_iommu: expose Shared Virtual Addressing to VMs

2020-03-22 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/1584880579-12178-1-git-send-email-yi.l@intel.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===

 from /tmp/qemu-test/src/include/hw/pci/pci_bus.h:4,
 from /tmp/qemu-test/src/include/hw/pci-host/i440fx.h:15,
 from /tmp/qemu-test/src/stubs/pci-host-piix.c:2:
/tmp/qemu-test/src/include/hw/iommu/host_iommu_context.h:28:10: fatal error: 
linux/iommu.h: No such file or directory
 #include 
  ^~~
compilation terminated.
make: *** [/tmp/qemu-test/src/rules.mak:69: stubs/pci-host-piix.o] Error 1
make: *** Waiting for unfinished jobs
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 664, in 
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=f45b53a01c8a446dba5120da7c3f63e2', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-gqmyp6pe/src/docker-src.2020-03-22-09.24.11.12638:/var/tmp/qemu:z,ro',
 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=f45b53a01c8a446dba5120da7c3f63e2
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-gqmyp6pe/src'
make: *** [docker-run-test-mingw@fedora] Error 2

real1m43.300s
user0m8.183s


The full log is available at
http://patchew.org/logs/1584880579-12178-1-git-send-email-yi.l@intel.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH 0/2] Additional parameters for qemu_img map

2020-03-22 Thread Eyal Moscovici
Hi,

The following series adds two parameters to qemu-img map:
1. start-offset: mapping starting offset.
2. max-length: the length of the mapping.

These parameters proved useful when mapping large disk spread across
long store file chains. It allows us to bound the execution time of each
qemu-img map execution as well as recover from failed mapping
operations. In addition the map operation can divided to
multiple independent tasks.

Eyal Moscovici (2):
  qemu-img: refactor dump_map_entry JSON format output
  qemu-img: Add --start-offset and --max-length to map

 docs/tools/qemu-img.rst |  2 +-
 qemu-img-cmds.hx|  4 ++--
 qemu-img.c  | 42 -
 3 files changed, 40 insertions(+), 8 deletions(-)

-- 
2.17.2 (Apple Git-113)




[PATCH 2/2] qemu-img: Add --start-offset and --max-length to map

2020-03-22 Thread Eyal Moscovici
The mapping operation of large disks especially ones stored over a
long chain of QCOW2 files can take a long time to finish.
Additionally when mapping fails there was no way recover by
restarting the mapping from the failed location.

The new options, --start-offset and --max-length allows the user to
divide these type of map operations into shorter independent tasks.

Acked-by: Mark Kanda 
Co-developed-by: Yoav Elnekave 
Signed-off-by: Yoav Elnekave 
Signed-off-by: Eyal Moscovici 
---
 docs/tools/qemu-img.rst |  2 +-
 qemu-img-cmds.hx|  4 ++--
 qemu-img.c  | 30 +-
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 0080f83a76..924e89f679 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -519,7 +519,7 @@ Command description:
 ``ImageInfoSpecific*`` QAPI object (e.g. ``ImageInfoSpecificQCow2``
 for qcow2 images).
 
-.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] 
[-U] FILENAME
+.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] 
[--start-offset=offset] [--max-length=len] [--output=OFMT] [-U] FILENAME
 
   Dump the metadata of image *FILENAME* and its backing file chain.
   In particular, this commands dumps the allocation state of every sector
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index c9c54de1df..35f832816f 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -63,9 +63,9 @@ SRST
 ERST
 
 DEF("map", img_map,
-"map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-U] 
filename")
+"map [--object objectdef] [--image-opts] [-f fmt] [--start-offset=offset] 
[--max-length=len] [--output=ofmt] [-U] filename")
 SRST
-.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] 
[-U] FILENAME
+.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] 
[--start-offset=OFFSET] [--max-length=LEN] [--output=OFMT] [-U] FILENAME
 ERST
 
 DEF("measure", img_measure,
diff --git a/qemu-img.c b/qemu-img.c
index 9cf8576217..cd365b275e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2967,6 +2967,8 @@ static int img_map(int argc, char **argv)
 int ret = 0;
 bool image_opts = false;
 bool force_share = false;
+int64_t start_offset = 0;
+int64_t max_length = -1;
 
 fmt = NULL;
 output = NULL;
@@ -2979,9 +2981,11 @@ static int img_map(int argc, char **argv)
 {"object", required_argument, 0, OPTION_OBJECT},
 {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 {"force-share", no_argument, 0, 'U'},
+{"start-offset", required_argument, 0, 's'},
+{"max-length", required_argument, 0, 'l'},
 {0, 0, 0, 0}
 };
-c = getopt_long(argc, argv, ":f:hU",
+c = getopt_long(argc, argv, ":f:s:l:hU",
 long_options, &option_index);
 if (c == -1) {
 break;
@@ -3005,6 +3009,26 @@ static int img_map(int argc, char **argv)
 case OPTION_OUTPUT:
 output = optarg;
 break;
+case 's':
+start_offset = cvtnum(optarg);
+if (start_offset < 0) {
+error_report("Invalid start offset specified! You may use "
+ "k, M, G, T, P or E suffixes for ");
+error_report("kilobytes, megabytes, gigabytes, terabytes, "
+ "petabytes and exabytes.");
+return 1;
+}
+break;
+case 'l':
+max_length = cvtnum(optarg);
+if (max_length < 0) {
+error_report("Invalid max length specified! You may use "
+ "k, M, G, T, P or E suffixes for ");
+error_report("kilobytes, megabytes, gigabytes, terabytes, "
+ "petabytes and exabytes.");
+return 1;
+}
+break;
 case OPTION_OBJECT: {
 QemuOpts *opts;
 opts = qemu_opts_parse_noisily(&qemu_object_opts,
@@ -3050,7 +3074,11 @@ static int img_map(int argc, char **argv)
 printf("[");
 }
 
+curr.start = start_offset;
 length = blk_getlength(blk);
+if (max_length != -1) {
+length = MIN(start_offset + max_length, length);
+}
 while (curr.start + curr.length < length) {
 int64_t offset = curr.start + curr.length;
 int64_t n;
-- 
2.17.2 (Apple Git-113)




[PATCH 1/2] qemu-img: refactor dump_map_entry JSON format output

2020-03-22 Thread Eyal Moscovici
Previously dump_map_entry identified whether we need to start a new JSON
array based on whether start address == 0. In this refactor we remove
this assumption as in following patches we will allow map to start from
an arbitrary position.

Acked-by: Mark Kanda 
Signed-off-by: Eyal Moscovici 
---
 qemu-img.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index afddf33f08..9cf8576217 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2860,9 +2860,8 @@ static int dump_map_entry(OutputFormat output_format, 
MapEntry *e,
 }
 break;
 case OFORMAT_JSON:
-printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
+printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
" \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
-   (e->start == 0 ? "[" : ",\n"),
e->start, e->length, e->depth,
e->zero ? "true" : "false",
e->data ? "true" : "false");
@@ -2871,8 +2870,8 @@ static int dump_map_entry(OutputFormat output_format, 
MapEntry *e,
 }
 putchar('}');
 
-if (!next) {
-printf("]\n");
+if (next) {
+printf(",\n");
 }
 break;
 }
@@ -3047,6 +3046,8 @@ static int img_map(int argc, char **argv)
 
 if (output_format == OFORMAT_HUMAN) {
 printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
+} else if (output_format == OFORMAT_JSON) {
+printf("[");
 }
 
 length = blk_getlength(blk);
@@ -3078,6 +3079,9 @@ static int img_map(int argc, char **argv)
 }
 
 ret = dump_map_entry(output_format, &curr, NULL);
+if (output_format == OFORMAT_JSON) {
+printf("]\n");
+}
 
 out:
 blk_unref(blk);
-- 
2.17.2 (Apple Git-113)




Re: deprecation of in-tree builds

2020-03-22 Thread Aleksandar Markovic
9:51 PM Sub, 21.03.2020. Peter Maydell  је
написао/ла:
>
> AIUI from Paolo, the intention is to deprecate and eventually
> stop supporting "in-tree" builds, so that the only option is
> building in a separate build directory. I thought we should
> probably mention that in the 5.0 changelog, so I wrote up some
> text:
>
> https://wiki.qemu.org/ChangeLog/5.0#Build_Information
>
> Suggestions for changes/comments etc welcome.
>
> (One thing we will need to fix before we can do separate build
> tree is the Coverity Scan build process, which (a) does an
> in-tree build (b) can't be easily switched to a builddir because
> all the source paths get baked into the scan results and moving
> to a builddir changes them all...)
>
> We could also make configure actively warn if used in
> the source tree.
>

I don't intend to complain too much about removing in-tree builds, but
there may be some not-so-visible, but valuable features that right now work
in in-tree builds only, and I think we should make them work in out-of-tree
builds as well.

For example, I noticed that gcov builds have some problems finding
directories if built in out-of-tree, leading to no coverage report output
at all, if applied to some external test executables (for some strange
reasons, "make check" works for out-of-tree anf in-tree builds though). I
think we should fix that and similar problems before removing in-tree
builds.

In general, I also think we should not have overly lax treatment of
features that may be effectively removed with any particular deprecation.
Just because a feature is less-known or less-used is not a sufficient
reason IMHO to drop it just for the sake of "progress".

If the "progress" (in the form of deprecation) is so impotrant, than the
authors should devise it so that there is no dammage to existing features,
and no adverse effects.

In this light, perhaps in-tree builds deorecation is 5.0 is little
premature.

Regards,
Aleksandar

> thanks
> -- PMM
>


[PATCH] .travis.yml: Add a KVM-only s390x job

2020-03-22 Thread Philippe Mathieu-Daudé
Add a job to build QEMU on s390x with TCG disabled, so
this configuration won't bitrot over time.

This job is quick, running check-unit: Ran for 5 min 30 sec
https://travis-ci.org/github/philmd/qemu/jobs/665456423

Acked-by: Cornelia Huck 
Signed-off-by: Philippe Mathieu-Daudé 
---
 .travis.yml | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 5672d129ec..e0c72210b7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -525,6 +525,48 @@ jobs:
   $(exit $BUILD_RC);
   fi
 
+- name: "[s390x] GCC check (KVM)"
+  arch: s390x
+  dist: bionic
+  addons:
+apt_packages:
+  - libaio-dev
+  - libattr1-dev
+  - libbrlapi-dev
+  - libcap-ng-dev
+  - libgcrypt20-dev
+  - libgnutls28-dev
+  - libgtk-3-dev
+  - libiscsi-dev
+  - liblttng-ust-dev
+  - libncurses5-dev
+  - libnfs-dev
+  - libnss3-dev
+  - libpixman-1-dev
+  - libpng-dev
+  - librados-dev
+  - libsdl2-dev
+  - libseccomp-dev
+  - liburcu-dev
+  - libusb-1.0-0-dev
+  - libvdeplug-dev
+  - libvte-2.91-dev
+  # Tests dependencies
+  - genisoimage
+  env:
+- TEST_CMD="make check-unit"
+- CONFIG="--disable-containers --disable-tcg --enable-kvm 
--disable-tools"
+  script:
+- ( cd ${SRC_DIR} ; git submodule update --init roms/SLOF )
+- BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$?
+- |
+  if [ "$BUILD_RC" -eq 0 ] ; then
+  mv pc-bios/s390-ccw/*.img pc-bios/ ;
+  ${TEST_CMD} ;
+  else
+  $(exit $BUILD_RC);
+  fi
+
 # Release builds
 # The make-release script expect a QEMU version, so our tag must start 
with a 'v'.
 # This is the case when release candidate tags are created.
-- 
2.21.1




[PATCH for QEMU v2] virtio-balloon: Add option cont-pages to set VIRTIO_BALLOON_VQ_INFLATE_CONT

2020-03-22 Thread Hui Zhu
If the guest kernel has many fragmentation pages, use virtio_balloon
will split THP of QEMU when it calls MADV_DONTNEED madvise to release
the balloon pages.
Set option cont-pages to on will open flags VIRTIO_BALLOON_VQ_INFLATE_CONT
and set continuous pages order to THP order.
Then It will get continuous pages PFN from VQ icvq use use madvise
MADV_DONTNEED release the THP page.
This will handle the THP split issue.

Signed-off-by: Hui Zhu 
---
 hw/virtio/virtio-balloon.c  | 32 +
 include/hw/virtio/virtio-balloon.h  |  4 +++-
 include/standard-headers/linux/virtio_balloon.h |  4 
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index a4729f7..88bdaca 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -34,6 +34,7 @@
 #include "hw/virtio/virtio-access.h"
 
 #define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
+#define CONT_PAGES_ORDER   9
 
 typedef struct PartiallyBalloonedPage {
 ram_addr_t base_gpa;
@@ -65,7 +66,8 @@ static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage 
*pbp,
 
 static void balloon_inflate_page(VirtIOBalloon *balloon,
  MemoryRegion *mr, hwaddr mr_offset,
- PartiallyBalloonedPage *pbp)
+ PartiallyBalloonedPage *pbp, 
+ bool is_cont_pages)
 {
 void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
 ram_addr_t rb_offset, rb_aligned_offset, base_gpa;
@@ -76,6 +78,13 @@ static void balloon_inflate_page(VirtIOBalloon *balloon,
 /* XXX is there a better way to get to the RAMBlock than via a
  * host address? */
 rb = qemu_ram_block_from_host(addr, false, &rb_offset);
+
+if (is_cont_pages) {
+ram_block_discard_range(rb, rb_offset,
+BALLOON_PAGE_SIZE << CONT_PAGES_ORDER);
+return;
+}
+
 rb_page_size = qemu_ram_pagesize(rb);
 
 if (rb_page_size == BALLOON_PAGE_SIZE) {
@@ -361,9 +370,10 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 trace_virtio_balloon_handle_output(memory_region_name(section.mr),
pa);
 if (!qemu_balloon_is_inhibited()) {
-if (vq == s->ivq) {
+if (vq == s->ivq || vq == s->icvq) {
 balloon_inflate_page(s, section.mr,
- section.offset_within_region, &pbp);
+ section.offset_within_region, &pbp,
+ vq == s->icvq);
 } else if (vq == s->dvq) {
 balloon_deflate_page(s, section.mr, 
section.offset_within_region);
 } else {
@@ -618,9 +628,12 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
 if (s->qemu_4_0_config_size) {
 return sizeof(struct virtio_balloon_config);
 }
-if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
+if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_CONT_PAGES)) {
 return sizeof(struct virtio_balloon_config);
 }
+if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
+return offsetof(struct virtio_balloon_config, pages_order);
+}
 if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
 return offsetof(struct virtio_balloon_config, poison_val);
 }
@@ -646,6 +659,10 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, 
uint8_t *config_data)
cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
 }
 
+if (virtio_has_feature(dev->host_features, VIRTIO_BALLOON_F_CONT_PAGES)) {
+config.pages_order = cpu_to_le32(CONT_PAGES_ORDER);
+}
+
 trace_virtio_balloon_get_config(config.num_pages, config.actual);
 memcpy(config_data, &config, virtio_balloon_config_size(dev));
 }
@@ -816,6 +833,11 @@ static void virtio_balloon_device_realize(DeviceState 
*dev, Error **errp)
 virtio_error(vdev, "iothread is missing");
 }
 }
+
+if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_CONT_PAGES)) {
+s->icvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
+}
+
 reset_stats(s);
 }
 
@@ -916,6 +938,8 @@ static Property virtio_balloon_properties[] = {
 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
 DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
 VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+DEFINE_PROP_BIT("cont-pages", VirtIOBalloon, host_features,
+VIRTIO_BALLOON_F_CONT_PAGES, false),
 /* QEMU 4.0 accidentally changed the config size even when free-page-hint
  * is disabled, resulting in QEMU 3.1 migration incompatibility.  This
  * property retains this quirk for QEMU 4.1 m

[PATCH for Linux v2] virtio_balloon: Add VIRTIO_BALLOON_VQ_INFLATE_CONT to handle THP split issue

2020-03-22 Thread Hui Zhu
The first version is in [1].
According to the comments from Michael and David, I updated the patch.
1. Added a separate vq inflate_cont_vq to transport inflate continuous
   pages.
2. Set all the pages in the continuous pages movable then they can be
   compaction.
3. Added a new element pages_order to virtio_balloon_config.  It is the
   inflate pages order that is set by the QEMU.
4. If the balloon cannot get any continuous pages from the system.
   Go back to use the single page to fill the balloon.
5.  Use balloon_pages_alloc to allocate the single page and continuous
pages.  Replace __GFP_NORETRY with __GFP_RETRY_MAYFAIL when allocating
the continuous pages because it can increase the success rate of
allocating large chunks of memory.

Following is the introduction of the function.
If the guest kernel has many fragmentation pages, use virtio_balloon
will split THP of QEMU when it calls MADV_DONTNEED madvise to release
the balloon pages.
This is an example in a VM with 1G memory 1CPU:
// This is the THP number before VM execution in the host.
// None use THP.
cat /proc/meminfo | grep AnonHugePages:
AnonHugePages: 0 kB

// After VM start, use usemem
// (https://git.kernel.org/pub/scm/linux/kernel/git/wfg/vm-scalability.git)
// punch-holes function generates 400m fragmentation pages in the guest
// kernel.
usemem --punch-holes -s -1 800m &

// This is the THP number after this command in the host.
// Some THP is used by VM because usemem will access 800M memory
// in the guest.
cat /proc/meminfo | grep AnonHugePages:
AnonHugePages:976896 kB

// Connect to the QEMU monitor, setup balloon, and set it size to 600M.
(qemu) device_add virtio-balloon-pci,id=balloon1
(qemu) info balloon
balloon: actual=1024
(qemu) balloon 600
(qemu) info balloon
balloon: actual=600

// This is the THP number after inflate the balloon in the host.
cat /proc/meminfo | grep AnonHugePages:
AnonHugePages:151552 kB

THP number decreased more than 800M.
The reason is usemem with punch-holes option will free every other
page after allocation.  Then 400M free memory inside the guest kernel
is fragmentation pages.
The guest kernel will use them to inflate the balloon.  When these
fragmentation pages are freed, THP will be split.

This commit tries to handle this with add a new flag
VIRTIO_BALLOON_VQ_INFLATE_CONT.
When this flag is set, the balloon will try to use continuous pages
inflate the balloon.  And the pages order is set to THP order.
Then THP pages will be freed together in the host.
This is an example in a VM with 1G memory 1CPU:
// This is the THP number before VM execution in the host.
// None use THP.
cat /proc/meminfo | grep AnonHugePages:
AnonHugePages: 0 kB

// After VM start, use usemem punch-holes function generates 400M
// fragmentation pages in the guest kernel.
usemem --punch-holes -s -1 800m &

// This is the THP number after this command in the host.
// Some THP is used by VM because usemem will access 800M memory
// in the guest.
cat /proc/meminfo | grep AnonHugePages:
AnonHugePages:976896 kB

// Connect to the QEMU monitor, setup balloon, and set it size to 600M.
(qemu) device_add virtio-balloon-pci,id=balloon1,cont-pages=on
(qemu) info balloon
balloon: actual=1024
(qemu) balloon 600
(qemu) info balloon
balloon: actual=600

// This is the THP number after inflate the balloon in the host.
cat /proc/meminfo | grep AnonHugePages:
AnonHugePages:610304 kB

The THP number decreases 358M.  This shows that
VIRTIO_BALLOON_VQ_INFLATE_CONT can help handle the THP split issue.

[1] https://lkml.org/lkml/2020/3/12/144

Signed-off-by: Hui Zhu 
---
 drivers/virtio/virtio_balloon.c | 78 ++---
 include/linux/balloon_compaction.h  |  9 -
 include/uapi/linux/virtio_balloon.h |  3 ++
 mm/balloon_compaction.c | 40 +++
 4 files changed, 109 insertions(+), 21 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 341458f..fbd2b02f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -47,6 +47,7 @@ enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_DEFLATE,
VIRTIO_BALLOON_VQ_STATS,
VIRTIO_BALLOON_VQ_FREE_PAGE,
+   VIRTIO_BALLOON_VQ_INFLATE_CONT,
VIRTIO_BALLOON_VQ_MAX
 };
 
@@ -56,7 +57,8 @@ enum virtio_balloon_config_read {
 
 struct virtio_balloon {
struct virtio_device *vdev;
-   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq,
+*inflate_cont_vq;
 
/* Balloon's own wq for cpu-intensive work items */
struct workqueue_struct *balloon_wq;
@@ -114,6 +116,12 @@ struct virtio_balloon {
 
/* To register a shrinker to shrink memory upon memory pressure */
struct shrinker shrinker;
+
+   /* Pages order if VIRTIO_BALLOON_F_CONT_PAGES is set.
+* if VIRTIO_BALL

[PATCH 1/1] COLO: Fix memory leak in packet_enqueue()

2020-03-22 Thread Derek Su
The patch is to fix the "pkt" memory leak in packet_enqueue().
The allocated "pkt" needs to be freed if the colo compare
primary or secondary queue is too big.

Signed-off-by: Derek Su 
---
 net/colo-compare.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 7ee17f2cf8..cdd87b2aa8 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -120,6 +120,10 @@ enum {
 SECONDARY_IN,
 };

+static const char *colo_mode[] = {
+[PRIMARY_IN] = "primary",
+[SECONDARY_IN] = "secondary",
+};

 static int compare_chr_send(CompareState *s,
 const uint8_t *buf,
@@ -215,6 +219,7 @@ static int packet_enqueue(CompareState *s, int mode,
Connection **con)
 ConnectionKey key;
 Packet *pkt = NULL;
 Connection *conn;
+int ret;

 if (mode == PRIMARY_IN) {
 pkt = packet_new(s->pri_rs.buf,
@@ -243,16 +248,18 @@ static int packet_enqueue(CompareState *s, int mode,
Connection **con)
 }

 if (mode == PRIMARY_IN) {
-if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
-error_report("colo compare primary queue size too big,"
- "drop packet");
-}
+ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack);
 } else {
-if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
-error_report("colo compare secondary queue size too big,"
- "drop packet");
-}
+ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack);
 }
+
+if (!ret) {
+error_report("colo compare %s queue size too big,"
+ "drop packet", colo_mode[mode]);
+packet_destroy(pkt, NULL);
+pkt = NULL;
+}
+
 *con = conn;

 return 0;
-- 
2.17.1


[PATCH 0/1] COLO: Fix memory leak in packet_enqueue()

2020-03-22 Thread Derek Su
The patch is to fix the memory leak in packet_enqueue().
The allocated "pkt" needs to be freed if the colo compare
primary or secondary queue is too big to insert.

Reproduce steps:
(1) Setup PVM and SVM both with NIC e1000 by the steps descripted


in the wiki qemu/COLO
(2) Run "iperf3 -s" in PVM
(3) Run "iperfs -c  -t 7200"

The memory usage of qemu-system-x86_64 increases as
the PVM's QMP shows "qemu-system-x86_64: colo compare
secondary queue size too big,drop packet".


Derek Su (1):
  net/colo-compare.c: Fix memory leak in packet_enqueue()

 net/colo-compare.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

-- 
2.17.1


[PATCH-for-5.0] ui/input-linux: Do not ignore ioctl() return value

2020-03-22 Thread Philippe Mathieu-Daudé
Fix warnings reported by Clang static code analyzer:

CC  ui/input-linux.o
  ui/input-linux.c:343:9: warning: Value stored to 'rc' is never read
  rc = ioctl(il->fd, EVIOCGBIT(EV_REL, sizeof(relmap)), &relmap);
  ^~
  ui/input-linux.c:351:9: warning: Value stored to 'rc' is never read
  rc = ioctl(il->fd, EVIOCGBIT(EV_ABS, sizeof(absmap)), &absmap);
  ^~
  ui/input-linux.c:354:13: warning: Value stored to 'rc' is never read
  rc = ioctl(il->fd, EVIOCGABS(ABS_X), &absinfo);
  ^~
  ui/input-linux.c:357:13: warning: Value stored to 'rc' is never read
  rc = ioctl(il->fd, EVIOCGABS(ABS_Y), &absinfo);
  ^~
  ui/input-linux.c:365:9: warning: Value stored to 'rc' is never read
  rc = ioctl(il->fd, EVIOCGBIT(EV_KEY, sizeof(keymap)), keymap);
  ^
  ui/input-linux.c:366:9: warning: Value stored to 'rc' is never read
  rc = ioctl(il->fd, EVIOCGKEY(sizeof(keystate)), keystate);
  ^

Reported-by: Clang Static Analyzer
Signed-off-by: Philippe Mathieu-Daudé 
---
 ui/input-linux.c | 29 +++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/ui/input-linux.c b/ui/input-linux.c
index a7b280b25b..ef37b14d6f 100644
--- a/ui/input-linux.c
+++ b/ui/input-linux.c
@@ -334,13 +334,15 @@ static void input_linux_complete(UserCreatable *uc, Error 
**errp)
 
 rc = ioctl(il->fd, EVIOCGBIT(0, sizeof(evtmap)), &evtmap);
 if (rc < 0) {
-error_setg(errp, "%s: failed to read event bits", il->evdev);
-goto err_close;
+goto err_read_event_bits;
 }
 
 if (evtmap & (1 << EV_REL)) {
 relmap = 0;
 rc = ioctl(il->fd, EVIOCGBIT(EV_REL, sizeof(relmap)), &relmap);
+if (rc < 0) {
+goto err_read_event_bits;
+}
 if (relmap & (1 << REL_X)) {
 il->has_rel_x = true;
 }
@@ -349,12 +351,25 @@ static void input_linux_complete(UserCreatable *uc, Error 
**errp)
 if (evtmap & (1 << EV_ABS)) {
 absmap = 0;
 rc = ioctl(il->fd, EVIOCGBIT(EV_ABS, sizeof(absmap)), &absmap);
+if (rc < 0) {
+goto err_read_event_bits;
+}
 if (absmap & (1 << ABS_X)) {
 il->has_abs_x = true;
 rc = ioctl(il->fd, EVIOCGABS(ABS_X), &absinfo);
+if (rc < 0) {
+error_setg(errp, "%s: failed to get get absolute X value",
+   il->evdev);
+goto err_close;
+}
 il->abs_x_min = absinfo.minimum;
 il->abs_x_max = absinfo.maximum;
 rc = ioctl(il->fd, EVIOCGABS(ABS_Y), &absinfo);
+if (rc < 0) {
+error_setg(errp, "%s: failed to get get absolute Y value",
+   il->evdev);
+goto err_close;
+}
 il->abs_y_min = absinfo.minimum;
 il->abs_y_max = absinfo.maximum;
 }
@@ -363,7 +378,14 @@ static void input_linux_complete(UserCreatable *uc, Error 
**errp)
 if (evtmap & (1 << EV_KEY)) {
 memset(keymap, 0, sizeof(keymap));
 rc = ioctl(il->fd, EVIOCGBIT(EV_KEY, sizeof(keymap)), keymap);
+if (rc < 0) {
+goto err_read_event_bits;
+}
 rc = ioctl(il->fd, EVIOCGKEY(sizeof(keystate)), keystate);
+if (rc < 0) {
+error_setg(errp, "%s: failed to get global key state", il->evdev);
+goto err_close;
+}
 for (i = 0; i < KEY_CNT; i++) {
 if (keymap[i / 8] & (1 << (i % 8))) {
 if (linux_is_button(i)) {
@@ -390,6 +412,9 @@ static void input_linux_complete(UserCreatable *uc, Error 
**errp)
 il->initialized = true;
 return;
 
+err_read_event_bits:
+error_setg(errp, "%s: failed to read event bits", il->evdev);
+
 err_close:
 close(il->fd);
 return;
-- 
2.21.1




Re: [PATCH for QEMU v2] virtio-balloon: Add option cont-pages to set VIRTIO_BALLOON_VQ_INFLATE_CONT

2020-03-22 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/1584893097-12317-2-git-send-email-teawa...@gmail.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH for QEMU v2] virtio-balloon: Add option cont-pages to set 
VIRTIO_BALLOON_VQ_INFLATE_CONT
Message-id: 1584893097-12317-2-git-send-email-teawa...@gmail.com
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
206d1fd virtio-balloon: Add option cont-pages to set 
VIRTIO_BALLOON_VQ_INFLATE_CONT

=== OUTPUT BEGIN ===
ERROR: trailing whitespace
#36: FILE: hw/virtio/virtio-balloon.c:69:
+ PartiallyBalloonedPage *pbp, $

total: 1 errors, 0 warnings, 115 lines checked

Commit 206d1fd85a23 (virtio-balloon: Add option cont-pages to set 
VIRTIO_BALLOON_VQ_INFLATE_CONT) has style problems, please review.  If any of 
these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/1584893097-12317-2-git-send-email-teawa...@gmail.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH 1/1] net/colo-compare.c: Fix memory leak in packet_enqueue()

2020-03-22 Thread Derek Su
The patch is to fix the "pkt" memory leak in packet_enqueue().
The allocated "pkt" needs to be freed if the colo compare
primary or secondary queue is too big.

Signed-off-by: Derek Su 
---
 net/colo-compare.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 7ee17f2cf8..cdd87b2aa8 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -120,6 +120,10 @@ enum {
 SECONDARY_IN,
 };

+static const char *colo_mode[] = {
+[PRIMARY_IN] = "primary",
+[SECONDARY_IN] = "secondary",
+};

 static int compare_chr_send(CompareState *s,
 const uint8_t *buf,
@@ -215,6 +219,7 @@ static int packet_enqueue(CompareState *s, int mode,
Connection **con)
 ConnectionKey key;




 Packet *pkt = NULL;
 Connection *conn;
+int ret;

 if (mode == PRIMARY_IN) {
 pkt = packet_new(s->pri_rs.buf,
@@ -243,16 +248,18 @@ static int packet_enqueue(CompareState *s, int mode,
Connection **con)
 }

 if (mode == PRIMARY_IN) {
-if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
-error_report("colo compare primary queue size too big,"
- "drop packet");
-}
+ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack);
 } else {
-if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
-error_report("colo compare secondary queue size too big,"
- "drop packet");
-}
+ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack);
 }
+
+if (!ret) {
+error_report("colo compare %s queue size too big,"
+ "drop packet", colo_mode[mode]);
+packet_destroy(pkt, NULL);
+pkt = NULL;
+}
+
 *con = conn;

 return 0;
-- 
2.17.1


Re: [PATCH-for-5.0 1/2] hw/acpi/piix4: Add 'system-hotplug-support' property

2020-03-22 Thread Philippe Mathieu-Daudé

On 3/19/20 4:08 PM, Igor Mammedov wrote:

On Thu, 19 Mar 2020 12:04:11 +0100
Philippe Mathieu-Daudé  wrote:


On 3/19/20 11:44 AM, Igor Mammedov wrote:

On Wed, 18 Mar 2020 23:15:30 +0100
Philippe Mathieu-Daudé  wrote:
   

The I/O ranges registered by the piix4_acpi_system_hot_add_init()
function are not documented in the PIIX4 datasheet.
This appears to be a PC-only feature added in commit 5e3cb5347e
("initialize hot add system / acpi gpe") which was then moved
to the PIIX4 device model in commit 9d5e77a22f ("make
qemu_system_device_hot_add piix independent")
Add a property (default enabled, to not modify the current
behavior) to allow machines wanting to model a simple PIIX4
to disable this feature.

Signed-off-by: Philippe Mathieu-Daudé 


it's already pretty twisted code and adding one more knob
to workaround other compat knobs makes it worse.

Even though it's not really welcomed approach,
we can ifdef all hotplug parts and compile them out for mips
dropping along the way linking with not needed dependencies


We can't use use target-specific poisoned definitions to ifdef out in
generic hw/ code.


or
more often used, make stubs from hotplug parts for mips
and link with them.


So the problem is this device doesn't match the hardware datasheet, has
extra features helping virtualization, and now we can not simplify it
due to backward compat.

Once Michael said he doesn't care about the PIIX4, only the PIIX3
southbridge [1] [2], but then the i440fx pc machine uses a PIIX3 with a
pci PM function from PIIX4, and made that PII4_PM Frankenstein.

You are asking me to choose between worse versus ugly?

That 'ugly' is typically used within QEMU to deal with such things
probably due to its low complexity.


OK. Can you point me to the documentation for this feature? I can find 
reference of GPE in the ICH9, but I can't find where this IO address on 
the PIIX4 comes from:


#define GPE_BASE 0xafe0




The saner outcome I see is make the current PIIX4_PM x86-specific, not
modifying the code, and start a fresh new copy respecting the datasheet.

properly implementing spec would be quite a task
(although if motivation is just for fun, then why not)


Is not for fun.





Note I'm not particularly interested in MIPS here, but having model
respecting the hardware.

[1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg504270.html
[2] https://www.mail-archive.com/qemu-devel@nongnu.org/msg601512.html

   

---
Should I squash this with the next patch and start with
default=false, which is closer to the hardware model?
---
   hw/acpi/piix4.c | 9 +++--
   1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 964d6f5990..9c970336ac 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -78,6 +78,7 @@ typedef struct PIIX4PMState {
   
   AcpiPciHpState acpi_pci_hotplug;

   bool use_acpi_pci_hotplug;
+bool use_acpi_system_hotplug;
   
   uint8_t disable_s3;

   uint8_t disable_s4;
@@ -503,8 +504,10 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
   s->machine_ready.notify = piix4_pm_machine_ready;
   qemu_add_machine_init_done_notifier(&s->machine_ready);
   
-piix4_acpi_system_hot_add_init(pci_address_space_io(dev),

-   pci_get_bus(dev), s);
+if (s->use_acpi_system_hotplug) {
+piix4_acpi_system_hot_add_init(pci_address_space_io(dev),
+   pci_get_bus(dev), s);
+}
   qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), OBJECT(s), &error_abort);
   
   piix4_pm_add_propeties(s);

@@ -635,6 +638,8 @@ static Property piix4_pm_properties[] = {
use_acpi_pci_hotplug, true),
   DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
acpi_memory_hotplug.is_enabled, true),
+DEFINE_PROP_BOOL("system-hotplug-support", PIIX4PMState,
+ use_acpi_system_hotplug, true),
   DEFINE_PROP_END_OF_LIST(),
   };
 
   










[PULL v3 0/1] Slirp patches

2020-03-22 Thread Marc-André Lureau
The following changes since commit f57587c7d47b35b2d9b31def3a74d81bdb5475d7:

  Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2020-03-17' into 
staging (2020-03-19 10:18:07 +)

are available in the Git repository at:

  https://github.com/elmarco/qemu.git tags/slirp-pull-request

for you to fetch changes up to 9c1f4f1b9bb4e5df43c4267d519938c1a2aa8e27:

  slirp: update submodule to v4.2.0+ (2020-03-22 18:04:14 +0100)





Marc-André Lureau (1):
  slirp: update submodule to v4.2.0+

 slirp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
2.25.0.rc2.1.g09a9a1a997




[PULL v3 1/1] slirp: update submodule to v4.2.0+

2020-03-22 Thread Marc-André Lureau
git shortlog
126c04acbabd7ad32c2b018fe10dfac2a3bc1210..55ab21c9a36852915b81f1b41ebaf3b6509dd8ba

5eraph (1):
  Use specific outbound IP address

Akihiro Suda (8):
  remove confusing comment that exists from ancient slirp
  add slirp_new(SlirpConfig *, SlirpCb *, void *)
  allow custom MTU
  add disable_host_loopback (prohibit connections to 127.0.0.1)
  add SlirpConfig version
  emu: remove dead code
  emu: disable by default
  fix a typo in a comment

Anders Waldenborg (1):
  state: fix loading of guestfwd state

Giuseppe Scrivano (1):
  socket: avoid getpeername after shutdown(SHUT_WR)

Jindrich Novy (1):
  Don't leak memory when reallocation fails.

Jordi Pujol Palomer (1):
  fork_exec: correctly parse command lines that contain spaces

Marc-André Lureau (60):
  Merge branch 'AkihiroSuda/libslirp-slirp4netns'
  Merge branch 'fix-typo' into 'master'
  meson: make it subproject friendly
  Merge branch 'meson' into 'master'
  misc: fix compilation warnings
  Merge branch 'fix-shutdown-wr' into 'master'
  sbuf: remove unused and undefined sbcopy() path
  sbuf: check more strictly sbcopy() bounds with offset
  sbuf: replace a comment with a runtime warning
  Replace remaining malloc/free user with glib
  tcp_attach() can no longer fail
  state: can't ENOMEM
  sbuf: use unsigned types
  sbuf: simplify sbreserve()
  dnssearch: use g_strv_length()
  vmstate: silence scan-build warning
  gitlab-ci: run scan-build
  Merge branch 'mem-cleanups' into 'master'
  libslirp.map: bind slirp_new to SLIRP_4.1 version
  meson: fix libtool versioning
  Release v4.1.0
  Merge branch '4.1.0' into 'master'
  CHANGELOG: start unreleased section
  Merge branch 'add-unix' into 'master'
  util: add G_SIZEOF_MEMBER() macro
  Check bootp_filename is not going to be truncated
  bootp: remove extra cast
  bootp: replace simple snprintf() with strcpy()
  tftp: clarify what is actually OACK m_len
  tcp_emu: add more fixme/warnings comments
  util: add slirp_fmt() helpers
  dhcpv6: use slirp_fmt()
  misc: use slirp_fmt0()
  tftp: use slirp_fmt0()
  tcp_ctl: use slirp_fmt()
  tcp_emu: fix unsafe snprintf() usages
  misc: improve error report
  Use g_snprintf()
  util: add gnuc format function attribute to slirp_fmt*
  Merge branch 'aw-guestfwd-state' into 'master'
  Merge branch 'slirp-fmt' into 'master'
  socket: remove extra label and variable
  socket: factor out sotranslate ipv4/ipv6 handling
  socket: remove need for extra scope_id variable
  socket: do not fallback on host loopback if get_dns_addr() failed
  socket: do not fallback on loopback addr for addresses in our mask/prefix
  Prepare for v4.2.0 release
  Merge branch 'translate-fix' into 'master'
  Merge branch 'release-v4.2.0' into 'master'
  changelog: post-release
  changelog: fix link
  .gitlab-ci: add --werror, treat CI build warnings as errors
  Revert "socket: remove need for extra scope_id variable"
  Teach slirp_version_string() to return vcs version
  Merge branch 'mingw-fix' into 'master'
  Merge branch 'vcs-version' into 'master'
  meson: bump required version to 0.49
  build-sys: fix NetBSD build regression
  Merge branch 'netbsd-fix' into 'master'
  build-sys: make libslirp-version.h depend on Makefile

PanNengyuan (1):
  libslirp: fix NULL pointer dereference in tcp_sockclosed

Philippe Mathieu-Daudé (1):
  Add a git-publish configuration file

Prasad J Pandit (4):
  slirp: ncsi: compute checksum for valid data length
  slirp: use correct size while emulating IRC commands
  slirp: use correct size while emulating commands
  slirp: tftp: restrict relative path access

Renzo Davoli (2):
  Add slirp_remove_guestfwd()
  Add slirp_add_unix()

Samuel Thibault (14):
  ip_reass: explain why we should not always update the q pointer
  Merge branch 'comment' into 'master'
  Merge branch 'no-emu' into 'master'
  Fix bogus indent, no source change
  ip_reass: Fix use after free
  Merge branch 'reass2' into 'master'
  Make host receive broadcast packets
  arp: Allow 0.0.0.0 destination address
  Merge branch 'warnings' into 'master'
  Merge branch 'arp_0' into 'master'
  Merge branch 'broadcast' into 'master'
  tcp_emu: Fix oob access
  Merge branch 'oob' into 'master'
  Merge branch 'master' into 'master'

Signed-off-by: Marc-André Lureau 
Reviewed-by: Samuel Thibault 
---
 slirp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/slirp b/slirp
index 126c04acba..55ab21c9a3 16
--- a/slirp
+++ b/slirp
@@ -1 +1 @@
-Subproject commit 126c04acbabd7ad32c2b018fe10dfac2a3bc1210
+Subproject commit 55ab21c9a36852915b81f1b41ebaf3b6509dd8ba
-- 
2.25.0.rc2.1.g09a9a1a997




Re: deprecation of in-tree builds

2020-03-22 Thread Peter Maydell
On Sun, 22 Mar 2020 at 15:29, Aleksandar Markovic
 wrote:
> If the "progress" (in the form of deprecation) is so impotrant, than the 
> authors should devise it so that there is no dammage to existing features, 
> and no adverse effects.
>
> In this light, perhaps in-tree builds deorecation is 5.0 is little premature.

The idea of deprecation is to give advance warning. So it's
better for our users if we announce it earlier, rather than
later. Strictly speaking our deprecation policy is for
user-facing features, not build-time stuff, where we are
less strict about how much notice we give people. But it
seems to me that if it's easy to give some advance notice
then why shouldn't we do so?

I agree that we should obviously make sure that everything
that currently assumes an in-tree build also works with an
out-of-tree build before we drop the support...

(Also, if we don't announce that we're planning to drop
support, nobody's going to report to us issues which
we need to fix :-))

thanks
-- PMM



[PATCH v2 1/1] net/colo-compare.c: Fix memory leak in packet_enqueue()

2020-03-22 Thread Derek Su
The patch is to fix the "pkt" memory leak in packet_enqueue().
The allocated "pkt" needs to be freed if the colo compare
primary or secondary queue is too big.

Signed-off-by: Derek Su 
---
 net/colo-compare.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 7ee17f2cf8..cdd87b2aa8 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -120,6 +120,10 @@ enum {
 SECONDARY_IN,
 };
 
+static const char *colo_mode[] = {
+[PRIMARY_IN] = "primary",
+[SECONDARY_IN] = "secondary",
+};
 
 static int compare_chr_send(CompareState *s,
 const uint8_t *buf,
@@ -215,6 +219,7 @@ static int packet_enqueue(CompareState *s, int mode, 
Connection **con)
 ConnectionKey key;
 Packet *pkt = NULL;
 Connection *conn;
+int ret;
 
 if (mode == PRIMARY_IN) {
 pkt = packet_new(s->pri_rs.buf,
@@ -243,16 +248,18 @@ static int packet_enqueue(CompareState *s, int mode, 
Connection **con)
 }
 
 if (mode == PRIMARY_IN) {
-if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
-error_report("colo compare primary queue size too big,"
- "drop packet");
-}
+ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack);
 } else {
-if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
-error_report("colo compare secondary queue size too big,"
- "drop packet");
-}
+ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack);
 }
+
+if (!ret) {
+error_report("colo compare %s queue size too big,"
+ "drop packet", colo_mode[mode]);
+packet_destroy(pkt, NULL);
+pkt = NULL;
+}
+
 *con = conn;
 
 return 0;
-- 
2.17.1




[PATCH v2 0/1] COLO: Fix memory leak in packet_enqueue()

2020-03-22 Thread Derek Su
The patch is to fix the memory leak in packet_enqueue().
The allocated "pkt" needs to be freed if the colo compare
primary or secondary queue is too big to insert.

Reproduce steps:
(1) Setup PVM and SVM both with NIC e1000 by the steps descripted
in the wiki qemu/COLO
(2) Run "iperf3 -s" in PVM
(3) Run "iperfs -c  -t 7200"

The memory usage of qemu-system-x86_64 increases as the PVM's QMP 
shows "qemu-system-x86_64: colo compare secondary queue size too big,
drop packet".

Please review, thanks.

V2:
 - Fix incorrect patch format

Derek Su (1):
  net/colo-compare.c: Fix memory leak in packet_enqueue()

 net/colo-compare.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

-- 
2.17.1




Re: [PATCH] Update copyright date for user-facing copyright strings

2020-03-22 Thread Peter Maydell
On Mon, 16 Mar 2020 at 11:20, Peter Maydell  wrote:
>
> Update the copyright date to 2020 for the copyright strings which are
> user-facing and represent overall copyright info for all of QEMU.
>
> Reported-by: John Arbuckle 
> Signed-off-by: Peter Maydell 
> ---
>  include/qemu-common.h | 2 +-
>  docs/conf.py  | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)

Applied to master.

-- PMM



[PATCH for-5.0] hw/ppc: Take QEMU lock when calling ppc_dcr_read/write()

2020-03-22 Thread Peter Maydell
The ppc_dcr_read() and ppc_dcr_write() functions call into callbacks
in device code, so we need to hold the QEMU iothread lock while
calling them.  This is the case already for the callsites in
kvmppc_handle_dcr_read/write(), but we must also take the lock when
calling the helpers from TCG.

This fixes a bug where attempting to initialise the PPC405EP
SDRAM will cause an assertion when sdram_map_bcr() attempts
to remap memory regions.

Reported-by: Amit Lazar 
Signed-off-by: Peter Maydell 
---
Amit reported this bug via IRC.

 target/ppc/timebase_helper.c | 40 +++-
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c
index 703bd9ed18b..d16360ab667 100644
--- a/target/ppc/timebase_helper.c
+++ b/target/ppc/timebase_helper.c
@@ -21,6 +21,7 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "qemu/log.h"
+#include "qemu/main-loop.h"
 
 /*/
 /* SPR accesses */
@@ -167,13 +168,19 @@ target_ulong helper_load_dcr(CPUPPCState *env, 
target_ulong dcrn)
 raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
POWERPC_EXCP_INVAL |
POWERPC_EXCP_INVAL_INVAL, GETPC());
-} else if (unlikely(ppc_dcr_read(env->dcr_env,
- (uint32_t)dcrn, &val) != 0)) {
-qemu_log_mask(LOG_GUEST_ERROR, "DCR read error %d %03x\n",
-  (uint32_t)dcrn, (uint32_t)dcrn);
-raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
-   POWERPC_EXCP_INVAL |
-   POWERPC_EXCP_PRIV_REG, GETPC());
+} else {
+int ret;
+
+qemu_mutex_lock_iothread();
+ret = ppc_dcr_read(env->dcr_env, (uint32_t)dcrn, &val);
+qemu_mutex_unlock_iothread();
+if (unlikely(ret != 0)) {
+qemu_log_mask(LOG_GUEST_ERROR, "DCR read error %d %03x\n",
+  (uint32_t)dcrn, (uint32_t)dcrn);
+raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+   POWERPC_EXCP_INVAL |
+   POWERPC_EXCP_PRIV_REG, GETPC());
+}
 }
 return val;
 }
@@ -185,12 +192,17 @@ void helper_store_dcr(CPUPPCState *env, target_ulong 
dcrn, target_ulong val)
 raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
POWERPC_EXCP_INVAL |
POWERPC_EXCP_INVAL_INVAL, GETPC());
-} else if (unlikely(ppc_dcr_write(env->dcr_env, (uint32_t)dcrn,
-  (uint32_t)val) != 0)) {
-qemu_log_mask(LOG_GUEST_ERROR, "DCR write error %d %03x\n",
-  (uint32_t)dcrn, (uint32_t)dcrn);
-raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
-   POWERPC_EXCP_INVAL |
-   POWERPC_EXCP_PRIV_REG, GETPC());
+} else {
+int ret;
+qemu_mutex_lock_iothread();
+ret = ppc_dcr_write(env->dcr_env, (uint32_t)dcrn, (uint32_t)val);
+qemu_mutex_unlock_iothread();
+if (unlikely(ret != 0)) {
+qemu_log_mask(LOG_GUEST_ERROR, "DCR write error %d %03x\n",
+  (uint32_t)dcrn, (uint32_t)dcrn);
+raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+   POWERPC_EXCP_INVAL |
+   POWERPC_EXCP_PRIV_REG, GETPC());
+}
 }
 }
-- 
2.20.1




Re: deprecation of in-tree builds

2020-03-22 Thread Aleksandar Markovic
18:21 Ned, 22.03.2020. Peter Maydell  је
написао/ла:
>
> On Sun, 22 Mar 2020 at 15:29, Aleksandar Markovic
>  wrote:
> > If the "progress" (in the form of deprecation) is so impotrant, than
the authors should devise it so that there is no dammage to existing
features, and no adverse effects.
> >
> > In this light, perhaps in-tree builds deorecation is 5.0 is little
premature.
>
> The idea of deprecation is to give advance warning. So it's
> better for our users if we announce it earlier, rather than
> later. Strictly speaking our deprecation policy is for
> user-facing features, not build-time stuff, where we are...

If an end-user feature works only in in-tree builds (so, explitely said:
not in out-of-tree builds), this is not a build-time stuff, but user-facing
feature issue.

If someone is keen on removing any feature (as is truly in this case), I
expect at least some moderate investigation being done on what could be
affected (prior to announcing deprecation), rather than attitude "ok, let's
announce deprecation, see if someone start clamoring, and, if not, we are
good to go with removing". For me, this slightly disappointing.

I haven't seen anyone doing a sufficiently thourough analysis on what
happens without in-tree builds, and doesn't work in out-of-tree builds in a
proper way.

But, ok, this is just my opinion, probably unpopular within dev comunity,
since it requires additional effort by us. Still, we should guide ourself
with "what is right to do", and not "what is easy to do" principles.

Regards,
Aleksandar

> less strict about how much notice we give people. But it
> seems to me that if it's easy to give some advance notice
> then why shouldn't we do so?
>
> I agree that we should obviously make sure that everything
> that currently assumes an in-tree build also works with an
> out-of-tree build before we drop the support...
>
> (Also, if we don't announce that we're planning to drop
> support, nobody's going to report to us issues which
> we need to fix :-))
>
> thanks
> -- PMM
>


Re: deprecation of in-tree builds

2020-03-22 Thread Peter Maydell
On Sun, 22 Mar 2020 at 19:52, Aleksandar Markovic
 wrote:
> If an end-user feature works only in in-tree builds (so,
> explitely said: not in out-of-tree builds), this is not
> a build-time stuff, but user-facing feature issue.

gprof is a developer feature, not an end-user-facing
feature. By the latter I mean "some feature that users
who have installed a built binary might be using":
command line stuff, actual functionality in the QEMU
binary, QMP protocol, that kind of thing.

> If someone is keen on removing any feature (as is truly in this case), I 
> expect at least some moderate investigation being done on what could be 
> affected (prior to announcing deprecation), rather than attitude "ok, let's 
> announce deprecation, see if someone start clamoring, and, if not, we are 
> good to go with removing". For me, this slightly disappointing.

Before you told me about the gprof issue, the *only* thing
I was aware of that might break was the coverity scan build,
which is a purely project internal bit of infrastructure.
>From my point of view, we did the investigation, in the
sense that for years we have had out-of-tree as our standard
recommended way of building QEMU and the thing we test
in our CI. Anything that breaks out-of-tree is by definition
something that fell through the gaps in our CI and which
we couldn't know about unless somebody tells us about it.
The "announce deprecation" part is the final part of the
process, and sometimes it does, yes, result in somebody
saying "you missed this thing", because we know our CI
is far from perfect.

> I haven't seen anyone doing a sufficiently thourough analysis
>on what happens without in-tree builds, and doesn't work in
>out-of-tree builds in a proper way.

*Everything* is supposed to work in out of tree builds.
If it doesn't that's a bug -- unless people report bugs
we'll never know to fix them. Most developers use out
of tree builds and all our CI is out of tree builds, so
they actually get better ad-hoc and CI coverage than
in-tree. Out-of-tree is overwhelmingly what we build and
what we test, so it's in-tree that breaks more often and
where I'd expect to find more things we didn't realise
were broken.

To be clear, I'm not saying we should pull the rug out
from anybody. I'm saying:
 * we should clearly say what our plans are, with a
   long warning if we can reasonably give longer warning
 * if there's anything that we would accidentally
   be breaking with those plans, we should adjust the
   plans so we don't break things we didn't mean to break

This doesn't seem controversial to me...

thanks
-- PMM



Re: [PULL 22/36] hw/arm/allwinner-h3: add SDRAM controller device

2020-03-22 Thread Niek Linnenbank
Hi Peter,

On Fri, Mar 20, 2020 at 4:46 PM Peter Maydell 
wrote:

> On Thu, 12 Mar 2020 at 16:45, Peter Maydell 
> wrote:
> >
> > From: Niek Linnenbank 
> >
> > In the Allwinner H3 SoC the SDRAM controller is responsible
> > for interfacing with the external Synchronous Dynamic Random
> > Access Memory (SDRAM). Types of memory that the SDRAM controller
> > supports are DDR2/DDR3 and capacities of up to 2GiB. This commit
> > adds emulation support of the Allwinner H3 SDRAM controller.
>
> Hi; Coverity has flagged a possible issue with this patch
> (CID 1421912):
>
> > +static void allwinner_h3_dramc_map_rows(AwH3DramCtlState *s, uint8_t
> row_bits,
> > +uint8_t bank_bits, uint16_t
> page_size)
> > +{
> > +/*
> > + * This function simulates row addressing behavior when bootloader
> > + * software attempts to detect the amount of available SDRAM. In
> U-Boot
> > + * the controller is configured with the widest row addressing
> available.
> > + * Then a pattern is written to RAM at an offset on the row
> boundary size.
> > + * If the value read back equals the value read back from the
> > + * start of RAM, the bootloader knows the amount of row bits.
> > + *
> > + * This function inserts a mirrored memory region when the
> configured row
> > + * bits are not matching the actual emulated memory, to simulate the
> > + * same behavior on hardware as expected by the bootloader.
> > + */
> > +uint8_t row_bits_actual = 0;
> > +
> > +/* Calculate the actual row bits using the ram_size property */
> > +for (uint8_t i = 8; i < 12; i++) {
> > +if (1 << i == s->ram_size) {
> > +row_bits_actual = i + 3;
> > +break;
> > +}
> > +}
> > +
> > +if (s->ram_size == (1 << (row_bits - 3))) {
> > +/* When row bits is the expected value, remove the mirror */
> > +memory_region_set_enabled(&s->row_mirror_alias, false);
> > +trace_allwinner_h3_dramc_rowmirror_disable();
> > +
> > +} else if (row_bits_actual) {
> > +/* Row bits not matching ram_size, install the rows mirror */
> > +hwaddr row_mirror = s->ram_addr + ((1 << (row_bits_actual +
> > +  bank_bits)) *
> page_size);
>
> In this calculation we do the multiply as a signed 32-bit operation,
> which then gets sign-extended to 64 bits for the addition; that
> means that if the multiply result is greater than 0x7fff then
> the upper bits of the result will all be 1s. Is this a "can't happen"
> situation, or should we be using "1ULL" to force a 64-bit multiply?
>

The allwinner_h3_dramcom_write() function invokes this function when the
'Control' register
is written. Basically allwinner_h3_dramc_map_rows() needs to insert a
memory region such
that the bootloader can detect the amount of RAM. Currently U-Boot only
searches up to page_size
values up to 8192, so in practise that would not trigger a result greater
than 0x7fff.

However in theory, the Control register can indeed be written with larger
page_size values.
So to be safe, I'll just make a small patch to replace the 1 with 1UL,
thanks!

Regards,
Niek



>
> thanks
> -- PMM
>


-- 
Niek Linnenbank


Re: deprecation of in-tree builds

2020-03-22 Thread Aleksandar Markovic
21:14 Ned, 22.03.2020. Peter Maydell
> *Everything* is supposed to work in out of tree builds.
> If it doesn't that's a bug -- unless people report bugs
> we'll never know to fix them. Most developers use out
> of tree builds and all our CI is out of tree builds, so
> they actually get better ad-hoc and CI coverage than
> in-tree. Out-of-tree is overwhelmingly what we build and
> what we test, so it's in-tree that breaks more often and
> where I'd expect to find more things we didn't realise
> were broken.
>
> To be clear, I'm not saying we should pull the rug out
> from anybody. I'm saying:
>  * we should clearly say what our plans are, with a
>long warning if we can reasonably give longer warning
>  * if there's anything that we would accidentally
>be breaking with those plans, we should adjust the
>plans so we don't break things we didn't mean to break
>
> This doesn't seem controversial to me...
>

OK, given all info you presented in last paragraphs and elsewhere - that
seems to have more emphasis on potential adjustments, and the obligatory
condition that nothing breaks - I agree with the approach you spelled out,
or, in other words, agree with introducing deprecation note.

I hope that we all perceive occasional differences in opinions as our value
(and, I even claim, a key to success of any dev community), and not a
nuisance or a danger.

Thanks! :)

Regards,
Aleksandar

> thanks
> -- PMM


Re: deprecation of in-tree builds

2020-03-22 Thread BALATON Zoltan

On Sun, 22 Mar 2020, Peter Maydell wrote:

On Sun, 22 Mar 2020 at 19:52, Aleksandar Markovic
 wrote:

If an end-user feature works only in in-tree builds (so,
explitely said: not in out-of-tree builds), this is not
a build-time stuff, but user-facing feature issue.


gprof is a developer feature, not an end-user-facing
feature. By the latter I mean "some feature that users
who have installed a built binary might be using":
command line stuff, actual functionality in the QEMU
binary, QMP protocol, that kind of thing.


If someone is keen on removing any feature (as is truly in this case), I expect at least 
some moderate investigation being done on what could be affected (prior to announcing 
deprecation), rather than attitude "ok, let's announce deprecation, see if someone 
start clamoring, and, if not, we are good to go with removing". For me, this 
slightly disappointing.


Before you told me about the gprof issue, the *only* thing


Was that gprof or gcov?


I was aware of that might break was the coverity scan build,
which is a purely project internal bit of infrastructure.


Plus potentially any scripts people might use to build stuff and distro 
packagers that might use in-tree build. They would suddently find their 
previously working scripts are now broken and they need to adapt. While 
making sure running configure; make; make install in source tree even if 
it actually does a build in a new build dir it creates automatically would 
be less annoying change than having to manually manage out-of-tree build 
dirs by those who did not do that so far.


Is it really that difficult to add a CI job to do a git clone then 
configure; make; make install in it to make sure it breaks less often? And 
to make sure this still works after in-tree builds are deprecated and 
removed? I think we can't check every distro packager or don't know what 
users do but supporting the usual way of building packages used by many 
may worth the little extra effort to not annoy users/developers 
unnecessarily.


Regards,
BALATON Zoltan



[PATCH 1/2] hw/arm/orangepi: check for potential NULL pointer when calling blk_is_available

2020-03-22 Thread Niek Linnenbank
The Orange Pi PC initialization function needs to verify that the SD card
block backend is usable before calling the Boot ROM setup routine. When
calling blk_is_available() the input parameter should not be NULL.
This commit ensures that blk_is_available is only called with non-NULL input.

Reported-by: Peter Maydell 
Signed-off-by: Niek Linnenbank 
---
 hw/arm/orangepi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/orangepi.c b/hw/arm/orangepi.c
index 181f5badab..a9f64c5680 100644
--- a/hw/arm/orangepi.c
+++ b/hw/arm/orangepi.c
@@ -104,7 +104,7 @@ static void orangepi_init(MachineState *machine)
 machine->ram);
 
 /* Load target kernel or start using BootROM */
-if (!machine->kernel_filename && blk_is_available(blk)) {
+if (!machine->kernel_filename && blk && blk_is_available(blk)) {
 /* Use Boot ROM to copy data from SD card to SRAM */
 allwinner_h3_bootrom_setup(h3, blk);
 }
-- 
2.17.1




[PATCH 2/2] hw/misc/allwinner-h3-dramc: enforce 64-bit multiply when calculating row mirror address

2020-03-22 Thread Niek Linnenbank
The allwinner_h3_dramc_map_rows function simulates row addressing behavior
when bootloader software attempts to detect the amount of available SDRAM.

Currently the line that calculates the 64-bit address of the mirrored row
uses a signed 32-bit multiply operation that in theory could result in the
upper 32-bit be all 1s. This commit ensures that the row mirror address
is calculated using only 64-bit operations.

Reported-by: Peter Maydell 
Signed-off-by: Niek Linnenbank 
---
 hw/misc/allwinner-h3-dramc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/misc/allwinner-h3-dramc.c b/hw/misc/allwinner-h3-dramc.c
index 2b5260260e..f9f05b5384 100644
--- a/hw/misc/allwinner-h3-dramc.c
+++ b/hw/misc/allwinner-h3-dramc.c
@@ -85,8 +85,8 @@ static void allwinner_h3_dramc_map_rows(AwH3DramCtlState *s, 
uint8_t row_bits,
 
 } else if (row_bits_actual) {
 /* Row bits not matching ram_size, install the rows mirror */
-hwaddr row_mirror = s->ram_addr + ((1 << (row_bits_actual +
-  bank_bits)) * page_size);
+hwaddr row_mirror = s->ram_addr + ((1UL << (row_bits_actual +
+bank_bits)) * page_size);
 
 memory_region_set_enabled(&s->row_mirror_alias, true);
 memory_region_set_address(&s->row_mirror_alias, row_mirror);
-- 
2.17.1




[PATCH] hax: Windows doesn't like posix device names

2020-03-22 Thread Volker Rümelin
Patch acb9f95a7c "i386: Fix GCC warning with snprintf when HAX
is enabled" replaced Windows device names with posix device
names. Revert this.

Fixes: acb9f95a7c "i386: Fix GCC warning with snprintf when HAX is enabled"

Signed-off-by: Volker Rümelin 
---
 target/i386/hax-windows.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/i386/hax-windows.c b/target/i386/hax-windows.c
index 0ba488c468..863c2bcc19 100644
--- a/target/i386/hax-windows.c
+++ b/target/i386/hax-windows.c
@@ -185,12 +185,12 @@ int hax_mod_version(struct hax_state *hax, struct 
hax_module_version *version)
 
 static char *hax_vm_devfs_string(int vm_id)
 {
-return g_strdup_printf("/dev/hax_vm/vm%02d", vm_id);
+return g_strdup_printf(".\\hax_vm%02d", vm_id);
 }
 
 static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
 {
-return g_strdup_printf("/dev/hax_vm%02d/vcpu%02d", vm_id, vcpu_id);
+return g_strdup_printf(".\\hax_vm%02d_vcpu%02d", vm_id, vcpu_id);
 }
 
 int hax_host_create_vm(struct hax_state *hax, int *vmid)
-- 
2.16.4




Re: deprecation of in-tree builds

2020-03-22 Thread Peter Maydell
On Sun, 22 Mar 2020 at 20:46, BALATON Zoltan  wrote:
> On Sun, 22 Mar 2020, Peter Maydell wrote:
> > Before you told me about the gprof issue, the *only* thing
>
> Was that gprof or gcov?

Sorry, gcov; I always get those two mixed up in my head.

> Plus potentially any scripts people might use to build stuff and distro
> packagers that might use in-tree build. They would suddently find their
> previously working scripts are now broken and they need to adapt.

It is to avoid the "suddenly" part that we announce in advance
that features are going away :-)  More generally, distro packagers
must adapt for any new QEMU release -- new features appear that
they may need to update their dependency lists to handle, old
features are sometimes removed and the corresponding configure
--enable-foo options stop working, existing features need new
dependencies.

Also, we've been recommending out-of-tree builds in our README
since 2015. They're hardly a new thing.

> While
> making sure running configure; make; make install in source tree even if
> it actually does a build in a new build dir it creates automatically would
> be less annoying change than having to manually manage out-of-tree build
> dirs by those who did not do that so far.
>
> Is it really that difficult to add a CI job to do a git clone then
> configure; make; make install in it to make sure it breaks less often?

To be honest, I don't feel very strongly here, except that
I didn't want us to drop in-tree builds without noting it
in the release notes, and my impression from previous list
discussions was that that was the way the project was heading.

If somebody wants to write patches to cause 'configure' to create
a new build tree that's OK I guess (though I'd be dubious because
I think that hidden magic like that is overall often going
to confuse people, and it's still extra machinery in the
makefile and the configure script). But I don't really see
much point in maintaining two different mechanisms which add
complication and where one of them is just not overall as useful
as the other.

I fairly often see posts from people on eg stackoverflow
who are trying to compile and modify QEMU, and they're
usually using in-tree build and I usually mention in a
PS to answering their question that they'd really be
better off with an out-of-tree build. I think we should
stop making it easy to default to a setup that we don't
recommend.

thanks
-- PMM



Re: [PULL 22/36] hw/arm/allwinner-h3: add SDRAM controller device

2020-03-22 Thread Peter Maydell
On Sun, 22 Mar 2020 at 20:23, Niek Linnenbank  wrote:
> On Fri, Mar 20, 2020 at 4:46 PM Peter Maydell  
> wrote:
>> In this calculation we do the multiply as a signed 32-bit operation,
>> which then gets sign-extended to 64 bits for the addition; that
>> means that if the multiply result is greater than 0x7fff then
>> the upper bits of the result will all be 1s. Is this a "can't happen"
>> situation, or should we be using "1ULL" to force a 64-bit multiply?

> However in theory, the Control register can indeed be written with larger 
> page_size values.
> So to be safe, I'll just make a small patch to replace the 1 with 1UL, thanks!

"1ULL", not "1UL". The former guarantees you a 64-bit constant,
the latter does not: it depends on the size of 'long' on the host.
(Usually using "UL" suffixes in QEMU is a bug, because either 32
bits was fine, in which case "U" or no suffix would have done,
or you really needed 64 bits, in which case you need "ULL". There
are some exceptions where the code really is working with "long"
values.)

thanks
-- PMM



Re: [PATCH 2/2] hw/misc/allwinner-h3-dramc: enforce 64-bit multiply when calculating row mirror address

2020-03-22 Thread Peter Maydell
On Sun, 22 Mar 2020 at 20:54, Niek Linnenbank  wrote:
>
> The allwinner_h3_dramc_map_rows function simulates row addressing behavior
> when bootloader software attempts to detect the amount of available SDRAM.
>
> Currently the line that calculates the 64-bit address of the mirrored row
> uses a signed 32-bit multiply operation that in theory could result in the
> upper 32-bit be all 1s. This commit ensures that the row mirror address
> is calculated using only 64-bit operations.
>
> Reported-by: Peter Maydell 
> Signed-off-by: Niek Linnenbank 
> ---
>  hw/misc/allwinner-h3-dramc.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/hw/misc/allwinner-h3-dramc.c b/hw/misc/allwinner-h3-dramc.c
> index 2b5260260e..f9f05b5384 100644
> --- a/hw/misc/allwinner-h3-dramc.c
> +++ b/hw/misc/allwinner-h3-dramc.c
> @@ -85,8 +85,8 @@ static void allwinner_h3_dramc_map_rows(AwH3DramCtlState 
> *s, uint8_t row_bits,
>
>  } else if (row_bits_actual) {
>  /* Row bits not matching ram_size, install the rows mirror */
> -hwaddr row_mirror = s->ram_addr + ((1 << (row_bits_actual +
> -  bank_bits)) * page_size);
> +hwaddr row_mirror = s->ram_addr + ((1UL << (row_bits_actual +
> +bank_bits)) * page_size);

This needs to be a "ULL" suffix... (I just sent a different email
with the rationale).

thanks
-- PMM



[PATCH v2 2/8] hw/watchdog: Implement full i.MX watchdog support

2020-03-22 Thread Guenter Roeck
Implement full support for the watchdog in i.MX systems.
Pretimeout support is optional because the watchdog hardware on i.MX31
does not support pretimeouts.

Signed-off-by: Guenter Roeck 
---
v2: Fixup of CONFIG_WDT_IMX -> CONFIG_WDT_IMX2 moved to patch 1/8

 hw/watchdog/wdt_imx2.c | 196 +++--
 include/hw/watchdog/wdt_imx2.h |  49 -
 2 files changed, 231 insertions(+), 14 deletions(-)

diff --git a/hw/watchdog/wdt_imx2.c b/hw/watchdog/wdt_imx2.c
index ad1ef02e9e..f5339f3590 100644
--- a/hw/watchdog/wdt_imx2.c
+++ b/hw/watchdog/wdt_imx2.c
@@ -13,24 +13,157 @@
 #include "qemu/bitops.h"
 #include "qemu/module.h"
 #include "sysemu/watchdog.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
 
 #include "hw/watchdog/wdt_imx2.h"
 
-#define IMX2_WDT_WCR_WDABIT(5)  /* -> External Reset WDOG_B */
-#define IMX2_WDT_WCR_SRSBIT(4)  /* -> Software Reset Signal */
+static void imx2_wdt_interrupt(void *opaque)
+{
+IMX2WdtState *s = IMX2_WDT(opaque);
+
+s->wicr |= IMX2_WDT_WICR_WTIS;
+qemu_set_irq(s->irq, 1);
+}
 
-static uint64_t imx2_wdt_read(void *opaque, hwaddr addr,
-  unsigned int size)
+static void imx2_wdt_expired(void *opaque)
 {
+IMX2WdtState *s = IMX2_WDT(opaque);
+
+s->wrsr = IMX2_WDT_WRSR_TOUT;
+
+/* Perform watchdog action if watchdog is enabled */
+if (s->wcr & IMX2_WDT_WCR_WDE) {
+watchdog_perform_action();
+}
+}
+
+static void imx2_wdt_reset(DeviceState *dev)
+{
+IMX2WdtState *s = IMX2_WDT(dev);
+
+s->wcr = IMX2_WDT_WCR_WDA | IMX2_WDT_WCR_SRS;
+s->wsr = 0;
+s->wrsr &= ~(IMX2_WDT_WRSR_TOUT | IMX2_WDT_WRSR_SFTW);
+s->wicr = 4;
+s->wmcr = IMX2_WDT_WMCR_PDE;
+}
+
+static uint64_t imx2_wdt_read(void *opaque, hwaddr addr, unsigned int size)
+{
+IMX2WdtState *s = IMX2_WDT(opaque);
+
+switch (addr) {
+case IMX2_WDT_WCR:
+return s->wcr;
+case IMX2_WDT_WSR:
+return s->wsr;
+case IMX2_WDT_WRSR:
+return s->wrsr;
+case IMX2_WDT_WICR:
+return s->wicr;
+case IMX2_WDT_WMCR:
+return s->wmcr;
+}
 return 0;
 }
 
+static void imx_wdt2_update_itimer(IMX2WdtState *s, bool start)
+{
+bool running = (s->wcr & IMX2_WDT_WCR_WDE) && (s->wcr & IMX2_WDT_WCR_WT);
+bool enabled = s->wicr & IMX2_WDT_WICR_WIE;
+
+ptimer_transaction_begin(s->itimer);
+if (start || !enabled) {
+ptimer_stop(s->itimer);
+}
+if (running && enabled) {
+int count = ptimer_get_count(s->timer);
+int pretimeout = s->wicr & IMX2_WDT_WICR_WICT;
+
+/*
+ * Only (re-)start pretimeout timer if its counter value is larger
+ * than 0. Otherwise it will fire right away and we'll get an
+ * interrupt loop.
+ */
+if (count > pretimeout) {
+ptimer_set_count(s->itimer, count - pretimeout);
+if (start) {
+ptimer_run(s->itimer, 1);
+}
+}
+}
+ptimer_transaction_commit(s->itimer);
+}
+
+static void imx_wdt2_update_timer(IMX2WdtState *s, bool start)
+{
+ptimer_transaction_begin(s->timer);
+if (start) {
+ptimer_stop(s->timer);
+}
+if ((s->wcr & IMX2_WDT_WCR_WDE) && (s->wcr & IMX2_WDT_WCR_WT)) {
+int count = (s->wcr & IMX2_WDT_WCR_WT) >> 8;
+
+ptimer_set_count(s->timer, count);
+if (start) {
+ptimer_run(s->timer, 1);
+}
+}
+ptimer_transaction_commit(s->timer);
+if (s->pretimeout_support) {
+imx_wdt2_update_itimer(s, start);
+}
+}
+
 static void imx2_wdt_write(void *opaque, hwaddr addr,
uint64_t value, unsigned int size)
 {
-if (addr == IMX2_WDT_WCR &&
-(~value & (IMX2_WDT_WCR_WDA | IMX2_WDT_WCR_SRS))) {
-watchdog_perform_action();
+IMX2WdtState *s = IMX2_WDT(opaque);
+
+switch (addr) {
+case IMX2_WDT_WCR:
+s->wcr = value;
+if (!(value & IMX2_WDT_WCR_SRS)) {
+s->wrsr = IMX2_WDT_WRSR_SFTW;
+}
+if (!(value & (IMX2_WDT_WCR_WDA | IMX2_WDT_WCR_SRS)) ||
+(!(value & IMX2_WDT_WCR_WT) && (value & IMX2_WDT_WCR_WDE))) {
+watchdog_perform_action();
+}
+s->wcr |= IMX2_WDT_WCR_SRS;
+imx_wdt2_update_timer(s, true);
+break;
+case IMX2_WDT_WSR:
+if (s->wsr == IMX2_WDT_SEQ1 && value == IMX2_WDT_SEQ2) {
+imx_wdt2_update_timer(s, false);
+}
+s->wsr = value;
+break;
+case IMX2_WDT_WRSR:
+break;
+case IMX2_WDT_WICR:
+if (!s->pretimeout_support) {
+return;
+}
+/* The pretimeout value is write-once */
+if (s->pretimeout_locked) {
+value &= ~IMX2_WDT_WICR_WICT;
+s->wicr &= (IMX2_WDT_WICR_WTIS | IMX2_WDT_WICR_WICT);
+} else {
+s->wicr &= IMX2_WDT_WICR_WTIS;
+}
+s->wicr |= value & (IMX2_WDT_WICR_WI

[PATCH v2 6/8] hw/arm/fsl-imx6ul: Connect watchdog interrupts

2020-03-22 Thread Guenter Roeck
With this commit, the watchdog on mcimx6ul-evk is fully operational,
including pretimeout support.

Signed-off-by: Guenter Roeck 
---
v2: No change

 hw/arm/fsl-imx6ul.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
index 56dfd7cecc..3ecb212da6 100644
--- a/hw/arm/fsl-imx6ul.c
+++ b/hw/arm/fsl-imx6ul.c
@@ -531,12 +531,22 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error 
**errp)
 FSL_IMX6UL_WDOG2_ADDR,
 FSL_IMX6UL_WDOG3_ADDR,
 };
+static const int FSL_IMX6UL_WDOGn_IRQ[FSL_IMX6UL_NUM_WDTS] = {
+FSL_IMX6UL_WDOG1_IRQ,
+FSL_IMX6UL_WDOG2_IRQ,
+FSL_IMX6UL_WDOG3_IRQ,
+};
 
+object_property_set_bool(OBJECT(&s->wdt[i]), true, 
"pretimeout-support",
+ &error_abort);
 object_property_set_bool(OBJECT(&s->wdt[i]), true, "realized",
  &error_abort);
 
 sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt[i]), 0,
 FSL_IMX6UL_WDOGn_ADDR[i]);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->wdt[i]), 0,
+   qdev_get_gpio_in(DEVICE(&s->a7mpcore),
+FSL_IMX6UL_WDOGn_IRQ[i]));
 }
 
 /*
-- 
2.17.1




[PATCH v2 0/8] hw/arm: Implement i.MX watchdog support

2020-03-22 Thread Guenter Roeck
The current i.MX watchdog implementation only supports resets.
This patch series implements the full watchdog, including optional
pretimeout support.

Notable changes:
- The existing i.MX watchdog emulation (which only emulates syste resets)
  is moved from hw/misc to hw/watchdog and renamed to match the naming
  convention in hw/watchdog (patch 1/8).
- Full watchdog support is implemented in patch 2/8.
- The watchdog is wired up for i.MX25 and i.MX31 emulations (patch 3/8 and
  4/8).
- The watchdog interrupt (for pretimeout support) is connected for
  i.MX6, i.MX6UL, and i.MX7 emulations (patch 5/8, 6/8, and 8/8).
- For i.MX7, various devices are wired up as unimplemented
  devices (patch 7/8). This was necessary to avoid crashes when
  booting recent Linux kernels.

The code was tested with all available emulations.

v2: Select WDT_IMX2 explicitly for supported platforms, not automatically
with IMX
Rebased to current master (as of 3/22)
Fixed typo "octop" -> "ocotp"
Added Reviewed-by: tags where given


Guenter Roeck (8):
  hw: Move i.MX watchdog driver to hw/watchdog
  hw/watchdog: Implement full i.MX watchdog support
  hw/arm/fsl-imx25: Wire up watchdog
  hw/arm/fsl-imx31: Wire up watchdog
  hw/arm/fsl-imx6: Connect watchdog interrupts
  hw/arm/fsl-imx6ul: Connect watchdog interrupts
  hw/arm/fsl-imx7: Instantiate various unimplemented devices
  hw/arm/fsl-imx7: Connect watchdog interrupts

 MAINTAINERS|   2 +
 hw/arm/Kconfig |   5 +
 hw/arm/fsl-imx25.c |  10 ++
 hw/arm/fsl-imx31.c |   6 +
 hw/arm/fsl-imx6.c  |   9 ++
 hw/arm/fsl-imx6ul.c|  10 ++
 hw/arm/fsl-imx7.c  |  35 ++
 hw/misc/Makefile.objs  |   1 -
 hw/misc/imx2_wdt.c |  90 --
 hw/watchdog/Kconfig|   3 +
 hw/watchdog/Makefile.objs  |   1 +
 hw/watchdog/wdt_imx2.c | 262 +
 include/hw/arm/fsl-imx25.h |   5 +
 include/hw/arm/fsl-imx31.h |   4 +
 include/hw/arm/fsl-imx6.h  |   2 +-
 include/hw/arm/fsl-imx6ul.h|   2 +-
 include/hw/arm/fsl-imx7.h  |  23 +++-
 include/hw/misc/imx2_wdt.h |  33 --
 include/hw/watchdog/wdt_imx2.h |  78 
 19 files changed, 454 insertions(+), 127 deletions(-)
 delete mode 100644 hw/misc/imx2_wdt.c
 create mode 100644 hw/watchdog/wdt_imx2.c
 delete mode 100644 include/hw/misc/imx2_wdt.h
 create mode 100644 include/hw/watchdog/wdt_imx2.h



[PATCH v2 4/8] hw/arm/fsl-imx31: Wire up watchdog

2020-03-22 Thread Guenter Roeck
With this patch, the watchdog on i.MX31 emulations is fully operational.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Guenter Roeck 
---
v2: Select WDT_IMX2 explicitly
Added Philippe's Reviewed-by: tag

 hw/arm/Kconfig | 1 +
 hw/arm/fsl-imx31.c | 6 ++
 include/hw/arm/fsl-imx31.h | 4 
 3 files changed, 11 insertions(+)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 54a49aeabd..9c77f4cbb4 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -366,6 +366,7 @@ config FSL_IMX31
 select SERIAL
 select IMX
 select IMX_I2C
+select WDT_IMX2
 select LAN9118
 
 config FSL_IMX6
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index 8472d2e911..1e7959863d 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -63,6 +63,8 @@ static void fsl_imx31_init(Object *obj)
 sysbus_init_child_obj(obj, "gpio[*]", &s->gpio[i], sizeof(s->gpio[i]),
   TYPE_IMX_GPIO);
 }
+
+sysbus_init_child_obj(obj, "wdt", &s->wdt, sizeof(s->wdt), TYPE_IMX2_WDT);
 }
 
 static void fsl_imx31_realize(DeviceState *dev, Error **errp)
@@ -205,6 +207,10 @@ static void fsl_imx31_realize(DeviceState *dev, Error 
**errp)
 gpio_table[i].irq));
 }
 
+/* Watchdog */
+object_property_set_bool(OBJECT(&s->wdt), true, "realized", &error_abort);
+sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, FSL_IMX31_WDT_ADDR);
+
 /* On a real system, the first 16k is a `secure boot rom' */
 memory_region_init_rom(&s->secure_rom, OBJECT(dev), "imx31.secure_rom",
FSL_IMX31_SECURE_ROM_SIZE, &err);
diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h
index ac5ca9826a..dd8561b309 100644
--- a/include/hw/arm/fsl-imx31.h
+++ b/include/hw/arm/fsl-imx31.h
@@ -25,6 +25,7 @@
 #include "hw/timer/imx_epit.h"
 #include "hw/i2c/imx_i2c.h"
 #include "hw/gpio/imx_gpio.h"
+#include "hw/watchdog/wdt_imx2.h"
 #include "exec/memory.h"
 #include "target/arm/cpu.h"
 
@@ -49,6 +50,7 @@ typedef struct FslIMX31State {
 IMXEPITState   epit[FSL_IMX31_NUM_EPITS];
 IMXI2CStatei2c[FSL_IMX31_NUM_I2CS];
 IMXGPIOState   gpio[FSL_IMX31_NUM_GPIOS];
+IMX2WdtState   wdt;
 MemoryRegion   secure_rom;
 MemoryRegion   rom;
 MemoryRegion   iram;
@@ -87,6 +89,8 @@ typedef struct FslIMX31State {
 #define FSL_IMX31_GPIO1_SIZE0x4000
 #define FSL_IMX31_GPIO2_ADDR0x53FD
 #define FSL_IMX31_GPIO2_SIZE0x4000
+#define FSL_IMX31_WDT_ADDR  0x53FDC000
+#define FSL_IMX31_WDT_SIZE  0x4000
 #define FSL_IMX31_AVIC_ADDR 0x6800
 #define FSL_IMX31_AVIC_SIZE 0x100
 #define FSL_IMX31_SDRAM0_ADDR   0x8000
-- 
2.17.1




[PATCH v2 1/8] hw: Move i.MX watchdog driver to hw/watchdog

2020-03-22 Thread Guenter Roeck
In preparation for a full implementation, move i.MX watchdog driver
from hw/misc to hw/watchdog. While at it, add the watchdog files
to MAINTAINERS.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Guenter Roeck 
---
v2: Instead of auto-selecting WDT_IMX2 if IMX is enabled, select it 
explicitly for each emulation using it.
In Makefile, fix CONFIG_WDT_IMX -> CONFIG_WDT_IMX2
Added Philippe's Reviewed-by: tag

 MAINTAINERS | 2 ++
 hw/arm/Kconfig  | 3 +++
 hw/misc/Makefile.objs   | 1 -
 hw/watchdog/Kconfig | 3 +++
 hw/watchdog/Makefile.objs   | 1 +
 hw/{misc/imx2_wdt.c => watchdog/wdt_imx2.c} | 2 +-
 include/hw/arm/fsl-imx6.h   | 2 +-
 include/hw/arm/fsl-imx6ul.h | 2 +-
 include/hw/arm/fsl-imx7.h   | 2 +-
 include/hw/{misc/imx2_wdt.h => watchdog/wdt_imx2.h} | 0
 10 files changed, 13 insertions(+), 5 deletions(-)
 rename hw/{misc/imx2_wdt.c => watchdog/wdt_imx2.c} (98%)
 rename include/hw/{misc/imx2_wdt.h => watchdog/wdt_imx2.h} (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2b46f3c6a8..1c36f36aad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -620,8 +620,10 @@ S: Odd Fixes
 F: hw/arm/fsl-imx25.c
 F: hw/arm/imx25_pdk.c
 F: hw/misc/imx25_ccm.c
+F: hw/watchdog/wdt_imx2.c
 F: include/hw/arm/fsl-imx25.h
 F: include/hw/misc/imx25_ccm.h
+F: include/hw/watchdog/wdt_imx2.h
 
 i.MX31 (kzm)
 M: Peter Chubb 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 188419dc1e..adf401e827 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -374,6 +374,7 @@ config FSL_IMX6
 select IMX_FEC
 select IMX_I2C
 select IMX_USBPHY
+select WDT_IMX2
 select SDHCI
 
 config ASPEED_SOC
@@ -411,6 +412,7 @@ config FSL_IMX7
 select IMX
 select IMX_FEC
 select IMX_I2C
+select WDT_IMX2
 select PCI_EXPRESS_DESIGNWARE
 select SDHCI
 select UNIMP
@@ -424,6 +426,7 @@ config FSL_IMX6UL
 select IMX
 select IMX_FEC
 select IMX_I2C
+select WDT_IMX2
 select SDHCI
 select UNIMP
 
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 68aae2eabb..b25181b711 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -44,7 +44,6 @@ common-obj-$(CONFIG_IMX) += imx6_ccm.o
 common-obj-$(CONFIG_IMX) += imx6ul_ccm.o
 obj-$(CONFIG_IMX) += imx6_src.o
 common-obj-$(CONFIG_IMX) += imx7_ccm.o
-common-obj-$(CONFIG_IMX) += imx2_wdt.o
 common-obj-$(CONFIG_IMX) += imx7_snvs.o
 common-obj-$(CONFIG_IMX) += imx7_gpr.o
 common-obj-$(CONFIG_IMX) += imx_rngc.o
diff --git a/hw/watchdog/Kconfig b/hw/watchdog/Kconfig
index 2118d897c9..293209b291 100644
--- a/hw/watchdog/Kconfig
+++ b/hw/watchdog/Kconfig
@@ -14,3 +14,6 @@ config WDT_IB700
 
 config WDT_DIAG288
 bool
+
+config WDT_IMX2
+bool
diff --git a/hw/watchdog/Makefile.objs b/hw/watchdog/Makefile.objs
index 3f536d1cad..631b711d86 100644
--- a/hw/watchdog/Makefile.objs
+++ b/hw/watchdog/Makefile.objs
@@ -4,3 +4,4 @@ common-obj-$(CONFIG_WDT_IB6300ESB) += wdt_i6300esb.o
 common-obj-$(CONFIG_WDT_IB700) += wdt_ib700.o
 common-obj-$(CONFIG_WDT_DIAG288) += wdt_diag288.o
 common-obj-$(CONFIG_ASPEED_SOC) += wdt_aspeed.o
+common-obj-$(CONFIG_WDT_IMX2) += wdt_imx2.o
diff --git a/hw/misc/imx2_wdt.c b/hw/watchdog/wdt_imx2.c
similarity index 98%
rename from hw/misc/imx2_wdt.c
rename to hw/watchdog/wdt_imx2.c
index 2aedfe803a..ad1ef02e9e 100644
--- a/hw/misc/imx2_wdt.c
+++ b/hw/watchdog/wdt_imx2.c
@@ -14,7 +14,7 @@
 #include "qemu/module.h"
 #include "sysemu/watchdog.h"
 
-#include "hw/misc/imx2_wdt.h"
+#include "hw/watchdog/wdt_imx2.h"
 
 #define IMX2_WDT_WCR_WDABIT(5)  /* -> External Reset WDOG_B */
 #define IMX2_WDT_WCR_SRSBIT(4)  /* -> Software Reset Signal */
diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h
index 973bcb72f7..1ebd751324 100644
--- a/include/hw/arm/fsl-imx6.h
+++ b/include/hw/arm/fsl-imx6.h
@@ -21,7 +21,7 @@
 #include "hw/cpu/a9mpcore.h"
 #include "hw/misc/imx6_ccm.h"
 #include "hw/misc/imx6_src.h"
-#include "hw/misc/imx2_wdt.h"
+#include "hw/watchdog/wdt_imx2.h"
 #include "hw/char/imx_serial.h"
 #include "hw/timer/imx_gpt.h"
 #include "hw/timer/imx_epit.h"
diff --git a/include/hw/arm/fsl-imx6ul.h b/include/hw/arm/fsl-imx6ul.h
index 1a0bab8daa..37c89cc5f9 100644
--- a/include/hw/arm/fsl-imx6ul.h
+++ b/include/hw/arm/fsl-imx6ul.h
@@ -24,7 +24,7 @@
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx7_gpr.h"
 #include "hw/intc/imx_gpcv2.h"
-#include "hw/misc/imx2_wdt.h"
+#include "hw/watchdog/wdt_imx2.h"
 #include "hw/gpio/imx_gpio.h"
 #include "hw/char/imx_serial.h"
 #include "hw/timer/imx_gpt.h"
diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h
index 706aef2e7e..3a0041c4c2 100644
--- a/include/hw/arm/fsl-imx7.h
+++ b/include/hw/arm/fsl-imx7.h
@@ -26,7 +26,7 @@
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx7_gpr.

[PATCH v2 5/8] hw/arm/fsl-imx6: Connect watchdog interrupts

2020-03-22 Thread Guenter Roeck
With this patch applied, the watchdog in the sabrelite emulation
is fully operational, including pretimeout support.

Signed-off-by: Guenter Roeck 
---
v2: No change

 hw/arm/fsl-imx6.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index 13f1bf23a6..f58c85aa8c 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -433,11 +433,20 @@ static void fsl_imx6_realize(DeviceState *dev, Error 
**errp)
 FSL_IMX6_WDOG1_ADDR,
 FSL_IMX6_WDOG2_ADDR,
 };
+static const int FSL_IMX6_WDOGn_IRQ[FSL_IMX6_NUM_WDTS] = {
+FSL_IMX6_WDOG1_IRQ,
+FSL_IMX6_WDOG2_IRQ,
+};
 
+object_property_set_bool(OBJECT(&s->wdt[i]), true, 
"pretimeout-support",
+ &error_abort);
 object_property_set_bool(OBJECT(&s->wdt[i]), true, "realized",
  &error_abort);
 
 sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt[i]), 0, FSL_IMX6_WDOGn_ADDR[i]);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->wdt[i]), 0,
+   qdev_get_gpio_in(DEVICE(&s->a9mpcore),
+FSL_IMX6_WDOGn_IRQ[i]));
 }
 
 /* ROM memory */
-- 
2.17.1




[PATCH v2 3/8] hw/arm/fsl-imx25: Wire up watchdog

2020-03-22 Thread Guenter Roeck
With this commit, the watchdog on imx25-pdk is fully operational,
including pretimeout support.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Guenter Roeck 
---
v2: Select WDT_IMX2 explicitly
Added Philippe's Reviewed-by: tag

 hw/arm/Kconfig |  1 +
 hw/arm/fsl-imx25.c | 10 ++
 include/hw/arm/fsl-imx25.h |  5 +
 3 files changed, 16 insertions(+)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index adf401e827..54a49aeabd 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -358,6 +358,7 @@ config FSL_IMX25
 select IMX
 select IMX_FEC
 select IMX_I2C
+select WDT_IMX2
 select DS1338
 
 config FSL_IMX31
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 6f1a82ce3d..cdaa79c26b 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -87,6 +87,7 @@ static void fsl_imx25_init(Object *obj)
   TYPE_CHIPIDEA);
 }
 
+sysbus_init_child_obj(obj, "wdt", &s->wdt, sizeof(s->wdt), TYPE_IMX2_WDT);
 }
 
 static void fsl_imx25_realize(DeviceState *dev, Error **errp)
@@ -302,6 +303,15 @@ static void fsl_imx25_realize(DeviceState *dev, Error 
**errp)
 usb_table[i].irq));
 }
 
+/* Watchdog */
+object_property_set_bool(OBJECT(&s->wdt), true, "pretimeout-support",
+ &error_abort);
+object_property_set_bool(OBJECT(&s->wdt), true, "realized", &error_abort);
+sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, FSL_IMX25_WDT_ADDR);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->wdt), 0,
+  qdev_get_gpio_in(DEVICE(&s->avic),
+   FSL_IMX25_WDT_IRQ));
+
 /* initialize 2 x 16 KB ROM */
 memory_region_init_rom(&s->rom[0], OBJECT(dev), "imx25.rom0",
FSL_IMX25_ROM0_SIZE, &err);
diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h
index 5e196bbf05..9e228dacea 100644
--- a/include/hw/arm/fsl-imx25.h
+++ b/include/hw/arm/fsl-imx25.h
@@ -29,6 +29,7 @@
 #include "hw/gpio/imx_gpio.h"
 #include "hw/sd/sdhci.h"
 #include "hw/usb/chipidea.h"
+#include "hw/watchdog/wdt_imx2.h"
 #include "exec/memory.h"
 #include "target/arm/cpu.h"
 
@@ -60,6 +61,7 @@ typedef struct FslIMX25State {
 IMXGPIOState   gpio[FSL_IMX25_NUM_GPIOS];
 SDHCIState esdhc[FSL_IMX25_NUM_ESDHCS];
 ChipideaState  usb[FSL_IMX25_NUM_USBS];
+IMX2WdtState   wdt;
 MemoryRegion   rom[2];
 MemoryRegion   iram;
 MemoryRegion   iram_alias;
@@ -229,6 +231,8 @@ typedef struct FslIMX25State {
 #define FSL_IMX25_GPIO1_SIZE0x4000
 #define FSL_IMX25_GPIO2_ADDR0x53FD
 #define FSL_IMX25_GPIO2_SIZE0x4000
+#define FSL_IMX25_WDT_ADDR  0x53FDC000
+#define FSL_IMX25_WDT_SIZE  0x4000
 #define FSL_IMX25_USB1_ADDR 0x53FF4000
 #define FSL_IMX25_USB1_SIZE 0x0200
 #define FSL_IMX25_USB2_ADDR 0x53FF4400
@@ -268,5 +272,6 @@ typedef struct FslIMX25State {
 #define FSL_IMX25_ESDHC2_IRQ8
 #define FSL_IMX25_USB1_IRQ  37
 #define FSL_IMX25_USB2_IRQ  35
+#define FSL_IMX25_WDT_IRQ   55
 
 #endif /* FSL_IMX25_H */
-- 
2.17.1




[PATCH v2 7/8] hw/arm/fsl-imx7: Instantiate various unimplemented devices

2020-03-22 Thread Guenter Roeck
Instantiating PWM, CAN, CAAM, and OCOTP devices is necessary to avoid
crashes when booting mainline Linux.

Signed-off-by: Guenter Roeck 
---
v2: "octop" -> "ocotp"

 hw/arm/fsl-imx7.c | 24 
 include/hw/arm/fsl-imx7.h | 16 
 2 files changed, 40 insertions(+)

diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
index 119b281a50..d6cf7c48ce 100644
--- a/hw/arm/fsl-imx7.c
+++ b/hw/arm/fsl-imx7.c
@@ -459,6 +459,30 @@ static void fsl_imx7_realize(DeviceState *dev, Error 
**errp)
  */
 create_unimplemented_device("sdma", FSL_IMX7_SDMA_ADDR, 
FSL_IMX7_SDMA_SIZE);
 
+/*
+ * CAAM
+ */
+create_unimplemented_device("caam", FSL_IMX7_CAAM_ADDR, 
FSL_IMX7_CAAM_SIZE);
+
+/*
+ * PWM
+ */
+create_unimplemented_device("pwm1", FSL_IMX7_PWM1_ADDR, 
FSL_IMX7_PWMn_SIZE);
+create_unimplemented_device("pwm2", FSL_IMX7_PWM2_ADDR, 
FSL_IMX7_PWMn_SIZE);
+create_unimplemented_device("pwm3", FSL_IMX7_PWM3_ADDR, 
FSL_IMX7_PWMn_SIZE);
+create_unimplemented_device("pwm4", FSL_IMX7_PWM4_ADDR, 
FSL_IMX7_PWMn_SIZE);
+
+/*
+ * CAN
+ */
+create_unimplemented_device("can1", FSL_IMX7_CAN1_ADDR, 
FSL_IMX7_CANn_SIZE);
+create_unimplemented_device("can2", FSL_IMX7_CAN2_ADDR, 
FSL_IMX7_CANn_SIZE);
+
+/*
+ * OCOTP
+ */
+create_unimplemented_device("ocotp", FSL_IMX7_OCOTP_ADDR,
+FSL_IMX7_OCOTP_SIZE);
 
 object_property_set_bool(OBJECT(&s->gpr), true, "realized",
  &error_abort);
diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h
index 3a0041c4c2..47826da2b7 100644
--- a/include/hw/arm/fsl-imx7.h
+++ b/include/hw/arm/fsl-imx7.h
@@ -113,6 +113,9 @@ enum FslIMX7MemoryMap {
 FSL_IMX7_IOMUXC_GPR_ADDR  = 0x3034,
 FSL_IMX7_IOMUXCn_SIZE = 0x1000,
 
+FSL_IMX7_OCOTP_ADDR   = 0x3035,
+FSL_IMX7_OCOTP_SIZE   = 0x1,
+
 FSL_IMX7_ANALOG_ADDR  = 0x3036,
 FSL_IMX7_SNVS_ADDR= 0x3037,
 FSL_IMX7_CCM_ADDR = 0x3038,
@@ -124,11 +127,24 @@ enum FslIMX7MemoryMap {
 FSL_IMX7_ADC2_ADDR= 0x3062,
 FSL_IMX7_ADCn_SIZE= 0x1000,
 
+FSL_IMX7_PWM1_ADDR= 0x3066,
+FSL_IMX7_PWM2_ADDR= 0x3067,
+FSL_IMX7_PWM3_ADDR= 0x3068,
+FSL_IMX7_PWM4_ADDR= 0x3069,
+FSL_IMX7_PWMn_SIZE= 0x1,
+
 FSL_IMX7_PCIE_PHY_ADDR= 0x306D,
 FSL_IMX7_PCIE_PHY_SIZE= 0x1,
 
 FSL_IMX7_GPC_ADDR = 0x303A,
 
+FSL_IMX7_CAAM_ADDR= 0x3090,
+FSL_IMX7_CAAM_SIZE= 0x4,
+
+FSL_IMX7_CAN1_ADDR= 0x30A0,
+FSL_IMX7_CAN2_ADDR= 0x30A1,
+FSL_IMX7_CANn_SIZE= 0x1,
+
 FSL_IMX7_I2C1_ADDR= 0x30A2,
 FSL_IMX7_I2C2_ADDR= 0x30A3,
 FSL_IMX7_I2C3_ADDR= 0x30A4,
-- 
2.17.1




[PATCH v2 8/8] hw/arm/fsl-imx7: Connect watchdog interrupts

2020-03-22 Thread Guenter Roeck
i.MX7 supports watchdog pretimeout interupts. With this commit,
the watchdog in mcimx7d-sabre is fully operational, including
pretimeout support.

Signed-off-by: Guenter Roeck 
---
v2: No change

 hw/arm/fsl-imx7.c | 11 +++
 include/hw/arm/fsl-imx7.h |  5 +
 2 files changed, 16 insertions(+)

diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
index d6cf7c48ce..89c3b64c06 100644
--- a/hw/arm/fsl-imx7.c
+++ b/hw/arm/fsl-imx7.c
@@ -447,11 +447,22 @@ static void fsl_imx7_realize(DeviceState *dev, Error 
**errp)
 FSL_IMX7_WDOG3_ADDR,
 FSL_IMX7_WDOG4_ADDR,
 };
+static const int FSL_IMX7_WDOGn_IRQ[FSL_IMX7_NUM_WDTS] = {
+FSL_IMX7_WDOG1_IRQ,
+FSL_IMX7_WDOG2_IRQ,
+FSL_IMX7_WDOG3_IRQ,
+FSL_IMX7_WDOG4_IRQ,
+};
 
+object_property_set_bool(OBJECT(&s->wdt[i]), true, 
"pretimeout-support",
+ &error_abort);
 object_property_set_bool(OBJECT(&s->wdt[i]), true, "realized",
  &error_abort);
 
 sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt[i]), 0, FSL_IMX7_WDOGn_ADDR[i]);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->wdt[i]), 0,
+   qdev_get_gpio_in(DEVICE(&s->a7mpcore),
+FSL_IMX7_WDOGn_IRQ[i]));
 }
 
 /*
diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h
index 47826da2b7..da977f9ffb 100644
--- a/include/hw/arm/fsl-imx7.h
+++ b/include/hw/arm/fsl-imx7.h
@@ -228,6 +228,11 @@ enum FslIMX7IRQs {
 FSL_IMX7_USB2_IRQ = 42,
 FSL_IMX7_USB3_IRQ = 40,
 
+FSL_IMX7_WDOG1_IRQ= 78,
+FSL_IMX7_WDOG2_IRQ= 79,
+FSL_IMX7_WDOG3_IRQ= 10,
+FSL_IMX7_WDOG4_IRQ= 109,
+
 FSL_IMX7_PCI_INTA_IRQ = 125,
 FSL_IMX7_PCI_INTB_IRQ = 124,
 FSL_IMX7_PCI_INTC_IRQ = 123,
-- 
2.17.1




[PATCH 1/6] Add BCM2835 SOC MPHI emulation

2020-03-22 Thread Paul Zimmerman
Add BCM2835 SOC MPHI emulation. It is very basic, only providing
the FIQ interrupt needed to allow the dwc-otg USB host controller
driver in the Raspbian kernel to function.

Signed-off-by: Paul Zimmerman 
---
 hw/arm/bcm2835_peripherals.c |  17 +++
 hw/misc/Makefile.objs|   1 +
 hw/misc/bcm2835_mphi.c   | 215 +++
 include/hw/arm/bcm2835_peripherals.h |   2 +
 include/hw/misc/bcm2835_mphi.h   |  50 +++
 5 files changed, 285 insertions(+)
 create mode 100644 hw/misc/bcm2835_mphi.c
 create mode 100644 include/hw/misc/bcm2835_mphi.h

diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 17207ae07e..dd7e6883cb 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -123,6 +123,10 @@ static void bcm2835_peripherals_init(Object *obj)
 sysbus_init_child_obj(obj, "gpio", &s->gpio, sizeof(s->gpio),
   TYPE_BCM2835_GPIO);
 
+/* Mphi */
+sysbus_init_child_obj(obj, "mphi", &s->mphi, sizeof(s->mphi),
+  TYPE_BCM2835_MPHI);
+
 object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
OBJECT(&s->sdhci.sdbus), &error_abort);
 object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
@@ -367,6 +371,19 @@ static void bcm2835_peripherals_realize(DeviceState *dev, 
Error **errp)
 return;
 }
 
+/* Mphi */
+object_property_set_bool(OBJECT(&s->mphi), true, "realized", &err);
+if (err) {
+error_propagate(errp, err);
+return;
+}
+
+memory_region_add_subregion(&s->peri_mr, MPHI_OFFSET,
+sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mphi), 0));
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->mphi), 0,
+qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
+   INTERRUPT_HOSTPORT));
+
 create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 
0x40);
 create_unimp(s, &s->cprman, "bcm2835-cprman", CPRMAN_OFFSET, 0x1000);
 create_unimp(s, &s->a2w, "bcm2835-a2w", A2W_OFFSET, 0x1000);
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 68aae2eabb..91085cc21b 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -57,6 +57,7 @@ common-obj-$(CONFIG_OMAP) += omap_l4.o
 common-obj-$(CONFIG_OMAP) += omap_sdrc.o
 common-obj-$(CONFIG_OMAP) += omap_tap.o
 common-obj-$(CONFIG_RASPI) += bcm2835_mbox.o
+common-obj-$(CONFIG_RASPI) += bcm2835_mphi.o
 common-obj-$(CONFIG_RASPI) += bcm2835_property.o
 common-obj-$(CONFIG_RASPI) += bcm2835_rng.o
 common-obj-$(CONFIG_RASPI) += bcm2835_thermal.o
diff --git a/hw/misc/bcm2835_mphi.c b/hw/misc/bcm2835_mphi.c
new file mode 100644
index 00..32433ce156
--- /dev/null
+++ b/hw/misc/bcm2835_mphi.c
@@ -0,0 +1,215 @@
+/*
+ * BCM2835 SOC MPHI emulation
+ *
+ * Very basic emulation, only providing the FIQ interrupt needed to
+ * allow the dwc-otg USB host controller driver in the Raspbian kernel
+ * to function.
+ *
+ * Copyright (c) 2020 Paul Zimmerman 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/misc/bcm2835_mphi.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+
+//#define MPHI_DEBUG  1
+
+#ifdef MPHI_DEBUG
+#define DPRINTF(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__)
+#else
+#define DPRINTF(fmt, ...) do {} while(0)
+#endif
+
+static inline void mphi_raise_irq(BCM2835MphiState *s)
+{
+DPRINTF("mphi_raise_irq, s %p\n", s);
+qemu_set_irq(s->irq, 1);
+}
+
+static inline void mphi_lower_irq(BCM2835MphiState *s)
+{
+DPRINTF("mphi_lower_irq, s %p\n", s);
+qemu_set_irq(s->irq, 0);
+}
+
+static uint64_t mphi_reg_read(void *ptr, hwaddr addr, unsigned size)
+{
+BCM2835MphiState *s = ptr;
+uint32_t reg = s->regbase + addr;
+uint32_t val = 0;
+
+switch (reg) {
+case 0x28:  /* outdda */
+val = s->outdda;
+break;
+case 0x2c:  /* outddb */
+val = s->outddb;
+break;
+case 0x4c:  /* ctrl */
+val = s->ctrl;
+val |= 1 << 17;
+break;
+case 0x50:  /* intstat */
+val = s->intstat;
+break;
+case 0x1f0: /* swirq_set */
+val = s->swirq_set;
+break;
+case 0x1f4: /* swirq_clr */
+val = s->swirq_clr;
+break;
+default:
+break;
+}
+
+DPRINTF("mphi_reg_read   0x%04lx val 0x%08x\n", addr, val);
+return val;
+}
+
+static void mphi_reg_writ

[PATCH 0/6] dwc-hsotg (aka dwc2) USB host contoller emulation

2020-03-22 Thread Paul Zimmerman
This patch series adds emulation for the dwc-hsotg USB controller,
which is used on the Raspberry Pi 3 and earlier, as well as a number
of other development boards. The main benefit for Raspberry Pi is that
this enables networking on these boards, since the network adapter is
attached via USB.

The emulation is working quite well, I have tested with USB network,
mass storage, mouse, keyboard, and tablet. I have tested with the dwc2
driver in the upstream Linux kernel, and with the dwc-otg driver in the
Raspbian kernel. One remaining issue is that USB redirection does not
work, I tried connecting to a USB stick on the host, but the device
generates babble errors and does not work. I will continue to work on
this issue.

The patch series also includes a very basic emulation of the MPHI
device on the Raspberry Pi SOC, which provides the FIQ interrupt that
is used by the dwc-otg driver in the Raspbian kernel. But that driver
still does not work in full FIQ mode, so it is necessary to add a
parameter to the kernel command line ("dwc_otg.fiq_fsm_enable=0") to
make it work.

Some open questions:

1) I have used printf-based debug statements while developing the
   code, and have not implemented any tracing statements. I'm not
   sure if that is considered acceptable for new code?

2) I have imported the register description file from the Linux
   kernel. This file is licensed GPL-2 only, is this OK?

3) The emulation does not respect the max-packet size when
   transferring packets. Since the dwc-hsotg controller only has
   one root port, and the Qemu USB hub is only full-speed, that
   means every device connected has to run at full speed. That
   makes mass-storage devices in particular run very slowly. Using
   transfers greater than max-packet size alleviates this. Is this
   OK? I think the EHCI emulation does the same thing, since its
   transfers seem to run at greater than real world transfer rates.

4) I have only implemented host mode for now. Would there be any
   benefit to implementing gadget mode as well? It seems it could
   be useful to emulate gadget devices in Qemu, but I am not sure
   if Qemu currently offers any support for that?

Thanks for your time,
Paul

---

Paul Zimmerman (6):
  Add BCM2835 SOC MPHI emulation
  dwc-hsotg USB host controller register definitions
  dwc-hsotg USB host controller state definitions
  dwc-hsotg USB host controller emulation
  Add short-packet handling to usb-storage driver
  Wire in the dwc-hsotg USB host controller emulation

 hw/arm/bcm2835_peripherals.c |   38 +-
 hw/misc/Makefile.objs|1 +
 hw/misc/bcm2835_mphi.c   |  215 
 hw/usb/Kconfig   |5 +
 hw/usb/Makefile.objs |1 +
 hw/usb/dev-storage.c |   15 +-
 hw/usb/hcd-dwc2.c| 1353 ++
 hw/usb/hcd-dwc2.h|  180 
 include/hw/arm/bcm2835_peripherals.h |5 +-
 include/hw/misc/bcm2835_mphi.h   |   50 +
 include/hw/usb/dwc2-regs.h   |  895 +
 11 files changed, 2755 insertions(+), 3 deletions(-)
 create mode 100644 hw/misc/bcm2835_mphi.c
 create mode 100644 hw/usb/hcd-dwc2.c
 create mode 100644 hw/usb/hcd-dwc2.h
 create mode 100644 include/hw/misc/bcm2835_mphi.h
 create mode 100644 include/hw/usb/dwc2-regs.h

-- 
2.17.1




[PATCH 6/6] Wire in the dwc-hsotg USB host controller emulation

2020-03-22 Thread Paul Zimmerman
Wire the dwc-hsotg (dwc2) emulation into Qemu

Signed-off-by: Paul Zimmerman 
---
 hw/arm/bcm2835_peripherals.c | 21 -
 hw/usb/Kconfig   |  5 +
 hw/usb/Makefile.objs |  1 +
 include/hw/arm/bcm2835_peripherals.h |  3 ++-
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index dd7e6883cb..932d084a50 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -127,6 +127,13 @@ static void bcm2835_peripherals_init(Object *obj)
 sysbus_init_child_obj(obj, "mphi", &s->mphi, sizeof(s->mphi),
   TYPE_BCM2835_MPHI);
 
+/* DWC2 */
+sysbus_init_child_obj(obj, "dwc2", &s->dwc2, sizeof(s->dwc2),
+  TYPE_DWC2_USB);
+
+object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr",
+   OBJECT(&s->gpu_bus_mr), &error_abort);
+
 object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
OBJECT(&s->sdhci.sdbus), &error_abort);
 object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
@@ -384,6 +391,19 @@ static void bcm2835_peripherals_realize(DeviceState *dev, 
Error **errp)
 qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
INTERRUPT_HOSTPORT));
 
+/* DWC2 */
+object_property_set_bool(OBJECT(&s->dwc2), true, "realized", &err);
+if (err) {
+error_propagate(errp, err);
+return;
+}
+
+memory_region_add_subregion(&s->peri_mr, USB_OTG_OFFSET,
+sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dwc2), 0));
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->dwc2), 0,
+qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
+   INTERRUPT_USB));
+
 create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 
0x40);
 create_unimp(s, &s->cprman, "bcm2835-cprman", CPRMAN_OFFSET, 0x1000);
 create_unimp(s, &s->a2w, "bcm2835-a2w", A2W_OFFSET, 0x1000);
@@ -397,7 +417,6 @@ static void bcm2835_peripherals_realize(DeviceState *dev, 
Error **errp)
 create_unimp(s, &s->otp, "bcm2835-otp", OTP_OFFSET, 0x80);
 create_unimp(s, &s->dbus, "bcm2835-dbus", DBUS_OFFSET, 0x8000);
 create_unimp(s, &s->ave0, "bcm2835-ave0", AVE0_OFFSET, 0x8000);
-create_unimp(s, &s->dwc2, "dwc-usb2", USB_OTG_OFFSET, 0x1000);
 create_unimp(s, &s->sdramc, "bcm2835-sdramc", SDRAMC_OFFSET, 0x100);
 }
 
diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
index 464348ba14..d4d8c37c28 100644
--- a/hw/usb/Kconfig
+++ b/hw/usb/Kconfig
@@ -46,6 +46,11 @@ config USB_MUSB
 bool
 select USB
 
+config USB_DWC2
+bool
+default y
+select USB
+
 config TUSB6010
 bool
 select USB_MUSB
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index 66835e5bf7..fa5c3fa1b8 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -12,6 +12,7 @@ common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o
 common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o
 common-obj-$(CONFIG_USB_XHCI_NEC) += hcd-xhci-nec.o
 common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o
+common-obj-$(CONFIG_USB_DWC2) += hcd-dwc2.o
 
 common-obj-$(CONFIG_TUSB6010) += tusb6010.o
 common-obj-$(CONFIG_IMX)  += chipidea.o
diff --git a/include/hw/arm/bcm2835_peripherals.h 
b/include/hw/arm/bcm2835_peripherals.h
index 77958ca60e..0841d54614 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -26,6 +26,7 @@
 #include "hw/sd/bcm2835_sdhost.h"
 #include "hw/gpio/bcm2835_gpio.h"
 #include "hw/timer/bcm2835_systmr.h"
+#include "hw/usb/hcd-dwc2.h"
 #include "hw/misc/unimp.h"
 
 #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals"
@@ -66,7 +67,7 @@ typedef struct BCM2835PeripheralState {
 UnimplementedDeviceState ave0;
 UnimplementedDeviceState bscsl;
 UnimplementedDeviceState smi;
-UnimplementedDeviceState dwc2;
+DWC2State dwc2;
 UnimplementedDeviceState sdramc;
 } BCM2835PeripheralState;
 
-- 
2.17.1




[PATCH 3/6] dwc-hsotg USB host controller state definitions

2020-03-22 Thread Paul Zimmerman
Add the dwc-hsotg (dwc2) USB host controller state definitions.
Mostly based on hw/usb/hcd-ehci.h.

Signed-off-by: Paul Zimmerman 
---
 hw/usb/hcd-dwc2.h | 180 ++
 1 file changed, 180 insertions(+)
 create mode 100644 hw/usb/hcd-dwc2.h

diff --git a/hw/usb/hcd-dwc2.h b/hw/usb/hcd-dwc2.h
new file mode 100644
index 00..c5f5037b91
--- /dev/null
+++ b/hw/usb/hcd-dwc2.h
@@ -0,0 +1,180 @@
+/*
+ * dwc-hsotg (dwc2) USB host controller state definitions
+ *
+ * Based on hw/usb/hcd-ehci.h
+ *
+ * Copyright (c) 2020 Paul Zimmerman 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef HW_USB_DWC2_H
+#define HW_USB_DWC2_H
+
+#include "qemu/timer.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/usb.h"
+#include "sysemu/dma.h"
+
+#define DWC2_MMIO_SIZE  0x11000
+
+#define NB_PORTS1   /* Number of downstream ports */
+#define NB_CHAN 8   /* Number of host channels */
+#define MAX_XFER_SIZE   65536   /* Max transfer size expected in HCTSIZ */
+
+typedef struct DWC2Packet DWC2Packet;
+typedef struct DWC2State DWC2State;
+
+enum async_state {
+DWC2_ASYNC_NONE = 0,
+DWC2_ASYNC_INITIALIZED,
+DWC2_ASYNC_INFLIGHT,
+DWC2_ASYNC_FINISHED,
+};
+
+struct DWC2Packet {
+USBPacket packet;
+USBDevice *dev;
+USBEndpoint *ep;
+uint32_t index;
+uint32_t epnum;
+uint32_t mps;
+uint32_t pid;
+uint32_t pcnt;
+uint32_t len;
+bool small;
+bool needs_service;
+enum async_state async;
+};
+
+struct DWC2State {
+SysBusDevice parent_obj;
+USBBus bus;
+DeviceState *device;
+qemu_irq irq;
+MemoryRegion *dma_mr;
+AddressSpace *as;
+AddressSpace dma_as;
+MemoryRegion mem;
+MemoryRegion mem_glbreg;
+MemoryRegion mem_fszreg;
+MemoryRegion mem_hreg0;
+MemoryRegion mem_hreg1;
+MemoryRegion mem_pcgreg;
+MemoryRegion mem_hreg2;
+uint16_t glbregbase;
+uint16_t fszregbase;
+uint16_t hreg0base;
+uint16_t hreg1base;
+uint16_t pcgregbase;
+uint16_t hreg2base;
+uint16_t portnr;
+
+union {
+uint32_t glbreg[0x70/sizeof(uint32_t)];
+struct {
+uint32_t gotgctl;   /* 00 */
+uint32_t gotgint;   /* 04 */
+uint32_t gahbcfg;   /* 08 */
+uint32_t gusbcfg;   /* 0c */
+uint32_t grstctl;   /* 10 */
+uint32_t gintsts;   /* 14 */
+uint32_t gintmsk;   /* 18 */
+uint32_t grxstsr;   /* 1c */
+uint32_t grxstsp;   /* 20 */
+uint32_t grxfsiz;   /* 24 */
+uint32_t gnptxfsiz; /* 28 */
+uint32_t gnptxsts;  /* 2c */
+uint32_t gi2cctl;   /* 30 */
+uint32_t gpvndctl;  /* 34 */
+uint32_t ggpio; /* 38 */
+uint32_t guid;  /* 3c */
+uint32_t gsnpsid;   /* 40 */
+uint32_t ghwcfg1;   /* 44 */
+uint32_t ghwcfg2;   /* 48 */
+uint32_t ghwcfg3;   /* 4c */
+uint32_t ghwcfg4;   /* 50 */
+uint32_t glpmcfg;   /* 54 */
+uint32_t gpwrdn;/* 58 */
+uint32_t gdfifocfg; /* 5c */
+uint32_t gadpctl;   /* 60 */
+uint32_t grefclk;   /* 64 */
+uint32_t gintmsk2;  /* 68 */
+uint32_t gintsts2;  /* 6c */
+};
+};
+
+union {
+uint32_t fszreg[0x4/sizeof(uint32_t)];
+struct {
+uint32_t hptxfsiz;  /* 100 */
+};
+};
+
+union {
+uint32_t hreg0[0x44/sizeof(uint32_t)];
+struct {
+uint32_t hcfg;  /* 400 */
+uint32_t hfir;  /* 404 */
+uint32_t hfnum; /* 408 */
+uint32_t rsvd0; /* 40c */
+uint32_t hptxsts;   /* 410 */
+uint32_t haint; /* 414 */
+uint32_t haintmsk;  /* 418 */
+uint32_t hflbaddr;  /* 41c */
+uint32_t rsvd1[8];  /* 420-43c */
+uint32_t hprt0; /* 440 */
+};
+};
+
+uint32_t hreg1[0x20*NB_CHAN/sizeof(uint32_t)];
+#define hcchar(_ch) hreg1[((_ch) << 3) + 0] /* 500, 520, ... */
+#define hcsplt(_ch) hreg1[((_ch) << 3) + 1] /* 504, 524, ... */
+#define hcint(_ch)  hreg1[((_ch) << 3) + 2] /* 508, 528, ... */
+#define hcintmsk(_ch)   hreg1[((_ch) << 3) + 3] /* 50c, 52c,

[PATCH 2/6] dwc-hsotg USB host controller register definitions

2020-03-22 Thread Paul Zimmerman
Import the dwc2 register definitions file from the Linux kernel.
This is a copy of drivers/usb/dwc2/hw.h from the mainline Linux
kernel, the only changes being two instances of 'u32' changed to
'uint32_t' to  allow it to compile

Signed-off-by: Paul Zimmerman 
---
 include/hw/usb/dwc2-regs.h | 895 +
 1 file changed, 895 insertions(+)
 create mode 100644 include/hw/usb/dwc2-regs.h

diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h
new file mode 100644
index 00..96dc07fb6f
--- /dev/null
+++ b/include/hw/usb/dwc2-regs.h
@@ -0,0 +1,895 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * hw.h - DesignWare HS OTG Controller hardware definitions
+ *
+ * Copyright 2004-2013 Synopsys, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions, and the following disclaimer,
+ *without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. The names of the above-listed copyright holders may not be used
+ *to endorse or promote products derived from this software without
+ *specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DWC2_HW_H__
+#define __DWC2_HW_H__
+
+#define HSOTG_REG(x)   (x)
+
+#define GOTGCTLHSOTG_REG(0x000)
+#define GOTGCTL_CHIRPENBIT(27)
+#define GOTGCTL_MULT_VALID_BC_MASK (0x1f << 22)
+#define GOTGCTL_MULT_VALID_BC_SHIFT22
+#define GOTGCTL_OTGVER BIT(20)
+#define GOTGCTL_BSESVLDBIT(19)
+#define GOTGCTL_ASESVLDBIT(18)
+#define GOTGCTL_DBNC_SHORT BIT(17)
+#define GOTGCTL_CONID_BBIT(16)
+#define GOTGCTL_DBNCE_FLTR_BYPASS  BIT(15)
+#define GOTGCTL_DEVHNPEN   BIT(11)
+#define GOTGCTL_HSTSETHNPENBIT(10)
+#define GOTGCTL_HNPREQ BIT(9)
+#define GOTGCTL_HSTNEGSCS  BIT(8)
+#define GOTGCTL_SESREQ BIT(1)
+#define GOTGCTL_SESREQSCS  BIT(0)
+
+#define GOTGINTHSOTG_REG(0x004)
+#define GOTGINT_DBNCE_DONE BIT(19)
+#define GOTGINT_A_DEV_TOUT_CHG BIT(18)
+#define GOTGINT_HST_NEG_DETBIT(17)
+#define GOTGINT_HST_NEG_SUC_STS_CHNG   BIT(9)
+#define GOTGINT_SES_REQ_SUC_STS_CHNG   BIT(8)
+#define GOTGINT_SES_END_DETBIT(2)
+
+#define GAHBCFGHSOTG_REG(0x008)
+#define GAHBCFG_AHB_SINGLE BIT(23)
+#define GAHBCFG_NOTI_ALL_DMA_WRIT  BIT(22)
+#define GAHBCFG_REM_MEM_SUPP   BIT(21)
+#define GAHBCFG_P_TXF_EMP_LVL  BIT(8)
+#define GAHBCFG_NP_TXF_EMP_LVL BIT(7)
+#define GAHBCFG_DMA_EN BIT(5)
+#define GAHBCFG_HBSTLEN_MASK   (0xf << 1)
+#define GAHBCFG_HBSTLEN_SHIFT  1
+#define GAHBCFG_HBSTLEN_SINGLE 0
+#define GAHBCFG_HBSTLEN_INCR   1
+#define GAHBCFG_HBSTLEN_INCR4  3
+#define GAHBCFG_HBSTLEN_INCR8  5
+#define GAHBCFG_HBSTLEN_INCR16 7
+#define GAHBCFG_GLBL_INTR_EN   BIT(0)
+#define GAHBCFG_CTRL_MASK  (GAHBCFG_P_TXF_EMP_LVL | \
+GAHBCFG_NP_TXF_EMP_LVL | \
+GAHBCFG_DMA_EN | \
+GAHBCFG_GLBL_INTR_EN)
+
+#define GUSBCFGHSOTG_REG(0x00C)
+#define GUSBCFG_FORCEDEVMODE   BIT(30)
+#define GUSBCFG_FORCEHOSTMODE  BIT(29)
+#define GUSBCFG_TXENDDELAY BIT(28)
+#define GUSBCFG_ICTRAFFI

[PATCH 4/6] dwc-hsotg USB host controller emulation

2020-03-22 Thread Paul Zimmerman
Add the dwc-hsotg (dwc2) USB host controller emulation code.
Based on hw/usb/hcd-ehci.c and hw/usb/hcd-ohci.c.

Note that to use this with the dwc-otg driver in the Raspbian
kernel, you must pass the option "dwc_otg.fiq_fsm_enable=0" on
the kernel command line.

I have used some on-line sources of information while developing
this emulation, including:

http://www.capital-micro.com/PDF/CME-M7_Family_User_Guide_EN.pdf
has a pretty complete description of the controller starting on
page 370.

https://sourceforge.net/p/wive-ng/wive-ng-mt/ci/master/tree/docs/DataSheets/RT3050_5x_V2.0_081408_0902.pdf
has a description of the controller registers starting on page
130.

Signed-off-by: Paul Zimmerman 
---
 hw/usb/hcd-dwc2.c | 1353 +
 1 file changed, 1353 insertions(+)
 create mode 100644 hw/usb/hcd-dwc2.c

diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
new file mode 100644
index 00..fd33190611
--- /dev/null
+++ b/hw/usb/hcd-dwc2.c
@@ -0,0 +1,1353 @@
+/*
+ * dwc-hsotg (dwc2) USB host controller emulation
+ *
+ * Based on hw/usb/hcd-ehci.c and hw/usb/hcd-ohci.c
+ *
+ * Copyright (c) 2020 Paul Zimmerman 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/usb/dwc2-regs.h"
+#include "hw/usb/hcd-dwc2.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+
+//#define DWC2_DEBUG  1
+
+#ifdef DWC2_DEBUG
+#define DPRINTF(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__)
+#else
+#define DPRINTF(fmt, ...) do {} while(0)
+#endif
+
+#define DWC2_DO_SOFS1
+
+#define USB_HZ_FS   1200
+#define USB_HZ_HS   9600
+
+/* nifty macros from Arnon's EHCI version  */
+#define get_field(data, field) \
+(((data) & field##_MASK) >> field##_SHIFT)
+
+#define set_field(data, newval, field) do { \
+uint32_t val = *data; \
+val &= ~ field##_MASK; \
+val |= ((newval) << field##_SHIFT) & field##_MASK; \
+*data = val; \
+} while (0)
+
+#define get_bit(data, bitmask) \
+(!!((data) & bitmask))
+
+/* update irq line */
+static inline void dwc2_update_irq(DWC2State *s)
+{
+static int oldlevel;
+int level = 0;
+
+if ((s->gintsts & s->gintmsk) && (s->gahbcfg & GAHBCFG_GLBL_INTR_EN))
+level = 1;
+if (level != oldlevel) {
+/*DPRINTF("dwc2_update_irq, sts 0x%08x msk 0x%08x level %d\n",
+s->gintsts, s->gintmsk, level);*/
+oldlevel = level;
+qemu_set_irq(s->irq, level);
+}
+}
+
+/* flag interrupt condition */
+static inline void dwc2_raise_global_irq(DWC2State *s, uint32_t intr)
+{
+/*DPRINTF("dwc2_raise_global_irq, 0x%08x\n", intr);*/
+s->gintsts |= intr;
+dwc2_update_irq(s);
+}
+
+static inline void dwc2_lower_global_irq(DWC2State *s, uint32_t intr)
+{
+/*DPRINTF("dwc2_lower_global_irq, 0x%08x\n", intr);*/
+s->gintsts &= ~intr;
+dwc2_update_irq(s);
+}
+
+static inline void dwc2_raise_host_irq(DWC2State *s, uint32_t intr)
+{
+/*DPRINTF("dwc2_raise_host_irq, 0x%04x\n", intr);*/
+s->haint |= intr;
+s->haint &= 0x;
+if (s->haint & s->haintmsk) {
+dwc2_raise_global_irq(s, GINTSTS_HCHINT);
+}
+}
+
+static inline void dwc2_lower_host_irq(DWC2State *s, uint32_t intr)
+{
+/*DPRINTF("dwc2_lower_host_irq, 0x%04x\n", intr);*/
+s->haint &= ~intr;
+if (!(s->haint & s->haintmsk)) {
+dwc2_lower_global_irq(s, GINTSTS_HCHINT);
+}
+}
+
+static inline void dwc2_update_hc_irq(DWC2State *s, int index)
+{
+uint32_t intr = 1 << (index >> 3);
+
+/*DPRINTF("dwc2_update_hc_irq, hcint%d 0x%04x hcintmsk%d 0x%04x\n",
+index >> 3, s->hreg1[index + 2], index >> 3, s->hreg1[index + 
3]);*/
+if (s->hreg1[index + 2] & s->hreg1[index + 3]) {
+dwc2_raise_host_irq(s, intr);
+} else {
+dwc2_lower_host_irq(s, intr);
+}
+}
+
+/* set a timer for EOF */
+static void dwc2_eof_timer(DWC2State *s)
+{
+#ifdef DWC2_DO_SOFS
+timer_mod(s->eof_timer, s->sof_time + s->usb_frame_time);
+#endif
+}
+
+#ifdef DWC2_DO_SOFS
+/* Set a timer for EOF and generate a SOF event */
+static void dwc2_sof(DWC2State *s)
+{
+s->sof_time += s->usb_frame_time;
+dwc2_eof_timer(s);
+dwc2_raise_global_irq(s, GINTSTS_SOF);
+}
+
+/* Do frame processing on frame boundary */
+static void dwc2_frame_boundary(void *opaque)
+{
+DWC2State *s = opaque;
+
+/* Frame boundary, so do EOF stuff here */
+
+/* Increment frame number */
+s->frame_number = (s->frame_number + 1) & 0x;
+

[PATCH 5/6] Add short-packet handling to usb-storage driver

2020-03-22 Thread Paul Zimmerman
The dwc-hsotg (dwc2) USB host depends on a short packet to
indicate the end of an IN transfer. The usb-storage driver
currently doesn't provide this, so fix it.

Signed-off-by: Paul Zimmerman 
---
 hw/usb/dev-storage.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 5c4b57b06b..ae3c550042 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -229,6 +229,9 @@ static void usb_msd_copy_data(MSDState *s, USBPacket *p)
 usb_packet_copy(p, scsi_req_get_buf(s->req) + s->scsi_off, len);
 s->scsi_len -= len;
 s->scsi_off += len;
+if (len > s->data_len) {
+len = s->data_len;
+}
 s->data_len -= len;
 if (s->scsi_len == 0 || s->data_len == 0) {
 scsi_req_continue(s->req);
@@ -303,6 +306,9 @@ static void usb_msd_command_complete(SCSIRequest *req, 
uint32_t status, size_t r
 if (s->data_len) {
 int len = (p->iov.size - p->actual_length);
 usb_packet_skip(p, len);
+if (len > s->data_len) {
+len = s->data_len;
+}
 s->data_len -= len;
 }
 if (s->data_len == 0) {
@@ -469,6 +475,9 @@ static void usb_msd_handle_data(USBDevice *dev, USBPacket 
*p)
 int len = p->iov.size - p->actual_length;
 if (len) {
 usb_packet_skip(p, len);
+if (len > s->data_len) {
+len = s->data_len;
+}
 s->data_len -= len;
 if (s->data_len == 0) {
 s->mode = USB_MSDM_CSW;
@@ -528,13 +537,17 @@ static void usb_msd_handle_data(USBDevice *dev, USBPacket 
*p)
 int len = p->iov.size - p->actual_length;
 if (len) {
 usb_packet_skip(p, len);
+if (len > s->data_len) {
+len = s->data_len;
+}
 s->data_len -= len;
 if (s->data_len == 0) {
 s->mode = USB_MSDM_CSW;
 }
 }
 }
-if (p->actual_length < p->iov.size) {
+if (p->actual_length < p->iov.size && (p->short_not_ok ||
+s->scsi_len >= p->ep->max_packet_size)) {
 DPRINTF("Deferring packet %p [wait data-in]\n", p);
 s->packet = p;
 p->status = USB_RET_ASYNC;
-- 
2.17.1




Re: [PATCH 0/6] dwc-hsotg (aka dwc2) USB host contoller emulation

2020-03-22 Thread Paul Zimmerman
On Sun, Mar 22, 2020 at 3:28 PM Paul Zimmerman  wrote:

< snip >

> 2) I have imported the register description file from the Linux
>kernel. This file is licensed GPL-2 only, is this OK?

Never mind about the license, I see it is actually GPL 2.0 or later. I
guess the question remains whether it is OK to copy the file directly
like this?

Thanks,
Paul


Re: [PATCH 0/6] dwc-hsotg (aka dwc2) USB host contoller emulation

2020-03-22 Thread no-reply
Patchew URL: https://patchew.org/QEMU/2020032726.10244-1-pauld...@gmail.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH 0/6] dwc-hsotg (aka dwc2) USB host contoller emulation
Message-id: 2020032726.10244-1-pauld...@gmail.com
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
d4c9fd6 Wire in the dwc-hsotg USB host controller emulation
eaf83d0 Add short-packet handling to usb-storage driver
a36ab17 dwc-hsotg USB host controller emulation
19ac434 dwc-hsotg USB host controller state definitions
0b0cf63 dwc-hsotg USB host controller register definitions
30fe183 Add BCM2835 SOC MPHI emulation

=== OUTPUT BEGIN ===
1/6 Checking commit 30fe183b9fcc (Add BCM2835 SOC MPHI emulation)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#62: 
new file mode 100644

ERROR: do not use C99 // comments
#93: FILE: hw/misc/bcm2835_mphi.c:27:
+//#define MPHI_DEBUG  1

ERROR: space required before the open parenthesis '('
#98: FILE: hw/misc/bcm2835_mphi.c:32:
+#define DPRINTF(fmt, ...) do {} while(0)

ERROR: braces {} are necessary for all arms of this statement
#165: FILE: hw/misc/bcm2835_mphi.c:99:
+if (val & (1 << 29))
[...]

ERROR: braces {} are necessary for all arms of this statement
#171: FILE: hw/misc/bcm2835_mphi.c:105:
+if (val & (1 << 16))
[...]

ERROR: braces {} are necessary for all arms of this statement
#177: FILE: hw/misc/bcm2835_mphi.c:111:
+if (val & ((1 << 16) | (1 << 29)))
[...]

ERROR: braces {} are necessary for all arms of this statement
#196: FILE: hw/misc/bcm2835_mphi.c:130:
+if (do_irq > 0)
[...]
+else if (do_irq < 0)
[...]

ERROR: braces {} are necessary for all arms of this statement
#198: FILE: hw/misc/bcm2835_mphi.c:132:
+else if (do_irq < 0)
[...]

total: 7 errors, 1 warnings, 315 lines checked

Patch 1/6 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/6 Checking commit 0b0cf6320d53 (dwc-hsotg USB host controller register 
definitions)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#16: 
new file mode 100644

WARNING: architecture specific defines should be avoided
#58: FILE: include/hw/usb/dwc2-regs.h:38:
+#ifndef __DWC2_HW_H__

ERROR: code indent should never use tabs
#61: FILE: include/hw/usb/dwc2-regs.h:41:
+#define HSOTG_REG(x)^I(x)$

ERROR: code indent should never use tabs
#63: FILE: include/hw/usb/dwc2-regs.h:43:
+#define GOTGCTL^I^I^I^IHSOTG_REG(0x000)$

ERROR: code indent should never use tabs
#64: FILE: include/hw/usb/dwc2-regs.h:44:
+#define GOTGCTL_CHIRPEN^I^I^IBIT(27)$

ERROR: code indent should never use tabs
#65: FILE: include/hw/usb/dwc2-regs.h:45:
+#define GOTGCTL_MULT_VALID_BC_MASK^I(0x1f << 22)$

ERROR: code indent should never use tabs
#66: FILE: include/hw/usb/dwc2-regs.h:46:
+#define GOTGCTL_MULT_VALID_BC_SHIFT^I22$

ERROR: code indent should never use tabs
#67: FILE: include/hw/usb/dwc2-regs.h:47:
+#define GOTGCTL_OTGVER^I^I^IBIT(20)$

ERROR: code indent should never use tabs
#68: FILE: include/hw/usb/dwc2-regs.h:48:
+#define GOTGCTL_BSESVLD^I^I^IBIT(19)$

ERROR: code indent should never use tabs
#69: FILE: include/hw/usb/dwc2-regs.h:49:
+#define GOTGCTL_ASESVLD^I^I^IBIT(18)$

ERROR: code indent should never use tabs
#70: FILE: include/hw/usb/dwc2-regs.h:50:
+#define GOTGCTL_DBNC_SHORT^I^IBIT(17)$

ERROR: code indent should never use tabs
#71: FILE: include/hw/usb/dwc2-regs.h:51:
+#define GOTGCTL_CONID_B^I^I^IBIT(16)$

ERROR: code indent should never use tabs
#72: FILE: include/hw/usb/dwc2-regs.h:52:
+#define GOTGCTL_DBNCE_FLTR_BYPASS^IBIT(15)$

ERROR: code indent should never use tabs
#73: FILE: include/hw/usb/dwc2-regs.h:53:
+#define GOTGCTL_DEVHNPEN^I^IBIT(11)$

ERROR: code indent should never use tabs
#74: FILE: include/hw/usb/dwc2-regs.h:54:
+#define GOTGCTL_HSTSETHNPEN^I^IBIT(10)$

ERROR: code indent should never use tabs
#75: FILE: include/hw/usb/dwc2-regs.h:55:
+#define GOTGCTL_HNPREQ^I^I^IBIT(9)$

ERROR: code indent should never use tabs
#76: FILE: include/hw/usb/dwc2-regs.h:56:
+#define GOTGCTL_HSTNEGSCS^I^IBIT(8)$

ERROR: code indent should never use tabs
#77: FILE: include/hw/usb/dwc2-regs.h:57:
+#define GOTGCTL_SESREQ^I^I^IBIT(1)$

ERROR: code indent should never use tabs
#78: FILE: include/hw/usb/dwc2-regs.h:58:
+#define GOTGCTL_SESREQSCS^I^IBIT(0)$

ERROR: code indent should never use tabs
#80: FILE: include/hw/usb/dwc2-regs.h:60:
+#define GOTGINT^I^I^I^IHSOTG_REG(0x004)$

ERROR: code indent should never use tabs
#81: FILE: include/hw/usb/dwc2-regs.h:61:
+#define GOTGINT_DBNCE_DONE^I^IBIT(19)$

ERROR: code indent should never use tabs
#82: F

Re: deprecation of in-tree builds

2020-03-22 Thread BALATON Zoltan

On Sun, 22 Mar 2020, Peter Maydell wrote:

On Sun, 22 Mar 2020 at 20:46, BALATON Zoltan  wrote:

On Sun, 22 Mar 2020, Peter Maydell wrote:

Before you told me about the gprof issue, the *only* thing


Was that gprof or gcov?


Sorry, gcov; I always get those two mixed up in my head.


Plus potentially any scripts people might use to build stuff and distro
packagers that might use in-tree build. They would suddently find their
previously working scripts are now broken and they need to adapt.


It is to avoid the "suddenly" part that we announce in advance
that features are going away :-)  More generally, distro packagers


People usually don't read docs so they'll find out "suddenly" anyway...


must adapt for any new QEMU release -- new features appear that
they may need to update their dependency lists to handle, old
features are sometimes removed and the corresponding configure
--enable-foo options stop working, existing features need new
dependencies.


It's true they'll occasionally have to adapt and probably most packagers 
already use out-of-tree builds but if there's something that can make 
their life easier without putting too much burden on QEMU I think it's a 
good thing to make it convenient for people compiling QEMU.



If somebody wants to write patches to cause 'configure' to create
a new build tree that's OK I guess (though I'd be dubious because
I think that hidden magic like that is overall often going
to confuse people, and it's still extra machinery in the
makefile and the configure script). But I don't really see
much point in maintaining two different mechanisms which add
complication and where one of them is just not overall as useful
as the other.


A convenience Makefile in top level to call make -C builddir and maybe a 
few lines in configure to create it does not seem to be too much extra 
machinery but I don't know the build system that well. Also it does not 
have to be hidden, it can print a message to user to tell it created a 
build dir and that the build results are found there. It's probably less 
confusing to people who never used out-of-tree builds before and relieves 
them from having to learn something new which a lot of people like to 
avoid if possible.



I fairly often see posts from people on eg stackoverflow
who are trying to compile and modify QEMU, and they're
usually using in-tree build and I usually mention in a
PS to answering their question that they'd really be
better off with an out-of-tree build. I think we should
stop making it easy to default to a setup that we don't
recommend.


I think this proves my point that a lot of people expect this to work and 
the answer should not be to annoy them and force them to change their ways 
but to support it in some way. If this is a problem with the make system 
then auto-creating build dir could avoid this problem without imposing 
things on people so if it's not too much effort it's probably worth doing.


Regards,
BALATON Zoltan



Re: [PATCH v14 Kernel 5/7] vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap

2020-03-22 Thread Yan Zhao
On Sat, Mar 21, 2020 at 03:28:21AM +0800, Alex Williamson wrote:
> On Sat, 21 Mar 2020 00:44:32 +0530
> Kirti Wankhede  wrote:
> 
> > On 3/20/2020 9:17 PM, Alex Williamson wrote:
> > > On Fri, 20 Mar 2020 09:40:39 -0600
> > > Alex Williamson  wrote:
> > >   
> > >> On Fri, 20 Mar 2020 04:35:29 -0400
> > >> Yan Zhao  wrote:
> > >>  
> > >>> On Thu, Mar 19, 2020 at 03:41:12AM +0800, Kirti Wankhede wrote:  
> >  DMA mapped pages, including those pinned by mdev vendor drivers, might
> >  get unpinned and unmapped while migration is active and device is still
> >  running. For example, in pre-copy phase while guest driver could access
> >  those pages, host device or vendor driver can dirty these mapped pages.
> >  Such pages should be marked dirty so as to maintain memory consistency
> >  for a user making use of dirty page tracking.
> > 
> >  To get bitmap during unmap, user should set flag
> >  VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP, bitmap memory should be 
> >  allocated and
> >  zeroed by user space application. Bitmap size and page size should be 
> >  set
> >  by user application.
> > 
> >  Signed-off-by: Kirti Wankhede 
> >  Reviewed-by: Neo Jia 
> >  ---
> >    drivers/vfio/vfio_iommu_type1.c | 55 
> >  ++---
> >    include/uapi/linux/vfio.h   | 11 +
> >    2 files changed, 62 insertions(+), 4 deletions(-)
> > 
> >  diff --git a/drivers/vfio/vfio_iommu_type1.c 
> >  b/drivers/vfio/vfio_iommu_type1.c
> >  index d6417fb02174..aa1ac30f7854 100644
> >  --- a/drivers/vfio/vfio_iommu_type1.c
> >  +++ b/drivers/vfio/vfio_iommu_type1.c
> >  @@ -939,7 +939,8 @@ static int verify_bitmap_size(uint64_t npages, 
> >  uint64_t bitmap_size)
> >    }
> >    
> >    static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> >  -   struct vfio_iommu_type1_dma_unmap *unmap)
> >  +   struct vfio_iommu_type1_dma_unmap *unmap,
> >  +   struct vfio_bitmap *bitmap)
> >    {
> > uint64_t mask;
> > struct vfio_dma *dma, *dma_last = NULL;
> >  @@ -990,6 +991,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu 
> >  *iommu,
> >  * will be returned if these conditions are not met.  The v2 
> >  interface
> >  * will only return success and a size of zero if there were no
> >  * mappings within the range.
> >  +   *
> >  +   * When VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP flag is set, unmap 
> >  request
> >  +   * must be for single mapping. Multiple mappings with this flag 
> >  set is
> >  +   * not supported.
> >  */
> > if (iommu->v2) {
> > dma = vfio_find_dma(iommu, unmap->iova, 1);
> >  @@ -997,6 +1002,13 @@ static int vfio_dma_do_unmap(struct vfio_iommu 
> >  *iommu,
> > ret = -EINVAL;
> > goto unlock;
> > }
> >  +
> >  +  if ((unmap->flags & 
> >  VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
> >  +  (dma->iova != unmap->iova || dma->size != 
> >  unmap->size)) {  
> > >>> dma is probably NULL here!  
> > >>
> > >> Yep, I didn't look closely enough there.  This is situated right
> > >> between the check to make sure we're not bisecting a mapping at the
> > >> start of the unmap and the check to make sure we're not bisecting a
> > >> mapping at the end of the unmap.  There's no guarantee that we have a
> > >> valid pointer here.  The test should be in the while() loop below this
> > >> code.  
> > > 
> > > Actually the test could remain here, we can exit here if we can't find
> > > a dma at the start of the unmap range with the GET_DIRTY_BITMAP flag,
> > > but we absolutely cannot deref dma without testing it.
> > >   
> > 
> > In the check above newly added check, if dma is NULL then its an error 
> > condition, because Unmap requests must fully cover previous mappings, right?
> 
> Yes, but we'll do a null pointer deref before we return error.
>  
> > >>> And this restriction on UNMAP would make some UNMAP operations of vIOMMU
> > >>> fail.
> > >>>
> > >>> e.g. below condition indeed happens in reality.
> > >>> an UNMAP ioctl comes for IOVA range from 0xff80, of size 0x20
> > >>> However, IOVAs in this range are mapped page by page.i.e., dma->size is 
> > >>> 0x1000.
> > >>>
> > >>> Previous, this UNMAP ioctl could unmap successfully as a whole.  
> > >>
> > >> What triggers this in the guest?  Note that it's only when using the
> > >> GET_DIRTY_BITMAP flag that this is restricted.  Does the event you're
> > >> referring to potentially occur under normal circumstances in that mode?
> > >> Thanks,
> > >>  

it happens in vIOMMU Domain level invalidation of IOTLB
(domain-selective inval

Re: [PATCH v4 0/2] Cross-device resource sharing

2020-03-22 Thread David Stevens
Thanks for taking a look at this. I've opened a github issue.

Fixes: https://github.com/oasis-tcs/virtio-spec/issues/76

Thanks,
David

On Fri, Mar 20, 2020 at 3:41 PM Gerd Hoffmann  wrote:
>
> On Thu, Mar 19, 2020 at 11:18:21AM +0900, David Stevens wrote:
> > Hi all,
> >
> > This is the next iteration of patches for adding support for sharing
> > resources between different virtio devices. The corresponding Linux
> > implementation is [1].
> >
> > In addition to these patches, the most recent virtio-video patchset
> > includes a patch for importing objects into that device [2].
>
> Looks good to me.
>
> So, open a github issue to kick the TC vote process and get this merged?
> (see virtio-spec/.github/PULL_REQUEST_TEMPLATE.md).
>
> cheers,
>   Gerd
>



Re: [PATCH v2] target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model

2020-03-22 Thread Tao Xu

Hi Xiaoyao,

May be you can add .note for this new version.

for example:

+.version = 3,
+.note = "ARCH_CAPABILITIES",
+.props = (PropValue[]) {

On 3/16/2020 5:56 PM, Xiaoyao Li wrote:

Current Icelake-Server CPU model lacks all the features enumerated by
MSR_IA32_ARCH_CAPABILITIES.

Add them, so that guest of "Icelake-Server" can see all of them.

Signed-off-by: Xiaoyao Li 
---
v2:
  - Add it as a new version.
---
  target/i386/cpu.c | 13 +
  1 file changed, 13 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 92fafa265914..5fba6a2ad6b3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3496,6 +3496,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { /* end of list */ }
  },
  },
+{
+.version = 3,
+.props = (PropValue[]) {
+{ "arch-capabilities", "on" },
+{ "rdctl-no", "on" },
+{ "ibrs-all", "on" },
+{ "skip-l1dfl-vmentry", "on" },
+{ "mds-no", "on" },
+{ "pschange-mc-no", "on" },
+{ "taa-no", "on" },
+{ /* end of list */ }
+},
+},
  { /* end of list */ }
  }
  },
--
2.20.1






Re: [PATCH v14 Kernel 7/7] vfio: Selective dirty page tracking if IOMMU backed device pins pages

2020-03-22 Thread Yan Zhao
On Sat, Mar 21, 2020 at 03:41:42AM +0800, Alex Williamson wrote:
> On Thu, 19 Mar 2020 02:24:33 -0400
> Yan Zhao  wrote:
> > On Thu, Mar 19, 2020 at 03:41:14AM +0800, Kirti Wankhede wrote:
> > > diff --git a/drivers/vfio/vfio_iommu_type1.c 
> > > b/drivers/vfio/vfio_iommu_type1.c
> > > index 912629320719..deec09f4b0f6 100644
> > > --- a/drivers/vfio/vfio_iommu_type1.c
> > > +++ b/drivers/vfio/vfio_iommu_type1.c
> > > @@ -72,6 +72,7 @@ struct vfio_iommu {
> > >   boolv2;
> > >   boolnesting;
> > >   booldirty_page_tracking;
> > > + boolpinned_page_dirty_scope;
> > >  };
> > >  
> > >  struct vfio_domain {
> > > @@ -99,6 +100,7 @@ struct vfio_group {
> > >   struct iommu_group  *iommu_group;
> > >   struct list_headnext;
> > >   boolmdev_group; /* An mdev group */
> > > + boolpinned_page_dirty_scope;
> > >  };
> > >  
> > >  struct vfio_iova {
> > > @@ -132,6 +134,10 @@ struct vfio_regions {
> > >  static int put_pfn(unsigned long pfn, int prot);
> > >  static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu);
> > >  
> > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu 
> > > *iommu,
> > > +struct iommu_group *iommu_group);
> > > +
> > > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
> > >  /*
> > >   * This code handles mapping and unmapping of user data buffers
> > >   * into DMA'ble space using the IOMMU
> > > @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma 
> > > *dma, dma_addr_t iova,
> > >  }
> > >  
> > >  static int vfio_iommu_type1_pin_pages(void *iommu_data,
> > > +   struct iommu_group *iommu_group,
> > > unsigned long *user_pfn,
> > > int npage, int prot,
> > > unsigned long *phys_pfn)
> > >  {
> > >   struct vfio_iommu *iommu = iommu_data;
> > > + struct vfio_group *group;
> > >   int i, j, ret;
> > >   unsigned long remote_vaddr;
> > >   struct vfio_dma *dma;
> > > @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void 
> > > *iommu_data,
> > >  (vpfn->iova - dma->iova) >> pgshift, 1);
> > >   }
> > >   }  
> > 
> > Could you provide an interface lightweight than vfio_pin_pages for 
> > pass-through
> > devices? e.g. vfio_mark_iova_dirty()
> > 
> > Or at least allowing phys_pfn to be empty for pass-through devices.
> > 
> > This is really inefficient:
> > bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1));
> > i.e.
> > in order to mark an iova dirty, it has to go through iova ---> pfn --> iova
> > while acquiring pfn is not necessary for pass-through devices.
> 
> I think this would be possible, but I don't think it should be gating
> to this series.  We don't have such consumers yet.  Thanks,
>
ok. Reasonable.

Thanks
Yan



Re: [PATCH v2] target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model

2020-03-22 Thread Xiaoyao Li

On 3/23/2020 10:32 AM, Tao Xu wrote:

Hi Xiaoyao,

May be you can add .note for this new version.

for example:

+    .version = 3,
+    .note = "ARCH_CAPABILITIES",
+    .props = (PropValue[]) {


Hi Paolo and Eduardo,

Need I spin a new version to add the .note ?
Maybe you can add it when queue?

Thanks,
-Xiaoyao


On 3/16/2020 5:56 PM, Xiaoyao Li wrote:

Current Icelake-Server CPU model lacks all the features enumerated by
MSR_IA32_ARCH_CAPABILITIES.

Add them, so that guest of "Icelake-Server" can see all of them.

Signed-off-by: Xiaoyao Li 
---
v2:
  - Add it as a new version.
---
  target/i386/cpu.c | 13 +
  1 file changed, 13 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 92fafa265914..5fba6a2ad6b3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3496,6 +3496,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { /* end of list */ }
  },
  },
+    {
+    .version = 3,
+    .props = (PropValue[]) {
+    { "arch-capabilities", "on" },
+    { "rdctl-no", "on" },
+    { "ibrs-all", "on" },
+    { "skip-l1dfl-vmentry", "on" },
+    { "mds-no", "on" },
+    { "pschange-mc-no", "on" },
+    { "taa-no", "on" },
+    { /* end of list */ }
+    },
+    },
  { /* end of list */ }
  }
  },
--
2.20.1







[PATCH 3/3] target/i386: Tell why guest exits to user space due to #AC

2020-03-22 Thread Xiaoyao Li
Tell why guest exits from kvm to user space due to #AC, so user knows
what happened.

Signed-off-by: Xiaoyao Li 
---
 target/i386/kvm.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 411402aa29fa..36bc1485d478 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -4464,8 +4464,15 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = -1;
 break;
 case KVM_EXIT_EXCEPTION:
-fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
-run->ex.exception, run->ex.error_code);
+if (run->ex.exception == AC_VECTOR) {
+fprintf(stderr, "Guest encounters an #AC due to split lock. 
Because "
+"guest doesn't expect this split lock #AC (it doesn't set "
+"msr_test_ctrl.split_lock_detect) and host sets "
+"split_lock_detect=fatal, guest has to be killed.\n");
+} else {
+fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
+run->ex.exception, run->ex.error_code);
+}
 ret = -1;
 break;
 case KVM_EXIT_DEBUG:
-- 
2.20.1




[PATCH 1/3] target/i386: Rename CORE_CAPABILITY to CORE_CAPABILITIES

2020-03-22 Thread Xiaoyao Li
Intel SDM updates the name of MSR CORE_CAPABILITY to CORE_CAPABILITIES,
so updating it QEMU.

Signed-off-by: Xiaoyao Li 
---
 target/i386/cpu.c | 12 ++--
 target/i386/cpu.h |  6 +++---
 target/i386/kvm.c |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 34b511f078e5..1de7f3cd533e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1216,7 +1216,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = 
{
 .index = MSR_IA32_ARCH_CAPABILITIES,
 },
 },
-[FEAT_CORE_CAPABILITY] = {
+[FEAT_CORE_CAPABILITIES] = {
 .type = MSR_FEATURE_WORD,
 .feat_names = {
 NULL, NULL, NULL, NULL,
@@ -1229,7 +1229,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = 
{
 NULL, NULL, NULL, NULL,
 },
 .msr = {
-.index = MSR_IA32_CORE_CAPABILITY,
+.index = MSR_IA32_CORE_CAPABILITIES,
 },
 },
 
@@ -1406,8 +1406,8 @@ static FeatureDep feature_dependencies[] = {
 .to = { FEAT_ARCH_CAPABILITIES, ~0ull },
 },
 {
-.from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY },
-.to = { FEAT_CORE_CAPABILITY,   ~0ull },
+.from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITIES },
+.to = { FEAT_CORE_CAPABILITIES, ~0ull },
 },
 {
 .from = { FEAT_1_ECX,   CPUID_EXT_VMX },
@@ -3709,8 +3709,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .features[FEAT_7_0_EDX] =
 CPUID_7_0_EDX_SPEC_CTRL |
 CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD |
-CPUID_7_0_EDX_CORE_CAPABILITY,
-.features[FEAT_CORE_CAPABILITY] =
+CPUID_7_0_EDX_CORE_CAPABILITIES,
+.features[FEAT_CORE_CAPABILITIES] =
 MSR_CORE_CAP_SPLIT_LOCK_DETECT,
 /*
  * Missing: XSAVES (not supported by some Linux versions,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 60d797d5941f..f6c54412ba5e 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -349,7 +349,7 @@ typedef enum X86Seg {
 #define MSR_VIRT_SSBD   0xc001011f
 #define MSR_IA32_PRED_CMD   0x49
 #define MSR_IA32_UCODE_REV  0x8b
-#define MSR_IA32_CORE_CAPABILITY0xcf
+#define MSR_IA32_CORE_CAPABILITIES  0xcf
 
 #define MSR_IA32_ARCH_CAPABILITIES  0x10a
 #define ARCH_CAP_TSX_CTRL_MSR  (1<<7)
@@ -526,7 +526,7 @@ typedef enum FeatureWord {
 FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */
 FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */
 FEAT_ARCH_CAPABILITIES,
-FEAT_CORE_CAPABILITY,
+FEAT_CORE_CAPABILITIES,
 FEAT_VMX_PROCBASED_CTLS,
 FEAT_VMX_SECONDARY_CTLS,
 FEAT_VMX_PINBASED_CTLS,
@@ -777,7 +777,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 /* Arch Capabilities */
 #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29)
 /* Core Capability */
-#define CPUID_7_0_EDX_CORE_CAPABILITY   (1U << 30)
+#define CPUID_7_0_EDX_CORE_CAPABILITIES (1U << 30)
 /* Speculative Store Bypass Disable */
 #define CPUID_7_0_EDX_SPEC_CTRL_SSBD(1U << 31)
 
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 69eb43d796e6..6888cb7caeae 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -2051,7 +2051,7 @@ static int kvm_get_supported_msrs(KVMState *s)
 case MSR_IA32_ARCH_CAPABILITIES:
 has_msr_arch_capabs = true;
 break;
-case MSR_IA32_CORE_CAPABILITY:
+case MSR_IA32_CORE_CAPABILITIES:
 has_msr_core_capabs = true;
 break;
 case MSR_IA32_VMX_VMFUNC:
@@ -2696,8 +2696,8 @@ static void kvm_init_msrs(X86CPU *cpu)
 }
 
 if (has_msr_core_capabs) {
-kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
-  env->features[FEAT_CORE_CAPABILITY]);
+kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITIES,
+  env->features[FEAT_CORE_CAPABILITIES]);
 }
 
 if (has_msr_ucode_rev) {
-- 
2.20.1




[PATCH 0/3] Misc patches for core_capabilites and split lock detection

2020-03-22 Thread Xiaoyao Li
Patch 1 renames core_capability to core_capabilities to align with the
latest SDM.

Patch 2 adds MSR_TEST_CTRL support.

Patch 3 prints info when guest is going to be killed due to split lock #AC

Xiaoyao Li (3):
  target/i386: Rename CORE_CAPABILITY to CORE_CAPABILITIES
  target/i386: Add support for TEST_CTRL MSR
  target/i386: Tell why guest exits to user space due to #AC

 target/i386/cpu.c | 12 ++--
 target/i386/cpu.h |  8 +---
 target/i386/kvm.c | 30 +-
 target/i386/machine.c | 20 
 4 files changed, 56 insertions(+), 14 deletions(-)

-- 
2.20.1




  1   2   >