Re: qemu-riscv32 usermode still broken?
Hi Alistair, > > Ok! > > So on my x86 machine I see this > > --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=285545, > si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- > wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], > WNOHANG|WSTOPPED|WCONTINUED, NULL) = 285545 > wait4(-1, 0x7ffe3eeb8210, WNOHANG|WSTOPPED|WCONTINUED, NULL) = 0 > rt_sigreturn({mask=[INT]}) = 0 > close(3)= 0 > > It all looks ok. This was fixed in the meantime (hooray!), sorry I didn't think anyone would still look at the old thread. The commit is given below. Since then we've been able to build riscv32 stages for Gentoo just fine using qemu-user, see https://www.gentoo.org/downloads/#riscv Cheers, Andreas commit f0907ff4cae743f1a4ef3d0a55a047029eed06ff Author: Richard Henderson AuthorDate: Fri Apr 5 11:58:14 2024 -1000 Commit: Richard Henderson CommitDate: Tue Apr 9 07:43:11 2024 -1000 linux-user: Fix waitid return of siginfo_t and rusage The copy back to siginfo_t should be conditional only on arg3, not the specific values that might have been written. The copy back to rusage was missing entirely. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2262 Signed-off-by: Richard Henderson Tested-by: Alex Fan Reviewed-by: Philippe Mathieu-Daudé > > Maybe the host_to_target_siginfo() function in QEMU is the issue? > Something in here? > https://github.com/qemu/qemu/blob/master/linux-user/signal.c#L335 > > Nothing jumps out with a quick look though > > Alistair > > > > > > > > > -- > > Andreas K. Hüttel > > dilfri...@gentoo.org > > Gentoo Linux developer > > (council, toolchain, base-system, perl, libreoffice) > -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: qemu-riscv32 usermode still broken?
-1 EINVAL (Invalid argument) readlink("/usr/share/locale", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en_US.utf8", 0x7fff3f8742a0, 1023) = -1 ENOENT (No such file or directory) access("/var/lib/machines/riscv32/usr/share/locale/en_US.utf8/LC_MESSAGES/bash.mo", F_OK) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/share/locale/en_US.utf8/LC_MESSAGES/bash.mo", O_RDONLY) = -1 ENOENT (No such file or directory) readlink("/usr", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en_US", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en_US/LC_MESSAGES", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en_US/LC_MESSAGES/bash.mo", 0x7fff3f8742a0, 1023) = -1 ENOENT (No such file or directory) access("/var/lib/machines/riscv32/usr/share/locale/en_US/LC_MESSAGES/bash.mo", F_OK) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/share/locale/en_US/LC_MESSAGES/bash.mo", O_RDONLY) = -1 ENOENT (No such file or directory) readlink("/usr", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en.utf8", 0x7fff3f8742a0, 1023) = -1 ENOENT (No such file or directory) access("/var/lib/machines/riscv32/usr/share/locale/en.utf8/LC_MESSAGES/bash.mo", F_OK) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/share/locale/en.utf8/LC_MESSAGES/bash.mo", O_RDONLY) = -1 ENOENT (No such file or directory) readlink("/usr", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en/LC_MESSAGES", 0x7fff3f8742a0, 1023) = -1 EINVAL (Invalid argument) readlink("/usr/share/locale/en/LC_MESSAGES/bash.mo", 0x7fff3f8742a0, 1023) = -1 ENOENT (No such file or directory) access("/var/lib/machines/riscv32/usr/share/locale/en/LC_MESSAGES/bash.mo", F_OK) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/share/locale/en/LC_MESSAGES/bash.mo", O_RDONLY) = -1 ENOENT (No such file or directory) write(2, "[1]+ Stopped py"..., 37[1]+ Stopped python ) = 37 rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0 rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_STOPPED, si_pid=2472081, si_uid=0, si_status=SIGTSTP, si_utime=2 /* 0.02 s */, si_stime=0} --- rt_sigreturn({mask=~[BUS SEGV]})= 0 rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0 rt_sigprocmask(SIG_SETMASK, [CHLD], NULL, 8) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 waitid(P_ALL, -1, {}, WNOHANG|WEXITED|WSTOPPED|WCONTINUED, NULL) = 0 (...) -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: qemu-riscv32 usermode still broken?
Am Donnerstag, 14. September 2023, 03:22:49 CEST schrieb Andreas K. Huettel: > > > https://lists.gnu.org/archive/html/bug-bash/2023-09/msg00119.html > > > ^ Here I'm trying to find out more. > > > > > > Bash tests apparently indicate that argv[0] is overwritten, and that > > > reading through a pipe or from /dev/tty fails or loses data. > > > > > > Apart from the bash testsuite failing, symptoms are as follows: > > > > > > * Something seems wrong in the signal handling (?): > > > > If it is wrong for signal handling and for 32-bit, I guess it may be > > fixed by this patch > > > > https://www.mail-archive.com/qemu-devel@nongnu.org/msg981238.html > > > > And this patch has been merged into master branch yesterday. > > > > May be you can have a try based on the master branch. > > I added the patch to 8.0.3 (easier for the moment), and this did > unfortunately *not* lead to any improvements. Also with the patch on top of 8.1.0 no improvement or change. > However, in the meantime on the GNU Make tracker Alejandro Colomar > pointed me to another detail based on my oddities [1]: > > > I think [make] it's failing here: > > > > <https://git.savannah.gnu.org/cgit/make.git/tree/src/job.c#n757> > > > > But it's failing with ENOENT, which is not one of the documented > > errors for wait(2): > > So maybe another point to look at would be the origin of the return > values of wait, and whether that's wired correctly for rv32... > > [1] https://savannah.gnu.org/bugs/?64664 In the meantime I tried to nail down a reproducible hang in bash on this frankensystem with qemu's gdb interface. This also pointed towards child handling and wait [2]. [2] https://lists.gnu.org/archive/html/bug-bash/2023-09/msg00128.html Some time ago we already debugged that corner, and Alistair Francis came up with a fix that improved the riscv32 situation back then [3]. Maybe that fix was somehow incomplete? Just speculating... [3] https://bugs.launchpad.net/qemu/+bug/1906193 -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: qemu-riscv32 usermode still broken?
> > https://lists.gnu.org/archive/html/bug-bash/2023-09/msg00119.html > > ^ Here I'm trying to find out more. > > > > Bash tests apparently indicate that argv[0] is overwritten, and that > > reading through a pipe or from /dev/tty fails or loses data. > > > > Apart from the bash testsuite failing, symptoms are as follows: > > > > * Something seems wrong in the signal handling (?): > > If it is wrong for signal handling and for 32-bit, I guess it may be > fixed by this patch > > https://www.mail-archive.com/qemu-devel@nongnu.org/msg981238.html > > And this patch has been merged into master branch yesterday. > > May be you can have a try based on the master branch. I added the patch to 8.0.3 (easier for the moment), and this did unfortunately *not* lead to any improvements. More debugging coming (weekend hopefully). However, in the meantime on the GNU Make tracker Alejandro Colomar pointed me to another detail based on my oddities [1]: > I think [make] it's failing here: > > <https://git.savannah.gnu.org/cgit/make.git/tree/src/job.c#n757> > > But it's failing with ENOENT, which is not one of the documented > errors for wait(2): So maybe another point to look at would be the origin of the return values of wait, and whether that's wired correctly for rv32... [1] https://savannah.gnu.org/bugs/?64664 -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: qemu-riscv32 usermode still broken?
Am Mittwoch, 13. September 2023, 10:06:01 CEST schrieb Michael Tokarev: > 13.09.2023 04:41, LIU Zhiwei wrote: > > > > On 2023/9/13 6:31, Andreas K. Huettel wrote: > .. > >> * Something seems wrong in the signal handling (?): > > > > If it is wrong for signal handling and for 32-bit, I guess it may be fixed > > by this patch > > > > https://www.mail-archive.com/qemu-devel@nongnu.org/msg981238.html > > > > And this patch has been merged into master branch yesterday. > > I picked this one up for stable yesterday, scheduled to be released > on Sep-21. Oh great, thanks! I'll test this and report back. -a > > Thanks, > > /mjt > > > -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
qemu-riscv32 usermode still broken?
Dear all, I've once more tried to build up a riscv32 linux install in a qemu-riscv32 usermode systemd-nspawn, and am running into the same problems as some time ago... https://dev.gentoo.org/~dilfridge/riscv32/riscv32.tar.xz (220M) The problems manifest themselves mostly in bash; if I replace /bin/bash with a static x86-64 binary (in the tarball as /bin/bash.amd64), bypassing qemu, I can make the chroot rebuild itself completely. https://lists.gnu.org/archive/html/bug-bash/2023-09/msg00119.html ^ Here I'm trying to find out more. Bash tests apparently indicate that argv[0] is overwritten, and that reading through a pipe or from /dev/tty fails or loses data. Apart from the bash testsuite failing, symptoms are as follows: * Something seems wrong in the signal handling (?): --- our package manager (bash/python combo, there bash) hangs reproducibly at one point. --- when I run a console program and try to background it with ctl-z, it hangs (only the first time per bash instance, it seems) repeated ctl-c gets me back to the shell, then the program is in the background riscv32 ~ # python Python 3.11.5 (main, Aug 31 2023, 21:56:30) [GCC 13.2.1 20230826] on linux Type "help", "copyright", "credits" or "license" for more information. >>> [1]+ Stopped python ^C^C^C^C^C^C^C riscv32 ~ # ^C riscv32 ~ # riscv32 ~ # jobs [1]+ Stopped python riscv32 ~ # fg python >>> --- make, when building something, seems to always start only one job in parallel Any advice or debugging would be appreciated. If we get this running then I can set up regular riscv32 Gentoo stage builds within a week. [*] Thanks in advance, Andreas PS. huettel@pinacolada ~ $ /var/lib/machines/riscv32/usr/bin/qemu-riscv32 -version qemu-riscv32 version 8.1.0 Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers [*] https://www.gentoo.org/downloads/#riscv -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: [PATCH v3] target/ppc: Fix fallback to MFSS for MFFS* instructions on pre 3.0 ISAs
On 10/05/2023 08:19, Richard Purdie wrote: The following commits changed the code such that the fallback to MFSS for MFFSCRN, MFFSCRNI, MFFSCE and MFFSL on pre 3.0 ISAs was removed and became an illegal instruction: bf8adfd88b547680aa857c46098f3a1e94373160 - target/ppc: Move mffscrn[i] to decodetree 394c2e2fda70da722f20fb60412d6c0ca4bfaa03 - target/ppc: Move mffsce to decodetree 3e5bce70efe6bd1f684efbb21fd2a316cbf0657e - target/ppc: Move mffsl to decodetree The hardware will handle them as a MFFS instruction as the code did previously. This means applications that were segfaulting under qemu when encountering these instructions which is used in glibc libm functions for example. The fallback for MFFSCDRN and MFFSCDRNI added in a later patch was also missing. This patch restores the fallback to MFSS for these instructions on pre 3.0s ISAs as the hardware decoder would, fixing the segfaulting libm code. It doesn't have the fallback for 3.0 onwards to match hardware behaviour. Signed-off-by: Richard Purdie --- target/ppc/insn32.decode | 20 +--- target/ppc/translate/fp-impl.c.inc | 22 -- 2 files changed, 29 insertions(+), 13 deletions(-) v3 - drop fallback to MFFS for 3.0 ISA to match hardware v2 - switch to use decodetree pattern groups per feedback Reviewed-by: Matheus Ferst Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2] target/ppc: Fix fallback to MFSS for MFFS* instructions on pre 3.0 ISAs
On 06/05/2023 03:52, Richard Purdie wrote: The following commits changed the code such that the fallback to MFSS for MFFSCRN, MFFSCRNI, MFFSCE and MFFSL on pre 3.0 ISAs was removed and became an illegal instruction: bf8adfd88b547680aa857c46098f3a1e94373160 - target/ppc: Move mffscrn[i] to decodetree 394c2e2fda70da722f20fb60412d6c0ca4bfaa03 - target/ppc: Move mffsce to decodetree 3e5bce70efe6bd1f684efbb21fd2a316cbf0657e - target/ppc: Move mffsl to decodetree The hardware will handle them as a MFFS instruction as the code did previously. This means applications that were segfaulting under qemu when encountering these instructions which is used in glibc libm functions for example. The fallback for MFFSCDRN and MFFSCDRNI added in a later patch was also missing. This patch restores the fallback to MFSS for these instructions on pre 3.0s ISAs as the hardware decoder would, fixing the segfaulting libm code. It and also ensures the MFSS instruction is used for currently reserved bits to handle other potential ISA additions more correctly. Signed-off-by: Richard Purdie --- target/ppc/insn32.decode | 19 --- target/ppc/translate/fp-impl.c.inc | 30 -- 2 files changed, 36 insertions(+), 13 deletions(-) v2 - switch to use decodetree pattern groups per feedback diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index f8f589e9fd..3c4e2c2fc2 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -390,13 +390,18 @@ SETNBCR 01 . . - 00 - @X_bi ### Move To/From FPSCR -MFFS11 . 0 - 1001000111 . @X_t_rc -MFFSCE 11 . 1 - 1001000111 - @X_t -MFFSCRN 11 . 10110 . 1001000111 - @X_tb -MFFSCDRN11 . 10100 . 1001000111 - @X_tb -MFFSCRNI11 . 10111 ---.. 1001000111 - @X_imm2 -MFFSCDRNI 11 . 10101 --... 1001000111 - @X_imm3 -MFFSL 11 . 11000 - 1001000111 - @X_t +{ + # Before Power ISA v3.0, MFFS bits 11~15 were reserved and should be ignored + [ +MFFSCE 11 . 1 - 1001000111 - @X_t +MFFSCRN 11 . 10110 . 1001000111 - @X_tb +MFFSCDRN11 . 10100 . 1001000111 - @X_tb +MFFSCRNI11 . 10111 ---.. 1001000111 - @X_imm2 +MFFSCDRNI 11 . 10101 --... 1001000111 - @X_imm3 +MFFSL 11 . 11000 - 1001000111 - @X_t + ] + MFFS11 . - - 1001000111 . @X_t_rc +} ### Decimal Floating-Point Arithmetic Instructions diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index 57d8437851..10dfd91aa4 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -584,7 +584,10 @@ static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a) { TCGv_i64 fpscr; -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (!(ctx->insns_flags2 & PPC2_ISA300)) { +return false; +} + REQUIRE_FPU(ctx); gen_reset_fpstatus(); @@ -597,7 +600,10 @@ static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a) { TCGv_i64 t1, fpscr; -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (!(ctx->insns_flags2 & PPC2_ISA300)) { +return false; +} + REQUIRE_FPU(ctx); t1 = tcg_temp_new_i64(); @@ -614,7 +620,10 @@ static bool trans_MFFSCDRN(DisasContext *ctx, arg_X_tb *a) { TCGv_i64 t1, fpscr; -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (!(ctx->insns_flags2 & PPC2_ISA300)) { +return false; +} + REQUIRE_FPU(ctx); t1 = tcg_temp_new_i64(); @@ -631,7 +640,10 @@ static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a) { TCGv_i64 t1, fpscr; -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (!(ctx->insns_flags2 & PPC2_ISA300)) { +return false; +} + REQUIRE_FPU(ctx); t1 = tcg_temp_new_i64(); @@ -647,7 +659,10 @@ static bool trans_MFFSCDRNI(DisasContext *ctx, arg_X_imm3 *a) { TCGv_i64 t1, fpscr; -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (!(ctx->insns_flags2 & PPC2_ISA300)) { +return false; +} + REQUIRE_FPU(ctx); t1 = tcg_temp_new_i64(); @@ -661,7 +676,10 @@ static bool trans_MFFSCDRNI(DisasContext *ctx, arg_X_imm3 *a) static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a) { -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (!(ctx->insns_flags2 & PPC2_ISA300)) { +return false; +} + REQUIRE_FPU(ctx); gen_reset_fpstatus(); -- 2.39.2 As mention in the v1 thread, we should validate bits 11~15 on Power ISA v3.0+ to follow what the ISA says and keep the same behavior as the hardware. Again, sorry for the delayed response. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Anal
Re: [PATCH] target/ppc: Fix fallback to MFSS for MFFSCRN, MFFSCRNI, MFFSCE and MFFSL
On 05/05/2023 12:23, Richard Henderson wrote: On 5/4/23 18:17, Matheus K. Ferst wrote: On 04/05/2023 08:01, Richard Purdie wrote: The following commits changed the code such that these instructions became invalid on pre 3.0 ISAs: bf8adfd88b547680aa857c46098f3a1e94373160 - target/ppc: Move mffscrn[i] to decodetree 394c2e2fda70da722f20fb60412d6c0ca4bfaa03 - target/ppc: Move mffsce to decodetree 3e5bce70efe6bd1f684efbb21fd2a316cbf0657e - target/ppc: Move mffsl to decodetree The hardware will handle them as a MFFS instruction as the code did previously. Restore that behaviour. This means applications that were segfaulting under qemu when encountering these instructions now operate correctly. The instruction is used in glibc libm functions for example. Signed-off-by: Richard Purdie --- target/ppc/translate/fp-impl.c.inc | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index 57d8437851..cb86381c3f 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -584,7 +584,10 @@ static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a) { TCGv_i64 fpscr; - REQUIRE_INSNS_FLAGS2(ctx, ISA300); + if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) { + return trans_MFFS(ctx, a); + } + Hi Richard, nice catch! I believe this may be better addressed by decodetree pattern groups, e.g.: On insns32.decode: { # Before Power ISA v3.0, MFFS bits 11~15 were reserved and should be ignored MFFS_ISA207 11 . - - 1001000111 . @X_t_rc [ MFFS 11 . 0 - 1001000111 . @X_t_rc MFFSCE 11 . 1 - 1001000111 - @X_t MFFSCRN 11 . 10110 . 1001000111 - @X_tb MFFSCDRN 11 . 10100 . 1001000111 - @X_tb MFFSCRNI 11 . 10111 ---.. 1001000111 - @X_imm2 MFFSCDRNI 11 . 10101 --... 1001000111 - @X_imm3 MFFSL 11 . 11000 - 1001000111 - @X_t ] } And on fp-impl.c.inc: static bool trans_MFFS_ISA207(DisasContext *ctx, arg_X_t_rc *a) { if (!(ctx->insns_flags2 & PPC2_ISA300)) { /* * Before Power ISA v3.0, MFFS bits 11~15 were reserved, any instruction * with OPCD=63 and XO=583 should be decoded as MFFS. */ return trans_MFFS(ctx, a); } /* * For Power ISA v3.0+, return false and let the pattern group * select the correct instruction. */ return false; } Not quite. Should be { [ MFFSCE 11 . 1 - 1001000111 - @X_t ... MFFSL 11 . 11000 - 1001000111 - @X_t ] MFFS 11 . - - 1001000111 . @X_t_rc } where all of the 3.0 insns do if (!(ctx->insns_flags2 & PPC2_ISA300)) { return false; } I do not believe that v3.0 rejects bits [11:15] = 00010, for example, which would have been accepted and ignored with v2.07. It should simply treat it as the full MFFS insn. Hi Richard, sorry for the delayed response. Testing on a POWER9, it does reject opcodes with undefined values in [11:15]: $ cat > mffs.c << EOF #include #include int main(void) { signal(SIGILL, _exit); asm(MFFS_OPCD); return 0; } EOF $ for i in {0..15}; do opc="$(( 0xfc00048e | ($i << 14) ))" gcc -DMFFS_OPCD="\".long $opc\"" mffs.c -o mffs printf "%s " $(echo "obase=2; $opc" | bc) if ./mffs ; then echo "valid" else echo "invalid" fi done 1100010010001110 valid # MFFS 1101010010001110 valid # MFFSCE 1110010010001110 invalid 1111010010001110 invalid 11000100010010001110 invalid 11000101010010001110 invalid 11000110010010001110 invalid 11000111010010001110 invalid 1100110010001110 invalid 11001001010010001110 invalid 11001010010010001110 invalid 11001011010010001110 invalid 11001100010010001110 invalid 11001101010010001110 invalid 11001110010010001110 invalid 1100010010001110 invalid 1101010010001110 invalid 11010001010010001110 invalid 11010010010010001110 invalid 11010011010010001110 invalid 11010100010010001110 valid # MFFSCDRN 11010101010010001110 valid # MFFSCDRNI 11010110010010001110 valid # MFFSCRN 11010111010010001110 valid # MFFSCRNI 1101110010001110 valid # MFFSL 11011001010010001110 invalid 11011010010010001110 invalid 11011011010010001110 invalid 11011100
Re: [PATCH] target/ppc: Fix fallback to MFSS for MFFSCRN, MFFSCRNI, MFFSCE and MFFSL
On 04/05/2023 08:01, Richard Purdie wrote: The following commits changed the code such that these instructions became invalid on pre 3.0 ISAs: bf8adfd88b547680aa857c46098f3a1e94373160 - target/ppc: Move mffscrn[i] to decodetree 394c2e2fda70da722f20fb60412d6c0ca4bfaa03 - target/ppc: Move mffsce to decodetree 3e5bce70efe6bd1f684efbb21fd2a316cbf0657e - target/ppc: Move mffsl to decodetree The hardware will handle them as a MFFS instruction as the code did previously. Restore that behaviour. This means applications that were segfaulting under qemu when encountering these instructions now operate correctly. The instruction is used in glibc libm functions for example. Signed-off-by: Richard Purdie --- target/ppc/translate/fp-impl.c.inc | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index 57d8437851..cb86381c3f 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -584,7 +584,10 @@ static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a) { TCGv_i64 fpscr; -REQUIRE_INSNS_FLAGS2(ctx, ISA300); +if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) { +return trans_MFFS(ctx, a); +} + Hi Richard, nice catch! I believe this may be better addressed by decodetree pattern groups, e.g.: On insns32.decode: { # Before Power ISA v3.0, MFFS bits 11~15 were reserved and should be ignored MFFS_ISA207 11 . - - 1001000111 . @X_t_rc [ MFFS11 . 0 - 1001000111 . @X_t_rc MFFSCE 11 . 1 - 1001000111 - @X_t MFFSCRN 11 . 10110 . 1001000111 - @X_tb MFFSCDRN11 . 10100 . 1001000111 - @X_tb MFFSCRNI11 . 10111 ---.. 1001000111 - @X_imm2 MFFSCDRNI 11 . 10101 --... 1001000111 - @X_imm3 MFFSL 11 . 11000 - 1001000111 - @X_t ] } And on fp-impl.c.inc: static bool trans_MFFS_ISA207(DisasContext *ctx, arg_X_t_rc *a) { if (!(ctx->insns_flags2 & PPC2_ISA300)) { /* * Before Power ISA v3.0, MFFS bits 11~15 were reserved, any instruction * with OPCD=63 and XO=583 should be decoded as MFFS. */ return trans_MFFS(ctx, a); } /* * For Power ISA v3.0+, return false and let the pattern group * select the correct instruction. */ return false; } That way, I believe it'll be easier to add more MFFS variants in the future without thinking too much about the behavior in previous versions of Power ISA. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 00/29] PowerPC interrupt rework
On 21/10/2022 07:56, Daniel Henrique Barboza wrote: Matheus, I did some digging yesterday. There are 2 distinct things happening: - the apparent problem with the avocado test. After doing more and more tests it seems like the test failure rate is lower than 10%. With a simple script to exercise it in my laptop: n=1 while [ 1 ]; do make -j check-avocado \ AVOCADO_TESTS='tests/avocado/replay_kernel.py:ReplayKernelNormal.test_ppc64_e500' ; if [ $? -ne 0 ]; then echo "test failed after $n interactions" exit 1 fi ((n=n+1)) done In master I managed to get up to 100+ runs without failure. Sometimes I get 90, 50, 30 runs before failure and so on. This is an OK failure rate in my opinion, so if any code contribution does not dramatically increase this failure rate I'm fine with it. This also means that I'll not be skipping the test. Thanks for this testing, I suspect we may have more than one bug that causes this test failure. - back to this series, I couldn't manage to get a single successful run with patch 27 applied. On the other hand, running the aforementioned script with patches 1-26 I just got 96 test runs before the first failure. This is enough evidence for me to believe that, yeah, patch 27 is really doing something that is messing with the icount replay for e500 one way or the other. Patch 27 is definitely wrong - other places that write in special registers and SPRs that may cause an interrupt (e.g., gen_helper_store_decr, gen_mtmsr[d]) call gen_io_start, so we also should use it before helper_ppc_maybe_interrupt. Without that call, we hit the cpu_abort in icount_handle_interrupt when using icount if writee[i] unmasks a pending interrupt. The current writee[i] may be wrong in not calling it too, as it may cause an interrupt to be delivered. However, before the interrupt rework, CPU_INTERRUPT_HARD was set somewhere else, so it wouldn't trigger the abort. That said, even after adding this call I still see failures after ~200 iterations of this test, so we may have more problems to tackle here. However, it's not a CPU abort anymore, the second QEMU invocation exits with zero without writing anything to the console. All that said, patches 1-26 are queued in ppc-next. On 10/20/22 10:40, Matheus K. Ferst wrote: On 20/10/2022 08:18, Daniel Henrique Barboza wrote: On 10/19/22 18:55, Daniel Henrique Barboza wrote: Matheus, This series fails 'make check-avocado' in an e500 test. This is the error output: Scrap that. This avocado test is also failing on master 10% of the time, give or take. It might be case that patch 27 makes the failure more consistent, but I can't say it's the culprit. I'll take a closer look and see if I can diagnose one particular commit that is making the patch fail 1 out of 10 times. It can be case where I might need to skip the test altogether. Nice catch. I guess we need a gen_icount_io_start before calling helper_ppc_maybe_interrupt, so maybe it's better to make a gen_ppc_maybe_interrupt that calls icount and the helper. I'll give it a bit more testing and re-spin the series. Don't need to re-spin everything (unless you needed to do some changes in the patches prior). Just resend patch 27+. Ok, I'll send 27-29 with based on ppc-next. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 00/29] PowerPC interrupt rework
On 20/10/2022 08:18, Daniel Henrique Barboza wrote: On 10/19/22 18:55, Daniel Henrique Barboza wrote: Matheus, This series fails 'make check-avocado' in an e500 test. This is the error output: Scrap that. This avocado test is also failing on master 10% of the time, give or take. It might be case that patch 27 makes the failure more consistent, but I can't say it's the culprit. I'll take a closer look and see if I can diagnose one particular commit that is making the patch fail 1 out of 10 times. It can be case where I might need to skip the test altogether. Nice catch. I guess we need a gen_icount_io_start before calling helper_ppc_maybe_interrupt, so maybe it's better to make a gen_ppc_maybe_interrupt that calls icount and the helper. I'll give it a bit more testing and re-spin the series. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 5/6] target/ppc: move msgclrp/msgsndp to decodetree
nc +++ b/target/ppc/translate/processor-ctrl-impl.c.inc @@ -68,3 +68,27 @@ static bool trans_MSGSND(DisasContext *ctx, arg_X_rb *a) #endif return true; } + +static bool trans_MSGCLRP(DisasContext *ctx, arg_X_rb *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + REQUIRE_SV(ctx); +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) + gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); ^ here. And also +#endif + return true; +} + +static bool trans_MSGSNDP(DisasContext *ctx, arg_X_rb *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + REQUIRE_SV(ctx); +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) + gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); ^ here. It seems like you're filtering for TARGET_PPC64 but you're not guarding for it, and the qemu_build_not_reached() is being hit. I fixed by squashing this in: diff --git a/target/ppc/translate/processor-ctrl-impl.c.inc b/target/ppc/translate/processor-ctrl-impl.c.inc index f253226a75..ff231db1af 100644 --- a/target/ppc/translate/processor-ctrl-impl.c.inc +++ b/target/ppc/translate/processor-ctrl-impl.c.inc @@ -73,6 +73,7 @@ static bool trans_MSGCLRP(DisasContext *ctx, arg_X_rb *a) { REQUIRE_INSNS_FLAGS2(ctx, ISA207S); REQUIRE_SV(ctx); + REQUIRE_64BIT(ctx); #if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[a->rb]); #else @@ -85,6 +86,7 @@ static bool trans_MSGSNDP(DisasContext *ctx, arg_X_rb *a) { REQUIRE_INSNS_FLAGS2(ctx, ISA207S); REQUIRE_SV(ctx); + REQUIRE_64BIT(ctx); #if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[a->rb]); #else If you think this fix is acceptable you can consider this patch acked as well. It shouldn't matter since we only want to make the compiler happy, but we should check instruction flags before privilege, so we throw POWERPC_EXCP_INVAL_INVAL instead of POWERPC_EXCP_PRIV_OPC if the CPU doesn't have the instruction: > @@ -73,6 +73,7 @@ static bool trans_MSGCLRP(DisasContext *ctx, arg_X_rb *a) > { > + REQUIRE_64BIT(ctx); > REQUIRE_INSNS_FLAGS2(ctx, ISA207S); > REQUIRE_SV(ctx); > #if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[a->rb]); > #else > @@ -85,6 +86,7 @@ static bool trans_MSGSNDP(DisasContext *ctx, arg_X_rb *a) > { > + REQUIRE_64BIT(ctx); > REQUIRE_INSNS_FLAGS2(ctx, ISA207S); > REQUIRE_SV(ctx); > #if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[a->rb]); > #else Since all CPUs with ISA207S are 64-bit, it shouldn't make any difference in this context, but someone might use this code as an example, so it's better to have these checks in the correct order. Do you want me to resend with this change? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH RESEND] linux-user: Fix struct statfs ABI on loongarch64
Am Donnerstag, 6. Oktober 2022, 12:07:10 CEST schrieb WANG Xuerui: > Previously the 32-bit version was incorrectly chosen, leading to funny > but incorrect output from e.g. df(1). Simply select the version > corresponding to the 64-bit asm-generic definition. > > For reference, this program should produce the same output no matter > natively compiled or not, for loongarch64 or not: > > ```c > #include > #include > > int main(int argc, const char *argv[]) > { > struct statfs b; > if (statfs(argv[0], &b)) > return 1; > > printf("f_type = 0x%lx\n", b.f_type); > printf("f_bsize = %ld\n", b.f_bsize); > printf("f_blocks = %ld\n", b.f_blocks); > printf("f_bfree = %ld\n", b.f_bfree); > printf("f_bavail = %ld\n", b.f_bavail); > > return 0; > } > > // Example output on my amd64 box, with the test binary residing on a > // btrfs partition. > > // Native and emulated output after the fix: > // > // f_type = 0x9123683e > // f_bsize = 4096 > // f_blocks = 268435456 > // f_bfree = 168406890 > // f_bavail = 168355058 > > // Output before the fix, note the messed layout: > // > // f_type = 0x10009123683e > // f_bsize = 723302085239504896 > // f_blocks = 168355058 > // f_bfree = 2250817541779750912 > // f_bavail = 1099229433104 > ``` > > Fixes: 1f63019632 ("linux-user: Add LoongArch syscall support") > Signed-off-by: WANG Xuerui > Cc: Song Gao > Cc: Xiaojuan Yang > Cc: Andreas K. Hüttel > --- > > Resend with amended commit message to 100% clarify the example output > are generated on my box and will differ for everyone else. Sorry for > the noise. > Definitely fixes df. Tested-by: Andreas K. Huettel -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: [PATCH] linux-user: Fix more MIPS n32 syscall ABI issues
Am Donnerstag, 6. Oktober 2022, 10:55:00 CEST schrieb WANG Xuerui: > In commit 80f0fe3a85 ("linux-user: Fix syscall parameter handling for > MIPS n32") the ABI problem regarding offset64 on MIPS n32 was fixed, > but still some cases remain where the n32 is incorrectly treated as any > other 32-bit ABI that passes 64-bit arguments in pairs of GPRs. Fix by > excluding TARGET_ABI_MIPSN32 from various TARGET_ABI_BITS == 32 checks. > > Closes: https://gitlab.com/qemu-project/qemu/-/issues/1238 > Signed-off-by: WANG Xuerui > Cc: Philippe Mathieu-Daudé > Cc: Jiaxun Yang > Cc: Andreas K. Hüttel > Cc: Joshua Kinard > --- > > Note: I can't reproduce the crash with neither MIPS n32 sysroot at my hand > (a self-built one for Loongson-2F, and > stage3-mips64_n32-openrc-20221001T170527Z), > so I can only verify by looking at the (host and qemu) strace outputs, and > would have to ask you to review/test this harder. Thanks. This solves the problem I observed in https://gitlab.com/qemu-project/qemu/-/issues/1238 Thank you!! Tested by having one mipsel n32 chroot rebuild itself completely. Tested-by: Andreas K. Huettel -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: [RFC PATCH v2 09/29] target/ppc: remove generic architecture checks from p9_deliver_interrupt
On 30/09/2022 15:13, Fabiano Rosas wrote: Matheus Ferst writes: No functional change intended. Signed-off-by: Matheus Ferst --- target/ppc/excp_helper.c | 9 + 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 603c956588..67e73f30ab 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1919,18 +1919,11 @@ static void p9_deliver_interrupt(CPUPPCState *env, int interrupt) break; case PPC_INTERRUPT_DECR: /* Decrementer exception */ -if (ppc_decr_clear_on_delivery(env)) { -env->pending_interrupts &= ~PPC_INTERRUPT_DECR; -} Maybe I'm missing something, but this should continue to clear the bit, no? Same comment for P8. ppc_decr_clear_on_delivery returns true if (env->tb_env->flags & (PPC_DECR_UNDERFLOW_TRIGGERED | PPC_DECR_UNDERFLOW_LEVEL)) == PPC_DECR_UNDERFLOW_TRIGGERED, i.e., PPC_DECR_UNDERFLOW_TRIGGERED is set and PPC_DECR_UNDERFLOW_LEVEL is clear. All Book3S CPU have a level triggered interrupt, so the method return false. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH v2 00/29] PowerPC interrupt rework
appens when the -smp >= $(nproc), but I haven't looked too deep in this case. Maybe some magic optimization on Linux mutex implementation that helps on the higher contention case? pSeries boot time is essentially unchanged. With a non-compressed kernel, the difference with PowerNV is smaller, and pSeries stills the same: +-+--+-+ | | PowerNV | pSeries | |-smp |--+-+ | | master | patch series | master | patch series | +-+--+-+ | 1 | 42,17 ± 0,92 | 38,13 ± 0,59 | 23,15 ± 1,02 | 23,46 ± 1,02 | | 2 | 55,72 ± 3,54 | 40,30 ± 0,56 | 26,26 ± 0,82 | 26,38 ± 0,80 | | 4 | 67,09 ± 3,02 | 38,26 ± 0,47 | 28,36 ± 0,77 | 28,19 ± 0,78 | | 6 | 98,96 ± 2,49 | 39,01 ± 0,38 | 28,68 ± 0,75 | 29,02 ± 0,88 | | 8 | 39,68 ± 0,42 | 38,44 ± 0,41 | 29,24 ± 0,81 | 29,44 ± 0,75 | +-+--+-+ Finally, using command lines like ./qemu-system-ppc64 -M powernv9 -cpu POWER9 -accel tcg,thread=multi \ -m 8G -smp 4 -device virtio-scsi-pci -boot c -vga none -nographic \ -device nvme,bus=pcie.2,addr=0x0,drive=drive0,serial=1234 \ -drive file=rootfs.ext2,if=none,id=drive0,format=raw,cache=none \ -snapshot -serial pipe:pipe -monitor unix:mon,server,nowait \ -kernel zImage -append 'console=hvc0 rootwait root=/dev/nvme0n1' \ -device virtio-net-pci,netdev=br0,mac=52:54:00:12:34:57,bus=pcie.0 \ -netdev bridge,id=br0 and ./qemu-system-ppc64 -M pseries -cpu POWER9 -accel tcg,thread=multi \ -m 8G -smp 4 -device virtio-scsi-pci -boot c -vga none -nographic \ -drive file=rootfs.ext2,if=scsi,index=0,format=raw -snapshot \ -kernel zImage -append 'console=hvc0 rootwait root=/dev/sda' \ -serial pipe:pipe -monitor unix:mon,server,nowait \ -device virtio-net-pci,netdev=br0,mac=52:54:00:12:34:57 \ -netdev bridge,id=br0 to tests IO performance, with iperf to test network and a 4Gb scp transfer to test disk+network, in 100 iterations we saw: +-+---+-+ | | scp (s) | iperf (MB/s) | +-+---+-+ |PowerNV master | 166,91 ± 8,37 | 918,06 ± 114,78 | |PowerNV patch series | 166,25 ± 8,85 | 916,91 ± 107,56 | |pSeries master | 175,70 ± 8,22 | 958,73 ± 115,09 | |pSeries patch series | 173,62 ± 8,13 | 893,42 ± 87,77 | +-+---+-+ These are SMP machines under high IO load using MTTCG. It means that the models are quite robust now. The scp data shows little difference, while testing just network shows that it's a bit slower with the patch series applied (although, with this variation, we'd probably need to repeat this test more times to have a more robust result...) You could try with powersave=off. Not a big difference, with 50 iterations: +-+---+-+ | |scp (s)| iperf (MB/s) | +-+---+-+ |PowerNV master | 142.73 ± 8.38 | 924.34 ± 353.93 | |PowerNV patch series | 145.75 ± 9.18 | 874.52 ± 286.21 | +-+---+-+ Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH v2 11/29] target/ppc: add power-saving interrupt masking logic to p9_next_unmasked_interrupt
lpes0)) { return PPC_INTERRUPT_EXT; } } -if (async_deliver != 0) { +if (msr_ee != 0) { /* Decrementer exception */ if (env->pending_interrupts & PPC_INTERRUPT_DECR) { return PPC_INTERRUPT_DECR; @@ -1895,6 +1906,15 @@ static void p9_deliver_interrupt(CPUPPCState *env, int interrupt) PowerPCCPU *cpu = env_archcpu(env); CPUState *cs = env_cpu(env); +if (cs->halted && !(env->spr[SPR_PSSCR] & PSSCR_EC) && +!FIELD_EX64(env->msr, MSR, EE)) { +/* + * A pending interrupt took us out of power-saving, but MSR[EE] says + * that we should return to NIP+4 instead of delivering it. + */ +return; How will the NIP be advanced in this case? It's already incremented by the translation code. ppc_tr_translate_insn increments ctx->base.pc_next before calling decode_{insn{32,64},legacy}, and methods that put the CPU to sleep will use gen_exception_nip with this value as the last argument. +} + switch (interrupt) { case PPC_INTERRUPT_MCK: /* Machine check exception */ env->pending_interrupts &= ~PPC_INTERRUPT_MCK; diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 337a362205..41e79adfdb 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -306,4 +306,8 @@ static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk) return msk; } +#if defined(TARGET_PPC64) +int p9_interrupt_powersave(CPUPPCState *env); +#endif + #endif /* PPC_INTERNAL_H */ Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH v2 13/29] target/ppc: remove unused interrupts from p8_pending_interrupt
On 27/09/2022 19:14, Fabiano Rosas wrote: Matheus Ferst writes: Remove the following unused interrupts from the POWER8 interrupt masking method: - PPC_INTERRUPT_RESET: only raised for 6xx, 7xx, 970, and POWER5p; - Debug Interrupt: removed in Power ISA v2.07; - Hypervisor Virtualization: introduced in Power ISA v3.0; - Critical Input, Watchdog Timer, and Fixed Interval Timer: only defined for embedded CPUs; - Hypervisor Doorbell, Doorbell, and Critical Doorbell: processor does We still need the first two. 0xe80 - Directed hypervisor doorbell 0xa00 - Directed privileged doorbell It seems that on PowerISA v2.07, the category for msgsnd and msgclr became "Embedded Processor Control" or "Book S." That's certainly not what we are doing in code, both instructions are behind the PPC2_PRCNTL flag, so they are not available for -cpu POWER8. Also, we're not checking for ISA 3.00 on msgsync... I'll keep these interrupts in v3 and send a separate patch fixing the instruction flags. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 06/13] target/ppc: remove embedded interrupts from ppc_pending_interrupt_p9
On 15/08/2022 18:23, Fabiano Rosas wrote: Matheus Ferst writes: Critical Input, Watchdog Timer, and Fixed Interval Timer are only defined for embedded CPUs. The Programmable Interval Timer is 40x-only. Signed-off-by: Matheus Ferst --- target/ppc/excp_helper.c | 18 -- 1 file changed, 18 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 2ca6a917b2..42b57019ba 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1780,28 +1780,10 @@ static int ppc_pending_interrupt_p9(CPUPPCState *env) return PPC_INTERRUPT_EXT; } } -if (FIELD_EX64(env->msr, MSR, CE)) { -/* External critical interrupt */ -if (env->pending_interrupts & PPC_INTERRUPT_CEXT) { -return PPC_INTERRUPT_CEXT; -} -} if (async_deliver != 0) { -/* Watchdog timer on embedded PowerPC */ -if (env->pending_interrupts & PPC_INTERRUPT_WDT) { -return PPC_INTERRUPT_WDT; -} if (env->pending_interrupts & PPC_INTERRUPT_CDOORBELL) { return PPC_INTERRUPT_CDOORBELL; } This one too. And the Thermal. There are some other interrupts that I was not sure if we should remove: - PPC_INTERRUPT_PERFM doesn't seem to be used anywhere else. I guess it will be used when we implement more PMU stuff, so I left it in all ppc_pending_interrupt_* methods. - PPC_INTERRUPT_RESET was treated in cpu_has_work_POWER*, but AFAICS, it's only used in ppc6xx_set_irq and ppc970_set_irq, which means it can only be raised on 6xx, 7xx, 970, and POWER5+. Should we remove it too? -/* Fixed interval timer on embedded PowerPC */ -if (env->pending_interrupts & PPC_INTERRUPT_FIT) { -return PPC_INTERRUPT_FIT; -} -/* Programmable interval timer on embedded PowerPC */ -if (env->pending_interrupts & PPC_INTERRUPT_PIT) { -return PPC_INTERRUPT_PIT; -} /* Decrementer exception */ if (env->pending_interrupts & PPC_INTERRUPT_DECR) { return PPC_INTERRUPT_DECR; Tḧanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 05/13] target/ppc: create an interrupt masking method for POWER9/POWER10
r interrupts that gate on MSR:EE, we need to do something a + * bit more subtle, as we need to let them through even when EE is + * clear when coming out of some power management states (in order + * for them to become a 0x100). + */ +async_deliver |= FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset; + +/* Hypervisor decrementer exception */ +if (env->pending_interrupts & PPC_INTERRUPT_HDECR) { +/* LPCR will be clear when not supported so this will work */ +bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE); +if ((async_deliver || !FIELD_EX64_HV(env->msr)) && hdice) { +/* HDEC clears on delivery */ +return PPC_INTERRUPT_HDECR; +} +} + +/* Hypervisor virtualization interrupt */ +if (env->pending_interrupts & PPC_INTERRUPT_HVIRT) { +/* LPCR will be clear when not supported so this will work */ +bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE); +if ((async_deliver || !FIELD_EX64_HV(env->msr)) && hvice) { +return PPC_INTERRUPT_HVIRT; +} +} + +/* External interrupt can ignore MSR:EE under some circumstances */ +if (env->pending_interrupts & PPC_INTERRUPT_EXT) { +bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); +bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); +/* HEIC blocks delivery to the hypervisor */ +if ((async_deliver && !(heic && FIELD_EX64_HV(env->msr) && +!FIELD_EX64(env->msr, MSR, PR))) || +(env->has_hv_mode && !FIELD_EX64_HV(env->msr) && !lpes0)) { +return PPC_INTERRUPT_EXT; +} +} +if (FIELD_EX64(env->msr, MSR, CE)) { +/* External critical interrupt */ +if (env->pending_interrupts & PPC_INTERRUPT_CEXT) { +return PPC_INTERRUPT_CEXT; +} +} +if (async_deliver != 0) { +/* Watchdog timer on embedded PowerPC */ +if (env->pending_interrupts & PPC_INTERRUPT_WDT) { +return PPC_INTERRUPT_WDT; +} +if (env->pending_interrupts & PPC_INTERRUPT_CDOORBELL) { +return PPC_INTERRUPT_CDOORBELL; +} +/* Fixed interval timer on embedded PowerPC */ +if (env->pending_interrupts & PPC_INTERRUPT_FIT) { +return PPC_INTERRUPT_FIT; +} +/* Programmable interval timer on embedded PowerPC */ +if (env->pending_interrupts & PPC_INTERRUPT_PIT) { +return PPC_INTERRUPT_PIT; +} +/* Decrementer exception */ +if (env->pending_interrupts & PPC_INTERRUPT_DECR) { +return PPC_INTERRUPT_DECR; +} +if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { +return PPC_INTERRUPT_DOORBELL; +} +if (env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) { +return PPC_INTERRUPT_HDOORBELL; +} +if (env->pending_interrupts & PPC_INTERRUPT_PERFM) { +return PPC_INTERRUPT_PERFM; +} +/* Thermal interrupt */ +if (env->pending_interrupts & PPC_INTERRUPT_THERM) { +return PPC_INTERRUPT_THERM; +} +/* EBB exception */ +if (env->pending_interrupts & PPC_INTERRUPT_EBB) { +/* + * EBB exception must be taken in problem state and + * with BESCR_GE set. + */ +if (FIELD_EX64(env->msr, MSR, PR) && +(env->spr[SPR_BESCR] & BESCR_GE)) { +return PPC_INTERRUPT_EBB; +} +} +} + +return 0; +} + static int ppc_pending_interrupt_legacy(CPUPPCState *env) { bool async_deliver; @@ -1793,6 +1950,9 @@ static int ppc_pending_interrupt_legacy(CPUPPCState *env) static int ppc_pending_interrupt(CPUPPCState *env) { switch (env->excp_model) { +case POWERPC_EXCP_POWER9: +case POWERPC_EXCP_POWER10: +return ppc_pending_interrupt_p9(env); default: return ppc_pending_interrupt_legacy(env); } Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 04/13] target/ppc: prepare to split ppc_interrupt_pending by excp_model
On 15/08/2022 17:25, Fabiano Rosas wrote: Matheus Ferst writes: Rename the method to ppc_interrupt_pending_legacy and create a new ppc_interrupt_pending that will call the appropriate interrupt masking method based on env->excp_model. Signed-off-by: Matheus Ferst --- target/ppc/excp_helper.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 8690017c70..59981efd16 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1678,7 +1678,7 @@ void ppc_cpu_do_interrupt(CPUState *cs) powerpc_excp(cpu, cs->exception_index); } -static int ppc_pending_interrupt(CPUPPCState *env) +static int ppc_pending_interrupt_legacy(CPUPPCState *env) Won't this code continue to be used for the older CPUs? If so, I don't think the term legacy is appropriate. It ends up being dependent on context and what people's definitions of "legacy" are. (if this function is removed in a later patch, then that's ok). For this RFC, I created methods for CPUs that change the interrupt masking logic when cs->halted is set. If the way we split the interruption code in the following patches is acceptable, I'm planning to add methods for all CPUs and remove ppc_pending_interrupt_legacy in future versions of this patch series. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 03/13] target/ppc: move interrupt masking out of ppc_hw_interrupt
On 15/08/2022 17:09, Fabiano Rosas wrote: Matheus Ferst writes: Move the interrupt masking logic to a new method, ppc_pending_interrupt, and only handle the interrupt processing in ppc_hw_interrupt. Signed-off-by: Matheus Ferst --- target/ppc/excp_helper.c | 228 --- 1 file changed, 141 insertions(+), 87 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c @@ -1884,15 +1915,38 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; +int pending_interrupt; I would give this a simpler name to avoid confusion with the other two pending_interrupt terms. -if (interrupt_request & CPU_INTERRUPT_HARD) { -ppc_hw_interrupt(env); -if (env->pending_interrupts == 0) { -cs->interrupt_request &= ~CPU_INTERRUPT_HARD; -} -return true; +if ((interrupt_request & CPU_INTERRUPT_HARD) == 0) { +return false; } -return false; + It seems we're assuming that after this point we certainly have some pending interrupt... +pending_interrupt = ppc_pending_interrupt(env); +if (pending_interrupt == 0) { ...but how then is this able to return 0? At this point in the patch series, raising interrupts with ppc_set_irq always sets CPU_INTERRUPT_HARD, so it is possible that all interrupts are masked, and then ppc_pending_interrupt would return zero. Could the function's name be made a bit clearer? Maybe interrupt = ppc_next_pending_interrupt or something to that effect. Maybe ppc_next_unmasked_interrupt? +if (env->resume_as_sreset) { +/* + * This is a bug ! It means that has_work took us out of halt + * without anything to deliver while in a PM state that requires + * getting out via a 0x100 + * + * This means we will incorrectly execute past the power management + * instruction instead of triggering a reset. + * + * It generally means a discrepancy between the wakeup conditions in + * the processor has_work implementation and the logic in this + * function. + */ +cpu_abort(env_cpu(env), + "Wakeup from PM state but interrupt Undelivered"); This condition is BookS only. Perhaps it would be better to move it inside each of the processor-specific functions. And since you're merging has_work with pending_interrupts, can't you solve that issue earlier? Specifically the "has_work tooks us out of halt..." part. This condition would not be an error in ppc_pending_interrupt because we'll call this method from other places in the following patches, like ppc_set_irq. Maybe we should move it to a "case 0:" in ppc_hw_interrupt? +} +return false; +} + +ppc_hw_interrupt(env, pending_interrupt); Some verbs would be nice. ppc_deliver_interrupt? Agreed. Should we also make processor-specific versions of this method? That way, we could get rid of the calls to ppc_decr_clear_on_delivery and is_book3s_arch2x. +if (env->pending_interrupts == 0) { +cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); +} +return true; } #endif /* !CONFIG_USER_ONLY */ Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 00/13] PowerPC interrupt rework
On 15/08/2022 17:02, Cédric Le Goater wrote: [ Adding Fabiano who reworked all exception models for 7.0 and Nick who rewrote the Linux side sometime ago ] On 8/15/22 18:20, Matheus Ferst wrote: Currently, PowerPC interrupts are handled as follows: 1) The CPU_INTERRUPT_HARD bit of cs->interrupt_request gates all interrupts; 2) The bits of env->pending_interrupts identify which particular interrupt is raised; 3) ppc_set_irq can be used to set/clear env->pending_interrupt bit and CPU_INTERRUPT_HARD, but some places access env->pending_interrupt directly; 4) ppc_cpu_exec_interrupt is called by cpu_handle_interrupt when cs->interrupt_request indicates that there is some interrupt pending. This method checks CPU_INTERRUPT_HARD and calls ppc_hw_interrupt. If env->pending_interrupt is zero after this call, CPU_INTERRUPT_HARD will be cleared. 5) ppc_hw_interrupt checks if there is any unmasked interrupt and calls powerpc_excp with the appropriate POWERPC_EXCP_* value. The method will also reset the corresponding bit in env->pending_interrupt for interrupts that clear on delivery. If all pending interrupts are masked, CPU_INTERRUPT_HARD will be set, but ppc_hw_interrupt will not deliver or clear the interrupt, so CPU_INTERRUPT_HARD will not be reset by ppc_cpu_exec_interrupt. With that, cs->has_work keeps returning true, creating a loop that acquires and release qemu_mutex_lock_iothread, causing the poor performance reported in [1]. This patch series attempts to rework the PowerPC interrupt code to set CPU_INTERRUPT_HARD only when there are unmasked interrupts. Then cs->has_work can be simplified to a check of CPU_INTERRUPT_HARD, so it also only returns true when at least one interrupt can be delivered. To achieve that, we are basically following Alex Bannée's suggestion[2] in the original thread: the interrupt masking logic will be factored out of ppc_hw_interrupt in a new method, ppc_pending_interrupts. This method is then used to decide if CPU_INTERRUPT_HARD should be set or cleared after changes to MSR, LPCR, env->pending_interrupts, and power-management instructions. We used [3] to check for regressions at each patch in this series. After patch 12, booting a powernv machine with a newer skiboot with "-smp 4" goes from 1m09s to 20.79s. whaou ! PowerNV is really an heavy weight platform, so that's a great improvement. Note that this result uses Frederic's test case, where one CPU decompresses the kernel and the others keep spinning to deliver a masked decrement interrupt. The improvement may not be that great with other workloads. Did you try KVM guests under PowerNV (L1 under an emulated L0) and KVM under pseries (L2 under an emulated L1) ? Try some intensive I/O on a SMP machine, like a large scp transfer. So far, I have mainly tested with buildroot boot+poweroff. I'll try these other tests too. We should try the MacOS images also. Thanks, C. Unfortunately, I can't test with MacOS :/ Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 1/1] target/ppc: fix unreachable code in do_ldst_quad()
On 25/07/2022 17:21, Daniel Henrique Barboza wrote: Coverity reports that commit fc34e81acd51 ("target/ppc: add macros to check privilege level") turned the following code unreachable: if (!prefixed && !(ctx->insns_flags2 & PPC2_LSQ_ISA207)) { /* lq and stq were privileged prior to V. 2.07 */ REQUIRE_SV(ctx); CID 1490757: Control flow issues (UNREACHABLE) This code cannot be reached: "if (ctx->le_mode) { if (ctx->le_mode) { gen_align_no_le(ctx); return true; } } This happens because the macro REQUIRE_SV(), in CONFIG_USER_MODE, will always result in a 'return true' statement. In fact, all REQUIRE_* macros for target/ppc/translate.c behave the same way: if a condition isn't met, an exception is generated and a 'return' statement is issued. The difference is that all other callers are using it in insns that are not implemented in user mode. do_ldst_quad(), on the other hand, is user mode compatible. Fixes include wrapping these lines in "if !defined(CONFIG_USER_MODE)", making it explicit that these lines are not user mode anymore. Another fix would be, for example, to change REQUIRE_SV() to not issue a 'return' and check if we're running in privileged mode or not by hand, but this would change all other callers of the macro that are using it in an adequate manner. The code that was in place before fc34e81acd51 was good enough, so let's go back to that: open code the ctx->pr condition and fire the exception if we're not privileged. The difference from the code back then to what we're doing now is an 'unlikely' compiler hint to ctx->pr and the use of gen_priv_opc() instead of gen_priv_exception(). Fixes: Coverity CID 1490757 Cc: Matheus Ferst Signed-off-by: Daniel Henrique Barboza --- target/ppc/translate/fixedpoint-impl.c.inc | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc index db14d3bebc..a3ade4fe2b 100644 --- a/target/ppc/translate/fixedpoint-impl.c.inc +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -79,8 +79,11 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed) REQUIRE_INSNS_FLAGS(ctx, 64BX); if (!prefixed && !(ctx->insns_flags2 & PPC2_LSQ_ISA207)) { -/* lq and stq were privileged prior to V. 2.07 */ -REQUIRE_SV(ctx); +if (unlikely(ctx->pr)) { +/* lq and stq were privileged prior to V. 2.07 */ +gen_priv_opc(ctx); +return true; +} if (ctx->le_mode) { gen_align_no_le(ctx); -- 2.36.1 Since the remaining code in this branch is dead code in user-mode, I'd personally prefer the v1 approach, but the difference is unlikely to have any meaningful impact, so either way is good. Reviewed-by: Matheus Ferst Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2] target/ppc: check tb_env != 0 before printing TBU/TBL/DECR
On 14/07/2022 10:35, Daniel Henrique Barboza wrote: On 7/13/22 15:38, Matheus Ferst wrote: When using "-machine none", env->tb_env is not allocated, causing the segmentation fault reported in issue #85 (launchpad bug #811683). To avoid this problem, check if the pointer != NULL before calling the methods to print TBU/TBL/DECR. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/85 Signed-off-by: Matheus Ferst --- v2: - Added checks in monitor_get_decr, monitor_get_tbu, and monitor_get_tbl. - Link to v1: https://lists.gnu.org/archive/html/qemu-ppc/2022-07/msg00173.html --- target/ppc/cpu_init.c | 16 target/ppc/monitor.c | 9 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 86ad28466a..7e96baac9f 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7476,18 +7476,18 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags) "%08x iidx %d didx %d\n", env->msr, env->spr[SPR_HID0], env->hflags, cpu_mmu_index(env, true), cpu_mmu_index(env, false)); -#if !defined(NO_TIMER_DUMP) Why did you remove the NO_TIMER_DUMP check? Is it redundant with the env->tb_env check? This is the only reference to this macro since it was added in d9bce9d99f46. I suppose it was manually defined, but the only discussion[1] I could find around this patch doesn't mention it. I don't see any other reason to define it other than avoiding segfaults in machines that don't allocate env_tb, but we can keep it if you prefer. - qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 + if (env->tb_env) { + qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 #if !defined(CONFIG_USER_ONLY) - " DECR " TARGET_FMT_lu + " DECR " TARGET_FMT_lu #endif - "\n", - cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env) + "\n", + cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env) #if !defined(CONFIG_USER_ONLY) - , cpu_ppc_load_decr(env) -#endif - ); + , cpu_ppc_load_decr(env) #endif + ); + } Not really a problem with your patch, but since you're changing this code, can you please cleanse it from evil? I mean, look at this: if (env->tb_env) { qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 #if !defined(CONFIG_USER_ONLY) " DECR " TARGET_FMT_lu #endif "\n", cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env) #if !defined(CONFIG_USER_ONLY) , cpu_ppc_load_decr(env) #endif ); } 2 ifdef macros in the middle of qemu_fprintf() params? With one line starting with a ', '? Why are we trading sanity for 3 lines of code repetition? We can --at least-- do something like this: if (env->tb_env) { #if !defined(CONFIG_USER_ONLY) qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 " DECR " TARGET_FMT_lu "\n", cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env), cpu_ppc_load_decr(env)); #else qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 "\n", cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)); #endif } Thanks, Daniel Sure, I'll change that in v3. [1] https://lists.gnu.org/archive/html/qemu-devel/2007-03/msg00239.html Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH] target/ppc: don't print TB in ppc_cpu_dump_state if it's not initialized
On 12/07/2022 23:21, David Gibson wrote: On Tue, Jul 12, 2022 at 06:13:44PM -0300, Daniel Henrique Barboza wrote: On 7/12/22 16:25, Matheus Ferst wrote: When using "-machine none", env->tb_env is not allocated, causing the segmentation fault reported in issue #85 (launchpad bug #811683). To avoid this problem, check if the pointer != NULL before calling the methods to print TBU/TBL/DECR. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/85 Signed-off-by: Matheus Ferst --- This patch fixes the reported problem, but may be an incomplete solution since many other places dereference env->tb_env without checking for NULL. AFAICS, "-machine none" is the only way to trigger this problem, and I'm not familiar with the use-cases for this option. The "none" machine type is mainly used by libvirt to do instrospection of the available options/capabilities of the QEMU binary. It starts a QEMU process like the following: ./x86_64-softmmu/qemu-system-x86_64 -S -no-user-config -nodefaults \ -nographic -machine none,accel=kvm:tcg -daemonize And then it uses QMP to probe the binary. Aside from this libvirt usage I am not aware of anyone else using -machine none extensively. Right. -machine none basically cannot work as a real machine for POWER (maybe some other CPUs as well). At least the more modern POWER CPUs simply cannot boot without a bunch of supporting board/system level elements, and there's not really a sane way to encode those into individual emulated devices at present (maybe ever). One of those things is that POWER expects the timebases to be synchronized across all CPUs in the system, which obviously can't be done locally to a single CPU chip. It requires system level operations, which is why it's handled by the machine type [Example: a typical sequence which might be handled in hardware by low-level firmware would be to use machine-specific board-level registers to suspend the clock pulse to the CPUs which drives the timebase, then write the same value to the TB on each CPU, then (atomically) restart the clock pulse using board registers again] So I guess it's safe to assume that it's impossible to run code with "-machine none", and then there would be no reason to check for NULL in the mtspr/mfspr path, right? Should we stop assuming env->tb_env != NULL and add checks everywhere? Or should we find a way to provide Time Base/Decrementer for "-machine none"? --- Are there other cases where env->tb_env can be NULL, aside from the case reported in the bug? If there are, I'd say that's a bug in the machine type. Setting up (and synchronizing) the timebase is part of the machine's job. With "-machine none", it seems that the only places where it could happen are: i) Monitor code: there are some other places where env_tb is used, like monitor_get_tb{u,l} and monitor_get_decr, so commands like "p $tbu" or "p $dect" cause a segfault too. ii) mtspr/mfspr: it shouldn't be a problem if it's not possible to run code without a machine. iii) gdbstub: we're not reading or setting TB{U,L} from gdb, which may be an issue on its own, but not related to #85. I don't mind the bug fix, but I'm not fond of the idea of adding additional checks because of this particular issue. I mean, the bug is using the 'prep' machine that Thomas removed year ago in b2ce76a0730. If there's no other foreseeable problem, that we care about, with env->tb_env being NULL, IMO let's fix the bug and move on. I'll send a v2 fixing the other segfault in monitor, and then I guess we have a complete solution. Thanks Daniel and David for the feedback. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RISU PATCH v4 26/29] ppc64: Clean up reginfo handling
.vrregs[i][3]) { -return 0; -} -} -return 1; +return memcmp(m, a, sizeof(*m)) == 0; } /* reginfo_dump: print state to a stream */ void reginfo_dump(struct reginfo *ri, FILE * f) { -int i; +const char *sep; +int i, j; -fprintf(f, " faulting insn 0x%x\n", ri->faulting_insn); -fprintf(f, " prev insn 0x%x\n", ri->prev_insn); -fprintf(f, " prev addr0x%" PRIx64 "\n\n", ri->nip); +fprintf(f, "%6s: %08x\n", "insn", ri->faulting_insn); +fprintf(f, "%6s: %016lx\n", "pc", ri->nip) > -for (i = 0; i < 16; i++) { -fprintf(f, "\tr%2d: %16lx\tr%2d: %16lx\n", i, ri->gregs[i], -i + 16, ri->gregs[i + 16]); +sep = ""; +for (i = j = 0; i < NGREG; i++) { +if (greg_names[i] != NULL) { +fprintf(f, "%s%6s: %016lx", sep, greg_names[i], ri->gregs[i]); +sep = (++j & 1 ? " " : "\n"); +} } -fprintf(f, "\n"); -fprintf(f, "\tnip: %16lx\n", ri->gregs[32]); -fprintf(f, "\tmsr: %16lx\n", ri->gregs[33]); -fprintf(f, "\torig r3: %16lx\n", ri->gregs[34]); -fprintf(f, "\tctr: %16lx\n", ri->gregs[35]); -fprintf(f, "\tlnk: %16lx\n", ri->gregs[36]); -fprintf(f, "\txer: %16lx\n", ri->gregs[37]); -fprintf(f, "\tccr: %16lx\n", ri->gregs[38]); -fprintf(f, "\tmq : %16lx\n", ri->gregs[39]); -fprintf(f, "\ttrap : %16lx\n", ri->gregs[40]); -fprintf(f, "\tdar: %16lx\n", ri->gregs[41]); -fprintf(f, "\tdsisr : %16lx\n", ri->gregs[42]); -fprintf(f, "\tresult : %16lx\n", ri->gregs[43]); -fprintf(f, "\tdscr : %16lx\n\n", ri->gregs[44]); - -for (i = 0; i < 16; i++) { -fprintf(f, "\tf%2d: %016lx\tf%2d: %016lx\n", i, ri->fpregs[i], -i + 16, ri->fpregs[i + 16]); +sep = "\n"; +for (i = j = 0; i < 32; i++) { +fprintf(f, "%s%*s%d: %016lx", +sep, 6 - (i < 10 ? 1 : 2), "f", i, ri->fpregs[i]); +sep = (++j & 1 ? " " : "\n"); } -fprintf(f, "\tfpscr: %016lx\n\n", ri->fpscr); +fprintf(f, "\n%6s: %016lx\n", "fpscr", ri->fpscr); for (i = 0; i < 32; i++) { -fprintf(f, "vr%02d: %8x, %8x, %8x, %8x\n", i, +fprintf(f, "%*s%d: %08x %08x %08x %08x\n", +6 - (i < 10 ? 1 : 2), "vr", i, ri->vrregs.vrregs[i][0], ri->vrregs.vrregs[i][1], ri->vrregs.vrregs[i][2], ri->vrregs.vrregs[i][3]); } -- 2.34.1 Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RISU PATCH v4 22/29] ppc64: Use uint64_t to represent double
On 08/07/2022 12:46, Richard Henderson wrote: We want to do exact bitwise comparisons of the data, not be held hostage to IEEE comparisons and NaNs. Signed-off-by: Richard Henderson --- risu_reginfo_ppc64.h | 3 ++- risu_reginfo_ppc64.c | 29 + 2 files changed, 11 insertions(+), 21 deletions(-) Hi Richard, Reviewed-by: Matheus Ferst It seems that the series is missing some r-b tags that Alex sent in v3 (e.g. https://lore.kernel.org/qemu-devel/871rm590sg@linaro.org/). Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: Slowness with multi-thread TCG?
On 29/06/2022 12:36, Frederic Barrat wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 29/06/2022 00:17, Alex Bennée wrote: If you run the sync-profiler (via the HMP "sync-profile on") you can then get a breakdown of which mutex's are being held and for how long ("info sync-profile"). Alex, a huge thank you! For the record, the "info sync-profile" showed: Type Object Call site Wait Time (s) Count Average (us) -- BQL mutex 0x55eb89425540 accel/tcg/cpu-exec.c:744 96.31578 73589937 1.31 BQL mutex 0x55eb89425540 target/ppc/helper_regs.c:207 0.00150 1178 1.27 And it points to a lock in the interrupt delivery path, in cpu_handle_interrupt(). I now understand the root cause. The interrupt signal for the decrementer interrupt remains set because the interrupt is not being delivered, per the config. I'm not quite sure what the proper fix is yet (there seems to be several implementations of the decrementer on ppc), but at least I understand why we are so slow. To summarize what we talked elsewhere: 1 - The threads that are not decompressing the kernel have a pending PPC_INTERRUPT_DECR, and cs->interrupt_request is CPU_INTERRUPT_HARD; 2 - cpu_handle_interrupt calls ppc_cpu_exec_interrupt, that calls ppc_hw_interrupt to handle the interrupt; 3 - ppc_cpu_exec_interrupt decides that the interrupt cannot be delivered immediately, so the corresponding bit in env->pending_interrupts is not reset; 4 - ppc_cpu_exec_interrupt does not change cs->interrupt_request because pending_interrupts != 0, so cpu_handle_interrupt will be called again. This loop will acquire and release qemu_mutex_lock_iothread, slowing down other threads that need this lock. With a quick hack, I could verify that by moving that signal out of the way, the decompression time of the kernel is now peanuts, no matter the number of cpus. Even with one cpu, the 15 seconds measured before was already a huge waste, so it was not really a multiple-cpus problem. Multiple cpus were just highlighting it. Thanks again! Fred -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: Slowness with multi-thread TCG?
On 27/06/2022 15:25, Frederic Barrat wrote: [ Resending as it was meant for the qemu-ppc list ] Hello, I've been looking at why our qemu powernv model is so slow when booting a compressed linux kernel, using multiple vcpus and multi-thread tcg. With only one vcpu, the decompression time of the kernel is what it is, but when using multiple vcpus, the decompression is actually slower. And worse: it degrades very fast with the number of vcpus! Rough measurement of the decompression time on a x86 laptop with multi-thread tcg and using the qemu powernv10 machine: 1 vcpu => 15 seconds 2 vcpus => 45 seconds 4 vcpus => 1 min 30 seconds Looking in details, when the firmware (skiboot) hands over execution to the linux kernel, there's one main thread entering some bootstrap code and running the kernel decompression algorithm. All the other secondary threads are left spinning in skiboot (1 thread per vpcu). So on paper, with multi-thread tcg and assuming the system has enough available physical cpus, I would expect the decompression to hog one physical cpu and the time needed to be constant, no matter the number of vpcus. All the secondary threads are left spinning in code like this: for (;;) { if (cpu_check_jobs(cpu)) // reading cpu-local data break; if (reconfigure_idle) // global variable break; barrier(); } The barrier is to force reading the memory with each iteration. It's defined as: asm volatile("" : : : "memory"); Some time later, the main thread in the linux kernel will get the secondary threads out of that loop by posting a job. My first thought was that the translation of that code through tcg was somehow causing some abnormally slow behavior, maybe due to some non-obvious contention between the threads. However, if I send the threads spinning forever with simply: for (;;) ; supposedly removing any contention, then the decompression time is the same. Ironically, the behavior seen with single thread tcg is what I would expect: 1 thread decompressing in 15 seconds, all the other threads spinning for that same amount of time, all sharing the same physical cpu, so it all adds up nicely: I see 60 seconds decompression time with 4 vcpus (4x15). Which means multi-thread tcg is slower by quite a bit. And single thread tcg hogs one physical cpu of the laptop vs. 4 physical cpus for the slower multi-thread tcg. Does anybody have an idea of what might happen or have suggestion to keep investigating? Thanks for your help! Fred Hi Frederic, I did some boot time tests recently and didn't notice this behavior. Could you share your QEMU command line with us? Did you build QEMU with any debug option or sanitizer enabled? -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH] tcg: Add tcg_gen_mov_ptr
On 31/05/2022 00:21, Richard Henderson wrote: Add an interface to perform moves between TCGv_ptr. Signed-off-by: Richard Henderson --- This will be required for target/arm FEAT_SME. r~ --- include/tcg/tcg-op.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index b09b8b4a05..209e168305 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -1288,6 +1288,11 @@ static inline void tcg_gen_addi_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t b) glue(tcg_gen_addi_,PTR)((NAT)r, (NAT)a, b); } +static inline void tcg_gen_mov_ptr(TCGv_ptr d, TCGv_ptr s) +{ +glue(tcg_gen_mov_,PTR)((NAT)d, (NAT)s); +} + static inline void tcg_gen_brcondi_ptr(TCGCond cond, TCGv_ptr a, intptr_t b, TCGLabel *label) { -- 2.34.1 Reviewed-by: Matheus Ferst -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH] target/ppc/cpu-models: Update max alias to power10
On 31/05/2022 14:27, Murilo Opsfelder Araujo wrote: Update max alias to power10 so users can take advantage of a more recent CPU model when '-cpu max' is provided. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1038 Cc: Daniel P. Berrangé Cc: Thomas Huth Cc: Cédric Le Goater Cc: Daniel Henrique Barboza Cc: Fabiano Rosas Signed-off-by: Murilo Opsfelder Araujo --- target/ppc/cpu-models.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index 976be5e0d1..c15fcb43a1 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -879,7 +879,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "755", "755_v2.8" }, { "goldfinger", "755_v2.8" }, { "7400", "7400_v2.9" }, -{ "max", "7400_v2.9" }, { "g4", "7400_v2.9" }, { "7410", "7410_v1.4" }, { "nitro", "7410_v1.4" }, @@ -910,6 +909,8 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power8nvl", "power8nvl_v1.0" }, { "power9", "power9_v2.0" }, { "power10", "power10_v2.0" }, +/* Update the 'max' alias to the latest CPU model */ +{ "max", "power10_v2.0" }, #endif /* Generic PowerPCs */ -- 2.36.1 Hi Murilo, I guess we need a "max" for qemu-system-ppc too, so maybe something like > /* Update the 'max' alias to the latest CPU model */ > #if defined(TARGET_PPC64) > { "max", "power10_v2.0" }, > #else > { "max", "7457a_v1.2" }, > #endif Or some other CPU which is considered the max for 32-bit... Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 2/5] machine.py: add default pseries params in machine.py
On 19/05/2022 20:18, John Snow wrote: On Mon, May 16, 2022, 12:53 PM Daniel Henrique Barboza mailto:danielhb...@gmail.com>> wrote: pSeries guests set a handful of machine capabilities on by default, all of them related to security mitigations, that aren't always available in the host. This means that, as is today, running avocado in a Power9 server without the proper firmware support, and with --disable-tcg, this error will occur: (1/1) tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd: ERROR: ConnectError: Failed to establish session: EOFError\n Exit code: 1\n (...) (...) Command: ./qemu-system-ppc64 -display none -vga none (...) Output: qemu-system-ppc64: warning: netdev vnet has no peer qemu-system-ppc64: Requested safe cache capability level not supported by KVM Try appending -machine cap-cfpc=broken info_usernet.py happens to trigger this error first, but all tests would fail in this configuration because the host does not support the default 'cap-cfpc' capability. A similar situation was already fixed a couple of years ago by Greg Kurz (commit 63d57c8f91d0) but it was focused on TCG warnings for these same capabilities and running C qtests. This commit ended up preventing the problem we're facing with avocado when running qtests with KVM support. This patch does a similar approach by amending machine.py to disable these security capabilities in case we're running a pseries guest. The change is made in the _launch() callback to be sure that we're already commited into launching the guest. It's also worth noticing that we're relying on self._machine being set accordingly (i.e. via tag:machine), which is currently the case for all ppc64 related avocado tests. Signed-off-by: Daniel Henrique Barboza mailto:danielhb...@gmail.com>> --- python/qemu/machine/machine.py | 13 + 1 file changed, 13 insertions(+) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index 07ac5a710b..12e5e37bff 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -51,6 +51,11 @@ LOG = logging.getLogger(__name__) +PSERIES_DEFAULT_CAPABILITIES = ("cap-cfpc=broken," + "cap-sbbc=broken," + "cap-ibs=broken," + "cap-ccf-assist=off," + "cap-fwnmi=off") class QEMUMachineError(Exception): @@ -447,6 +452,14 @@ def _launch(self) -> None: """ Launch the VM and establish a QMP connection """ + + # pseries needs extra machine options to disable Spectre/Meltdown + # KVM related capabilities that might not be available in the + # host. + if "qemu-system-ppc64" in self._binary: + if self._machine is None or "pseries" in self._machine: + self._args.extend(['-machine', PSERIES_DEFAULT_CAPABILITIES]) + self._pre_launch() LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args)) -- 2.32.0 Hm, okay. I have plans to try and factor the machine appliance out and into an upstream package in the near future, so I want to avoid more hardcoding of defaults. Does avocado have a subclass of QEMUMachine where it might be more appropriate to stick this bandaid? Can we make one? (I don't think iotests runs into this problem because we always use machine:none there, I think. VM tests might have a similar problem though, and then it'd be reasonable to want the bandaid here in machine.py ... well, boo. okay.) My verdict is that it's a bandaid, but I'll accept it if the avocado folks agree to it and I'll sort it out later when I do my rewrite. I don't think I have access to a power9 machine to test this with either, so I might want a tested-by from someone who does. --js Unfortunately, none of our POWER9 machines had a firmware old enough to be affected by this issue. The closest I can test is a nested KVM-HV with L0 using cap-cfpc=broken, so the L1 receives the quoted message when running 'make check-avocado'. With this setup I can confirm that the patch fixes this error, so Tested-by: Matheus Ferst Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 0/6] softfloat 128-bit integer support
On 29/03/2022 00:38, Richard Henderson wrote: On 3/28/22 14:14, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst This RFC is a first attempt at implementing the 128-bit integer conversion routines in softfloat, as required by the xscv[su]qqp and xscvqp[su]qz instructions of PowerISA v3.1. Instead of using int128.h, int-to-float routines receive the 128-bit numbers through a pair of 64-bit values, and float-to-int conversions use a pointer to return the lower half of the result. We only need the parts128 methods, but since the difference to parts64 ones seemed minor, I included both in this patch. RFC: - Should we use struct Int128 instead of 64-bit value pairs? I think so. We have it, and it makes the interface more obvious. - I've not tested the float64 methods since the PPC instructions only use the quad-precision routines. Should we keep them in the final version? Let's not add anything that we don't have a need for. It may eventually be needed by RISC-V RV128, but we can add it then. r~ Thanks for your comments and review. I'll send an alternative version of this RFC using Int128. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
[PATCH for-7.0 v5] qemu-binfmt-conf.sh: mips: allow nonzero EI_ABIVERSION, distinguish o32 and n32
With the command line flag -mplt and a recent toolchain, ELF binaries generated by gcc can obtain EI_ABIVERSION=1, which makes, e.g., gcc three-stage bootstrap in a mips-unknown-linux-gnu qemu-user chroot fail since the binfmt-misc magic does not match anymore. Also other values are technically possible. qemu executes these binaries just fine, so relax the mask for the EI_ABIVERSION byte at offset 0x08. In addition, extend magic string to distinguish mips o32 and n32 ABI. This information is given by the EF_MIPS_ABI2 (0x20) bit in the e_flags field of the ELF header (a 4-byte value at offset 0x24 for the here applicable ELFCLASS32). See-also: ace3d65459 Signed-off-by: Andreas K. Hüttel Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: WANG Xuerui Cc: Laurent Vivier Cc: WANG Xuerui Cc: Richard Henderson Cc: Alex Bennee Cc: Philippe Mathieu-Daudé Closes: https://gitlab.com/qemu-project/qemu/-/issues/843 --- v5: Fully relax mask for EI_ABIVERSION for all of mips; squash patches since they touch the same lines v4: Unchanged repost of v3 v3: Add the magic extension to distinguish n32 and o32 v2: Add the same EI_ABIVERSION fix for little endian as for big endian v1: Initial version, only handling EI_ABIVERSION=1 on BE scripts/qemu-binfmt-conf.sh | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index e9bfeb94d3..9cb723f443 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -60,28 +60,28 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. -mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' mips_family=mips -mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' mipsel_family=mips -mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' +mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' mipsn32_family=mips -mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' +mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' mipsn32el_family=mips mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' mips64_family=mips mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' mips64el_family=mips sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00' -- 2.34.1
Re: [PATCH] target/mips: Fix address space range declaration on n32
Am Sonntag, 27. März 2022, 07:34:55 CEST schrieb WANG Xuerui: > This bug is probably lurking there for so long, I cannot even git-blame > my way to the commit first introducing it. > > Anyway, because n32 is also TARGET_MIPS64, the address space range > cannot be determined by looking at TARGET_MIPS64 alone. Fix this by only > declaring 48-bit address spaces for n64, or the n32 user emulation will > happily hand out memory ranges beyond the 31-bit limit and crash. > > Confirmed to make the minimal reproducing example in the linked issue > behave. > > Closes: https://gitlab.com/qemu-project/qemu/-/issues/939 > Signed-off-by: WANG Xuerui > Cc: Philippe Mathieu-Daudé > Cc: Aurelien Jarno > Cc: Jiaxun Yang > Cc: Aleksandar Rikalo > Cc: Andreas K. Hüttel > --- Tested-by: Andreas K. Huettel > target/mips/cpu-param.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/target/mips/cpu-param.h b/target/mips/cpu-param.h > index 9c4a6ea45e2..1aebd01df9c 100644 > --- a/target/mips/cpu-param.h > +++ b/target/mips/cpu-param.h > @@ -12,7 +12,7 @@ > #else > # define TARGET_LONG_BITS 32 > #endif > -#ifdef TARGET_MIPS64 > +#ifdef TARGET_ABI_MIPSN64 > #define TARGET_PHYS_ADDR_SPACE_BITS 48 > #define TARGET_VIRT_ADDR_SPACE_BITS 48 > #else > -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
[PATCH v4 1/2] qemu-binfmt-conf.sh: allow elf EI_ABIVERSION=1 for mips
With the command line flag -mplt and a recent toolchain, ELF binaries generated by gcc can obtain EI_ABIVERSION=1, see below, which makes, e.g., gcc three-stage bootstrap in a mips-unknown-linux-gnu qemu-user chroot fail since the binfmt-misc magic does not match anymore. qemu executes these binaries just fine, so relax the mask slightly. CHOST=mips-unknown-linux-gnu (and also mipsel-unknown-linux-gnu) CFLAGS="-O2 -march=mips32 -mabi=32 -mplt -pipe" gcc-11.2, binutils-2.37, glibc-2.34 | /* | - * ELF dump of './prev-gcc/build/gengenrtl' | - * 29608 (0x73A8) bytes | + * ELF dump of './gcc/build/gengenrtl' | + * 54532 (0xD504) bytes | */ | | Elf32_Dyn dumpedelf_dyn_0[]; | struct { | Elf32_Ehdr ehdr; | Elf32_Phdr phdrs[12]; | - Elf32_Shdr shdrs[33]; | + Elf32_Shdr shdrs[44]; | Elf32_Dyn *dyns; | } dumpedelf_0 = { | | .ehdr = { | .e_ident = { /* (EI_NIDENT bytes) */ | /* [0] EI_MAG:*/ 0x7F,'E','L','F', | /* [4] EI_CLASS: */ 1 , /* (ELFCLASS32) */ | /* [5] EI_DATA: */ 2 , /* (ELFDATA2MSB) */ | /* [6] EI_VERSION:*/ 1 , /* (EV_CURRENT) */ | /* [7] EI_OSABI: */ 0 , /* (ELFOSABI_NONE) */ | - /* [8] EI_ABIVERSION: */ 0 , | + /* [8] EI_ABIVERSION: */ 1 , | /* [9-15] EI_PAD: */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, | }, | .e_type = 2 , /* (ET_EXEC) */ | .e_machine = 8 , /* (EM_MIPS) */ | .e_version = 1 , /* (EV_CURRENT) */ | (...) Signed-off-by: Andreas K. Hüttel --- scripts/qemu-binfmt-conf.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index e9bfeb94d3..fc2f856800 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -61,11 +61,11 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' mips_family=mips mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' mipsel_family=mips mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -- 2.34.1
[PATCH v4 2/2] qemu-binfmt-conf.sh: Extend magic to distinguish mips o32 and n32 ABI
This information is given by the EF_MIPS_ABI2 (0x20) bit in the e_flags field of the ELF header (a 4-byte value at offset 0x24 for the here applicable ELFCLASS32). See-also: https://www.mail-archive.com/qemu-devel@nongnu.org/msg732572.html Signed-off-by: Andreas K. Hüttel --- scripts/qemu-binfmt-conf.sh | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index fc2f856800..5f44346166 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -60,20 +60,20 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. -mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' mips_family=mips -mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' mipsel_family=mips -mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' +mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' mipsn32_family=mips -mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' +mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' mipsn32el_family=mips mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -- 2.34.1
qemu-binfmt-conf.sh: improvements for mips
Re-sending v3 unchanged as requested. The first patch has already been submitted earlier and is unchanged from v2. The second patch extends it and resolves issue 843, "duplicate magic mips patterns". Tested with various self-bootstrapped Gentoo chroots and in production on the Gentoo release engineering stage builder.
[PATCH v3 2/2] qemu-binfmt-conf.sh: Extend magic to distinguish mips o32 and n32 ABI
This information is given by the EF_MIPS_ABI2 (0x20) bit in the e_flags field of the ELF header (a 4-byte value at offset 0x24 for the here applicable ELFCLASS32). See-also: https://www.mail-archive.com/qemu-devel@nongnu.org/msg732572.html Signed-off-by: Andreas K. Hüttel --- scripts/qemu-binfmt-conf.sh | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index fc2f856800..5f44346166 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -60,20 +60,20 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. -mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' mips_family=mips -mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' mipsel_family=mips -mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' +mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20' mipsn32_family=mips -mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' +mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00' mipsn32el_family=mips mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -- 2.34.1
[PATCH v3 1/2] qemu-binfmt-conf.sh: allow elf EI_ABIVERSION=1 for mips
With the command line flag -mplt and a recent toolchain, ELF binaries generated by gcc can obtain EI_ABIVERSION=1, see below, which makes, e.g., gcc three-stage bootstrap in a mips-unknown-linux-gnu qemu-user chroot fail since the binfmt-misc magic does not match anymore. qemu executes these binaries just fine, so relax the mask slightly. CHOST=mips-unknown-linux-gnu (and also mipsel-unknown-linux-gnu) CFLAGS="-O2 -march=mips32 -mabi=32 -mplt -pipe" gcc-11.2, binutils-2.37, glibc-2.34 | /* | - * ELF dump of './prev-gcc/build/gengenrtl' | - * 29608 (0x73A8) bytes | + * ELF dump of './gcc/build/gengenrtl' | + * 54532 (0xD504) bytes | */ | | Elf32_Dyn dumpedelf_dyn_0[]; | struct { | Elf32_Ehdr ehdr; | Elf32_Phdr phdrs[12]; | - Elf32_Shdr shdrs[33]; | + Elf32_Shdr shdrs[44]; | Elf32_Dyn *dyns; | } dumpedelf_0 = { | | .ehdr = { | .e_ident = { /* (EI_NIDENT bytes) */ | /* [0] EI_MAG:*/ 0x7F,'E','L','F', | /* [4] EI_CLASS: */ 1 , /* (ELFCLASS32) */ | /* [5] EI_DATA: */ 2 , /* (ELFDATA2MSB) */ | /* [6] EI_VERSION:*/ 1 , /* (EV_CURRENT) */ | /* [7] EI_OSABI: */ 0 , /* (ELFOSABI_NONE) */ | - /* [8] EI_ABIVERSION: */ 0 , | + /* [8] EI_ABIVERSION: */ 1 , | /* [9-15] EI_PAD: */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, | }, | .e_type = 2 , /* (ET_EXEC) */ | .e_machine = 8 , /* (EM_MIPS) */ | .e_version = 1 , /* (EV_CURRENT) */ | (...) Signed-off-by: Andreas K. Hüttel --- scripts/qemu-binfmt-conf.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index e9bfeb94d3..fc2f856800 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -61,11 +61,11 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' mips_family=mips mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' mipsel_family=mips mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -- 2.34.1
qemu-binfmt-conf.sh: mips improvements
Two patches; the first one has been under review before, the second builds on it and extends the binfmt-misc magic to differentiate between o32 and n32 binaries (see also issue 843).
Re: [RFC PATCH v3 3/3] target/ppc: Fix gen_priv_exception error value in mfspr/mtspr
On 04/03/2022 11:42, Laurent Vivier wrote: Le 13/01/2022 à 18:04, matheus.fe...@eldorado.org.br a écrit : From: Matheus Ferst The code in linux-user/ppc/cpu_loop.c expects POWERPC_EXCP_PRIV exception with error POWERPC_EXCP_PRIV_OPC or POWERPC_EXCP_PRIV_REG, while POWERPC_EXCP_INVAL_SPR is expected in POWERPC_EXCP_INVAL exceptions. This mismatch caused an EXCP_DUMP with the message "Unknown privilege violation (03)", as seen in [1]. Fixes: 9b2fadda3e01 ("ppc: Rework generation of priv and inval interrupts") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/588 [1] https://gitlab.com/qemu-project/qemu/-/issues/588 Signed-off-by: Matheus Ferst --- Is there any case where throwing a PRIV/INVAL exception with a INVAL/PRIV error makes sense? It seems wrong, but maybe I'm missing something... especially with the HV_EMU to program check conversion. Also, if this patch is correct, it seems that all invalid SPR access would be nop or privilege exceptions. In this case, is POWERPC_EXCP_INVAL_SPR still needed? --- target/ppc/translate.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 40232201bb..abbc3a5bb9 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -4827,11 +4827,11 @@ static inline void gen_op_mfspr(DisasContext *ctx) */ if (sprn & 0x10) { if (ctx->pr) { - gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } else { if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 || sprn == 6) { - gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_hvpriv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } } @@ -5014,11 +5014,11 @@ static void gen_mtspr(DisasContext *ctx) */ if (sprn & 0x10) { if (ctx->pr) { - gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } else { if (ctx->pr || sprn == 0) { - gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_hvpriv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } } It seems logic to emit a POWERPC_EXCP_PRIV_XXX exception with gen_priv_exception() (POWERPC_EXCP_PRIV). Moreover in line above we have a gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG) if the register cannot be read (SPR_NOACCESS). But in helper_load_dcr() we have POWERPC_EXCP_PRIV_REG with POWERPC_EXCP_INVAL (whereas in the helper_store_dcr() function we have POWERPC_EXCP_INVAL with POWERPC_EXCP_INVAL_INVAL). It looks like another bug. The instructions that could invoke these helpers in user-mode (mfdcrux and mtdcrux) are behind PPC_DCRUX, and no CPU has this insns_flag, so I guess the code is wrong, but we cannot hit the bug in linux-user. Similarly, we have gen_hvpriv_exception with POWERPC_EXCP_INVAL_SPR in spr_groupA_write_allowed and gen_inval_exception with POWERPC_EXCP_PRIV_REG in spr_write_excp_vector, but both are behind !defined(CONFIG_USER_ONLY). and in gen_slbfee() we have also a POWERPC_EXCP_PRIV_REG with gen_inval_exception() (POWERPC_EXCP_INVAL). This one is easier to test. Looking at si_code: void sigill_handle(int sig, siginfo_t *si, void *ucontext) { _exit(si->si_code); } int main(void) { struct sigaction sa = {.sa_sigaction = sigill_handle, .sa_flags = SA_SIGINFO}; uint64_t t = 0, b = 0; sigaction(SIGILL, &sa, NULL); asm("slbfee. %0, %1" : "=r" (t) : "r" (b)); return 0; } We have: $ ./slbee; echo $? 5 # ILL_PRVOPC $ ./qemu-ppc64 ./slbfee; echo $? 2 # ILL_ILLOPN So I think we should be using gen_priv_exception or gen_hvpriv_exception with POWERPC_EXCP_PRIV_OPC. What is interesting is gen_inval_exception() uses POWERPC_EXCP_HV_EMU that could make thinking we could try to emulate the operation (for KVM PR, for instance). IIUC, we should use gen_hvpriv_exception in those cases, so we have POWERPC_EXCP_HV_EMU with POWERPC_EXCP_PRIV | something. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
[PATCH] ui/gtk: Ignore 2- and 3-button press events
GTK already produces corresponding GDK_BUTTON_PRESS events alongside 2BUTTON and 3BUTTON_PRESS events. The 2BUTTON and 3BUTTON_PRESS events were incorrectly being interpreted and passed to guests as button release events. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/558 Signed-off-by: K. Lange --- ui/gtk.c | 4 1 file changed, 4 insertions(+) diff --git a/ui/gtk.c b/ui/gtk.c index a8567b9ddc..8675ae76fa 100644 --- a/ui/gtk.c +++ b/ui/gtk.c @@ -958,6 +958,10 @@ static gboolean gd_button_event(GtkWidget *widget, GdkEventButton *button, return TRUE; } +if (button->type == GDK_2BUTTON_PRESS || button->type == GDK_3BUTTON_PRESS) { +return TRUE; +} + qemu_input_queue_btn(vc->gfx.dcl.con, btn, button->type == GDK_BUTTON_PRESS); qemu_input_event_sync(); -- 2.25.1
[PATCH v2] qemu-binfmt-conf.sh: allow elf EI_ABIVERSION=1 for mips
With the command line flag -mplt and a recent toolchain, ELF binaries generated by gcc can obtain EI_ABIVERSION=1, see below, which makes, e.g., gcc three-stage bootstrap in a mips-unknown-linux-gnu qemu-user chroot fail since the binfmt-misc magic does not match anymore. qemu executes these binaries just fine, so relax the mask slightly. CHOST=mips-unknown-linux-gnu (and also mipsel-unknown-linux-gnu) CFLAGS="-O2 -march=mips32 -mabi=32 -mplt -pipe" gcc-11.2, binutils-2.37, glibc-2.34 | /* | - * ELF dump of './prev-gcc/build/gengenrtl' | - * 29608 (0x73A8) bytes | + * ELF dump of './gcc/build/gengenrtl' | + * 54532 (0xD504) bytes | */ | | Elf32_Dyn dumpedelf_dyn_0[]; | struct { | Elf32_Ehdr ehdr; | Elf32_Phdr phdrs[12]; | - Elf32_Shdr shdrs[33]; | + Elf32_Shdr shdrs[44]; | Elf32_Dyn *dyns; | } dumpedelf_0 = { | | .ehdr = { | .e_ident = { /* (EI_NIDENT bytes) */ | /* [0] EI_MAG:*/ 0x7F,'E','L','F', | /* [4] EI_CLASS: */ 1 , /* (ELFCLASS32) */ | /* [5] EI_DATA: */ 2 , /* (ELFDATA2MSB) */ | /* [6] EI_VERSION:*/ 1 , /* (EV_CURRENT) */ | /* [7] EI_OSABI: */ 0 , /* (ELFOSABI_NONE) */ | - /* [8] EI_ABIVERSION: */ 0 , | + /* [8] EI_ABIVERSION: */ 1 , | /* [9-15] EI_PAD: */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, | }, | .e_type = 2 , /* (ET_EXEC) */ | .e_machine = 8 , /* (EM_MIPS) */ | .e_version = 1 , /* (EV_CURRENT) */ | (...) Signed-off-by: Andreas K. Hüttel --- v2: Add the same fix for little endian as for big endian scripts/qemu-binfmt-conf.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index e9bfeb94d3..fc2f856800 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -61,11 +61,11 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' mips_family=mips mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' mipsel_family=mips mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -- 2.34.1
Re: [PATCH v2 5/5] tests/tcg/ppc64le: Use Altivec register names in clobbler list
On 03/03/2022 16:30, Richard Henderson wrote: On 3/3/22 07:20, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst LLVM/Clang doesn't know the VSX registers when compiling with -mabi=elfv1. Use only registers >= 32 and list them with their Altivec name. Signed-off-by: Matheus Ferst This description isn't quite right. The change to the m[tf]vsr insns is a generic bug fix, and not related to Clang. I'm not sure if I understood. I'm targeting the Clang problem with this patch, is something else being fixed by this change? AFAICT, the old "mtvsrd 0, %2" and "mfvsrd %0, 0" were correct, I'm just changing from VSR 0 to VSR 32 to allow the clobber with Clang, but GCC doesn't seem to have this limitation with ELFv1. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 2/5] target/ppc: change xs[n]madd[am]sp to use float64r32_muladd
On 03/03/2022 15:49, Richard Henderson wrote: On 3/3/22 07:20, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst Change VSX Scalar Multiply-Add/Subtract Type-A/M Single Precision helpers to use float64r32_muladd. This method should correctly handle all rounding modes, so the workaround for float_round_nearest_even can be dropped. Reviewed-by: Richard Henderson Signed-off-by: Matheus Ferst --- target/ppc/fpu_helper.c | 93 - 1 file changed, 35 insertions(+), 58 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 8f970288f5..c973968ed6 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -1916,22 +1916,19 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t opcode, * fld - vsr_t field (VsrD(*) or VsrW(*)) * sfprf - set FPRF */ -#define VSX_RE(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_RE(op, nels, tp, op_tp, fld, sfprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ - int i; \ + int i, flags; \ \ helper_reset_fpstatus(env); \ \ for (i = 0; i < nels; i++) { \ - if (unlikely(tp##_is_signaling_nan(xb->fld, &env->fp_status))) { \ + t.fld = op_tp##_div(tp##_one, xb->fld, &env->fp_status); \ + flags = get_float_exception_flags(&env->fp_status); \ + if (unlikely(flags & float_flag_invalid_snan)) { You seem to have squashed the change to recip-estimate into the same patch as muladd. -#define VSX_RSQRTE(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_RSQRTE(op, nels, tp, op_tp, fld, sfprf) \ And recip-sqrt-estimate. I guess it's ok to squash, since it's all related, but you should update the patch description if you leave it this way. Sorry, I cherry-picked the wrong branch. This patch should just be a rebase of v1. I'll send the changes to VSX_{ADD_SUB,MUL,DIV,RE,SQRT,RSQRTE} in a separate patch series since it's not test-related. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
[PATCH] qemu-binfmt-conf.sh: allow elf EI_ABIVERSION=1 for mips
With the command line flag -mplt and a recent toolchain, ELF binaries generated by gcc can obtain EI_ABIVERSION=1, see below, which makes, e.g., gcc three-stage bootstrap in a mips-unknown-linux-gnu qemu-user chroot fail since the binfmt-misc magic does not match anymore. qemu executes these binaries just fine, so relax the mask slightly. CHOST=mips-unknown-linux-gnu CFLAGS="-O2 -march=mips32 -mabi=32 -mplt -pipe" gcc-11.2, binutils-2.37, glibc-2.34 | /* | - * ELF dump of './prev-gcc/build/gengenrtl' | - * 29608 (0x73A8) bytes | + * ELF dump of './gcc/build/gengenrtl' | + * 54532 (0xD504) bytes | */ | | Elf32_Dyn dumpedelf_dyn_0[]; | struct { | Elf32_Ehdr ehdr; | Elf32_Phdr phdrs[12]; | - Elf32_Shdr shdrs[33]; | + Elf32_Shdr shdrs[44]; | Elf32_Dyn *dyns; | } dumpedelf_0 = { | | .ehdr = { | .e_ident = { /* (EI_NIDENT bytes) */ | /* [0] EI_MAG:*/ 0x7F,'E','L','F', | /* [4] EI_CLASS: */ 1 , /* (ELFCLASS32) */ | /* [5] EI_DATA: */ 2 , /* (ELFDATA2MSB) */ | /* [6] EI_VERSION:*/ 1 , /* (EV_CURRENT) */ | /* [7] EI_OSABI: */ 0 , /* (ELFOSABI_NONE) */ | - /* [8] EI_ABIVERSION: */ 0 , | + /* [8] EI_ABIVERSION: */ 1 , | /* [9-15] EI_PAD: */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, | }, | .e_type = 2 , /* (ET_EXEC) */ | .e_machine = 8 , /* (EM_MIPS) */ | .e_version = 1 , /* (EV_CURRENT) */ | (...) Signed-off-by: Andreas K. Hüttel --- scripts/qemu-binfmt-conf.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index e9bfeb94d3..2ac2226f26 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -61,7 +61,7 @@ m68k_family=m68k # FIXME: We could use the other endianness on a MIPS host. mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' -mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' mips_family=mips mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' -- 2.34.1
Re: [PATCH v5 00/49] target/ppc: PowerISA Vector/VSX instruction batch
On 01/03/2022 05:29, Cédric Le Goater wrote: On 2/25/22 22:08, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst This patch series implements 5 missing instructions from PowerISA v3.0 and 58 new instructions from PowerISA v3.1, moving 87 other instructions to decodetree along the way. Patches without review: 4, 24, 26, 27, 34, 35, 38, 40, 44-46 I think we are done. Applied to ppc-7.0. Thanks, C. We still had some minor fixes, but I guess we can send in a follow-up patch. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v4 24/47] target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi to decodetree
On 23/02/2022 19:19, Richard Henderson wrote: On 2/23/22 11:43, Matheus K. Ferst wrote: Note that rotlv does the masking itself: /* * Expand D = A << (B % element bits) * * Unlike scalar shifts, where it is easy for the target front end * to include the modulo as part of the expansion. If the target * naturally includes the modulo as part of the operation, great! * If the target has some other behaviour from out-of-range shifts, * then it could not use this function anyway, and would need to * do it's own expansion with custom functions. */ Using tcg_gen_rotlv_vec(vece, vrt, vra, vrb) works on PPC but fails on x86. It looks like a problem on the i386 backend. It's using VPS[RL]LV[DQ], but instead of this modulo behavior, these instructions write zero to the element[1]. I'm not sure how to fix that. You don't want to use tcg_gen_rotlv_vec directly, but tcg_gen_rotlv_vec. I guess there is a typo here. Did you mean tcg_gen_gvec_rotlv? Or tcg_gen_rotlv_mod_vec? The generic modulo is being applied here: static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) { TCGv_vec t = tcg_temp_new_vec_matching(d); TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1); tcg_gen_and_vec(vece, t, b, m); tcg_gen_rotlv_vec(vece, d, a, t); tcg_temp_free_vec(t); } I can see that this method is called when we use tcg_gen_gvec_rotlv to implement vrl[bhwd], and they are working as expected. For vrl[wd]nm and vrl[wd]mi, however, we can't call tcg_gen_rotlv_mod_vec directly in the .fniv implementation because it is not exposed in tcg-op.h. Is there any other way to use this method? Should we add it to the header file? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v4 20/47] target/ppc: implement vslq
On 22/02/2022 19:14, Richard Henderson wrote: On 2/22/22 04:36, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst Signed-off-by: Matheus Ferst --- v4: - New in v4. --- target/ppc/insn32.decode | 1 + target/ppc/translate/vmx-impl.c.inc | 40 + 2 files changed, 41 insertions(+) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 88baebe35e..3799065508 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -473,6 +473,7 @@ VSLB 000100 . . . 0010100 @VX VSLH 000100 . . . 00101000100 @VX VSLW 000100 . . . 0011100 @VX VSLD 000100 . . . 10111000100 @VX +VSLQ 000100 . . . 0010101 @VX VSRB 000100 . . . 0100100 @VX VSRH 000100 . . . 01001000100 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index ec4f0e7654..ca98a545ef 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -834,6 +834,46 @@ TRANS_FLAGS(ALTIVEC, VSRAH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_sarv); TRANS_FLAGS(ALTIVEC, VSRAW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_sarv); TRANS_FLAGS2(ALTIVEC_207, VSRAD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_sarv); +static bool trans_VSLQ(DisasContext *ctx, arg_VX *a) +{ + TCGv_i64 hi, lo, tmp, n, sf = tcg_constant_i64(64); + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + n = tcg_temp_new_i64(); + hi = tcg_temp_new_i64(); + lo = tcg_temp_new_i64(); + tmp = tcg_const_i64(0); + + get_avr64(lo, a->vra, false); + get_avr64(hi, a->vra, true); + + get_avr64(n, a->vrb, true); + tcg_gen_andi_i64(n, n, 0x7F); + + tcg_gen_movcond_i64(TCG_COND_GE, hi, n, sf, lo, hi); + tcg_gen_movcond_i64(TCG_COND_GE, lo, n, sf, tmp, lo); Since you have to mask twice anyway, better use (n & 64) != 0. Hmm, I'm not sure if I understood. To check != 0 we'll need a temp to hold n&64. We could use tmp here, but we'll need another one in patch 22. Is that right? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v4 24/47] target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi to decodetree
On 22/02/2022 19:30, Richard Henderson wrote: On 2/22/22 04:36, matheus.fe...@eldorado.org.br wrote: +static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra, + TCGv_vec vrb) +{ + TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt); + + /* Create the mask */ + mask = do_vrl_mask_vec(vece, vrb); + + /* Extract n */ + tcg_gen_dupi_vec(vece, n, (8 << vece) - 1); + tcg_gen_and_vec(vece, n, vrb, n); + + /* Rotate and mask */ + tcg_gen_rotlv_vec(vece, vrt, vra, n); Note that rotlv does the masking itself: /* * Expand D = A << (B % element bits) * * Unlike scalar shifts, where it is easy for the target front end * to include the modulo as part of the expansion. If the target * naturally includes the modulo as part of the operation, great! * If the target has some other behaviour from out-of-range shifts, * then it could not use this function anyway, and would need to * do it's own expansion with custom functions. */ Using tcg_gen_rotlv_vec(vece, vrt, vra, vrb) works on PPC but fails on x86. It looks like a problem on the i386 backend. It's using VPS[RL]LV[DQ], but instead of this modulo behavior, these instructions write zero to the element[1]. I'm not sure how to fix that. Do we need an INDEX_op_shlv_vec case in i386 tcg_expand_vec_op? +static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec, + INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0 + }; Where is sari used? I'll remove in v5. [1] Section 5.3 of https://www.intel.com/content/dam/develop/external/us/en/documents/36945 Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 0/3] linux-user/ppc: Deliver SIGTRAP on tw[i]/td[i]
Ping. All patches reviewed and the series still applies to master with no conflicts. On 13/01/2022 14:04, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst In the review of 66c6b40aba1, Richard Henderson suggested[1] using "trap" instead of ".long 0x0" to generate the signal to test XER save/restore behavior. However, linux-user aborts when a trap exception is raised, so we kept the patch with SIGILL. This patch series is a follow-up to remove the cpu_abort call, deliver SIGTRAP instead (using TRAP_BRKPT as si_code), and apply the suggestion to the signal_save_restore_xer test. The first patch removes the "qemu: fatal: Tried to call a TRAP" reported in issue #588[2]. The third patch is an RFC to address the other logged messages of "Unknown privilege violation (03)". [1] https://lists.gnu.org/archive/html/qemu-ppc/2021-10/msg00143.html [2] https://gitlab.com/qemu-project/qemu/-/issues/588 v3: - RFC to address the "Unknown privilege violation (03)" in #588. v2: - Based-on rth's patch to use force_sig_fault and avoid merge conflicts Matheus Ferst (3): linux-user/ppc: deliver SIGTRAP on POWERPC_EXCP_TRAP tests/tcg/ppc64le: change signal_save_restore_xer to use SIGTRAP target/ppc: Fix gen_priv_exception error value in mfspr/mtspr linux-user/ppc/cpu_loop.c | 3 ++- target/ppc/translate.c | 8 tests/tcg/ppc64le/signal_save_restore_xer.c | 8 3 files changed, 10 insertions(+), 9 deletions(-)
Re: [RFC PATCH 3/3] tests/tcg/ppc64le: Use vector types instead of __int128
On 17/02/2022 09:46, Matheus K. Ferst wrote: On 17/02/2022 05:09, Cédric Le Goater wrote: On 2/8/22 21:31, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst LLVM/Clang doesn't like inline asm with __int128, use a vector type instead. Signed-off-by: Matheus Ferst --- Alternatively, we could pass VSR values in GPR pairs, as we did in tests/tcg/ppc64le/non_signalling_xscv.c --- tests/tcg/ppc64le/bcdsub.c | 92 +- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/tests/tcg/ppc64le/bcdsub.c b/tests/tcg/ppc64le/bcdsub.c index 8c188cae6d..17403daf22 100644 --- a/tests/tcg/ppc64le/bcdsub.c +++ b/tests/tcg/ppc64le/bcdsub.c @@ -1,6 +1,7 @@ #include #include #include +#include #define CRF_LT (1 << 3) #define CRF_GT (1 << 2) @@ -8,6 +9,16 @@ #define CRF_SO (1 << 0) #define UNDEF 0 +#ifdef __LITTLE_ENDIAN__ Shouldn't we be using : #if BYTE_ORDER == LITTLE_ENDIAN instead ? I guess it is better, I'll send a v2. Actually, it doesn't work for LLVM and needs endian.h for GCC[1]. This check is also used in sigbus and sha1 tests. The first shouldn't be a problem (allow_fail is zero for ppc), but sha1 gives the wrong result for BE: $ ./qemu-ppc64le tests/tcg/ppc64le-linux-user/sha1 SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6 $ ./qemu-ppc64 tests/tcg/ppc64-linux-user/sha1 SHA1=70f1d4d65eb47309ffacc5a28ff285ad826006da and 'make check-tcg' succeeds anyway... [1] https://godbolt.org/z/fYbzbbexn -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 3/3] tests/tcg/ppc64le: Use vector types instead of __int128
On 17/02/2022 05:09, Cédric Le Goater wrote: On 2/8/22 21:31, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst LLVM/Clang doesn't like inline asm with __int128, use a vector type instead. Signed-off-by: Matheus Ferst --- Alternatively, we could pass VSR values in GPR pairs, as we did in tests/tcg/ppc64le/non_signalling_xscv.c --- tests/tcg/ppc64le/bcdsub.c | 92 +- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/tests/tcg/ppc64le/bcdsub.c b/tests/tcg/ppc64le/bcdsub.c index 8c188cae6d..17403daf22 100644 --- a/tests/tcg/ppc64le/bcdsub.c +++ b/tests/tcg/ppc64le/bcdsub.c @@ -1,6 +1,7 @@ #include #include #include +#include #define CRF_LT (1 << 3) #define CRF_GT (1 << 2) @@ -8,6 +9,16 @@ #define CRF_SO (1 << 0) #define UNDEF 0 +#ifdef __LITTLE_ENDIAN__ Shouldn't we be using : #if BYTE_ORDER == LITTLE_ENDIAN instead ? I guess it is better, I'll send a v2. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v1 11/11] tests/tcg: add vectorised sha512 versions
On 14/02/2022 12:14, Alex Bennée wrote: "Matheus K. Ferst" writes: On 11/02/2022 13:03, Alex Bennée wrote: This builds vectorised versions of sha512 to exercise the vector code: - aarch64 (AdvSimd) - i386 (SSE) - s390x (MVX) - ppc64 (vector) Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Message-Id: <20220202191242.652607-5-alex.ben...@linaro.org> --- v2 - use -msse4.1 -O3 instead of -pentium4 for i386 build --- tests/tcg/multiarch/sha512.c | 2 +- tests/tcg/aarch64/Makefile.target | 7 +++ tests/tcg/arm/Makefile.target | 8 tests/tcg/i386/Makefile.target| 6 ++ tests/tcg/ppc64le/Makefile.target | 5 - tests/tcg/s390x/Makefile.target | 9 + tests/tcg/x86_64/Makefile.target | 7 +++ 7 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target index 480ff0898d..4f1d03dfcf 100644 --- a/tests/tcg/ppc64le/Makefile.target +++ b/tests/tcg/ppc64le/Makefile.target @@ -5,10 +5,13 @@ VPATH += $(SRC_PATH)/tests/tcg/ppc64le ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER8_VECTOR),) -PPC64LE_TESTS=bcdsub non_signalling_xscv +PPC64LE_TESTS=bcdsub non_signalling_xscv sha512-vector endif $(PPC64LE_TESTS): CFLAGS += -mpower8-vector Since this test does not target a specific instruction, maybe it should use -mvsx/-maltivec to allow the compiler to use newer instructions. I wasn't sure which vector instructions are supported by the TCG front ends so if the above flags won't trip up the TCG I can add them to the cflags. AFAICT, we should have all vector instruction until POWER9. POWER10 is WIP, but current versions of GCC/Clang are not emitting any of the missing instructions, even with -mcpu=power10 Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 10/37] target/ppc: Move Vector Compare Not Equal or Zero to decodetree
On 11/02/2022 01:41, Richard Henderson wrote: On 2/10/22 23:34, matheus.fe...@eldorado.org.br wrote: +static void gen_vcmpnez_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t0, t1, zero; + + t0 = tcg_temp_new_vec_matching(t); + t1 = tcg_temp_new_vec_matching(t); + zero = tcg_constant_vec_matching(t, vece, 0); + + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t0, a, zero); + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t1, b, zero); + tcg_gen_cmp_vec(TCG_COND_NE, vece, t, a, b); + + tcg_gen_or_vec(vece, t, t, t0); + tcg_gen_or_vec(vece, t, t, t1); + + tcg_gen_shli_vec(vece, t, t, (8 << vece) - 1); + tcg_gen_sari_vec(vece, t, t, (8 << vece) - 1); No shifting required, only the cmp. +static bool do_vcmpnez(DisasContext *ctx, arg_VC *a, int vece) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, INDEX_op_shli_vec, INDEX_op_sari_vec, 0 + }; Therefore no vecop_list required (cmp itself is mandatory). Without vecop_list, we hit the assert in tcg_assert_listed_vecop, which is called from tcg_gen_cmp_vec. Am I missing something? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v1 11/11] tests/tcg: add vectorised sha512 versions
On 11/02/2022 13:03, Alex Bennée wrote: This builds vectorised versions of sha512 to exercise the vector code: - aarch64 (AdvSimd) - i386 (SSE) - s390x (MVX) - ppc64 (vector) Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Message-Id: <20220202191242.652607-5-alex.ben...@linaro.org> --- v2 - use -msse4.1 -O3 instead of -pentium4 for i386 build --- tests/tcg/multiarch/sha512.c | 2 +- tests/tcg/aarch64/Makefile.target | 7 +++ tests/tcg/arm/Makefile.target | 8 tests/tcg/i386/Makefile.target| 6 ++ tests/tcg/ppc64le/Makefile.target | 5 - tests/tcg/s390x/Makefile.target | 9 + tests/tcg/x86_64/Makefile.target | 7 +++ 7 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target index 480ff0898d..4f1d03dfcf 100644 --- a/tests/tcg/ppc64le/Makefile.target +++ b/tests/tcg/ppc64le/Makefile.target @@ -5,10 +5,13 @@ VPATH += $(SRC_PATH)/tests/tcg/ppc64le ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER8_VECTOR),) -PPC64LE_TESTS=bcdsub non_signalling_xscv +PPC64LE_TESTS=bcdsub non_signalling_xscv sha512-vector endif $(PPC64LE_TESTS): CFLAGS += -mpower8-vector Since this test does not target a specific instruction, maybe it should use -mvsx/-maltivec to allow the compiler to use newer instructions. +sha512-vector: sha512.c + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) + Can we have this test for big-endian too? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH 0/3] tests/tcg/ppc64le: fix the build of TCG tests with Clang
On 09/02/2022 09:31, Cédric Le Goater wrote: Hello Matheus, [ Adding Miroslav ] On 2/8/22 21:31, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst Based-on: https://lists.gnu.org/archive/html/qemu-devel/2022-01/msg06506.html As the configuration scripts used -mbig and -mlittle, building PPC tests with Clang was silently skipped. With the patch to fix these options[1], "make check-tcg" fails because of build and runtime errors. This patch series tries to fix some of these problems. The first patch fixes "tests/tcg/ppc64le/mtfsf.c" by removing the GCC-only builtins used to emit mtfsf and mffs. We can emit these insns with inline asm instead. The second patch addresses differences in the output of float_madds.c. The __builtin_fmaf used in this test emits fmadds with GCC and xsmaddasp with LLVM. The first insn had rounding errors fixed in d04ca895dc7f ("target/ppc: Add helpers for fmadds et al"), we apply a similar fix to xsmaddasp. Then we have the build errors of tests/tcg/ppc64le/bcdsub.c. According to GCC docs[2], the '-mpower8-vector' flag provides some bcdsub builtins, so it'd be reasonable to assume that the rest of the toolchain knows about the insn if the compiler accepts this flag. Clang supports this flag since version 3.6[3], but the insn and builtins were only added in LLVM 14[4]. I couldn't find a good solution. Should we write a test to check for this insn at configuration time? Should we detect the compiler at build time and emit the insns with ".long" and fixed registers? Even building with Clang 14, the test will fail in runtime because LLVM doesn't like "__int128" in inline asm. No error or warning is emitted, but the generated code only loads one doubleword of the VSR. The third patch of this series avoids this issue by using a vector type for VSR values. Finally, it seems that the insns tested by tests/tcg/ppc64le/byte_reverse.c are not yet supported by LLVM. Since the configuration script uses '-mpower10' to check for POWER10 support and Clang doesn't support this flag, "make check-tcg" doesn't fail. We should probably change this check in the future, but since LLVM support of POWER10 seems incomplete, I guess we can leave it for now. gitlab didn't spot any issues with the 4 patches applied. AFAICT, CI wouldn't run into this kind of problem because we don't have PPC runners, and the cross-compiler containers use GCC. Should we merge all patches : Use long endian options for ppc64 tests/tcg/ppc64le: Use vector types instead of __int128 target/ppc: change xs[n]madd[am]sp to use float64r32_muladd tests/tcg/ppc64le: use inline asm instead of __builtin_mtfsf and see how we can address the LLVM support for P10 later ? The problems with bcdsub.c are not resolved for Clang < 14, but I guess it's ok to merge anyway. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2] Use long endian options for ppc64
On 31/01/2022 06:17, Miroslav Rezanina wrote: GCC options pairs -mlittle/-mlittle-endian and -mbig/-mbig-endian are equivalent on ppc64 architecture. However, Clang supports only long version of the options. Use longer form in configure to properly support both GCC and Clang compiler. In addition, fix this issue in tcg test configure. Signed-off-by: Miroslav Rezanina --- This is v2 of configure: Use -mlittle-endian instead of -mlittle for ppc64. v2: - handle both -mlittle and -mbig usage - fix tests/tcg/configure.sh --- configure | 4 ++-- tests/tcg/configure.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configure b/configure index e6cfc0e4be..066fa29b70 100755 --- a/configure +++ b/configure @@ -655,10 +655,10 @@ case "$cpu" in ppc) CPU_CFLAGS="-m32" ;; ppc64) -CPU_CFLAGS="-m64 -mbig" ;; +CPU_CFLAGS="-m64 -mbig-endian" ;; ppc64le) cpu="ppc64" -CPU_CFLAGS="-m64 -mlittle" ;; +CPU_CFLAGS="-m64 -mlittle-endian" ;; s390) CPU_CFLAGS="-m31" ;; diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh index 309335a2bd..21959e1fde 100755 --- a/tests/tcg/configure.sh +++ b/tests/tcg/configure.sh @@ -64,9 +64,9 @@ fi : ${cross_cc_ppc="powerpc-linux-gnu-gcc"} : ${cross_cc_cflags_ppc="-m32"} : ${cross_cc_ppc64="powerpc64-linux-gnu-gcc"} -: ${cross_cc_cflags_ppc64="-m64 -mbig"} +: ${cross_cc_cflags_ppc64="-m64 -mbig-endian"} : ${cross_cc_ppc64le="$cross_cc_ppc64"} -: ${cross_cc_cflags_ppc64le="-m64 -mlittle"} +: ${cross_cc_cflags_ppc64le="-m64 -mlittle-endian"} : ${cross_cc_riscv64="riscv64-linux-gnu-gcc"} : ${cross_cc_s390x="s390x-linux-gnu-gcc"} : ${cross_cc_sh4="sh4-linux-gnu-gcc"} -- 2.34.1 The patch is fine, but some PPC tests are not compiling with Clang. I've sent an RFC about these issues: https://lists.gnu.org/archive/html/qemu-ppc/2022-02/msg00116.html Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 0/3] linux-user/ppc: Deliver SIGTRAP on tw[i]/td[i]
Ping. The based-on series is already on master, only patch 3 is missing review. On 13/01/2022 14:04, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst In the review of 66c6b40aba1, Richard Henderson suggested[1] using "trap" instead of ".long 0x0" to generate the signal to test XER save/restore behavior. However, linux-user aborts when a trap exception is raised, so we kept the patch with SIGILL. This patch series is a follow-up to remove the cpu_abort call, deliver SIGTRAP instead (using TRAP_BRKPT as si_code), and apply the suggestion to the signal_save_restore_xer test. The first patch removes the "qemu: fatal: Tried to call a TRAP" reported in issue #588[2]. The third patch is an RFC to address the other logged messages of "Unknown privilege violation (03)". [1] https://lists.gnu.org/archive/html/qemu-ppc/2021-10/msg00143.html [2] https://gitlab.com/qemu-project/qemu/-/issues/588 v3: - RFC to address the "Unknown privilege violation (03)" in #588. v2: - Based-on rth's patch to use force_sig_fault and avoid merge conflicts Matheus Ferst (3): linux-user/ppc: deliver SIGTRAP on POWERPC_EXCP_TRAP tests/tcg/ppc64le: change signal_save_restore_xer to use SIGTRAP target/ppc: Fix gen_priv_exception error value in mfspr/mtspr linux-user/ppc/cpu_loop.c | 3 ++- target/ppc/translate.c | 8 tests/tcg/ppc64le/signal_save_restore_xer.c | 8 3 files changed, 10 insertions(+), 9 deletions(-)
Re: [PATCH v3 1/2] ppc/pnv: use a do-while() loop in pnv_phb3_translate_tve()
On 27/01/2022 09:22, Daniel Henrique Barboza wrote: The 'taddr' variable is left unintialized, being set only inside the "while ((lev--) >= 0)" loop where we get the TCE address. The 'lev' var is an int32_t that is being initiliazed by the GETFIELD() macro, which returns an uint64_t. For a human reader this means that 'lev' will always be positive or zero. But some compilers may beg to differ. 'lev' being an int32_t can in theory be set as negative, and the "while ((lev--) >= 0)" loop might never be reached, and 'taddr' will be left unitialized. This can cause phb3_error() to use 'taddr' uninitialized down below: if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr); A quick way of fixing it is to use a do/while() loop. This will keep the same semanting as the existing while() loop does and the compiler will understand that 'taddr' will be initialized at least once. Suggested-by: Matheus K. Ferst Resolves: https://gitlab.com/qemu-project/qemu/-/issues/573 Signed-off-by: Daniel Henrique Barboza --- hw/pci-host/pnv_phb3.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) Reviewed-by: Matheus Ferst Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 2/2] ppc/pnv: use a do-while() loop in pnv_phb4_translate_tve()
On 27/01/2022 09:22, Daniel Henrique Barboza wrote: pnv_phb4_translate_tve() is quite similar to pnv_phb3_translate_tve(), and that includes the fact that 'taddr' can be considered uninitialized when throwing the "TCE access fault" error because, in theory, the loop that sets 'taddr' can be skippable due to 'lev' being an signed int. No one complained about this specific case yet, but since we took the time to handle the same situtation in pnv_phb3_translate_tve(), let's replicate it here as well. Signed-off-by: Daniel Henrique Barboza --- hw/pci-host/pnv_phb4.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) Reviewed-by: Matheus Ferst Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 1/2] ppc/pnv: use a do-while() loop in pnv_phb3_translate_tve()
On 27/01/2022 09:09, Daniel Henrique Barboza wrote: On 1/27/22 08:41, Matheus K. Ferst wrote: On 26/01/2022 17:14, Daniel Henrique Barboza wrote: The 'taddr' variable is left unintialized, being set only inside the "while ((lev--) >= 0)" loop where we get the TCE address. The 'lev' var is an int32_t that is being initiliazed by the GETFIELD() macro, which returns an uint64_t. For a human reader this means that 'lev' will always be positive or zero. But some compilers may beg to differ. 'lev' being an int32_t can in theory be set as negative, and the "while ((lev--) >= 0)" loop might never be reached, and 'taddr' will be left unitialized. This can cause phb3_error() to use 'taddr' uninitialized down below: if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr); A quick way of fixing it is to use a do/while() loop. This will keep the same semanting as the existing while() loop does and the compiler will understand that 'taddr' will be initialized at least once. Suggested-by: Matheus K. Ferst Resolves: https://gitlab.com/qemu-project/qemu/-/issues/573 Signed-off-by: Daniel Henrique Barboza --- hw/pci-host/pnv_phb3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index 7fb35dc031..39a6184419 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -792,7 +792,7 @@ static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr, sh = tbl_shift * lev + tce_shift; /* TODO: Multi-level untested */ - while ((lev--) >= 0) { + do { /* Grab the TCE address */ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); if (dma_memory_read(&address_space_memory, taddr, &tce, @@ -813,7 +813,7 @@ static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr, } sh -= tbl_shift; base = tce & ~0xfffull; - } + } while ((lev--) >= 0); This changes the number of iterations in this loop. ooofff We'd need "while ((--lev) >= 0)" to keep it the same, but then we would be checking "!(tce & 3)" for the last iteration. Is that a problem? I don't think that's a problem because then (lev >= 0) will not be true That holds for "while ((lev--) >= 0)", where the last iteration has "lev=-1", but "while ((--lev) >= 0)" would make lev=0 in the last execution of this loop. I guess we need to either decrement lev elsewhere or adjust its use inside the loop (e.g.: if (lev > 0 && !(tce & 3))) and we'll not going to check !(tce &3), so even if 'tce' has a bogus value it's fine. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 2/2] ppc/pnv: use a do-while() loop in pnv_phb4_translate_tve()
On 26/01/2022 17:14, Daniel Henrique Barboza wrote: pnv_phb4_translate_tve() is quite similar to pnv_phb3_translate_tve(), and that includes the fact that 'taddr' can be considered uninitialized when throwing the "TCE access fault" error because, in theory, the loop that sets 'taddr' can be skippable due to 'lev' being an signed int. No one complained about this specific case yet, but since we took the time to handle the same situtation in pnv_phb3_translate_tve(), let's replicate it here as well. Signed-off-by: Daniel Henrique Barboza --- hw/pci-host/pnv_phb4.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index a78add75b0..88a1479831 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1261,13 +1261,21 @@ static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr, /* Top level table base address */ base = tta << 12; +/* + * There were reports of compilers complaining about 'taddr' + * being used uninitialized in pnv_phb3_translate_tve(), and + * the same scenario is happening here. Initialize 'taddr' + * just in case. + */ +taddr = base; + Do we still need this initialization? /* Total shift to first level */ sh = tbl_shift * lev + tce_shift; /* TODO: Limit to support IO page sizes */ /* TODO: Multi-level untested */ -while ((lev--) >= 0) { +do { /* Grab the TCE address */ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); if (dma_memory_read(&address_space_memory, taddr, &tce, @@ -1288,7 +1296,7 @@ static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr, } sh -= tbl_shift; base = tce & ~0xfffull; -} +} while ((lev--) >= 0); The same comments from the other patch apply here, this changes the number of iterations in this loop. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 1/2] ppc/pnv: use a do-while() loop in pnv_phb3_translate_tve()
On 26/01/2022 17:14, Daniel Henrique Barboza wrote: The 'taddr' variable is left unintialized, being set only inside the "while ((lev--) >= 0)" loop where we get the TCE address. The 'lev' var is an int32_t that is being initiliazed by the GETFIELD() macro, which returns an uint64_t. For a human reader this means that 'lev' will always be positive or zero. But some compilers may beg to differ. 'lev' being an int32_t can in theory be set as negative, and the "while ((lev--) >= 0)" loop might never be reached, and 'taddr' will be left unitialized. This can cause phb3_error() to use 'taddr' uninitialized down below: if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr); A quick way of fixing it is to use a do/while() loop. This will keep the same semanting as the existing while() loop does and the compiler will understand that 'taddr' will be initialized at least once. Suggested-by: Matheus K. Ferst Resolves: https://gitlab.com/qemu-project/qemu/-/issues/573 Signed-off-by: Daniel Henrique Barboza --- hw/pci-host/pnv_phb3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index 7fb35dc031..39a6184419 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -792,7 +792,7 @@ static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr, sh = tbl_shift * lev + tce_shift; /* TODO: Multi-level untested */ -while ((lev--) >= 0) { +do { /* Grab the TCE address */ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); if (dma_memory_read(&address_space_memory, taddr, &tce, @@ -813,7 +813,7 @@ static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr, } sh -= tbl_shift; base = tce & ~0xfffull; -} +} while ((lev--) >= 0); This changes the number of iterations in this loop. We'd need "while ((--lev) >= 0)" to keep it the same, but then we would be checking "!(tce & 3)" for the last iteration. Is that a problem? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 1/2] ppc/pnv: initialize 'taddr' in pnv_phb3_translate_tve()
On 26/01/2022 10:41, Daniel Henrique Barboza wrote: The 'taddr' variable is left unintialized, being set only inside the "while ((lev--) >= 0)" loop where we get the TCE address. The 'lev' var is an int32_t that is being initiliazed by the GETFIELD() macro, which returns an uint64_t. For a human reader this means that 'lev' will always be positive or zero. But some compilers may beg to differ. 'lev' being an int32_t can in theory be set as negative, and the "while ((lev--) >= 0)" loop might never be reached, and 'taddr' will be left unitialized. If we expect this code to execute at least once, wouldn't it be better to use a do-while? E.g.: do { lev--; /* Grab the TCE address */ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); if (dma_memory_read(&address_space_memory, taddr, &tce, /* ... */ } sh -= tbl_shift; base = tce & ~0xfffull; } while (lev >= 0); Otherwise, I think we'll need to initialize tce too. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH] hw/misc/macio/cuda: Simplify using the ldst API
On 24/01/2022 20:13, Philippe Mathieu-Daudé via wrote: This code is easier to review using the load/store API. Signed-off-by: Philippe Mathieu-Daudé --- hw/misc/macio/cuda.c | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) Reviewed-by: Matheus Ferst -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 1/2] target/ppc: Remove last user of .load_state_old
On 18/01/2022 07:41, Cédric Le Goater wrote: This breaks migration compatibility from (very) old versions of QEMU. This should not be a problem for the pseries machine for which migration is only supported on recent QEMUs ( > 2.x). There is no clear status on what is supported or not for the other machines. Let's move forward and remove the .load_state_old handler. Signed-off-by: Cédric Le Goater --- target/ppc/machine.c | 112 --- 1 file changed, 112 deletions(-) diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 756d8de5d8dd..df547385ff1e 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -23,117 +23,6 @@ static void post_load_update_msr(CPUPPCState *env) pmu_update_summaries(env); } -static int cpu_load_old(QEMUFile *f, void *opaque, int version_id) -{ -PowerPCCPU *cpu = opaque; - static int get_avr(QEMUFile *f, void *pv, size_t size, const VMStateField *field) { @@ -808,7 +697,6 @@ const VMStateDescription vmstate_ppc_cpu = { .version_id = 5, .minimum_version_id = 5, .minimum_version_id_old = 4, -.load_state_old = cpu_load_old, According to docs/devel/migration.rst, .minimum_version_id_old is ignored if no load_state_old handler is provided, I think we can drop it too. .pre_save = cpu_pre_save, .post_load = cpu_post_load, .fields = (VMStateField[]) { -- 2.31.1 Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 1/2] linux-user/ppc: deliver SIGTRAP on POWERPC_EXCP_TRAP
On 03/01/2022 14:50, Richard Henderson wrote: On 1/3/22 8:56 AM, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst Handle POWERPC_EXCP_TRAP in cpu_loop to deliver SIGTRAP on tw[i]/td[i]. The si_code comes from do_program_check in the kernel source file arch/powerpc/kernel/traps.c Signed-off-by: Matheus Ferst --- linux-user/ppc/cpu_loop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c index 30c82f2354..8fbaa772dc 100644 --- a/linux-user/ppc/cpu_loop.c +++ b/linux-user/ppc/cpu_loop.c @@ -242,7 +242,9 @@ void cpu_loop(CPUPPCState *env) } break; case POWERPC_EXCP_TRAP: - cpu_abort(cs, "Tried to call a TRAP\n"); + info.si_signo = TARGET_SIGTRAP; + info.si_errno = 0; + info.si_code = TARGET_TRAP_BRKPT; You're missing the address, which should be nip. https://github.com/torvalds/linux/blob/master/arch/powerpc/kernel/traps.c#L1503 After this switch-case, there is a info._sifields._sigfault._addr = env->nip; Is there anything else to be set? Please use force_sig_fault. (I have a pending patch set to convert all other instances; hopefully that can be merged soon...) I'll send v2 with a Based-on Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH] linux-user/signal: Map exit signals in SIGCHLD siginfo_t
Am Sonntag, 19. Dezember 2021, 00:32:09 CET schrieb Matthias Schiffer: > On 23/10/2021 21:59, Matthias Schiffer wrote: > > When converting a siginfo_t from waitid(), the interpretation of si_status > > depends on the value of si_code: For CLD_EXITED, it is an exit code and > > should be copied verbatim. For other codes, it is a signal number > > (possibly with additional high bits from ptrace) that should be mapped. > > > > This code was previously changed in commit 1c3dfb506ea3 > > ("linux-user/signal: Decode waitid si_code"), but the fix was > > incomplete. > > ping > Sorry I can't say anything about this. The hangs that I experience seem to be unrelated to the patch (no improvement, but also no worsening). -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, qa, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: [PATCH] tests/tcg/ppc64le: remove INT128 requirement to run non_signalling_xscv
On 17/12/2021 20:54, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 12/16/21 6:09 AM, matheus.fe...@eldorado.org.br wrote: + asm("mtvsrd 0, %3\n\t" \ + "xxswapd 0, 0\n\t" \ + "mtvsrd 0, %2\n\t" \ This doesn't work. The lower half of vs0 is undefined after mtvsrd. You actually want mtvsrdd 0, %2, %3, with "b" as the constraint for bh. + "mfvsrd %0, 0\n\t" \ + "xxswapd 0, 0\n\t" \ + "mfvsrd %1, 0\n\t" \ Drop the xxswapd and use mfvsrld. Otherwise it looks ok. r~ I'd like to avoid mtvsrdd/mfvsrld because they were introduced in PowerISA v3.0, and xscvspdpn/xscvdpspn are from v2.07. How about asm("mtvsrd 0, %2\n\t" "mtvsrd 1, %3\n\t" "xxmrghd 0, 0, 1\n\t" INSN " 0, 0\n\t" "mfvsrd %0, 0\n\t" "xxswapd 0, 0\n\t" "mfvsrd %1, 0\n\t" : "=r" (th), "=r" (tl) : "r" (bh), "r" (bl) : "vs0", "vs1"); ? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH] linux-user/signal: Map exit signals in SIGCHLD siginfo_t
Well, the original fix 1c3dfb506ea3 did clearly improve things for me, but it wasn't complete yet. At some point I gave up on finding a minimal reproducer for my remaining problems (futex-related hangs in a complex python+bash app). So, this *may* be the missing piece. Will test, but that takes a few days. Andreas Am Sonntag, 19. Dezember 2021, 16:55:16 CET schrieb Laurent Vivier: > CC'ing Alistair and Andreas that were involved in original fix 1c3dfb506ea3 > ("linux-user/signal: > Decode waitid si_code") > > Thanks, > Laurent > > Le 23/10/2021 à 21:59, Matthias Schiffer a écrit : > > When converting a siginfo_t from waitid(), the interpretation of si_status > > depends on the value of si_code: For CLD_EXITED, it is an exit code and > > should be copied verbatim. For other codes, it is a signal number > > (possibly with additional high bits from ptrace) that should be mapped. > > > > This code was previously changed in commit 1c3dfb506ea3 > > ("linux-user/signal: Decode waitid si_code"), but the fix was > > incomplete. > > > > Tested with the following test program: > > > > #include > > #include > > #include > > #include > > > > int main() { > > pid_t pid = fork(); > > if (pid == 0) { > > exit(12); > > } else { > > siginfo_t siginfo = {}; > > waitid(P_PID, pid, &siginfo, WEXITED); > > printf("Code: %d, status: %d\n", (int)siginfo.si_code, > > (int)siginfo.si_status); > > } > > > > pid = fork(); > > if (pid == 0) { > > raise(SIGUSR2); > > } else { > > siginfo_t siginfo = {}; > > waitid(P_PID, pid, &siginfo, WEXITED); > > printf("Code: %d, status: %d\n", (int)siginfo.si_code, > > (int)siginfo.si_status); > > } > > } > > > > Output with an x86_64 host and mips64el target before 1c3dfb506ea3 > > (incorrect: exit code 12 is translated like a signal): > > > > Code: 1, status: 17 > > Code: 2, status: 17 > > > > After 1c3dfb506ea3 (incorrect: signal number is not translated): > > > > Code: 1, status: 12 > > Code: 2, status: 12 > > > > With this patch: > > > > Code: 1, status: 12 > > Code: 2, status: 17 > > > > Signed-off-by: Matthias Schiffer > > --- > > linux-user/signal.c | 7 ++- > > 1 file changed, 6 insertions(+), 1 deletion(-) > > > > diff --git a/linux-user/signal.c b/linux-user/signal.c > > index 14d8fdfde152..8e3af98ec0a7 100644 > > --- a/linux-user/signal.c > > +++ b/linux-user/signal.c > > @@ -403,7 +403,12 @@ static inline void > > host_to_target_siginfo_noswap(target_siginfo_t *tinfo, > > case TARGET_SIGCHLD: > > tinfo->_sifields._sigchld._pid = info->si_pid; > > tinfo->_sifields._sigchld._uid = info->si_uid; > > -tinfo->_sifields._sigchld._status = info->si_status; > > + if (si_code == CLD_EXITED) > > +tinfo->_sifields._sigchld._status = info->si_status; > > +else > > +tinfo->_sifields._sigchld._status > > += host_to_target_signal(info->si_status & 0x7f) > > +| (info->si_status & ~0x7f); > > tinfo->_sifields._sigchld._utime = info->si_utime; > > tinfo->_sifields._sigchld._stime = info->si_stime; > > si_type = QEMU_SI_CHLD; > > > > > -- Andreas K. Hüttel dilfri...@gentoo.org Gentoo Linux developer (council, toolchain, base-system, perl, libreoffice) signature.asc Description: This is a digitally signed message part.
Re: [RFC PATCH] target/ppc: do not silence snan in xscvspdpn
On 15/12/2021 12:55, Alex Bennée wrote: Philippe Mathieu-Daudé writes: On 12/13/21 21:15, Matheus K. Ferst wrote: On 13/12/2021 09:36, Philippe Mathieu-Daudé wrote: On 12/13/21 13:13, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst The non-signalling versions of VSX scalar convert to shorter/longer precision insns doesn't silence SNaNs in the hardware. We are currently honoring this behavior in xscvdpspn, since helper_xscvdpspn handles the conversion with extracts/deposits/etc. OTOH, xscvspdpn uses float32_to_float64 that calls parts_float_to_float, which uses parts_return_nan that finally calls parts_silence_nan if the input is an SNaN. To address this problem, this patch adds a new float_status flag (return_snan) to avoid the call to parts_silence_nan in the float_class_snan case of parts_return_nan. Signed-off-by: Matheus Ferst --- This behavior was observed in a Power9 and can be reproduced with the following code: int main(void) { __uint128_t t, b = 0x7f82; asm("xscvspdpn %x0, %x1\n\t" : "=wa" (t) : "wa" (b << 64)); printf("0x%016" PRIx64 "%016" PRIx64 "\n", (uint64_t)(t >> 64), (uint64_t)t); b = 0x7ff2; asm("xscvdpspn %x0, %x1\n\t" : "=wa" (t) : "wa" (b << 64)); printf("0x%016" PRIx64 "%016" PRIx64 "\n", (uint64_t)(t >> 64), (uint64_t)t); return 0; } Why not add this test in tests/tcg/ppc64le/ ? I'll add it in v2. Is it ok to use __uint128_t in tests? What about: int main(void) { #ifndef __SIZEOF_INT128__ printf("uint128_t not available, skipping...\n"); #else ... #endif return 0; } We have a tests/tcg/configure.sh which does feature tests although it is mainly testing for the presence of compiler target flags. We do this because while the docker compilers are all pretty modern the user might be using their own cross compiler. I'm generally not keen on the tests silently skipping because it gives a false impression things have been tested. If it is possible to avoid INT128 shenanigans to load the values into the inline assembler lets do that, otherwise lets feature test and ifdef a skip-test in the makefile. -- Alex Bennée I think we can pass the value via register and use mtvsrd/mfvsrd to avoid the INT128 part. There was a v2 of this patch, but I messed up the CC and only included get_maintainer.pl result and Philippe. The patch is now applied, so I'll send this change as a follow-up. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH for-7.0 v2] target/ppc: do not silence SNaN in xscvspdpn
On 15/12/2021 13:53, Cédric Le Goater wrote: On 12/14/21 15:44, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst The non-signalling versions of VSX scalar convert to shorter/longer precision insns doesn't silence SNaNs in the hardware. To better match this behavior, use the non-arithmatic conversion of helper_todouble instead of float32_to_float64. A test is added to prevent future regressions. Signed-off-by: Matheus Ferst Applied to ppc-next. Thanks, C. Hi Cédric, Alex requested some changes in the test part, could you drop this patch for now? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH] target/ppc: do not silence snan in xscvspdpn
On 13/12/2021 09:36, Philippe Mathieu-Daudé wrote: On 12/13/21 13:13, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst The non-signalling versions of VSX scalar convert to shorter/longer precision insns doesn't silence SNaNs in the hardware. We are currently honoring this behavior in xscvdpspn, since helper_xscvdpspn handles the conversion with extracts/deposits/etc. OTOH, xscvspdpn uses float32_to_float64 that calls parts_float_to_float, which uses parts_return_nan that finally calls parts_silence_nan if the input is an SNaN. To address this problem, this patch adds a new float_status flag (return_snan) to avoid the call to parts_silence_nan in the float_class_snan case of parts_return_nan. Signed-off-by: Matheus Ferst --- This behavior was observed in a Power9 and can be reproduced with the following code: int main(void) { __uint128_t t, b = 0x7f82; asm("xscvspdpn %x0, %x1\n\t" : "=wa" (t) : "wa" (b << 64)); printf("0x%016" PRIx64 "%016" PRIx64 "\n", (uint64_t)(t >> 64), (uint64_t)t); b = 0x7ff2; asm("xscvdpspn %x0, %x1\n\t" : "=wa" (t) : "wa" (b << 64)); printf("0x%016" PRIx64 "%016" PRIx64 "\n", (uint64_t)(t >> 64), (uint64_t)t); return 0; } Why not add this test in tests/tcg/ppc64le/ ? I'll add it in v2. Is it ok to use __uint128_t in tests? Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH] target/ppc: do not silence snan in xscvspdpn
On 13/12/2021 12:46, Richard Henderson wrote: On 12/13/21 4:13 AM, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst The non-signalling versions of VSX scalar convert to shorter/longer precision insns doesn't silence SNaNs in the hardware. We are currently honoring this behavior in xscvdpspn, since helper_xscvdpspn handles the conversion with extracts/deposits/etc. OTOH, xscvspdpn uses float32_to_float64 that calls parts_float_to_float, which uses parts_return_nan that finally calls parts_silence_nan if the input is an SNaN. To address this problem, this patch adds a new float_status flag (return_snan) to avoid the call to parts_silence_nan in the float_class_snan case of parts_return_nan. Signed-off-by: Matheus Ferst I think you shouldn't be using softfloat at all for this, but use the existing helper_todouble function. Oh, that's better, I'll send a v2. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 4/4] target/ppc: move xscvqpdp to decodetree
On 10/12/2021 11:13, Victor Colombo wrote: From: Matheus Ferst Signed-off-by: Matheus Ferst --- target/ppc/fpu_helper.c | 10 +++--- target/ppc/helper.h | 2 +- target/ppc/insn32.decode| 4 target/ppc/translate/vsx-impl.c.inc | 24 +--- target/ppc/translate/vsx-ops.c.inc | 1 - 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index ab5cb21f13..f8669cae42 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -904,22 +904,24 @@ VSX_CMP(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX) VSX_CMP(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX) VSX_CMP(xvcmpnesp, 0x0C, 0x0B, 0, PPC2_VSX) -static void gen_xscvqpdp(DisasContext *ctx) +static bool trans_XSCVQPDP(DisasContext *ctx, arg_X_tb_rc *a) { -TCGv_i32 opc; +TCGv_i32 ro; TCGv_ptr xt, xb; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} -opc = tcg_const_i32(ctx->opcode); -xt = gen_vsr_ptr(rD(ctx->opcode) + 32); -xb = gen_vsr_ptr(rB(ctx->opcode) + 32); -gen_helper_xscvqpdp(cpu_env, opc, xt, xb); -tcg_temp_free_i32(opc); +REQUIRE_INSNS_FLAGS2(ctx, ISA310); It's actually ISA300. We'll send a v2 fixing this. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [RFC PATCH for-7.0 00/35] target/ppc fpu fixes and cleanups
On 19/11/2021 13:04, Richard Henderson wrote: This is a partial patch set showing the direction I believe the cleanups should go, as opposed to a complete conversion. I add a bunch of float_flag_* bits that diagnose the reason for most float_flag_invalid, as guided by the requirements of the PowerPC VX* bits. I have converted some helpers to use these new flags but not all. A good signal for unconverted routines is the use of float*_is_signalling_nan, which should now be using float_flag_invalid_snan. I added float64x32_* arithmetic routines, which take float64 arguments and round the result to the range and precision of float32, while giving the result in float64 form. This is exactly what PowerPC requires for its single-precision math. This fixes double-rounding problems that exist in the current code, and are visible in the float_madds test. I add test reference files for float_madds and float_convs after fixing the bugs required to make the tests pass. With this series and few other VSX instructions[1], QEMU now passes the GLibc math test suite. Tested-by: Matheus Ferst [1] https://github.com/PPC64/qemu/tree/ferst-tcg-xsmaddqp (WIP) Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: ppc/fpu_helper.c
On 14/11/2021 07:15, 罗勇刚(Yonggang Luo) wrote: On Sun, Nov 14, 2021 at 5:07 PM Richard Henderson mailto:richard.hender...@linaro.org>> wrote: > > On 11/13/21 11:07 AM, 罗勇刚(Yonggang Luo) wrote: > > I've seen nothing in fpu_helper.c to update > > the fpscr to FP_FR, > > that is there is no code with `fpscr |= FP_FR`, > > is that valid for PowerPC? or it's just because this is hard > > to implement and then the FP_FR are always not set for fpscr > > It is unimplemented, yes. I think that no one has spent the time; I don't think that it > should be hard, necessarily. Thanks, I also have a question, where is the complete PowerPC floating point instrunctions tests case? I wanna improve the performance of powerpc floating point calculation and don't lost the accuracy of it So I need a complete testsuite for it. AFAIK we only have the multiarch tests, float_convs and float_madds. You'll need the reference files in tests/tcg/ppc{,64{,le}}/ to run them. I'm attaching the ppc64le ones (obtained from a POWER9), but you can also generate them by running the test in the real hardware and saving the output (e.g.: ./float_convs > float_convs.ref). However, both tests currently fail. I guess it's related to https://bugs.launchpad.net/qemu/+bug/1841592, but I'm not sure if the comments on this bug are still valid/up-to-date. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html> ### Rounding to nearest from single: f32(-nan:0xffa0) to double: f64(-nan:0x00fff4) (INVALID) to int32: -2147483648 (INVALID) to int64: -9223372036854775808 (INVALID) to uint32: 0 (INVALID) to uint64: 0 (INVALID) from single: f32(-nan:0xffc0) to double: f64(-nan:0x00fff8) (OK) to int32: -2147483648 (INVALID) to int64: -9223372036854775808 (INVALID) to uint32: 0 (INVALID) to uint64: 0 (INVALID) from single: f32(-inf:0xff80) to double: f64(-inf:0x00fff0) (OK) to int32: -2147483648 (INVALID) to int64: -9223372036854775808 (INVALID) to uint32: 0 (INVALID) to uint64: 0 (INVALID) from single: f32(-0x1.fe00p+127:0xff7f) to double: f64(-0x1.fe00p+127:0x00c7efe000) (OK) to int32: -2147483648 (INVALID) to int64: -9223372036854775808 (INVALID) to uint32: 0 (INVALID) to uint64: 0 (INVALID) from single: f32(-0x1.1874b200p+103:0xf30c3a59) to double: f64(-0x1.1874b200p+103:0x00c661874b2000) (OK) to int32: -2147483648 (INVALID) to int64: -9223372036854775808 (INVALID) to uint32: 0 (INVALID) to uint64: 0 (INVALID) from single: f32(-0x1.c0bab600p+99:0xf1605d5b) to double: f64(-0x1.c0bab600p+99:0x00c62c0bab6000) (OK) to int32: -2147483648 (INVALID) to int64: -9223372036854775808 (INVALID) to uint32: 0 (INVALID) to uint64: 0 (INVALID) from single: f32(-0x1.31f75000p-40:0xab98fba8) to double: f64(-0x1.31f75000p-40:0x00bd731f75) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(-0x1.50544400p-66:0x9ea82a22) to double: f64(-0x1.50544400p-66:0x00bbd505444000) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(-0x1.p-126:0x8080) to double: f64(-0x1.p-126:0x00b810) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(0x0.p+0:00) to double: f64(0x0.p+0:) (OK) to int32: 0 (OK) to int64: 0 (OK) to uint32: 0 (OK) to uint64: 0 (OK) from single: f32(0x1.p-126:0x0080) to double: f64(0x1.p-126:0x003810) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(0x1.p-25:0x3300) to double: f64(0x1.p-25:0x003e60) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(0x1.e600p-25:0x3373) to double: f64(0x1.e600p-25:0x003e6e6000) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(0x1.ff801a00p-15:0x387fc00d) to double: f64(0x1.ff801a00p-15:0x003f0ff801a000) (OK) to int32: 0 (INEXACT ) to int64: 0 (INEXACT ) to uint32: 0 (INEXACT ) to uint64: 0 (INEXACT ) from single: f32(0x1.0c0
Re: Fwd: New Defects reported by Coverity Scan for QEMU
On 10/11/2021 05:18, Cédric Le Goater wrote: Hello Luis, Coverity found a couple of issues which seem related to the DFP patchset. Could you please take a look ? Thanks, C. Forwarded Message Subject: New Defects reported by Coverity Scan for QEMU Date: Tue, 9 Nov 2021 22:09:40 + From: scan-ad...@coverity.com To: c...@kaod.org Hi, Please find the latest report on new defect(s) introduced to QEMU found with Coverity Scan. 16 new defect(s) introduced to QEMU found with Coverity Scan. 19 defect(s), reported by Coverity Scan earlier, were marked fixed in the recent build analyzed by Coverity Scan. New defect(s) Reported-by: Coverity Scan Showing 16 of 16 defect(s) ** CID 1465791: Uninitialized variables (UNINIT) *** CID 1465791: Uninitialized variables (UNINIT) /qemu/target/ppc/dfp_helper.c: 1202 in helper_DENBCD() 1196 } \ 1197 dfp_finalize_decimal##size(&dfp); \ 1198 dfp_set_FPRF_from_FRT(&dfp); \ 1199 set_dfp##size(t, &dfp.vt); \ 1200 } 1201 CID 1465791: Uninitialized variables (UNINIT) Using uninitialized element of array "dfp.vt" when calling "set_dfp64". 1202 DFP_HELPER_ENBCD(DENBCD, 64) 1203 DFP_HELPER_ENBCD(DENBCDQ, 128) Hi Cédric, The only change was the helper name that is now uppercase, so nothing new here. The underlying cause is that dfp_finalize_decimal64 only sets dfp->vt.VsrD(1) and set_dfp64 receives a pointer to the complete struct. But since set_dfp64 also only access VsrD(1), it shouldn't be a real problem AFAICT. The same applies to CID 1465776~1465786 and 1465788~1465790. ** CID 1465787: (BAD_SHIFT) /qemu/target/ppc/int_helper.c: 369 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 370 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 356 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 356 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 356 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 369 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 370 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 370 in helper_CFUGED() /qemu/target/ppc/int_helper.c: 369 in helper_CFUGED() *** CID 1465787: (BAD_SHIFT) /qemu/target/ppc/int_helper.c: 369 in helper_CFUGED() 363 /* 364 * Discards the processed bits from 'src' and 'mask'. Note that we are 365 * removing 'n' trailing zeros from 'mask', but the logical shift will 366 * add 'n' leading zeros back, so the population count of 'mask' is kept 367 * the same. 368 */ CID 1465787: (BAD_SHIFT) In expression "src >>= n", right shifting by more than 63 bits has undefined behavior. The shift amount, "n", is as much as 64. Similar case here, the helper was just renamed. The value of "n" comes from ctz64(mask) and mask == 0 is a trivial case handled before anything else. Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 03/34] target/ppc: Move load and store floating point instructions to decodetree
type) \ -GEN_LDXF(name, ldop, 0x17, op | 0x00, type) - -GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT) -GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT) GEN_HANDLER_E(lfdepx, 0x1F, 0x1F, 0x12, 0x0001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x0001, PPC_NONE, PPC2_ISA205), GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206), GEN_HANDLER_E(lfdpx, 0x1F, 0x17, 0x18, 0x0021, PPC_NONE, PPC2_ISA205), -#define GEN_STF(name, stop, opc, type) \ -GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x, type), -#define GEN_STUF(name, stop, opc, type) \ -GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x, type), -#define GEN_STUXF(name, stop, opc, type) \ -GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x0001, type), #define GEN_STXF(name, stop, opc2, opc3, type) \ GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x0001, type), -#define GEN_STFS(name, stop, op, type) \ -GEN_STF(name, stop, op | 0x20, type) \ -GEN_STUF(name, stop, op | 0x21, type) \ -GEN_STUXF(name, stop, op | 0x01, type) \ -GEN_STXF(name, stop, 0x17, op | 0x00, type) -GEN_STFS(stfd, st64_i64, 0x16, PPC_FLOAT) -GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT) GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX) GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x0001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x0021, PPC_NONE, PPC2_ISA205), This commit appears to break both MacOS 9 and OS X under qemu-system-ppc in my boot tests: MacOS 9 hangs early on startup, whilst OS X now has black box artifacts on some GUI widgets (see attached). It seems that we're updating the wrong register (RT instead of RA). Can you test the attached patch? Any idea what could be happening here? Note that this series isn't bisectable - it is necessary to forward-port the REQUIRE_FPU macro in order to build this commit for PPC32, and I also found errors relating to undefined times_* functions during bisection. REQUIRE_FPU and the times_* functions come from the DFP patch series, in which this series is based-on, so "target/ppc: Introduce REQUIRE_FPU" was supposed to be merged before this patch. Maybe something went wrong when these series were partially applied Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html> >From 0ae11665ca66f3c304f3027513a140fab28d4bc2 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 9 Nov 2021 13:57:30 -0300 Subject: [PATCH] fixup! target/ppc: Move load and store floating point instructions to decodetree Signed-off-by: Matheus Ferst --- target/ppc/translate/fp-impl.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index d1dbb1b96b..c9e05201d9 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -1328,7 +1328,7 @@ static bool do_lsfpsd(DisasContext *ctx, int rt, int ra, TCGv displ, set_fpr(rt, t0); } if (update) { -tcg_gen_mov_tl(cpu_gpr[rt], ea); +tcg_gen_mov_tl(cpu_gpr[ra], ea); } tcg_temp_free_i64(t0); tcg_temp_free(ea); -- 2.25.1
Re: [PATCH v5 09/10] target/ppc: PMU Event-Based exception support
On 08/11/2021 17:03, Daniel Henrique Barboza wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 11/8/21 16:48, Matheus K. Ferst wrote: On 01/11/2021 20:56, Daniel Henrique Barboza wrote: From: Gustavo Romero Following up the rfebb implementation, this patch adds the EBB exception support that are triggered by Performance Monitor alerts. This exception occurs when an enabled PMU condition or event happens and both MMCR0_EBE and BESCR_PME are set. The supported PM alerts will consist of counter negative conditions of the PMU counters. This will be achieved by a timer mechanism that will predict when a counter becomes negative. The PMU timer callback will set the appropriate bits in MMCR0 and fire a PMC interrupt. The EBB exception code will then set the appropriate BESCR bits, set the next instruction pointer to the address pointed by the return register (SPR_EBBRR), and redirect execution to the handler (pointed by SPR_EBBHR). CC: Gustavo Romero Signed-off-by: Gustavo Romero Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 5 - target/ppc/excp_helper.c | 28 target/ppc/power8-pmu.c | 26 -- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 8f545ff482..592031ce54 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -129,8 +129,10 @@ enum { /* ISA 3.00 additions */ POWERPC_EXCP_HVIRT = 101, POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception */ + POWERPC_EXCP_EBB = 103, /* Event-based branch exception */ + /* EOL */ - POWERPC_EXCP_NB = 103, + POWERPC_EXCP_NB = 104, /* QEMU exceptions: special cases we want to stop translation */ POWERPC_EXCP_SYSCALL_USER = 0x203, /* System call in user mode only */ }; @@ -2455,6 +2457,7 @@ enum { PPC_INTERRUPT_HMI, /* Hypervisor Maintenance interrupt */ PPC_INTERRUPT_HDOORBELL, /* Hypervisor Doorbell interrupt */ PPC_INTERRUPT_HVIRT, /* Hypervisor virtualization interrupt */ + PPC_INTERRUPT_PMC, /* Hypervisor virtualization interrupt */ }; /* Processor Compatibility mask (PCR) */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 7be334e007..88aa0a84f8 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -797,6 +797,22 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) cpu_abort(cs, "Non maskable external exception " "is not implemented yet !\n"); break; + case POWERPC_EXCP_EBB: /* Event-based branch exception */ + if ((env->spr[SPR_BESCR] & BESCR_GE) && + (env->spr[SPR_BESCR] & BESCR_PME)) { Do we need to check FSCR[EBB] here? FSCR[EBB] is being checked in the spr_read_ebb* and spr_write_ebb* callbacks in translate.c. These are the read/write callbacks of EBB sprs (register_power8_ebb_sprs() in target/ppc/cpu_init.c). I'm not sure if these checks are enough. Looking at section 4.4 of book III, it seems that we need to check again before delivering the exception. E.g., if the timer expires with MSR[PR]=0 and supervisor code disables the facility before it returns to problem state, we would have a spurious exception. Thanks, Daniel + target_ulong nip; + + env->spr[SPR_BESCR] &= ~BESCR_GE; /* Clear GE */ + env->spr[SPR_BESCR] |= BESCR_PMEO; /* Set PMEO */ + env->spr[SPR_EBBRR] = env->nip; /* Save NIP for rfebb insn */ + nip = env->spr[SPR_EBBHR]; /* EBB handler */ + powerpc_set_excp_state(cpu, nip, env->msr); + } + /* + * This interrupt is handled by userspace. No need + * to proceed. + */ + return; default: excp_invalid: cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp); @@ -1044,6 +1060,18 @@ static void ppc_hw_interrupt(CPUPPCState *env) powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_THERM); return; } + /* PMC -> Event-based branch exception */ + if (env->pending_interrupts & (1 << PPC_INTERRUPT_PMC)) { + /* + * Performance Monitor event-based exception can only + * occur in problem state. + */ + if (msr_pr == 1) { + env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PMC); + powerpc_excp(cpu, env->excp_model, POWE
Re: [PATCH v5 09/10] target/ppc: PMU Event-Based exception support
running again. + */ +pmu_delete_timers(env); +} + +pmu_update_cycles(env); + +if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE) { +env->spr[SPR_POWER_MMCR0] &= ~MMCR0_PMAE; +env->spr[SPR_POWER_MMCR0] |= MMCR0_PMAO; +} + +/* Fire the PMC hardware exception */ +ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1); } /* This helper assumes that the PMC is running. */ -- 2.31.1 -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v5 03/10] target/ppc: enable PMU counter overflow with cycle events
or now */ -env->pmu_base_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); +bool overflow_enabled = env->spr[SPR_POWER_MMCR0] & +(MMCR0_PMC1CE | MMCR0_PMCjCE); + +/* + * Always delete existing overflow timers when starting a + * new cycle counting session. + */ +pmu_delete_timers(env); + +if (!overflow_enabled) { +/* Define pmu_base_time and leave */ +env->pmu_base_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); +return; +} + +pmu_start_overflow_timers(env); } void helper_store_mmcr0(CPUPPCState *env, target_ulong value) -- 2.31.1 -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v5 02/10] target/ppc: PMU basic cycle count for pseries TCG
On 01/11/2021 20:56, Daniel Henrique Barboza wrote: diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c index 3c2f73896f..a0a42b666c 100644 --- a/target/ppc/power8-pmu.c +++ b/target/ppc/power8-pmu.c +static bool pmc_is_active(CPUPPCState *env, int sprn) +{ +if (sprn < SPR_POWER_PMC5) { +return !(env->spr[SPR_POWER_MMCR0] & MMCR0_FC14); +} + +return !(env->spr[SPR_POWER_MMCR0] & MMCR0_FC56); +} + +static void pmu_update_cycles(CPUPPCState *env) +{ +uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); +uint64_t time_delta = now - env->pmu_base_time; +int sprn; + +for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC6; sprn++) { +if (!pmc_is_active(env, sprn) || +getPMUEventType(env, sprn) != PMU_EVENT_CYCLES) { +continue; +} + +/* + * The pseries and powernv clock runs at 1Ghz, meaning + * that 1 nanosec equals 1 cycle. + */ +env->spr[sprn] += time_delta; +} + +/* + * Update base_time for future calculations if we updated + * the PMCs while the PMU was running. + */ +if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_FC)) { +env->pmu_base_time = now; +} +} + +/* + * A cycle count session consists of the basic operations we + * need to do to support PM_CYC events: redefine a new base_time + * to be used to calculate PMC values and start overflow timers. + */ +static void start_cycle_count_session(CPUPPCState *env) +{ +/* Just define pmu_base_time for now */ +env->pmu_base_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); +} + +void helper_store_mmcr0(CPUPPCState *env, target_ulong value) +{ +target_ulong curr_value = env->spr[SPR_POWER_MMCR0]; +bool curr_FC = curr_value & MMCR0_FC; +bool new_FC = value & MMCR0_FC; + +env->spr[SPR_POWER_MMCR0] = value; I'm not sure if this is the right place to update MMCR0. If we set both FC and FC14 in one write, the code will call pmu_update_cycles, but PMCs 1-4 will not be updated because pmc_is_active will use the new value to check if the PMCs are frozen. + +/* MMCR0 writes can change HFLAGS_PMCCCLEAR and HFLAGS_MMCR0FC */ +if (((curr_value & MMCR0_PMCC) != (value & MMCR0_PMCC)) || +(curr_FC != new_FC)) { +hreg_compute_hflags(env); +} + +/* + * In an frozen count (FC) bit change: + * + * - if PMCs were running (curr_FC = false) and we're freezing + * them (new_FC = true), save the PMCs values in the registers. + * + * - if PMCs were frozen (curr_FC = true) and we're activating + * them (new_FC = false), set the new base_time for future cycle + * calculations. + */ +if (curr_FC != new_FC) { +if (!curr_FC) { > +pmu_update_cycles(env); +} else { +start_cycle_count_session(env); +} +} +} -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v2 07/34] target/ppc: Implement cntlzdm
On 30/10/2021 18:17, Richard Henderson wrote: On 10/29/21 1:23 PM, matheus.fe...@eldorado.org.br wrote: From: Luis Pires Implement the following PowerISA v3.1 instruction: cntlzdm: Count Leading Zeros Doubleword Under Bit Mask Suggested-by: Richard Henderson Signed-off-by: Luis Pires Signed-off-by: Matheus Ferst --- v2: - Inline implementation of cntlzdm --- target/ppc/insn32.decode | 1 + target/ppc/translate/fixedpoint-impl.c.inc | 36 ++ 2 files changed, 37 insertions(+) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 9cb9fc00b8..221cb00dd6 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -203,6 +203,7 @@ ADDPCIS 010011 . . .. 00010 . @DX ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X +CNTLZDM 01 . . . 111011 - @X ### Float-Point Load Instructions diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc index 0d9c6e0996..c9e9ae35df 100644 --- a/target/ppc/translate/fixedpoint-impl.c.inc +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -413,3 +413,39 @@ static bool trans_CFUGED(DisasContext *ctx, arg_X *a) #endif return true; } + +#if defined(TARGET_PPC64) +static void do_cntlzdm(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 mask) +{ + TCGv_i64 tmp; + TCGLabel *l1; + + tmp = tcg_temp_local_new_i64(); + l1 = gen_new_label(); + + tcg_gen_and_i64(tmp, src, mask); + tcg_gen_clzi_i64(tmp, tmp, 64); + + tcg_gen_brcondi_i64(TCG_COND_EQ, tmp, 0, l1); + + tcg_gen_subfi_i64(tmp, 64, tmp); + tcg_gen_shr_i64(tmp, mask, tmp); + tcg_gen_ctpop_i64(tmp, tmp); + + gen_set_label(l1); + + tcg_gen_mov_i64(dst, tmp); This works, but a form without brcond would be better (due to how poorly tcg handles basic blocks). I should've tried a little harder to get rid of this branch... How about tcg_gen_clzi_i64(tmp, tmp, 0); tcg_gen_xori_i64(tmp, tmp, 63); tcg_gen_shr_i64(tmp, mask, tmp); tcg_gen_shri_i64(tmp, tmp, 1); tcg_gen_ctpop_i64(dst, tmp); The middle 3 operations perform a shift between [1-64], such that we are assured of 0 for 64. When src & mask == 0 we shouldn't shift mask (or shift it zero bits), so I guess we can't have this shri. Maybe something like tcg_gen_and_i64(t0, src, mask); tcg_gen_clzi_i64(t0, t0, -1); tcg_gen_setcondi_i64(TCG_COND_NE, t1, t0, -1); tcg_gen_andi_i64(t0, t0, 63); tcg_gen_xori_i64(t0, t0, 63); tcg_gen_shr_i64(t0, mask, t0); tcg_gen_shr_i64(t0, t0, t1); tcg_gen_ctpop_i64(dst, t0); So we still shift 63+1 bits when there are no leading zeros and shift 0 bits when it's all zeros. Either way, Reviewed-by: Richard Henderson Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 0/2] Fix mtfsf, mtfsfi and mtfsb1 bug
CC'ing the reporter On 20/10/2021 09:57, Lucas Mateus Castro (alqotel) wrote: From: "Lucas Mateus Castro (alqotel)" The instructions mtfsf, mtfsfi and mtfsb, when called, fail to set the FI bit (bit 46 in the FPSCR) and can set to 1 the reserved bit 52 of the FPSCR, as reported in https://gitlab.com/qemu-project/qemu/-/issues/266 (although the bug report is only for mtfsf, the bug applies to mtfsfi and mtfsb1 as well). These instructions also fail to throw an exception when the exception and enabling bits are set, this can be tested by adding 'prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE);' before the __builtin_mtfsf call in the test case of the bug report. These patches aim to fix these issues. Lucas Mateus Castro (alqotel) (2): target/ppc: Fixed call to deferred exception target/ppc: ppc_store_fpscr doesn't update bit 52 target/ppc/cpu.c | 2 +- target/ppc/cpu.h | 3 +++ target/ppc/fpu_helper.c| 41 ++ target/ppc/helper.h| 1 + target/ppc/translate/fp-impl.c.inc | 6 ++--- 5 files changed, 49 insertions(+), 4 deletions(-) -- 2.31.1 -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 16/33] target/ppc: Implement Vector Insert Word from GPR using Immediate insns
On 26/10/2021 15:45, Paul A. Clarke wrote: On Tue, Oct 26, 2021 at 09:58:15AM -0700, Richard Henderson wrote: On 10/26/21 7:33 AM, Matheus K. Ferst wrote: It says that "if UIM is greater than N, the result is undefined." My first read was also that the outcome is "boundedly undefined," but I guess it can be understood as "the resulting value in VRT will be undefined" (like when the pseudo-code uses "VRT <- 0x_..._"), in which case this patch and Mambo are correct. If the reference simulator is fine with it, I am too. FYI, it appears that the hardware does a partial insert, per an experiment: ``` 1: x/i $pc => 0x16d4 : vinsw v2,r3,14 (gdb) p $v2.v4_int32 $1 = {0x1, 0x1, 0x1, 0x1} (gdb) p $r3 $2 = 0x12345678 (gdb) nexti (gdb) p $v2.v4_int32 $3 = {0x1234, 0x1, 0x1, 0x1} Thanks for this test Paul. I'll add a comment about the hardware behavior. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 15/33] target/ppc: Implement Vector Insert from GPR using GPR index insns
On 23/10/2021 01:40, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 10/21/21 12:45 PM, matheus.fe...@eldorado.org.br wrote: +#if defined(TARGET_PPC64) + return do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], cpu_gpr[a->vrb], + gen_helper); +#else + bool ok; + TCGv_i64 val; + + val = tcg_temp_new_i64(); + tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]); + + ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper); + + tcg_temp_free_i64(val); + return ok; +#endif Oh, and what's all this? Either this isn't defined for !PPC64 at all, or you should just use target_ulong and not do any ifdeffing at all. r~ The helper receives i64 because it's also used by Vector Insert From VSR in patch 17. We can drop the ifdef and always tcg_gen_extu_tl_i64 though. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 16/33] target/ppc: Implement Vector Insert Word from GPR using Immediate insns
On 23/10/2021 01:42, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 10/21/21 12:45 PM, matheus.fe...@eldorado.org.br wrote: +static bool do_vins_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size, + void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv)) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + if (a->uim > (16 - size)) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS* at" + " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim, + 16 - size); + return true; + } Does this really do nothing on real hw? We don't have access to the real hardware yet, so our reference is the POWER10 Functional Simulator (Mambo). Maybe someone from IBM can run a test for us, but Mambo usually does the right thing, especially in "simple mode." I know the manual says undefined, but I would have expected SIGILL. It says that "if UIM is greater than N, the result is undefined." My first read was also that the outcome is "boundedly undefined," but I guess it can be understood as "the resulting value in VRT will be undefined" (like when the pseudo-code uses "VRT <- 0x_..._"), in which case this patch and Mambo are correct. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 07/33] target/ppc: Implement cntlzdm
On 22/10/2021 20:16, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 10/21/21 12:45 PM, matheus.fe...@eldorado.org.br wrote: +uint64_t helper_CNTLZDM(uint64_t src, uint64_t mask) +{ + uint64_t sel_bit, count = 0; + + while (mask != 0) { + sel_bit = 0x8000ULL >> clz64(mask); + + if (src & sel_bit) { + break; + } We need to count how many mask are set left of mask & src. How about sh = clz64(src & mask); if (sh == 0) { return 0; } return ctpop64(mask >> (64 - sh)); which could probably be implemented inline relatively easy. Thanks for this suggestion Richard, we'll try to inline it. +static bool trans_CNTLZDM(DisasContext *ctx, arg_X *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA310); +#if defined(TARGET_PPC64) + gen_helper_CNTLZDM(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); +#endif + return true; +} Why the ifdef here? Oh, I see. You could just use target_long in the helper to avoid that. And if not, you should move the helper into an ifdef too. That's the same case of cfuged. There is a vector version of this instruction (vclzdm) that is not 64-bits only (at least on paper), so it should receive i64 and cannot be inside an ifdef(TARGET_PPC64). I'll add this info to the commit message. If we dismiss the possibility of a future 32-bits implementation of PowerISA v3.1, we can move the helper inside the ifdef and add REQUIRE_64BITS in vclzdm/vctzdm (and vcfuged, vpdepd, vpextd, etc.) -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 00/33] PowerISA v3.1 instruction batch
On 21/10/2021 23:06, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 10/21/21 12:45 PM, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst This patch series implements 56 new instructions for POWER10, moving 28 "old" instructions to decodetree along the way. The series is divided by facility as follows: - From patch 1 to 4: Floating-Point - From patch 5 to 10: Fixed-Point - From patch 11 to 19: Vector - From patch 20 to 33: Vector-Scalar Extensions Based-on:<20210910112624.72748-1-luis.pi...@eldorado.org.br> because of patch 10 ("target/ppc: Move REQUIRE_ALTIVEC/VECTOR to translate.c") and patch 11 ("target/ppc: Introduce REQUIRE_FPU"). The prereqs no longer apply cleanly. Do you have a branch you can publish in the meantime? r~ I forgot to mention that it's also based on Gibson's ppc-for-6.2. The branch is available on https://github.com/PPC64/qemu/tree/ppc-isa31-review Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH 1/4] linux-user/ppc: Fix XER access in save/restore_user_regs
On 14/10/2021 20:43, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. On 10/14/21 3:32 PM, matheus.fe...@eldorado.org.br wrote: From: Matheus Ferst We should use cpu_read_xer/cpu_write_xer to save/restore the complete register since some of its bits are in other fields of CPUPPCState. A test is added to prevent future regressions. Fixes: da91a00f191f ("target-ppc: Split out SO, OV, CA fields from XER") Signed-off-by: Matheus Ferst --- linux-user/ppc/signal.c | 9 +++-- tests/tcg/ppc64/Makefile.target | 2 + tests/tcg/ppc64le/Makefile.target | 2 + tests/tcg/ppc64le/signal_save_restore_xer.c | 42 + 4 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 tests/tcg/ppc64le/signal_save_restore_xer.c The code is good so, Reviewed-by: Richard Henderson + sigaction(SIGILL, &sa, NULL); + + asm("mtspr 1, %1\n\t" + ".long 0x0\n\t" While Appendix B does guarantee that "0" is and always will be an invalid instruction, I wonder if the test itself would be clearer (i.e. self-documenting the intent) using SIGTRAP and "trap". r~ It would be better, but cpu_loop is currently calling cpu_abort for POWERPC_EXCP_TRAP, so the test would fail. I'll see if I can fix that in another patch, and then we can change the test to use trap. -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Júnior Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 03/15] target/ppc: PMU basic cycle count for pseries TCG
On 24/09/2021 11:41, Daniel Henrique Barboza wrote: On 9/22/21 08:24, Matheus K. Ferst wrote: On 03/09/2021 17:31, Daniel Henrique Barboza wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. This patch adds the barebones of the PMU logic by enabling cycle counting, done via the performance monitor counter 6. The overall logic goes as follows: - a helper is added to control the PMU state on each MMCR0 write. This allows for the PMU to start/stop as the frozen counter bit (MMCR0_FC) is cleared or set; - MMCR0 reg initial value is set to 0x8000 (MMCR0_FC set) to avoid having to spin the PMU right at system init; - the intended usage is to freeze the counters by setting MMCR0_FC, do any additional setting of events to be counted via MMCR1 (not implemented yet) and enable the PMU by zeroing MMCR0_FC. Software must freeze counters to read the results - on the fly reading of the PMCs will return the starting value of each one. Since there will be more PMU exclusive code to be added next, put the PMU logic in its own helper to keep all in the same place. The name of the new helper file, power8_pmu.c, is an indicative that the PMU logic has been tested with the IBM POWER chip family, POWER8 being the oldest version tested. This doesn't mean that this PMU logic will break with any other PPC64 chip that implements Book3s, but since we can't assert that this PMU will work with all available Book3s emulated processors we're choosing to be explicit. Signed-off-by: Daniel Henrique Barboza --- diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 0babde3131..c3e2e3d329 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -401,6 +401,24 @@ void spr_write_generic(DisasContext *ctx, int sprn, int gprn) spr_store_dump_spr(sprn); } +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn) +{ + /* + * helper_store_mmcr0 will make clock based operations that + * will cause 'bad icount read' errors if we do not execute + * gen_icount_io_start() beforehand. + */ + gen_icount_io_start(ctx); + gen_helper_store_mmcr0(cpu_env, cpu_gpr[gprn]); +} +#else +void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn) +{ + spr_write_generic(ctx, sprn, gprn); +} +#endif + #if !defined(CONFIG_USER_ONLY) void spr_write_generic32(DisasContext *ctx, int sprn, int gprn) { @@ -596,7 +614,10 @@ void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn) tcg_gen_andi_tl(t1, t1, ~(MMCR0_UREG_MASK)); /* Keep all other bits intact */ tcg_gen_or_tl(t1, t1, t0); - gen_store_spr(SPR_POWER_MMCR0, t1); + + /* Overwrite cpu_gpr[gprn] and use spr_write_MMCR0() */ + tcg_gen_mov_tl(cpu_gpr[gprn], t1); + spr_write_MMCR0(ctx, sprn + 0x10, gprn); IIUC, this makes writing to MMCR0 change the GPR value and expose the unfiltered content of the SPR to problem state. It might be better to call the helper directly or create another method that takes a TCGv as an argument and call it from spr_write_MMCR0_ureg and spr_write_MMCR0. I'm overwriting cpu_gpr[gprn] with t1, which is filtered by MMCR0_REG_MASK right before, to re-use spr_write_MMCR0() since its API requires a gprn index. The reason I'm re-using spr_write_MMCR0() here is to avoid code repetition in spr_write_MMCR0_ureg(), which would need to repeat the same steps as spr_write_MMCR0 (calling icount_io_start(), calling the helper, and then setting DISAS_EXIT_UPDATE in a later patch). The idea behind is that all PMU user_write() functions works the same as its privileged counterparts but with some form of filtering done beforehand. Note that this is kind of true in the previous patch as well - gen_store_spr() is similar to the privileged function MMCR0 was using (spr_write_generic()) with the exception of an optional qemu_log(). Maybe I should've made this clear in the previous patch, using spr_write_generic() and overwriting cpu_gpr[gprn] with the filtered t1 content back there. Speaking of which, since t1 is being filtered by MMCR0_REG_MASK before being used to overwrite cpu_gpr[gprn], I'm not sure how this is exposing unfiltered content to problem state. Can you elaborate? Suppose MMCR0 has the value 0x8001 (FC and FCH) and problem state executes an mtspr with the value 0x400 (unset FC and set PMAE) in the GPR. The proposed code will do the following: > tcg_gen_andi_tl(t0, cpu_gpr[gprn], MMCR0_UREG_MASK); t0 = GPR & MMCR0_UREG_MASK = 0x400 & 0x8480 = 0x400 > gen_load_spr(t1, SPR_POWER_MMCR0); t1 = MMCR0 = 0x8001 > tcg_gen_andi_tl(t1, t1, ~(MMCR0_UREG_MASK)); t1 = t1 & ~MMCR0_UREG_MASK = 0x8001 & ~0x8480 = 0x1 > tcg_gen
Re: [PATCH v3 03/15] target/ppc: PMU basic cycle count for pseries TCG
On 03/09/2021 17:31, Daniel Henrique Barboza wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. This patch adds the barebones of the PMU logic by enabling cycle counting, done via the performance monitor counter 6. The overall logic goes as follows: - a helper is added to control the PMU state on each MMCR0 write. This allows for the PMU to start/stop as the frozen counter bit (MMCR0_FC) is cleared or set; - MMCR0 reg initial value is set to 0x8000 (MMCR0_FC set) to avoid having to spin the PMU right at system init; - the intended usage is to freeze the counters by setting MMCR0_FC, do any additional setting of events to be counted via MMCR1 (not implemented yet) and enable the PMU by zeroing MMCR0_FC. Software must freeze counters to read the results - on the fly reading of the PMCs will return the starting value of each one. Since there will be more PMU exclusive code to be added next, put the PMU logic in its own helper to keep all in the same place. The name of the new helper file, power8_pmu.c, is an indicative that the PMU logic has been tested with the IBM POWER chip family, POWER8 being the oldest version tested. This doesn't mean that this PMU logic will break with any other PPC64 chip that implements Book3s, but since we can't assert that this PMU will work with all available Book3s emulated processors we're choosing to be explicit. Signed-off-by: Daniel Henrique Barboza --- diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 0babde3131..c3e2e3d329 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -401,6 +401,24 @@ void spr_write_generic(DisasContext *ctx, int sprn, int gprn) spr_store_dump_spr(sprn); } +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn) +{ +/* + * helper_store_mmcr0 will make clock based operations that + * will cause 'bad icount read' errors if we do not execute + * gen_icount_io_start() beforehand. + */ +gen_icount_io_start(ctx); +gen_helper_store_mmcr0(cpu_env, cpu_gpr[gprn]); +} +#else +void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn) +{ +spr_write_generic(ctx, sprn, gprn); +} +#endif + #if !defined(CONFIG_USER_ONLY) void spr_write_generic32(DisasContext *ctx, int sprn, int gprn) { @@ -596,7 +614,10 @@ void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn) tcg_gen_andi_tl(t1, t1, ~(MMCR0_UREG_MASK)); /* Keep all other bits intact */ tcg_gen_or_tl(t1, t1, t0); -gen_store_spr(SPR_POWER_MMCR0, t1); + +/* Overwrite cpu_gpr[gprn] and use spr_write_MMCR0() */ +tcg_gen_mov_tl(cpu_gpr[gprn], t1); +spr_write_MMCR0(ctx, sprn + 0x10, gprn); IIUC, this makes writing to MMCR0 change the GPR value and expose the unfiltered content of the SPR to problem state. It might be better to call the helper directly or create another method that takes a TCGv as an argument and call it from spr_write_MMCR0_ureg and spr_write_MMCR0. tcg_temp_free(t0); tcg_temp_free(t1); -- 2.31.1 -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Júnior Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 01/15] target/ppc: add user read functions for MMCR0 and MMCR2
l bits that are not FCnP0 bits. + * When MMCR0[PMCC] is set to 0b10 or 0b11, providing + * problem state programs read/write access to MMCR2, + * only the FCnP0 bits can be accessed. All other bits are + * not changed when mtspr is executed in problem state, and + * all other bits return 0s when mfspr is executed in problem + * state, according to ISA v3.1, section 10.4.6 Monitor Mode + * Control Register 2, p. 1316, third paragraph. + */ +gen_load_spr(t0, SPR_POWER_MMCR2); +tcg_gen_andi_tl(t0, t0, 0x402010080402UL); +tcg_gen_mov_tl(cpu_gpr[gprn], t0); + +tcg_temp_free(t0); +} + #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) void spr_write_ureg(DisasContext *ctx, int sprn, int gprn) { -- 2.31.1 -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Júnior Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax
On 15/09/2021 18:30, Richard Henderson wrote: [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI. For usadd, we only have to consider overflow. Since ~B + B == -1, the maximum value for A that saturates is ~B. For ussub, we only have to consider underflow. The minimum value that saturates to 0 from A - B is B. Signed-off-by: Richard Henderson --- tcg/tcg-op-vec.c | 37 +++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index 15e026ae49..7705a49c0b 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -119,6 +119,18 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, continue; } break; +case INDEX_op_usadd_vec: +if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) || +tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { +continue; +} +break; +case INDEX_op_ussub_vec: +if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) || +tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { +continue; +} +break; case INDEX_op_cmpsel_vec: case INDEX_op_smin_vec: case INDEX_op_smax_vec: @@ -603,7 +615,18 @@ void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { -do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); +if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) { +const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); +TCGv_vec t = tcg_temp_new_vec_matching(r); + +/* usadd(a, b) = min(a, ~b) + b */ +tcg_gen_not_vec(vece, t, b); +tcg_gen_umin_vec(vece, t, t, a); +tcg_gen_add_vec(vece, r, r, b); I think it should be tcg_gen_add_vec(vece, r, t, b); + +tcg_temp_free_vec(t); +tcg_swap_vecop_list(hold_list); +} } void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) @@ -613,7 +636,17 @@ void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { -do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); +if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) { +const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); +TCGv_vec t = tcg_temp_new_vec_matching(r); + +/* ussub(a, b) = max(a, b) - b */ +tcg_gen_umax_vec(vece, t, a, b); +tcg_gen_sub_vec(vece, r, t, b); + +tcg_temp_free_vec(t); +tcg_swap_vecop_list(hold_list); +} } static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, -- 2.25.1 Thanks, Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Júnior Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
Re: [PATCH v3 08/15] PPC64/TCG: Implement 'rfebb' instruction
/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -7636,6 +7636,8 @@ static int times_4(DisasContext *ctx, int x) #include "translate/spe-impl.c.inc" +#include "translate/branch-impl.c.inc" + /* Handles lfdp, lxsd, lxssp */ static void gen_dform39(DisasContext *ctx) { diff --git a/target/ppc/translate/branch-impl.c.inc b/target/ppc/translate/branch-impl.c.inc new file mode 100644 index 00..9c991d9abb --- /dev/null +++ b/target/ppc/translate/branch-impl.c.inc @@ -0,0 +1,33 @@ +/* + * Power ISA decode for branch instructions + * + * Copyright IBM Corp. 2021 + * + * Authors: + * Daniel Henrique Barboza + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) + +static bool trans_RFEBB(DisasContext *ctx, arg_RFEBB *arg) I think it's a bit more readable to use arg_XL_s instead of arg_RFEBB. Anyway, Reviewed-by: Matheus Ferst +{ +REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + +gen_icount_io_start(ctx); +gen_update_cfar(ctx, ctx->cia); +gen_helper_rfebb(cpu_env, cpu_gpr[arg->s]); + +ctx->base.is_jmp = DISAS_CHAIN; + +return true; +} +#else +static bool trans_RFEBB(DisasContext *ctx, arg_RFEBB *arg) +{ +gen_invalid(ctx); +return true; +} +#endif -- 2.31.1 -- Matheus K. Ferst Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/> Analista de Software Júnior Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>