Re: [PATCH v6 00/13] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-05-03 Thread David Miller
>From bb6bf2f9529c4d76db9a9eff2ff7fa1235657103 Mon Sep 17 00:00:00 2001
From: David Miller 
Date: Mon, 21 Mar 2022 16:58:57 -0400
Subject: [PATCH v5 10/11] tests/tcg/s390x: Tests for Vector Enhancements
 Facility 2

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vx.h|  19 +
 tests/tcg/s390x/vxeh2_vcvt.c|  88 
 tests/tcg/s390x/vxeh2_vlstr.c   | 139 
 tests/tcg/s390x/vxeh2_vs.c  |  95 ++
 5 files changed, 349 insertions(+)
 create mode 100644 tests/tcg/s390x/vx.h
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x

+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py

diff --git a/tests/tcg/s390x/vx.h b/tests/tcg/s390x/vx.h
new file mode 100644
index 00..2e66f8b714
--- /dev/null
+++ b/tests/tcg/s390x/vx.h
@@ -0,0 +1,19 @@
+#ifndef QEMU_TESTS_S390X_VX_H
+#define QEMU_TESTS_S390X_VX_H
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+floatf[4];  /* float32 */
+double   fd[2]; /* float64 */
+__uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#endif
\ No newline at end of file
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 00..2e46841ab5
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,88 @@
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include 
+#include "vx.h"
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile(".insn vrr, 0xE7C3, %[v1], %[v2], 0, %[m3],
%[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile(".insn vrr, 0xE7C1, %[v1], %[v2], 0, %[m3],
%[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile(".insn vrr, 0xE7C2, %[v1], %[v2], 0, %[m3],
%[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile(".insn vrr, 0xE7C0, %[v1], %[v2], 0, %[m3],
%[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+S390Vector vd;
+S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] =  };
+S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+  .f[2] = 4.499, .f[3] = 0.512 };
+
+vd.d[0] = vd.d[1] = 0;
+vcfps(, _i32, 2, M4_def, 0);
+if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcfpl(, _u32, 2, M4_def, 0);
+if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] ||  != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcsfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vclfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != 

Re: [PATCH v6 00/13] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-05-03 Thread David Miller
It looks like google killed allowing password access early, nothing
makes it work anymore.
They had plans to disable 'less secure app' in may,  but it thought it
was the end of the month.
I'll try copy/paste as plain text as well though I Know it will likely
screw it up..

On Tue, May 3, 2022 at 10:42 AM David Miller  wrote:
>
> Sorry,  It was in the discussion for v4 patches,  as an attachment .
> mail thread:
> [PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2
> So it likely never made it to the mailing list.
>
> I've reattached and will forward the patch (by itself) to the mailing list.
>
> I think the other solution works just as well by ignoring if compiler
> doesn't support z15.
>
> I just thought I'd bring it back up as I saw discussion about it.
>
> Thanks
> - David Miller
>
>
>
>
>
>
> On Tue, May 3, 2022 at 2:55 AM Thomas Huth  wrote:
> >
> >   Hi!
> >
> > On 02/05/2022 18.06, David Miller wrote:
> > > There was also the patch that had them as .insn in the other series of 
> > > emails.
> >
> > Sorry, I missed that patch, could you please point me to the mail on
> > https://lore.kernel.org/qemu-devel/ ? I remember that there was a discussion
> > about the vri-d encoding, but I apparently missed the patch that came out of
> > this discussion...
> >
> >   Thomas
> >
> > > On Mon, May 2, 2022 at 11:52 AM David Hildenbrand  
> > > wrote:
> > >>
> > >> On 02.05.22 09:20, Thomas Huth wrote:
> > >>> On 28/04/2022 11.46, David Hildenbrand wrote:
> > >>>> Implement Vector-Enhancements Facility 2 for s390x
> > >>>>
> > >>>> resolves: https://gitlab.com/qemu-project/qemu/-/issues/738
> > >>>>
> > >>>> implements:
> > >>>>   VECTOR LOAD ELEMENTS REVERSED   (VLER)
> > >>>>   VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
> > >>>>   VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, 
> > >>>> VLEBRG)
> > >>>>   VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
> > >>>>   VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
> > >>>>   VECTOR STORE ELEMENTS REVERSED  (VSTER)
> > >>>>   VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
> > >>>>   VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, 
> > >>>> VSTEBRG)
> > >>>>   VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
> > >>>>   VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
> > >>>>   VECTOR STRING SEARCH(VSTRS)
> > >>>>
> > >>>>   modifies:
> > >>>>   VECTOR FP CONVERT FROM FIXED(VCFPS)
> > >>>>   VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
> > >>>>   VECTOR FP CONVERT TO FIXED  (VCSFP)
> > >>>>   VECTOR FP CONVERT TO LOGICAL(VCLFP)
> > >>>>   VECTOR SHIFT LEFT   (VSL)
> > >>>>   VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
> > >>>>   VECTOR SHIFT RIGHT LOGICAL  (VSRL)
> > >>>
> > >>> Thanks, queued to my s390x-next branch now:
> > >>>
> > >>>https://gitlab.com/thuth/qemu/-/commits/s390x-next/
> > >>>
> > >> Thanks for fixing up. At this point I would have suggested to exclude
> > >> the tests for now.
> > >>
> > >> --
> > >> Thanks,
> > >>
> > >> David / dhildenb
> > >>
> > >
> >



Re: [PATCH v6 00/13] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-05-03 Thread David Miller
Sorry,  It was in the discussion for v4 patches,  as an attachment .
mail thread:
[PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2
So it likely never made it to the mailing list.

I've reattached and will forward the patch (by itself) to the mailing list.

I think the other solution works just as well by ignoring if compiler
doesn't support z15.

I just thought I'd bring it back up as I saw discussion about it.

Thanks
- David Miller






On Tue, May 3, 2022 at 2:55 AM Thomas Huth  wrote:
>
>   Hi!
>
> On 02/05/2022 18.06, David Miller wrote:
> > There was also the patch that had them as .insn in the other series of 
> > emails.
>
> Sorry, I missed that patch, could you please point me to the mail on
> https://lore.kernel.org/qemu-devel/ ? I remember that there was a discussion
> about the vri-d encoding, but I apparently missed the patch that came out of
> this discussion...
>
>   Thomas
>
> > On Mon, May 2, 2022 at 11:52 AM David Hildenbrand  wrote:
> >>
> >> On 02.05.22 09:20, Thomas Huth wrote:
> >>> On 28/04/2022 11.46, David Hildenbrand wrote:
> >>>> Implement Vector-Enhancements Facility 2 for s390x
> >>>>
> >>>> resolves: https://gitlab.com/qemu-project/qemu/-/issues/738
> >>>>
> >>>> implements:
> >>>>   VECTOR LOAD ELEMENTS REVERSED   (VLER)
> >>>>   VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
> >>>>   VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, 
> >>>> VLEBRG)
> >>>>   VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
> >>>>   VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
> >>>>   VECTOR STORE ELEMENTS REVERSED  (VSTER)
> >>>>   VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
> >>>>   VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, 
> >>>> VSTEBRG)
> >>>>   VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
> >>>>   VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
> >>>>   VECTOR STRING SEARCH(VSTRS)
> >>>>
> >>>>   modifies:
> >>>>   VECTOR FP CONVERT FROM FIXED(VCFPS)
> >>>>   VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
> >>>>   VECTOR FP CONVERT TO FIXED  (VCSFP)
> >>>>   VECTOR FP CONVERT TO LOGICAL(VCLFP)
> >>>>   VECTOR SHIFT LEFT   (VSL)
> >>>>   VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
> >>>>   VECTOR SHIFT RIGHT LOGICAL  (VSRL)
> >>>
> >>> Thanks, queued to my s390x-next branch now:
> >>>
> >>>https://gitlab.com/thuth/qemu/-/commits/s390x-next/
> >>>
> >> Thanks for fixing up. At this point I would have suggested to exclude
> >> the tests for now.
> >>
> >> --
> >> Thanks,
> >>
> >> David / dhildenb
> >>
> >
>
From bb6bf2f9529c4d76db9a9eff2ff7fa1235657103 Mon Sep 17 00:00:00 2001
From: David Miller 
Date: Mon, 21 Mar 2022 16:58:57 -0400
Subject: [PATCH v5 10/11] tests/tcg/s390x: Tests for Vector Enhancements
 Facility 2

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vx.h|  19 +
 tests/tcg/s390x/vxeh2_vcvt.c|  88 
 tests/tcg/s390x/vxeh2_vlstr.c   | 139 
 tests/tcg/s390x/vxeh2_vs.c  |  95 ++
 5 files changed, 349 insertions(+)
 create mode 100644 tests/tcg/s390x/vx.h
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vx.h b/tests/tcg/s390x/vx.h
new file mode 100644
index 00..2e66f8b714
--- /dev/null
+++ b/tests/tcg/s390x/vx.h
@@ -0,0 +1,19 @@
+#ifndef QEMU_TESTS_S390X_VX_H
+#define QEMU_TESTS_S390X_VX_H
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* wo

Re: [PATCH v6 00/13] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-05-02 Thread David Miller
There was also the patch that had them as .insn in the other series of emails.

On Mon, May 2, 2022 at 11:52 AM David Hildenbrand  wrote:
>
> On 02.05.22 09:20, Thomas Huth wrote:
> > On 28/04/2022 11.46, David Hildenbrand wrote:
> >> Implement Vector-Enhancements Facility 2 for s390x
> >>
> >> resolves: https://gitlab.com/qemu-project/qemu/-/issues/738
> >>
> >> implements:
> >>  VECTOR LOAD ELEMENTS REVERSED   (VLER)
> >>  VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
> >>  VECTOR STORE ELEMENTS REVERSED  (VSTER)
> >>  VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
> >>  VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, 
> >> VSTEBRG)
> >>  VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
> >>  VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
> >>  VECTOR STRING SEARCH(VSTRS)
> >>
> >>  modifies:
> >>  VECTOR FP CONVERT FROM FIXED(VCFPS)
> >>  VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
> >>  VECTOR FP CONVERT TO FIXED  (VCSFP)
> >>  VECTOR FP CONVERT TO LOGICAL(VCLFP)
> >>  VECTOR SHIFT LEFT   (VSL)
> >>  VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
> >>  VECTOR SHIFT RIGHT LOGICAL  (VSRL)
> >
> > Thanks, queued to my s390x-next branch now:
> >
> >   https://gitlab.com/thuth/qemu/-/commits/s390x-next/
> >
> Thanks for fixing up. At this point I would have suggested to exclude
> the tests for now.
>
> --
> Thanks,
>
> David / dhildenb
>



Re: [PATCH v5 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-04-27 Thread David Miller
I'm playing catch up a bit here,  as I was out sick for a few days.
It would be very much appreciated if you could do so,  as I'm not
familiar with what is required.

Thanks
- David Miller

On Mon, Apr 25, 2022 at 3:51 AM David Hildenbrand  wrote:
>
> On 25.04.22 09:43, Christian Borntraeger wrote:
> > Am 23.03.22 um 14:57 schrieb David Miller:
> >> Implement Vector-Enhancements Facility 2 for s390x
> >>
> >> resolves: https://gitlab.com/qemu-project/qemu/-/issues/738
> >>
> >> implements:
> >>  VECTOR LOAD ELEMENTS REVERSED   (VLER)
> >>  VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
> >>  VECTOR STORE ELEMENTS REVERSED  (VSTER)
> >>  VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
> >>  VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, 
> >> VSTEBRG)
> >>  VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
> >>  VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
> >>  VECTOR STRING SEARCH(VSTRS)
> >>
> >>  modifies:
> >>  VECTOR FP CONVERT FROM FIXED(VCFPS)
> >>  VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
> >>  VECTOR FP CONVERT TO FIXED  (VCSFP)
> >>  VECTOR FP CONVERT TO LOGICAL        (VCLFP)
> >>  VECTOR SHIFT LEFT   (VSL)
> >>  VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
> >>  VECTOR SHIFT RIGHT LOGICAL  (VSRL)
> >>
> >>
> >> David Miller (9):
> >>tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
> >>target/s390x: vxeh2: vector convert short/32b
> >>target/s390x: vxeh2: vector string search
> >>target/s390x: vxeh2: Update for changes to vector shifts
> >>target/s390x: vxeh2: vector shift double by bit
> >>target/s390x: vxeh2: vector {load, store} elements reversed
> >>target/s390x: vxeh2: vector {load, store} byte reversed elements
> >>target/s390x: vxeh2: vector {load, store} byte reversed element
> >>target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model
> >>tests/tcg/s390x: Tests for Vector Enhancements Facility 2
> >>target/s390x: Fix writeback to v1 in helper_vstl
> >>
> >> Richard Henderson (2):
> >>tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
> >>target/s390x: Fix writeback to v1 in helper_vstl
> >
> >
> > I guess we can now re-do this series against 7.1-devel (qemu/master) which 
> > does
> > have the machine compat changes. Apart from that this should be ready now?
> >
>
> Yes, I think so. I can respin with the proper compat changes if requested.
>
> --
> Thanks,
>
> David / dhildenb
>



Re: [PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-04-05 Thread David Miller
Recommendation for comment?

/* vri-d encoding matches vrr for 4b imm.
  .insn does not handle this encoding variant.
*/

Christian: I will push another patch version as soon as that's decided.
(unless you prefer to choose the comment and edit during staging)

On Tue, Apr 5, 2022 at 6:13 AM David Hildenbrand  wrote:
>
> On 01.04.22 17:25, Christian Borntraeger wrote:
> > Am 01.04.22 um 17:02 schrieb David Miller:
> >> vrr is almost a perfect match (it is for this, larger than imm4 would
> >> need to be split).
> >>
> >> .long : this would be uglier.
> >> use enough to be filled with nops after ?
> >> or use a 32b and 16b instead if it's in .text it should make no difference.
> >
> > I will let Richard or David decide what they prefer.
> >
>
> I don't particularly care as long as there is a comment stating why we
> need this hack.
>
> --
> Thanks,
>
> David / dhildenb
>



Re: [PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-04-01 Thread David Miller
vrr is almost a perfect match (it is for this, larger than imm4 would
need to be split).

.long : this would be uglier.
use enough to be filled with nops after ?
or use a 32b and 16b instead if it's in .text it should make no difference.


On Fri, Apr 1, 2022 at 2:42 AM Christian Borntraeger
 wrote:
>
>
>
> Am 01.04.22 um 04:15 schrieb David Miller:
> > Hi,
> >
> > There is some issue with instruction sub/alt encodings not matching,
> > but I worked around it easily.
> >
> > I'm dropping the updated patch for the tests in here.
> > I know I should resend the entire patch series as a higher version
> > really, and will do so.
> > I'm hoping someone can tell me if it's ok to use .insn vrr  in place
> > of vri(-d) as it doesn't match vri.
> > [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]
> >
> > .insn doesn't deal with sub encodings and there is no good alternative
> > that I know of.
> >
> > example:
> >
> >  /* vri-d as vrr */
> >  asm volatile(".insn vrr, 0xE786, %[v1], %[v2], %[v3], 0, %[I], 
> > 0\n"
> >  : [v1] "=v" (v1->v)
> >  : [v2]  "v" (v2->v)
> >  , [v3]  "v" (v3->v)
> >  , [I]   "i" (I & 7));
> >
> > Patch is attached
>
> Yes, vri sucks and does not work with vrsd. Maybe just use .long which is 
> probably
> better than using a "wrong" format.
> Opinions?



Re: [PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-31 Thread David Miller
On Thu, Mar 31, 2022 at 10:15 PM David Miller  wrote:
>
> Hi,
>
> There is some issue with instruction sub/alt encodings not matching,
> but I worked around it easily.
>
> I'm dropping the updated patch for the tests in here.
> I know I should resend the entire patch series as a higher version
> really, and will do so.
> I'm hoping someone can tell me if it's ok to use .insn vrr  in place
> of vri(-d) as it doesn't match vri.
> [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]
>
> .insn doesn't deal with sub encodings and there is no good alternative
> that I know of.
>
> example:
>
> /* vri-d as vrr */
> asm volatile(".insn vrr, 0xE786, %[v1], %[v2], %[v3], 0, %[I], 
> 0\n"
> : [v1] "=v" (v1->v)
> : [v2]  "v" (v2->v)
>     , [v3]  "v" (v3->v)
>     , [I]   "i" (I & 7));
>
> Patch is attached
>
>
> Thanks
> - David Miller
>
>
> On Thu, Mar 31, 2022 at 2:26 PM David Miller  wrote:
> >
> > Sorry,
> >Didn't notice this, as it was on v4 patch emails.
> > I assume since there is no other follow up after a week,
> >  CI jobs are not being updated and I should change samples to use .insn.
> > I will try to get this out tomorrow.
> >
> > Thanks,
> > - David Miller
> >
> > On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth  wrote:
> > >
> > > On 22/03/2022 11.31, Thomas Huth wrote:
> > > > On 22/03/2022 09.53, David Hildenbrand wrote:
> > > >> On 22.03.22 01:04, David Miller wrote:
> > > > [...]
> > > >>> diff --git a/tests/tcg/s390x/Makefile.target
> > > >>> b/tests/tcg/s390x/Makefile.target
> > > >>> index 8c9b6a13ce..921a056dd1 100644
> > > >>> --- a/tests/tcg/s390x/Makefile.target
> > > >>> +++ b/tests/tcg/s390x/Makefile.target
> > > >>> @@ -16,6 +16,14 @@ TESTS+=shift
> > > >>>   TESTS+=trap
> > > >>>   TESTS+=signals-s390x
> > > >>> +VECTOR_TESTS=vxeh2_vs
> > > >>> +VECTOR_TESTS+=vxeh2_vcvt
> > > >>> +VECTOR_TESTS+=vxeh2_vlstr
> > > >>> +
> > > >>> +TESTS+=$(VECTOR_TESTS)
> > > >>> +
> > > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> > > >>
> > > >> @Thomas, will that survive our test framework already, or do we have to
> > > >> wait for the debain11 changes?
> > > >
> > > > Alex' update to the container has already been merged:
> > > >
> > > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> > > >
> > > > ... and seems like it's working in Travis on s390x, too:
> > > >
> > > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> > > >
> > > > ... so it seems like it should be OK now (considering that we drop 
> > > > support
> > > > for the old Ubuntu version 18.04 in QEMU 7.1, too).
> > >
> > > Looks like I spoke a little bit too soon - some of the CI pipelines are
> > > still using Debian 10 for running the TCG tests, and they are failing with
> > > these patches applied:
> > >
> > > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499
> > >
> > > Thus we either need to update the CI jobs to use Debian 11, or use
> > > handcrafted instruction opcodes here again...
> > >
> > >   Thomas
> > >
From bb6bf2f9529c4d76db9a9eff2ff7fa1235657103 Mon Sep 17 00:00:00 2001
From: David Miller 
Date: Mon, 21 Mar 2022 16:58:57 -0400
Subject: [PATCH v5 10/11] tests/tcg/s390x: Tests for Vector Enhancements
 Facility 2

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vx.h|  19 +
 tests/tcg/s390x/vxeh2_vcvt.c|  88 
 tests/tcg/s390x/vxeh2_vlstr.c   | 139 
 tests/tcg/s390x/vxeh2_vs.c  |  95 ++
 5 files changed, 349 insertions(+)
 create mode 100644 tests/tcg/s390x/vx.h
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=

Re: [PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-31 Thread David Miller
Hi,

There is some issue with instruction sub/alt encodings not matching,
but I worked around it easily.

I'm dropping the updated patch for the tests in here.
I know I should resend the entire patch series as a higher version
really, and will do so.
I'm hoping someone can tell me if it's ok to use .insn vrr  in place
of vri(-d) as it doesn't match vri.
[https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]

.insn doesn't deal with sub encodings and there is no good alternative
that I know of.

example:

/* vri-d as vrr */
asm volatile(".insn vrr, 0xE786, %[v1], %[v2], %[v3], 0, %[I], 0\n"
: [v1] "=v" (v1->v)
: [v2]  "v" (v2->v)
, [v3]  "v" (v3->v)
, [I]   "i" (I & 7));

Patch is attached


Thanks
- David Miller


On Thu, Mar 31, 2022 at 2:26 PM David Miller  wrote:
>
> Sorry,
>Didn't notice this, as it was on v4 patch emails.
> I assume since there is no other follow up after a week,
>  CI jobs are not being updated and I should change samples to use .insn.
> I will try to get this out tomorrow.
>
> Thanks,
> - David Miller
>
> On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth  wrote:
> >
> > On 22/03/2022 11.31, Thomas Huth wrote:
> > > On 22/03/2022 09.53, David Hildenbrand wrote:
> > >> On 22.03.22 01:04, David Miller wrote:
> > > [...]
> > >>> diff --git a/tests/tcg/s390x/Makefile.target
> > >>> b/tests/tcg/s390x/Makefile.target
> > >>> index 8c9b6a13ce..921a056dd1 100644
> > >>> --- a/tests/tcg/s390x/Makefile.target
> > >>> +++ b/tests/tcg/s390x/Makefile.target
> > >>> @@ -16,6 +16,14 @@ TESTS+=shift
> > >>>   TESTS+=trap
> > >>>   TESTS+=signals-s390x
> > >>> +VECTOR_TESTS=vxeh2_vs
> > >>> +VECTOR_TESTS+=vxeh2_vcvt
> > >>> +VECTOR_TESTS+=vxeh2_vlstr
> > >>> +
> > >>> +TESTS+=$(VECTOR_TESTS)
> > >>> +
> > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> > >>
> > >> @Thomas, will that survive our test framework already, or do we have to
> > >> wait for the debain11 changes?
> > >
> > > Alex' update to the container has already been merged:
> > >
> > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> > >
> > > ... and seems like it's working in Travis on s390x, too:
> > >
> > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> > >
> > > ... so it seems like it should be OK now (considering that we drop support
> > > for the old Ubuntu version 18.04 in QEMU 7.1, too).
> >
> > Looks like I spoke a little bit too soon - some of the CI pipelines are
> > still using Debian 10 for running the TCG tests, and they are failing with
> > these patches applied:
> >
> > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499
> >
> > Thus we either need to update the CI jobs to use Debian 11, or use
> > handcrafted instruction opcodes here again...
> >
> >   Thomas
> >


Re: [PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-31 Thread David Miller
Sorry,
   Didn't notice this, as it was on v4 patch emails.
I assume since there is no other follow up after a week,
 CI jobs are not being updated and I should change samples to use .insn.
I will try to get this out tomorrow.

Thanks,
- David Miller

On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth  wrote:
>
> On 22/03/2022 11.31, Thomas Huth wrote:
> > On 22/03/2022 09.53, David Hildenbrand wrote:
> >> On 22.03.22 01:04, David Miller wrote:
> > [...]
> >>> diff --git a/tests/tcg/s390x/Makefile.target
> >>> b/tests/tcg/s390x/Makefile.target
> >>> index 8c9b6a13ce..921a056dd1 100644
> >>> --- a/tests/tcg/s390x/Makefile.target
> >>> +++ b/tests/tcg/s390x/Makefile.target
> >>> @@ -16,6 +16,14 @@ TESTS+=shift
> >>>   TESTS+=trap
> >>>   TESTS+=signals-s390x
> >>> +VECTOR_TESTS=vxeh2_vs
> >>> +VECTOR_TESTS+=vxeh2_vcvt
> >>> +VECTOR_TESTS+=vxeh2_vlstr
> >>> +
> >>> +TESTS+=$(VECTOR_TESTS)
> >>> +
> >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> >>
> >> @Thomas, will that survive our test framework already, or do we have to
> >> wait for the debain11 changes?
> >
> > Alex' update to the container has already been merged:
> >
> > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> >
> > ... and seems like it's working in Travis on s390x, too:
> >
> > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> >
> > ... so it seems like it should be OK now (considering that we drop support
> > for the old Ubuntu version 18.04 in QEMU 7.1, too).
>
> Looks like I spoke a little bit too soon - some of the CI pipelines are
> still using Debian 10 for running the TCG tests, and they are failing with
> these patches applied:
>
> https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499
>
> Thus we either need to update the CI jobs to use Debian 11, or use
> handcrafted instruction opcodes here again...
>
>   Thomas
>



Re: [PATCH v5 09/11] target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model

2022-03-23 Thread David Miller
I am sending v5 now as I'm away from my desk until Sunday.
As I'm unsure whether or not this just needs a V7_0 and the patch
shown as an example isn't a direct correlation : I've left it out for
now.
I will dig into it further Monday March 28th.

Thanks
- David Miller

On Wed, Mar 23, 2022 at 9:57 AM David Miller  wrote:
>
> Signed-off-by: David Miller 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/gen-features.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
> index 22846121c4..499a3b10a8 100644
> --- a/target/s390x/gen-features.c
> +++ b/target/s390x/gen-features.c
> @@ -740,7 +740,9 @@ static uint16_t qemu_V6_2[] = {
>
>  static uint16_t qemu_LATEST[] = {
>  S390_FEAT_MISC_INSTRUCTION_EXT3,
> +S390_FEAT_VECTOR_ENH2,
>  };
> +
>  /* add all new definitions before this point */
>  static uint16_t qemu_MAX[] = {
>  /* generates a dependency warning, leave it out for now */
> --
> 2.34.1
>



[PATCH v5 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/insn-data.def  |   4 +
 target/s390x/tcg/translate_vx.c.inc | 113 
 2 files changed, 117 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index b524541a7d..ee6e1dc9e5 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1027,6 +1027,8 @@
 F(0xe756, VLR, VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
 /* VECTOR LOAD AND REPLICATE */
 F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENTS */
+F(0xe606, VLBR,VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
 /* VECTOR LOAD ELEMENT */
 E(0xe700, VLEB,VRX,   V,   la2, 0, 0, 0, vle, 0, ES_8, IF_VEC)
 E(0xe701, VLEH,VRX,   V,   la2, 0, 0, 0, vle, 0, ES_16, IF_VEC)
@@ -1079,6 +1081,8 @@
 F(0xe75f, VSEG,VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
 /* VECTOR STORE */
 F(0xe70e, VST, VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
+/* VECTOR STORE BYTE REVERSED ELEMENTS */
+F(0xe60e, VSTBR,VRX,   VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
 /* VECTOR STORE ELEMENT */
 E(0xe708, VSTEB,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_8, IF_VEC)
 E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 0bef1200e3..c0b4a5b9ed 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -457,6 +457,62 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 t0, t1;
+
+if (es < ES_16 || es > ES_128) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+
+
+if (es == ES_128) {
+tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
+goto write;
+}
+
+/* Begin with byte reversed doublewords... */
+tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
+
+/*
+ * For 16 and 32-bit elements, the doubleword bswap also reversed
+ * the order of the elements.  Perform a larger order swap to put
+ * them back into place.  For the 128-bit "element", finish the
+ * bswap by swapping the doublewords.
+ */
+switch (es) {
+case ES_16:
+tcg_gen_hswap_i64(t0, t0);
+tcg_gen_hswap_i64(t1, t1);
+break;
+case ES_32:
+tcg_gen_wswap_i64(t0, t0);
+tcg_gen_wswap_i64(t1, t1);
+break;
+case ES_64:
+break;
+default:
+g_assert_not_reached();
+}
+
+write:
+write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = s->insn->data;
@@ -998,6 +1054,63 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 t0, t1;
+
+if (es < ES_16 || es > ES_128) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+/* Probe write access before actually modifying memory */
+gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+
+
+if (es == ES_128) {
+read_vec_element_i64(t1, get_field(s, v1), 0, ES_64);
+read_vec_element_i64(t0, get_field(s, v1), 1, ES_64);
+goto write;
+}
+
+read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+/*
+ * For 16 and 32-bit elements, the doubleword bswap below will
+ * reverse the order of the elements.  Perform a larger order
+ * swap to put them back into place.  For the 128-bit "element",
+ * finish the bswap by swapping the doublewords.
+ */
+switch (es) {
+case MO_16:
+tcg_gen_hswap_i64(t0, t0);
+tcg_gen_hswap_i64(t1, t1);
+break;
+case MO_32:
+tcg_gen_wswap_i64(t0, t0);
+tcg_gen_wswap_i64(t1, t1);
+break;
+case MO_64:
+break;
+default:
+g_assert_not_reached();
+}
+
+write:
+tcg_gen_qemu_st_i64(t0, o->addr1, g

[PATCH v5 11/11] target/s390x: Fix writeback to v1 in helper_vstl

2022-03-23 Thread David Miller
From: Richard Henderson 

Fixes: 0e0a5b49ad58 ("s390x/tcg: Implement VECTOR STORE WITH LENGTH")

Signed-off-by: Richard Henderson 
Reviewed-by: David Miller 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/vec_helper.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c
index ededf13cf0..48d86722b2 100644
--- a/target/s390x/tcg/vec_helper.c
+++ b/target/s390x/tcg/vec_helper.c
@@ -200,7 +200,6 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, 
uint64_t addr,
 addr = wrap_address(env, addr + 8);
 cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC());
 } else {
-S390Vector tmp = {};
 int i;
 
 for (i = 0; i < bytes; i++) {
@@ -209,6 +208,5 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, 
uint64_t addr,
 cpu_stb_data_ra(env, addr, byte, GETPC());
 addr = wrap_address(env, addr + 1);
 }
-*(S390Vector *)v1 = tmp;
 }
 }
-- 
2.34.1




[PATCH v5 05/11] target/s390x: vxeh2: vector shift double by bit

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/tcg/insn-data.def  |  6 +++-
 target/s390x/tcg/translate_vx.c.inc | 55 +
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index f487a64abf..98a31a557d 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1207,12 +1207,16 @@
 E(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, 0, IF_VEC)
 /* VECTOR SHIFT LEFT BY BYTE */
 E(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, 1, IF_VEC)
+/* VECTOR SHIFT LEFT DOUBLE BY BIT */
+E(0xe786, VSLD,VRI_d, VE2, 0, 0, 0, 0, vsld, 0, 0, IF_VEC)
 /* VECTOR SHIFT LEFT DOUBLE BY BYTE */
-F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
+E(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsld, 0, 1, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC */
 E(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
 E(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, 1, IF_VEC)
+/* VECTOR SHIFT RIGHT DOUBLE BY BIT */
+F(0xe787, VSRD,VRI_d, VE2, 0, 0, 0, 0, vsrd, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL */
 E(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index fd53ddafef..bb997de794 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2056,14 +2056,23 @@ static DisasJumpType op_vsrl(DisasContext *s, DisasOps 
*o)
 gen_helper_gvec_vsrl_ve2);
 }
 
-static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o)
+static DisasJumpType op_vsld(DisasContext *s, DisasOps *o)
 {
-const uint8_t i4 = get_field(s, i4) & 0xf;
-const int left_shift = (i4 & 7) * 8;
-const int right_shift = 64 - left_shift;
-TCGv_i64 t0 = tcg_temp_new_i64();
-TCGv_i64 t1 = tcg_temp_new_i64();
-TCGv_i64 t2 = tcg_temp_new_i64();
+const bool byte = s->insn->data;
+const uint8_t mask = byte ? 15 : 7;
+const uint8_t mul  = byte ?  8 : 1;
+const uint8_t i4   = get_field(s, i4);
+const int right_shift = 64 - (i4 & 7) * mul;
+TCGv_i64 t0, t1, t2;
+
+if (i4 & ~mask) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+t2 = tcg_temp_new_i64();
 
 if ((i4 & 8) == 0) {
 read_vec_element_i64(t0, get_field(s, v2), 0, ES_64);
@@ -2074,8 +2083,40 @@ static DisasJumpType op_vsldb(DisasContext *s, DisasOps 
*o)
 read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
 read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
 }
+
 tcg_gen_extract2_i64(t0, t1, t0, right_shift);
 tcg_gen_extract2_i64(t1, t2, t1, right_shift);
+
+write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+tcg_temp_free(t2);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o)
+{
+const uint8_t i4 = get_field(s, i4);
+TCGv_i64 t0, t1, t2;
+
+if (i4 & ~7) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+t2 = tcg_temp_new_i64();
+
+read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
+read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
+read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
+
+tcg_gen_extract2_i64(t0, t1, t0, i4);
+tcg_gen_extract2_i64(t1, t2, t1, i4);
+
 write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
 write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
 
-- 
2.34.1




[PATCH v5 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vx.h|  19 +
 tests/tcg/s390x/vxeh2_vcvt.c|  88 
 tests/tcg/s390x/vxeh2_vlstr.c   | 139 
 tests/tcg/s390x/vxeh2_vs.c  |  93 +
 5 files changed, 347 insertions(+)
 create mode 100644 tests/tcg/s390x/vx.h
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vx.h b/tests/tcg/s390x/vx.h
new file mode 100644
index 00..2e66f8b714
--- /dev/null
+++ b/tests/tcg/s390x/vx.h
@@ -0,0 +1,19 @@
+#ifndef QEMU_TESTS_S390X_VX_H
+#define QEMU_TESTS_S390X_VX_H
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+floatf[4];  /* float32 */
+double   fd[2]; /* float64 */
+__uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#endif
\ No newline at end of file
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 00..d6e551c16e
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,88 @@
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include 
+#include "vx.h"
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+S390Vector vd;
+S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] =  };
+S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+  .f[2] = 4.499, .f[3] = 0.512 };
+
+vd.d[0] = vd.d[1] = 0;
+vcfps(, _i32, 2, M4_def, 0);
+if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcfpl(, _u32, 2, M4_def, 0);
+if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] ||  != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcsfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vclfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
new file mode 100644
index 00..5677bf7c29
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -0,0 +1,139 @@
+/*
+ * vxeh2_vlstr: v

[PATCH v5 09/11] target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/gen-features.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 22846121c4..499a3b10a8 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,7 +740,9 @@ static uint16_t qemu_V6_2[] = {
 
 static uint16_t qemu_LATEST[] = {
 S390_FEAT_MISC_INSTRUCTION_EXT3,
+S390_FEAT_VECTOR_ENH2,
 };
+
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
 /* generates a dependency warning, leave it out for now */
-- 
2.34.1




[PATCH v5 08/11] target/s390x: vxeh2: vector {load, store} byte reversed element

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/insn-data.def  | 12 
 target/s390x/tcg/translate_vx.c.inc | 85 +
 2 files changed, 97 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index ee6e1dc9e5..5e448bb2c4 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1027,6 +1027,14 @@
 F(0xe756, VLR, VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
 /* VECTOR LOAD AND REPLICATE */
 F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT */
+E(0xe601, VLEBRH,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, ES_16, IF_VEC)
+E(0xe603, VLEBRF,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, ES_32, IF_VEC)
+E(0xe602, VLEBRG,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, ES_64, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE */
+F(0xe605, VLBRREP, VRX,   VE2, la2, 0, 0, 0, vlbrrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO */
+F(0xe604, VLLEBRZ, VRX,   VE2, la2, 0, 0, 0, vllebrz, 0, IF_VEC)
 /* VECTOR LOAD BYTE REVERSED ELEMENTS */
 F(0xe606, VLBR,VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
 /* VECTOR LOAD ELEMENT */
@@ -1081,6 +1089,10 @@
 F(0xe75f, VSEG,VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
 /* VECTOR STORE */
 F(0xe70e, VST, VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
+/* VECTOR STORE BYTE REVERSED ELEMENT */
+E(0xe609, VSTEBRH,  VRX,   VE2, la2, 0, 0, 0, vstebr, 0, ES_16, IF_VEC)
+E(0xe60b, VSTEBRF,  VRX,   VE2, la2, 0, 0, 0, vstebr, 0, ES_32, IF_VEC)
+E(0xe60a, VSTEBRG,  VRX,   VE2, la2, 0, 0, 0, vstebr, 0, ES_64, IF_VEC)
 /* VECTOR STORE BYTE REVERSED ELEMENTS */
 F(0xe60e, VSTBR,VRX,   VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
 /* VECTOR STORE ELEMENT */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index c0b4a5b9ed..e57d0395db 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -457,6 +457,73 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = s->insn->data;
+const uint8_t enr = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (!valid_vec_element(enr, es)) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+gen_gvec_dup_i64(es, get_field(s, v1), tmp);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o)
+{
+const uint8_t m3 = get_field(s, m3);
+TCGv_i64 tmp;
+int es, lshift;
+
+switch (m3) {
+case ES_16:
+case ES_32:
+case ES_64:
+es = m3;
+lshift = 0;
+break;
+case 6:
+es = ES_32;
+lshift = 32;
+break;
+default:
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+tcg_gen_shli_i64(tmp, tmp, lshift);
+
+write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(tcg_constant_i64(0), get_field(s, v1), 1, ES_64);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m3);
@@ -1054,6 +1121,24 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstebr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = s->insn->data;
+const uint8_t enr = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (!valid_vec_element(enr, es)) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+read_vec_element_i64(tmp, get_field(s, v1), enr, es);
+tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m3);
-- 
2.34.1




[PATCH v5 06/11] target/s390x: vxeh2: vector {load, store} elements reversed

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/insn-data.def  |  4 ++
 target/s390x/tcg/translate_vx.c.inc | 84 +
 2 files changed, 88 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 98a31a557d..b524541a7d 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1037,6 +1037,8 @@
 E(0xe741, VLEIH,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_16, IF_VEC)
 E(0xe743, VLEIF,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_32, IF_VEC)
 E(0xe742, VLEIG,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_64, IF_VEC)
+/* VECTOR LOAD ELEMENTS REVERSED */
+F(0xe607, VLER,VRX,   VE2, la2, 0, 0, 0, vler, 0, IF_VEC)
 /* VECTOR LOAD GR FROM VR ELEMENT */
 F(0xe721, VLGV,VRS_c, V,   la2, 0, r1, 0, vlgv, 0, IF_VEC)
 /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */
@@ -1082,6 +1084,8 @@
 E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
 E(0xe70b, VSTEF,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_32, IF_VEC)
 E(0xe70a, VSTEG,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_64, IF_VEC)
+/* VECTOR STORE ELEMENTS REVERSED */
+F(0xe60f, VSTER,   VRX,   VE2, la2, 0, 0, 0, vster, 0, IF_VEC)
 /* VECTOR STORE MULTIPLE */
 F(0xe73e, VSTM,VRS_a, V,   la2, 0, 0, 0, vstm, 0, IF_VEC)
 /* VECTOR STORE WITH LENGTH */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index bb997de794..0bef1200e3 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -492,6 +492,46 @@ static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vler(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+/* Begin with the two doublewords swapped... */
+tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
+
+/* ... then swap smaller elements within the doublewords as required. */
+switch (es) {
+case MO_16:
+tcg_gen_hswap_i64(t1, t1);
+tcg_gen_hswap_i64(t0, t0);
+break;
+case MO_32:
+tcg_gen_wswap_i64(t1, t1);
+tcg_gen_wswap_i64(t0, t0);
+break;
+case MO_64:
+break;
+default:
+g_assert_not_reached();
+}
+
+write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m4);
@@ -976,6 +1016,50 @@ static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vster(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 t0, t1;
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+/* Probe write access before actually modifying memory */
+gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
+
+/* Begin with the two doublewords swapped... */
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+read_vec_element_i64(t1,  get_field(s, v1), 0, ES_64);
+read_vec_element_i64(t0,  get_field(s, v1), 1, ES_64);
+
+/* ... then swap smaller elements within the doublewords as required. */
+switch (es) {
+case MO_16:
+tcg_gen_hswap_i64(t1, t1);
+tcg_gen_hswap_i64(t0, t0);
+break;
+case MO_32:
+tcg_gen_wswap_i64(t1, t1);
+tcg_gen_wswap_i64(t0, t0);
+break;
+case MO_64:
+break;
+default:
+g_assert_not_reached();
+}
+
+tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
 {
 const uint8_t v3 = get_field(s, v3);
-- 
2.34.1




[PATCH v5 04/11] target/s390x: vxeh2: Update for changes to vector shifts

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/helper.h   |  3 ++
 target/s390x/tcg/insn-data.def  | 12 ++---
 target/s390x/tcg/translate_vx.c.inc | 75 -
 target/s390x/tcg/vec_int_helper.c   | 55 +
 4 files changed, 95 insertions(+), 50 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7412130883..bf33d86f74 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -203,8 +203,11 @@ DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, 
ptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsra_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsrl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 46add91a0e..f487a64abf 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1204,19 +1204,19 @@
 F(0xe778, VESRLV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
 F(0xe738, VESRL,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
 /* VECTOR SHIFT LEFT */
-F(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+E(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, 0, IF_VEC)
 /* VECTOR SHIFT LEFT BY BYTE */
-F(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+E(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, 1, IF_VEC)
 /* VECTOR SHIFT LEFT DOUBLE BY BYTE */
 F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC */
-F(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+E(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
-F(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+E(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, 1, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL */
-F(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
+E(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
-F(0xe77d, VSRLB,   VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
+E(0xe77d, VSRLB,   VRR_c, V,   0, 0, 0, 0, vsrl, 0, 1, IF_VEC)
 /* VECTOR SUBTRACT */
 F(0xe7f7, VS,  VRR_c, V,   0, 0, 0, 0, vs, 0, IF_VEC)
 /* VECTOR SUBTRACT COMPUTE BORROW INDICATION */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 29e4dd78a8..fd53ddafef 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2018,23 +2018,44 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
-static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
+static DisasJumpType gen_vsh_by_byte(DisasContext *s, DisasOps *o,
+  gen_helper_gvec_2i *gen,
+  gen_helper_gvec_3 *gen_ve2)
 {
-TCGv_i64 shift = tcg_temp_new_i64();
+bool byte = s->insn->data;
 
-read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
-if (s->fields.op2 == 0x74) {
-tcg_gen_andi_i64(shift, shift, 0x7);
+if (!byte && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+   get_field(s, v3), 0, gen_ve2);
 } else {
-tcg_gen_andi_i64(shift, shift, 0x78);
-}
+TCGv_i64 shift = tcg_temp_new_i64();
 
-gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
-shift, 0, gen_helper_gvec_vsl);
-tcg_temp_free_i64(shift);
+read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+tcg_gen_andi_i64(shift, shift, byte ? 0x78 : 7);
+gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), shift, 0, gen);
+tcg_temp_free_i64(shift);
+}
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
+{
+return gen_vsh_by_byte(s, o, gen_helper_gvec_vsl,
+gen_helper_gvec_vsl_ve2);
+}
+
+static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
+{
+return gen_vsh_by_byte(s, o, gen_helper_gvec_vsra,
+gen_helper_gvec_vsra_ve2);
+}
+
+static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
+{
+return 

[PATCH v5 03/11] target/s390x: vxeh2: vector string search

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/helper.h|  6 ++
 target/s390x/tcg/insn-data.def   |  2 +
 target/s390x/tcg/translate.c |  3 +-
 target/s390x/tcg/translate_vx.c.inc  | 25 +++
 target/s390x/tcg/vec_string_helper.c | 99 
 5 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7cbcbd7f0b..7412130883 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
 DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 6c8a8b229f..46add91a0e 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1246,6 +1246,8 @@
 F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
 /* VECTOR STRING RANGE COMPARE */
 F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
+/* VECTOR STRING SEARCH */
+F(0xe78b, VSTRS,   VRR_d, VE2, 0, 0, 0, 0, vstrs, 0, IF_VEC)
 
 /* === Vector Floating-Point Instructions */
 
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 904b51542f..d9ac29573d 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6222,7 +6222,8 @@ enum DisasInsnEnum {
 #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */
 #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION
 #define FAC_V   S390_FEAT_VECTOR /* vector facility */
-#define FAC_VE  S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 
*/
+#define FAC_VE  S390_FEAT_VECTOR_ENH  /* vector enhancements facility 
1 */
+#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 
2 */
 #define FAC_MIE2S390_FEAT_MISC_INSTRUCTION_EXT2 /* 
miscellaneous-instruction-extensions facility 2 */
 #define FAC_MIE3S390_FEAT_MISC_INSTRUCTION_EXT3 /* 
miscellaneous-instruction-extensions facility 3 */
 
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index ea28e40d4f..29e4dd78a8 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2497,6 +2497,31 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
+{
+typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
+static const helper_vstrs fns[3][2] = {
+{ gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
+{ gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
+{ gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
+};
+const uint8_t es = get_field(s, m5);
+const uint8_t m6 = get_field(s, m6);
+const bool zs = extract32(m6, 1, 1);
+
+if (es > ES_32 || m6 & ~2) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+   get_field(s, v3), get_field(s, v4),
+   cpu_env, 0, fns[es][zs]);
+set_cc_static(s);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
 const uint8_t fpf = get_field(s, m4);
diff --git a/target/s390x/tcg/vec_string_helper.c 
b/target/s390x/tcg/vec_string_helper.c
index ac315eb095..00135865c0 100644
--- a/target/s390x/tcg/vec_string_helper.c
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -471,3 +471,102 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void 
*v2, const void *v3,  \
 DEF_VSTRC_CC_RT_HELPER(8)
 DEF_VSTRC_CC_RT_HELPER(16)
 DEF_VSTRC_CC_RT_HELPER(32)
+
+static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+ const S390Vector *v4, uint8_t es, bool zs)
+{
+int substr_elen, substr_0, str_elen, i, j, k, cc;
+int nelem = 16 >> es;
+bool eos = false;
+
+substr_elen = s390_vec_read_element8(v4, 7) >> es;
+
+/* If ZS, bound substr length by min(nelem, strlen(v3)). */
+if (zs) {
+substr_elen = MIN

[PATCH v5 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-03-23 Thread David Miller
Implement Vector-Enhancements Facility 2 for s390x

resolves: https://gitlab.com/qemu-project/qemu/-/issues/738

implements:
VECTOR LOAD ELEMENTS REVERSED   (VLER)
VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
VECTOR STORE ELEMENTS REVERSED  (VSTER)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, VSTEBRG)
VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
VECTOR STRING SEARCH(VSTRS)

modifies:
VECTOR FP CONVERT FROM FIXED(VCFPS)
VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
VECTOR FP CONVERT TO FIXED  (VCSFP)
VECTOR FP CONVERT TO LOGICAL(VCLFP)
VECTOR SHIFT LEFT   (VSL)
VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
VECTOR SHIFT RIGHT LOGICAL  (VSRL)


David Miller (9):
  tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
  target/s390x: vxeh2: vector convert short/32b
  target/s390x: vxeh2: vector string search
  target/s390x: vxeh2: Update for changes to vector shifts
  target/s390x: vxeh2: vector shift double by bit
  target/s390x: vxeh2: vector {load, store} elements reversed
  target/s390x: vxeh2: vector {load, store} byte reversed elements
  target/s390x: vxeh2: vector {load, store} byte reversed element
  target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model
  tests/tcg/s390x: Tests for Vector Enhancements Facility 2
  target/s390x: Fix writeback to v1 in helper_vstl

Richard Henderson (2):
  tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
  target/s390x: Fix writeback to v1 in helper_vstl

 include/tcg/tcg-op.h |   6 +
 target/s390x/gen-features.c  |   2 +
 target/s390x/helper.h|  13 +
 target/s390x/tcg/insn-data.def   |  40 ++-
 target/s390x/tcg/translate.c |   3 +-
 target/s390x/tcg/translate_vx.c.inc  | 461 ---
 target/s390x/tcg/vec_fpu_helper.c|  31 ++
 target/s390x/tcg/vec_helper.c|   2 -
 target/s390x/tcg/vec_int_helper.c|  55 
 target/s390x/tcg/vec_string_helper.c |  99 ++
 tcg/tcg-op.c |  30 ++
 tests/tcg/s390x/Makefile.target  |   8 +
 tests/tcg/s390x/vx.h |  19 ++
 tests/tcg/s390x/vxeh2_vcvt.c |  88 +
 tests/tcg/s390x/vxeh2_vlstr.c| 139 
 tests/tcg/s390x/vxeh2_vs.c   |  93 ++
 16 files changed, 1034 insertions(+), 55 deletions(-)
 create mode 100644 tests/tcg/s390x/vx.h
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

-- 
2.34.1




[PATCH v5 01/11] tcg: Implement tcg_gen_{h,w}swap_{i32,i64}

2022-03-23 Thread David Miller
From: Richard Henderson 

Swap half-words (16-bit) and words (32-bit) within a larger value.
Mirrors functions of the same names within include/qemu/bitops.h.

Signed-off-by: Richard Henderson 
Reviewed-by: David Miller 
Reviewed-by: David Hildenbrand 
---
 include/tcg/tcg-op.h |  6 ++
 tcg/tcg-op.c | 30 ++
 2 files changed, 36 insertions(+)

diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index caa0a63612..b09b8b4a05 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -332,6 +332,7 @@ void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
@@ -531,6 +532,8 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
@@ -1077,6 +1080,8 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg 
offset, TCGType t);
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
 #define tcg_gen_bswap_tl tcg_gen_bswap64_i64
+#define tcg_gen_hswap_tl tcg_gen_hswap_i64
+#define tcg_gen_wswap_tl tcg_gen_wswap_i64
 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
 #define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
 #define tcg_gen_andc_tl tcg_gen_andc_i64
@@ -1192,6 +1197,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg 
offset, TCGType t);
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
 #define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
 #define tcg_gen_bswap_tl tcg_gen_bswap32_i32
+#define tcg_gen_hswap_tl tcg_gen_hswap_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
 #define tcg_gen_andc_tl tcg_gen_andc_i32
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 65e1c94c2d..ae336ff6c2 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1056,6 +1056,12 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
 }
 }
 
+void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+/* Swapping 2 16-bit elements is a rotate. */
+tcg_gen_rotli_i32(ret, arg, 16);
+}
+
 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
 {
 tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
@@ -1792,6 +1798,30 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
 }
 }
 
+void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+uint64_t m = 0xull;
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+/* See include/qemu/bitops.h, hswap64. */
+tcg_gen_rotli_i64(t1, arg, 32);
+tcg_gen_andi_i64(t0, t1, m);
+tcg_gen_shli_i64(t0, t0, 16);
+tcg_gen_shri_i64(t1, t1, 16);
+tcg_gen_andi_i64(t1, t1, m);
+tcg_gen_or_i64(ret, t0, t1);
+
+tcg_temp_free_i64(t0);
+tcg_temp_free_i64(t1);
+}
+
+void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+/* Swapping 2 32-bit elements is a rotate. */
+tcg_gen_rotli_i64(ret, arg, 32);
+}
+
 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
 if (TCG_TARGET_REG_BITS == 32) {
-- 
2.34.1




[PATCH v5 02/11] target/s390x: vxeh2: vector convert short/32b

2022-03-23 Thread David Miller
Signed-off-by: David Miller 
Reviewed-by: David Hildenbrand 
Signed-off-by: Richard Henderson 
---
 target/s390x/helper.h   |  4 +++
 target/s390x/tcg/translate_vx.c.inc | 44 ++---
 target/s390x/tcg/vec_fpu_helper.c   | 31 
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 69f69cf718..7cbcbd7f0b 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -275,6 +275,10 @@ DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, 
ptr, cptr, cptr, env, i32
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfche128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfche128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 98eb7710a4..ea28e40d4f 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2720,23 +2720,59 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps 
*o)
 
 switch (s->fields.op2) {
 case 0xc3:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcdg64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcdg32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc1:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcdlg64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcdlg32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc2:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcgd64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcgd32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc0:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vclgd64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vclgd32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc7:
diff --git a/target/s390x/tcg/vec_fpu_helper.c 
b/target/s390x/tcg/vec_fpu_helper.c
index 1a77993471..6834dbc540 100644
--- a/target/s390x/tcg/vec_fpu_helper.c
+++ b/target/s390x/tcg/vec_fpu_helper.c
@@ -176,6 +176,30 @@ static void vop128_2(S390Vector *v1, const S390Vector *v2, 
CPUS390XState *env,
 *v1 = tmp;
 }
 
+static float32 vcdg32(float32 a, float_status *s)
+{
+return int32_to_float32(a, s);
+}
+
+static float32 vcdlg32(float32 a, float_status *s)
+{
+return uint32_to_float32(a, s);
+}
+
+static float32 vcgd32(float32 a, float_status *s)
+{
+const float32 tmp = float32_to_int32(a, s);
+
+return float32_is_any_nan(a) ? INT32_MIN : tmp;
+}
+
+static float32 vclgd32(float32 a, float_status *s)
+{
+const float32 tmp = float32_to_uint32(a, s);
+
+return float32_is_any_nan(a) ? 0 : tmp;
+}
+
 static float64 vcdg64(float64 a, float_status *s)
 {
 return int64_to_float64(a, s);
@@ -211,6 +235,9 @@ void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, 
CPUS390XState *env,   \
 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); 
\
 }
 
+#define DEF_GVEC_VOP2_32(NAME) 
\
+DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
+
 #define DEF_GVEC_VOP2_64(NAME) 
\
 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
 
@@ -219,6 +246,10 @@ DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)   
\
 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)   
\
 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
 
+DEF_GVEC_VOP2_32(vcdg)
+DEF_GVEC_VOP2_32(vcdlg)
+DEF_GVEC_VOP2_32(vcgd)
+DEF_GVEC_VOP2_32(vclgd)
 DEF_GVEC_VOP2_64(vcdg)
 DEF_GVEC_VOP2_64(vcdlg)
 DEF_GVEC_VOP2_64(vcgd)
-- 
2.34.1




Re: [PATCH v3 03/11] target/s390x: vxeh2: vector string search

2022-03-22 Thread David Miller
I came to much the same conclusion

On Tue, Mar 22, 2022 at 10:42 AM Richard Henderson
 wrote:
>
> On 3/21/22 03:31, David Hildenbrand wrote:
> >> +for (i = 0; i < nelem; i++) {
> >> +if (s390_vec_read_element(v3, i, es) == 0) {
> >> +break;
> >> +}
> >> +}
> >> +if (i < substr_elen) {
> >> +substr_elen = i;
> >> +}
> >
> > Maybe combine both, I guess there is no need to search beyond substr_elen.
> >
> > substr_elen = MIN(substr_elen, nelem);
> > for (i = 0; i < substr_elen; i++) {
> >  if (s390_vec_read_element(v3, i, es) == 0) {
> >  substr_elen = i;
> >  break;
> >  }
> > }
>
> Yep.
>
> > We should do the MIN(substr_elen, nelem) maybe right when reading it
> > from v4.
>
> No, v4 does not get bounded until zs is set.
>
> >> +/* If ZS, look for eos in the searched string. */
> >> +if (zs) {
> >> +for (k = 0; k < nelem; k++) {
> >> +if (s390_vec_read_element(v2, k, es) == 0) {
> >> +eos = true;
> >> +break;
> >> +}
> >> +}
> >
> > I guess we could move that into the main search loop and avoid parsing
> > the string twice. Not sure what's better.
>
> I'd leave it here, so that we only do the strlen once.  There's no obvious 
> place within
> the the search loop that wouldn't wind up doing the strlen more than once.
>
>
> r~



[PATCH v4 09/11] target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/gen-features.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 22846121c4..499a3b10a8 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,7 +740,9 @@ static uint16_t qemu_V6_2[] = {
 
 static uint16_t qemu_LATEST[] = {
 S390_FEAT_MISC_INSTRUCTION_EXT3,
+S390_FEAT_VECTOR_ENH2,
 };
+
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
 /* generates a dependency warning, leave it out for now */
-- 
2.34.1




[PATCH v4 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vxeh2_vcvt.c|  97 +
 tests/tcg/s390x/vxeh2_vlstr.c   | 146 
 tests/tcg/s390x/vxeh2_vs.c  |  91 
 4 files changed, 342 insertions(+)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 00..71ecbd77b0
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,97 @@
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include 
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+floatf[4];
+double   fd[2];
+__uint128_t v;
+} S390Vector;
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+S390Vector vd;
+S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] =  };
+S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+  .f[2] = 4.499, .f[3] = 0.512 };
+
+vd.d[0] = vd.d[1] = 0;
+vcfps(, _i32, 2, M4_def, 0);
+if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcfpl(, _u32, 2, M4_def, 0);
+if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] ||  != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcsfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vclfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
new file mode 100644
index 00..bf2954e86d
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -0,0 +1,146 @@
+/*
+ * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
+ */
+#include 
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+__uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#define vtst(v1, v2) \
+if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
+return 1; \
+}
+
+static inline void vle

[PATCH v4 06/11] target/s390x: vxeh2: vector {load, store} elements reversed

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/insn-data.def  |  4 ++
 target/s390x/tcg/translate_vx.c.inc | 84 +
 2 files changed, 88 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 98a31a557d..b524541a7d 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1037,6 +1037,8 @@
 E(0xe741, VLEIH,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_16, IF_VEC)
 E(0xe743, VLEIF,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_32, IF_VEC)
 E(0xe742, VLEIG,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_64, IF_VEC)
+/* VECTOR LOAD ELEMENTS REVERSED */
+F(0xe607, VLER,VRX,   VE2, la2, 0, 0, 0, vler, 0, IF_VEC)
 /* VECTOR LOAD GR FROM VR ELEMENT */
 F(0xe721, VLGV,VRS_c, V,   la2, 0, r1, 0, vlgv, 0, IF_VEC)
 /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */
@@ -1082,6 +1084,8 @@
 E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
 E(0xe70b, VSTEF,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_32, IF_VEC)
 E(0xe70a, VSTEG,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_64, IF_VEC)
+/* VECTOR STORE ELEMENTS REVERSED */
+F(0xe60f, VSTER,   VRX,   VE2, la2, 0, 0, 0, vster, 0, IF_VEC)
 /* VECTOR STORE MULTIPLE */
 F(0xe73e, VSTM,VRS_a, V,   la2, 0, 0, 0, vstm, 0, IF_VEC)
 /* VECTOR STORE WITH LENGTH */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index bb997de794..0bef1200e3 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -492,6 +492,46 @@ static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vler(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+/* Begin with the two doublewords swapped... */
+tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
+
+/* ... then swap smaller elements within the doublewords as required. */
+switch (es) {
+case MO_16:
+tcg_gen_hswap_i64(t1, t1);
+tcg_gen_hswap_i64(t0, t0);
+break;
+case MO_32:
+tcg_gen_wswap_i64(t1, t1);
+tcg_gen_wswap_i64(t0, t0);
+break;
+case MO_64:
+break;
+default:
+g_assert_not_reached();
+}
+
+write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m4);
@@ -976,6 +1016,50 @@ static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vster(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 t0, t1;
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+/* Probe write access before actually modifying memory */
+gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
+
+/* Begin with the two doublewords swapped... */
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+read_vec_element_i64(t1,  get_field(s, v1), 0, ES_64);
+read_vec_element_i64(t0,  get_field(s, v1), 1, ES_64);
+
+/* ... then swap smaller elements within the doublewords as required. */
+switch (es) {
+case MO_16:
+tcg_gen_hswap_i64(t1, t1);
+tcg_gen_hswap_i64(t0, t0);
+break;
+case MO_32:
+tcg_gen_wswap_i64(t1, t1);
+tcg_gen_wswap_i64(t0, t0);
+break;
+case MO_64:
+break;
+default:
+g_assert_not_reached();
+}
+
+tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
 {
 const uint8_t v3 = get_field(s, v3);
-- 
2.34.1




[PATCH v4 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/tcg/insn-data.def  |   4 +
 target/s390x/tcg/translate_vx.c.inc | 115 
 2 files changed, 119 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index b524541a7d..ee6e1dc9e5 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1027,6 +1027,8 @@
 F(0xe756, VLR, VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
 /* VECTOR LOAD AND REPLICATE */
 F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENTS */
+F(0xe606, VLBR,VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
 /* VECTOR LOAD ELEMENT */
 E(0xe700, VLEB,VRX,   V,   la2, 0, 0, 0, vle, 0, ES_8, IF_VEC)
 E(0xe701, VLEH,VRX,   V,   la2, 0, 0, 0, vle, 0, ES_16, IF_VEC)
@@ -1079,6 +1081,8 @@
 F(0xe75f, VSEG,VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
 /* VECTOR STORE */
 F(0xe70e, VST, VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
+/* VECTOR STORE BYTE REVERSED ELEMENTS */
+F(0xe60e, VSTBR,VRX,   VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
 /* VECTOR STORE ELEMENT */
 E(0xe708, VSTEB,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_8, IF_VEC)
 E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 0bef1200e3..284ee4362c 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -457,6 +457,63 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 t0, t1;
+
+if (es < ES_16 || es > ES_128) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+
+
+if (es == ES_128) {
+tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
+goto write;
+}
+
+/* Begin with byte reversed doublewords... */
+tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
+gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
+
+/*
+ * For 16 and 32-bit elements, the doubleword bswap also reversed
+ * the order of the elements.  Perform a larger order swap to put
+ * them back into place.  For the 128-bit "element", finish the
+ * bswap by swapping the doublewords.
+ */
+switch (es) {
+case ES_16:
+tcg_gen_hswap_i64(t0, t0);
+tcg_gen_hswap_i64(t1, t1);
+break;
+case ES_32:
+tcg_gen_wswap_i64(t0, t0);
+tcg_gen_wswap_i64(t1, t1);
+break;
+case ES_64:
+case ES_128:
+break;
+default:
+g_assert_not_reached();
+}
+
+write:
+write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = s->insn->data;
@@ -998,6 +1055,64 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 t0, t1;
+
+if (es < ES_16 || es > ES_128) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+/* Probe write access before actually modifying memory */
+gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+
+
+if (es == ES_128) {
+read_vec_element_i64(t1, get_field(s, v1), 0, ES_64);
+read_vec_element_i64(t0, get_field(s, v1), 1, ES_64);
+goto write;
+}
+
+read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+/*
+ * For 16 and 32-bit elements, the doubleword bswap below will
+ * reverse the order of the elements.  Perform a larger order
+ * swap to put them back into place.  For the 128-bit "element",
+ * finish the bswap by swapping the doublewords.
+ */
+switch (es) {
+case MO_16:
+tcg_gen_hswap_i64(t0, t0);
+tcg_gen_hswap_i64(t1, t1);
+break;
+case MO_32:
+tcg_gen_wswap_i64(t0, t0);
+tcg_gen_wswap_i64(t1, t1);
+break;
+case MO_64:
+case MO_128:
+break;
+default:
+g_assert_not_reached();
+}
+
+write:
+tcg_gen_qemu_st_i64(t0, o->addr1, g

[PATCH v4 01/11] tcg: Implement tcg_gen_{h,w}swap_{i32,i64}

2022-03-21 Thread David Miller
From: Richard Henderson 

Swap half-words (16-bit) and words (32-bit) within a larger value.
Mirrors functions of the same names within include/qemu/bitops.h.

Signed-off-by: Richard Henderson 
Reviewed-by: David Miller 
Reviewed-by: David Hildenbrand 
---
 include/tcg/tcg-op.h |  6 ++
 tcg/tcg-op.c | 30 ++
 2 files changed, 36 insertions(+)

diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index caa0a63612..b09b8b4a05 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -332,6 +332,7 @@ void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
@@ -531,6 +532,8 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
@@ -1077,6 +1080,8 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg 
offset, TCGType t);
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
 #define tcg_gen_bswap_tl tcg_gen_bswap64_i64
+#define tcg_gen_hswap_tl tcg_gen_hswap_i64
+#define tcg_gen_wswap_tl tcg_gen_wswap_i64
 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
 #define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
 #define tcg_gen_andc_tl tcg_gen_andc_i64
@@ -1192,6 +1197,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg 
offset, TCGType t);
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
 #define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
 #define tcg_gen_bswap_tl tcg_gen_bswap32_i32
+#define tcg_gen_hswap_tl tcg_gen_hswap_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
 #define tcg_gen_andc_tl tcg_gen_andc_i32
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 65e1c94c2d..ae336ff6c2 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1056,6 +1056,12 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
 }
 }
 
+void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+/* Swapping 2 16-bit elements is a rotate. */
+tcg_gen_rotli_i32(ret, arg, 16);
+}
+
 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
 {
 tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
@@ -1792,6 +1798,30 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
 }
 }
 
+void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+uint64_t m = 0xull;
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+/* See include/qemu/bitops.h, hswap64. */
+tcg_gen_rotli_i64(t1, arg, 32);
+tcg_gen_andi_i64(t0, t1, m);
+tcg_gen_shli_i64(t0, t0, 16);
+tcg_gen_shri_i64(t1, t1, 16);
+tcg_gen_andi_i64(t1, t1, m);
+tcg_gen_or_i64(ret, t0, t1);
+
+tcg_temp_free_i64(t0);
+tcg_temp_free_i64(t1);
+}
+
+void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+/* Swapping 2 32-bit elements is a rotate. */
+tcg_gen_rotli_i64(ret, arg, 32);
+}
+
 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
 if (TCG_TARGET_REG_BITS == 32) {
-- 
2.34.1




[PATCH v4 08/11] target/s390x: vxeh2: vector {load, store} byte reversed element

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/insn-data.def  | 12 
 target/s390x/tcg/translate_vx.c.inc | 85 +
 2 files changed, 97 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index ee6e1dc9e5..5e448bb2c4 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1027,6 +1027,14 @@
 F(0xe756, VLR, VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
 /* VECTOR LOAD AND REPLICATE */
 F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT */
+E(0xe601, VLEBRH,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, ES_16, IF_VEC)
+E(0xe603, VLEBRF,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, ES_32, IF_VEC)
+E(0xe602, VLEBRG,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, ES_64, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE */
+F(0xe605, VLBRREP, VRX,   VE2, la2, 0, 0, 0, vlbrrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO */
+F(0xe604, VLLEBRZ, VRX,   VE2, la2, 0, 0, 0, vllebrz, 0, IF_VEC)
 /* VECTOR LOAD BYTE REVERSED ELEMENTS */
 F(0xe606, VLBR,VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
 /* VECTOR LOAD ELEMENT */
@@ -1081,6 +1089,10 @@
 F(0xe75f, VSEG,VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
 /* VECTOR STORE */
 F(0xe70e, VST, VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
+/* VECTOR STORE BYTE REVERSED ELEMENT */
+E(0xe609, VSTEBRH,  VRX,   VE2, la2, 0, 0, 0, vstebr, 0, ES_16, IF_VEC)
+E(0xe60b, VSTEBRF,  VRX,   VE2, la2, 0, 0, 0, vstebr, 0, ES_32, IF_VEC)
+E(0xe60a, VSTEBRG,  VRX,   VE2, la2, 0, 0, 0, vstebr, 0, ES_64, IF_VEC)
 /* VECTOR STORE BYTE REVERSED ELEMENTS */
 F(0xe60e, VSTBR,VRX,   VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
 /* VECTOR STORE ELEMENT */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 284ee4362c..ecf7f87c6c 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -457,6 +457,73 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = s->insn->data;
+const uint8_t enr = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (!valid_vec_element(enr, es)) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+gen_gvec_dup_i64(es, get_field(s, v1), tmp);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o)
+{
+const uint8_t m3 = get_field(s, m3);
+TCGv_i64 tmp;
+int es, lshift;
+
+switch (m3) {
+case ES_16:
+case ES_32:
+case ES_64:
+es = m3;
+lshift = 0;
+break;
+case 6:
+es = ES_32;
+lshift = 32;
+break;
+default:
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+tcg_gen_shli_i64(tmp, tmp, lshift);
+
+write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(tcg_constant_i64(0), get_field(s, v1), 1, ES_64);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m3);
@@ -1055,6 +1122,24 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstebr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = s->insn->data;
+const uint8_t enr = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (!valid_vec_element(enr, es)) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+read_vec_element_i64(tmp, get_field(s, v1), enr, es);
+tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m3);
-- 
2.34.1




[PATCH v4 03/11] target/s390x: vxeh2: vector string search

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/helper.h|  6 ++
 target/s390x/tcg/insn-data.def   |  2 +
 target/s390x/tcg/translate.c |  3 +-
 target/s390x/tcg/translate_vx.c.inc  | 25 +++
 target/s390x/tcg/vec_string_helper.c | 99 
 5 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7cbcbd7f0b..7412130883 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
 DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 6c8a8b229f..46add91a0e 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1246,6 +1246,8 @@
 F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
 /* VECTOR STRING RANGE COMPARE */
 F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
+/* VECTOR STRING SEARCH */
+F(0xe78b, VSTRS,   VRR_d, VE2, 0, 0, 0, 0, vstrs, 0, IF_VEC)
 
 /* === Vector Floating-Point Instructions */
 
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 904b51542f..d9ac29573d 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6222,7 +6222,8 @@ enum DisasInsnEnum {
 #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */
 #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION
 #define FAC_V   S390_FEAT_VECTOR /* vector facility */
-#define FAC_VE  S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 
*/
+#define FAC_VE  S390_FEAT_VECTOR_ENH  /* vector enhancements facility 
1 */
+#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 
2 */
 #define FAC_MIE2S390_FEAT_MISC_INSTRUCTION_EXT2 /* 
miscellaneous-instruction-extensions facility 2 */
 #define FAC_MIE3S390_FEAT_MISC_INSTRUCTION_EXT3 /* 
miscellaneous-instruction-extensions facility 3 */
 
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index ea28e40d4f..29e4dd78a8 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2497,6 +2497,31 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
+{
+typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
+static const helper_vstrs fns[3][2] = {
+{ gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
+{ gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
+{ gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
+};
+const uint8_t es = get_field(s, m5);
+const uint8_t m6 = get_field(s, m6);
+const bool zs = extract32(m6, 1, 1);
+
+if (es > ES_32 || m6 & ~2) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+   get_field(s, v3), get_field(s, v4),
+   cpu_env, 0, fns[es][zs]);
+set_cc_static(s);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
 const uint8_t fpf = get_field(s, m4);
diff --git a/target/s390x/tcg/vec_string_helper.c 
b/target/s390x/tcg/vec_string_helper.c
index ac315eb095..00135865c0 100644
--- a/target/s390x/tcg/vec_string_helper.c
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -471,3 +471,102 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void 
*v2, const void *v3,  \
 DEF_VSTRC_CC_RT_HELPER(8)
 DEF_VSTRC_CC_RT_HELPER(16)
 DEF_VSTRC_CC_RT_HELPER(32)
+
+static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+ const S390Vector *v4, uint8_t es, bool zs)
+{
+int substr_elen, substr_0, str_elen, i, j, k, cc;
+int nelem = 16 >> es;
+bool eos = false;
+
+substr_elen = s390_vec_read_element8(v4, 7) >> es;
+
+/* If ZS, bound substr length by min(nelem, strlen(v3)). */
+if (zs) {
+substr_elen = MIN(substr_elen, nelem);
+for (i =

[PATCH v4 04/11] target/s390x: vxeh2: Update for changes to vector shifts

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/helper.h   |  3 ++
 target/s390x/tcg/insn-data.def  | 12 ++---
 target/s390x/tcg/translate_vx.c.inc | 75 -
 target/s390x/tcg/vec_int_helper.c   | 55 +
 4 files changed, 95 insertions(+), 50 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7412130883..bf33d86f74 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -203,8 +203,11 @@ DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, 
ptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsra_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsrl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 46add91a0e..f487a64abf 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1204,19 +1204,19 @@
 F(0xe778, VESRLV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
 F(0xe738, VESRL,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
 /* VECTOR SHIFT LEFT */
-F(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+E(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, 0, IF_VEC)
 /* VECTOR SHIFT LEFT BY BYTE */
-F(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+E(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, 1, IF_VEC)
 /* VECTOR SHIFT LEFT DOUBLE BY BYTE */
 F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC */
-F(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+E(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
-F(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+E(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, 1, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL */
-F(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
+E(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
-F(0xe77d, VSRLB,   VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
+E(0xe77d, VSRLB,   VRR_c, V,   0, 0, 0, 0, vsrl, 0, 1, IF_VEC)
 /* VECTOR SUBTRACT */
 F(0xe7f7, VS,  VRR_c, V,   0, 0, 0, 0, vs, 0, IF_VEC)
 /* VECTOR SUBTRACT COMPUTE BORROW INDICATION */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 29e4dd78a8..fd53ddafef 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2018,23 +2018,44 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
-static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
+static DisasJumpType gen_vsh_by_byte(DisasContext *s, DisasOps *o,
+  gen_helper_gvec_2i *gen,
+  gen_helper_gvec_3 *gen_ve2)
 {
-TCGv_i64 shift = tcg_temp_new_i64();
+bool byte = s->insn->data;
 
-read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
-if (s->fields.op2 == 0x74) {
-tcg_gen_andi_i64(shift, shift, 0x7);
+if (!byte && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+   get_field(s, v3), 0, gen_ve2);
 } else {
-tcg_gen_andi_i64(shift, shift, 0x78);
-}
+TCGv_i64 shift = tcg_temp_new_i64();
 
-gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
-shift, 0, gen_helper_gvec_vsl);
-tcg_temp_free_i64(shift);
+read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+tcg_gen_andi_i64(shift, shift, byte ? 0x78 : 7);
+gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), shift, 0, gen);
+tcg_temp_free_i64(shift);
+}
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
+{
+return gen_vsh_by_byte(s, o, gen_helper_gvec_vsl,
+gen_helper_gvec_vsl_ve2);
+}
+
+static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
+{
+return gen_vsh_by_byte(s, o, gen_helper_gvec_vsra,
+gen_helper_gvec_vsra_ve2);
+}
+
+static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
+{
+return gen_vsh_by_byte(s, o, gen

[PATCH v4 05/11] target/s390x: vxeh2: vector shift double by bit

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
---
 target/s390x/tcg/insn-data.def  |  6 +++-
 target/s390x/tcg/translate_vx.c.inc | 55 +
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index f487a64abf..98a31a557d 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1207,12 +1207,16 @@
 E(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, 0, IF_VEC)
 /* VECTOR SHIFT LEFT BY BYTE */
 E(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, 1, IF_VEC)
+/* VECTOR SHIFT LEFT DOUBLE BY BIT */
+E(0xe786, VSLD,VRI_d, VE2, 0, 0, 0, 0, vsld, 0, 0, IF_VEC)
 /* VECTOR SHIFT LEFT DOUBLE BY BYTE */
-F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
+E(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsld, 0, 1, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC */
 E(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
 E(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, 1, IF_VEC)
+/* VECTOR SHIFT RIGHT DOUBLE BY BIT */
+F(0xe787, VSRD,VRI_d, VE2, 0, 0, 0, 0, vsrd, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL */
 E(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index fd53ddafef..bb997de794 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2056,14 +2056,23 @@ static DisasJumpType op_vsrl(DisasContext *s, DisasOps 
*o)
 gen_helper_gvec_vsrl_ve2);
 }
 
-static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o)
+static DisasJumpType op_vsld(DisasContext *s, DisasOps *o)
 {
-const uint8_t i4 = get_field(s, i4) & 0xf;
-const int left_shift = (i4 & 7) * 8;
-const int right_shift = 64 - left_shift;
-TCGv_i64 t0 = tcg_temp_new_i64();
-TCGv_i64 t1 = tcg_temp_new_i64();
-TCGv_i64 t2 = tcg_temp_new_i64();
+const bool byte = s->insn->data;
+const uint8_t mask = byte ? 15 : 7;
+const uint8_t mul  = byte ?  8 : 1;
+const uint8_t i4   = get_field(s, i4);
+const int right_shift = 64 - (i4 & 7) * mul;
+TCGv_i64 t0, t1, t2;
+
+if (i4 & ~mask) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+t2 = tcg_temp_new_i64();
 
 if ((i4 & 8) == 0) {
 read_vec_element_i64(t0, get_field(s, v2), 0, ES_64);
@@ -2074,8 +2083,40 @@ static DisasJumpType op_vsldb(DisasContext *s, DisasOps 
*o)
 read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
 read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
 }
+
 tcg_gen_extract2_i64(t0, t1, t0, right_shift);
 tcg_gen_extract2_i64(t1, t2, t1, right_shift);
+
+write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+tcg_temp_free(t2);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o)
+{
+const uint8_t i4 = get_field(s, i4);
+TCGv_i64 t0, t1, t2;
+
+if (i4 & ~7) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+t2 = tcg_temp_new_i64();
+
+read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
+read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
+read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
+
+tcg_gen_extract2_i64(t0, t1, t0, i4);
+tcg_gen_extract2_i64(t1, t2, t1, i4);
+
 write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
 write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
 
-- 
2.34.1




[PATCH v4 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-03-21 Thread David Miller
Implement Vector-Enhancements Facility 2 for s390x

resolves: https://gitlab.com/qemu-project/qemu/-/issues/738

implements:
VECTOR LOAD ELEMENTS REVERSED   (VLER)
VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
VECTOR STORE ELEMENTS REVERSED  (VSTER)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, VSTEBRG)
VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
VECTOR STRING SEARCH(VSTRS)

modifies:
VECTOR FP CONVERT FROM FIXED(VCFPS)
VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
VECTOR FP CONVERT TO FIXED  (VCSFP)
VECTOR FP CONVERT TO LOGICAL(VCLFP)
VECTOR SHIFT LEFT   (VSL)
VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
VECTOR SHIFT RIGHT LOGICAL  (VSRL)


David Miller (9):
  tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
  target/s390x: vxeh2: vector convert short/32b
  target/s390x: vxeh2: vector string search
  target/s390x: vxeh2: Update for changes to vector shifts
  target/s390x: vxeh2: vector shift double by bit
  target/s390x: vxeh2: vector {load, store} elements reversed
  target/s390x: vxeh2: vector {load, store} byte reversed elements
  target/s390x: vxeh2: vector {load, store} byte reversed element
  target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model
  tests/tcg/s390x: Tests for Vector Enhancements Facility 2
  target/s390x: Fix writeback to v1 in helper_vstl

Richard Henderson (2):
  tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
  target/s390x: Fix writeback to v1 in helper_vstl

 include/tcg/tcg-op.h |   6 +
 target/s390x/gen-features.c  |   2 +
 target/s390x/helper.h|  13 +
 target/s390x/tcg/insn-data.def   |  40 ++-
 target/s390x/tcg/translate.c |   3 +-
 target/s390x/tcg/translate_vx.c.inc  | 463 ---
 target/s390x/tcg/vec_fpu_helper.c|  31 ++
 target/s390x/tcg/vec_helper.c|   2 -
 target/s390x/tcg/vec_int_helper.c|  55 
 target/s390x/tcg/vec_string_helper.c |  99 ++
 tcg/tcg-op.c |  30 ++
 tests/tcg/s390x/Makefile.target  |   8 +
 tests/tcg/s390x/vxeh2_vcvt.c |  97 ++
 tests/tcg/s390x/vxeh2_vlstr.c| 146 +
 tests/tcg/s390x/vxeh2_vs.c   |  91 ++
 15 files changed, 1031 insertions(+), 55 deletions(-)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

-- 
2.34.1




[PATCH v4 11/11] target/s390x: Fix writeback to v1 in helper_vstl

2022-03-21 Thread David Miller
From: Richard Henderson 

Signed-off-by: Richard Henderson 
Reviewed-by: David Miller 
Reviewed-by: David Hildenbrand 
---
 target/s390x/tcg/vec_helper.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c
index ededf13cf0..48d86722b2 100644
--- a/target/s390x/tcg/vec_helper.c
+++ b/target/s390x/tcg/vec_helper.c
@@ -200,7 +200,6 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, 
uint64_t addr,
 addr = wrap_address(env, addr + 8);
 cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC());
 } else {
-S390Vector tmp = {};
 int i;
 
 for (i = 0; i < bytes; i++) {
@@ -209,6 +208,5 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, 
uint64_t addr,
 cpu_stb_data_ra(env, addr, byte, GETPC());
 addr = wrap_address(env, addr + 1);
 }
-*(S390Vector *)v1 = tmp;
 }
 }
-- 
2.34.1




[PATCH v4 02/11] target/s390x: vxeh2: vector convert short/32b

2022-03-21 Thread David Miller
Signed-off-by: David Miller 
Signed-off-by: Richard Henderson 
Reviewed-by: David Hildenbrand 
---
 target/s390x/helper.h   |  4 +++
 target/s390x/tcg/translate_vx.c.inc | 44 ++---
 target/s390x/tcg/vec_fpu_helper.c   | 31 
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 69f69cf718..7cbcbd7f0b 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -275,6 +275,10 @@ DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, 
ptr, cptr, cptr, env, i32
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfche128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfche128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 98eb7710a4..ea28e40d4f 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2720,23 +2720,59 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps 
*o)
 
 switch (s->fields.op2) {
 case 0xc3:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcdg64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcdg32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc1:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcdlg64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcdlg32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc2:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcgd64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcgd32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc0:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vclgd64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vclgd32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc7:
diff --git a/target/s390x/tcg/vec_fpu_helper.c 
b/target/s390x/tcg/vec_fpu_helper.c
index 1a77993471..6834dbc540 100644
--- a/target/s390x/tcg/vec_fpu_helper.c
+++ b/target/s390x/tcg/vec_fpu_helper.c
@@ -176,6 +176,30 @@ static void vop128_2(S390Vector *v1, const S390Vector *v2, 
CPUS390XState *env,
 *v1 = tmp;
 }
 
+static float32 vcdg32(float32 a, float_status *s)
+{
+return int32_to_float32(a, s);
+}
+
+static float32 vcdlg32(float32 a, float_status *s)
+{
+return uint32_to_float32(a, s);
+}
+
+static float32 vcgd32(float32 a, float_status *s)
+{
+const float32 tmp = float32_to_int32(a, s);
+
+return float32_is_any_nan(a) ? INT32_MIN : tmp;
+}
+
+static float32 vclgd32(float32 a, float_status *s)
+{
+const float32 tmp = float32_to_uint32(a, s);
+
+return float32_is_any_nan(a) ? 0 : tmp;
+}
+
 static float64 vcdg64(float64 a, float_status *s)
 {
 return int64_to_float64(a, s);
@@ -211,6 +235,9 @@ void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, 
CPUS390XState *env,   \
 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); 
\
 }
 
+#define DEF_GVEC_VOP2_32(NAME) 
\
+DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
+
 #define DEF_GVEC_VOP2_64(NAME) 
\
 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
 
@@ -219,6 +246,10 @@ DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)   
\
 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)   
\
 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
 
+DEF_GVEC_VOP2_32(vcdg)
+DEF_GVEC_VOP2_32(vcdlg)
+DEF_GVEC_VOP2_32(vcgd)
+DEF_GVEC_VOP2_32(vclgd)
 DEF_GVEC_VOP2_64(vcdg)
 DEF_GVEC_VOP2_64(vcdlg)
 DEF_GVEC_VOP2_64(vcgd)
-- 
2.34.1




Re: [PATCH v3 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-03-19 Thread David Miller
Is this waiting on me for anything?
I wanted to ensure this is wrapped up before starting a new project.

Thanks,
-  David Miller

On Mon, Mar 7, 2022 at 11:09 PM David Miller  wrote:
>
>
> I've reviewed all changes,  looks good.
> Ran all of my own tests including vstrs, all passed.
>
> Thank you for all reviews and changes here.
>
> - David Miller
>
> On Mon, Mar 7, 2022 at 8:54 PM Richard Henderson 
>  wrote:
>>
>> Hi David,
>>
>> I've split up the patches a bit, made some improvements to
>> the shifts and reversals, and fixed a few bugs.
>>
>> Please especially review vector string search, as that is
>> has had major changes.
>>
>>
>> r~
>>
>>
>> David Miller (9):
>>   target/s390x: vxeh2: vector convert short/32b
>>   target/s390x: vxeh2: vector string search
>>   target/s390x: vxeh2: Update for changes to vector shifts
>>   target/s390x: vxeh2: vector shift double by bit
>>   target/s390x: vxeh2: vector {load, store} elements reversed
>>   target/s390x: vxeh2: vector {load, store} byte reversed elements
>>   target/s390x: vxeh2: vector {load, store} byte reversed element
>>   target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max
>>   tests/tcg/s390x: Tests for Vector Enhancements Facility 2
>>
>> Richard Henderson (2):
>>   tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
>>   target/s390x: Fix writeback to v1 in helper_vstl
>>
>>  include/tcg/tcg-op.h |   6 +
>>  target/s390x/helper.h|  13 +
>>  target/s390x/gen-features.c  |   2 +
>>  target/s390x/tcg/translate.c |   3 +-
>>  target/s390x/tcg/vec_fpu_helper.c|  31 ++
>>  target/s390x/tcg/vec_helper.c|   2 -
>>  target/s390x/tcg/vec_int_helper.c|  58 
>>  target/s390x/tcg/vec_string_helper.c | 101 ++
>>  tcg/tcg-op.c |  30 ++
>>  tests/tcg/s390x/vxeh2_vcvt.c |  97 ++
>>  tests/tcg/s390x/vxeh2_vlstr.c| 146 +
>>  tests/tcg/s390x/vxeh2_vs.c   |  91 ++
>>  target/s390x/tcg/translate_vx.c.inc  | 442 ---
>>  target/s390x/tcg/insn-data.def   |  40 ++-
>>  tests/tcg/s390x/Makefile.target  |   8 +
>>  15 files changed, 1018 insertions(+), 52 deletions(-)
>>  create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
>>  create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
>>  create mode 100644 tests/tcg/s390x/vxeh2_vs.c
>>
>> --
>> 2.25.1
>>



Re: [PATCH v3 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-03-07 Thread David Miller
I've reviewed all changes,  looks good.
Ran all of my own tests including vstrs, all passed.

Thank you for all reviews and changes here.

- David Miller

On Mon, Mar 7, 2022 at 8:54 PM Richard Henderson <
richard.hender...@linaro.org> wrote:

> Hi David,
>
> I've split up the patches a bit, made some improvements to
> the shifts and reversals, and fixed a few bugs.
>
> Please especially review vector string search, as that is
> has had major changes.
>
>
> r~
>
>
> David Miller (9):
>   target/s390x: vxeh2: vector convert short/32b
>   target/s390x: vxeh2: vector string search
>   target/s390x: vxeh2: Update for changes to vector shifts
>   target/s390x: vxeh2: vector shift double by bit
>   target/s390x: vxeh2: vector {load, store} elements reversed
>   target/s390x: vxeh2: vector {load, store} byte reversed elements
>   target/s390x: vxeh2: vector {load, store} byte reversed element
>   target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max
>   tests/tcg/s390x: Tests for Vector Enhancements Facility 2
>
> Richard Henderson (2):
>   tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
>   target/s390x: Fix writeback to v1 in helper_vstl
>
>  include/tcg/tcg-op.h |   6 +
>  target/s390x/helper.h|  13 +
>  target/s390x/gen-features.c  |   2 +
>  target/s390x/tcg/translate.c |   3 +-
>  target/s390x/tcg/vec_fpu_helper.c|  31 ++
>  target/s390x/tcg/vec_helper.c|   2 -
>  target/s390x/tcg/vec_int_helper.c|  58 
>  target/s390x/tcg/vec_string_helper.c | 101 ++
>  tcg/tcg-op.c |  30 ++
>  tests/tcg/s390x/vxeh2_vcvt.c |  97 ++
>  tests/tcg/s390x/vxeh2_vlstr.c| 146 +
>  tests/tcg/s390x/vxeh2_vs.c   |  91 ++
>  target/s390x/tcg/translate_vx.c.inc  | 442 ---
>  target/s390x/tcg/insn-data.def   |  40 ++-
>  tests/tcg/s390x/Makefile.target  |   8 +
>  15 files changed, 1018 insertions(+), 52 deletions(-)
>  create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
>  create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
>  create mode 100644 tests/tcg/s390x/vxeh2_vs.c
>
> --
> 2.25.1
>
>


[PATCH v2 7/7] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-06 Thread David Miller
* tests/tcg/s390x/vxeh2_vcvt.c  : vector convert
* tests/tcg/s390x/vxeh2_vs.c: vector shift
* tests/tcg/s390x/vxeh2_vlstr.c : vector load/store reversed

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vxeh2_vcvt.c|  97 +
 tests/tcg/s390x/vxeh2_vlstr.c   | 146 
 tests/tcg/s390x/vxeh2_vs.c  |  91 
 4 files changed, 342 insertions(+)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 00..71ecbd77b0
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,97 @@
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include 
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+floatf[4];
+double   fd[2];
+__uint128_t v;
+} S390Vector;
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+S390Vector vd;
+S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] =  };
+S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+  .f[2] = 4.499, .f[3] = 0.512 };
+
+vd.d[0] = vd.d[1] = 0;
+vcfps(, _i32, 2, M4_def, 0);
+if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcfpl(, _u32, 2, M4_def, 0);
+if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] ||  != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcsfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vclfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
new file mode 100644
index 00..bf2954e86d
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -0,0 +1,146 @@
+/*
+ * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
+ */
+#include 
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+__uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#defi

[PATCH v2 5/7] target/s390x: vxeh2: vector {load, store} reversed elements [and {zero, replicate}]

2022-03-06 Thread David Miller
Signed-off-by: David Miller 
---
 target/s390x/tcg/insn-data.def  |  16 +++
 target/s390x/tcg/translate_vx.c.inc | 161 
 2 files changed, 177 insertions(+)

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 3a7f15a0b5..dc6daa6c10 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1027,6 +1027,16 @@
 F(0xe756, VLR, VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
 /* VECTOR LOAD AND REPLICATE */
 F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENTS */
+F(0xe601, VLEBRH,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, IF_VEC)
+F(0xe603, VLEBRF,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, IF_VEC)
+F(0xe602, VLEBRG,  VRX,   VE2, la2, 0, 0, 0, vlebr, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT AND REPLOCATE */
+F(0xe605, VLBRREP, VRX,   VE2, la2, 0, 0, 0, vlbrrep, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO */
+F(0xe604, VLLEBRZ, VRX,   VE2, la2, 0, 0, 0, vllebrz, 0, IF_VEC)
+/* VECTOR LOAD BYTE REVERSED ELEMENTS */
+F(0xe606, VLBR,VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
 /* VECTOR LOAD ELEMENT */
 E(0xe700, VLEB,VRX,   V,   la2, 0, 0, 0, vle, 0, ES_8, IF_VEC)
 E(0xe701, VLEH,VRX,   V,   la2, 0, 0, 0, vle, 0, ES_16, IF_VEC)
@@ -1079,6 +1089,12 @@
 F(0xe75f, VSEG,VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
 /* VECTOR STORE */
 F(0xe70e, VST, VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
+/* VECTOR STORE BYTE REVERSED ELEMENT */
+F(0xe609, VSTEBRH,  VRX,   VE2, la2, 0, 0, 0, vsteb, 0, IF_VEC)
+F(0xe60b, VSTEBRF,  VRX,   VE2, la2, 0, 0, 0, vsteb, 0, IF_VEC)
+F(0xe60a, VSTEBRG,  VRX,   VE2, la2, 0, 0, 0, vsteb, 0, IF_VEC)
+/* VECTOR STORE BYTE REVERSED ELEMENTS */
+F(0xe60e, VSTBR,VRX,   VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
 /* VECTOR STORE ELEMENT */
 E(0xe708, VSTEB,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_8, IF_VEC)
 E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index d543203e02..06c4340655 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -457,6 +457,111 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = (1 == s->fields.op2) ? 1 : (1 ^ s->fields.op2);
+const uint8_t enr = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (es < ES_16 || es > ES_64 || !valid_vec_element(enr, es)) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+
+tcg_gen_bswap64_i64(tmp, tmp);
+tcg_gen_rotri_i64(tmp, tmp, 64 - 8 * (1 << es));
+
+write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+TCGv_i64 tmp;
+
+if (es == ES_8 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+tcg_gen_bswap64_i64(tmp, tmp);
+tcg_gen_rotri_i64(tmp, tmp, 64 - 8 * (1 << es));
+gen_gvec_dup_i64(es, get_field(s, v1), tmp);
+tcg_temp_free_i64(tmp);
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o)
+{
+const uint8_t m3 = get_field(s, m3);
+const uint8_t es = m3 & 3;
+const uint8_t enr = (m3 == 6) ? 0 : ((1 << (3 - es)) - 1);
+
+TCGv_i64 tmp, zero;
+
+if (m3 < ES_16 || (m3 > ES_64 && m3 != 6))  {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+zero = tcg_const_i64(0);
+write_vec_element_i64(zero, get_field(s, v1), 1, ES_64);
+write_vec_element_i64(zero, get_field(s, v1), 0, ES_64);
+
+tmp = tcg_temp_new_i64();
+tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+
+tcg_gen_bswap64_i64(tmp, tmp);
+tcg_gen_rotri_i64(tmp, tmp, 64 - 8 * (1 << es));
+
+write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+tcg_temp_free_i64(tmp);
+tcg_temp_free_i64(zero);
+
+return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+const uint8_t bytes = 1 << es;
+uint32_t dst_idx;
+
+if (es < ES_16 || es > ES_128) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+if (es >= ES_64) {
+tcg_gen_qemu_ld_

[PATCH v2 6/7] target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max

2022-03-06 Thread David Miller
Signed-off-by: David Miller 
---
 target/s390x/gen-features.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 22846121c4..499a3b10a8 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,7 +740,9 @@ static uint16_t qemu_V6_2[] = {
 
 static uint16_t qemu_LATEST[] = {
 S390_FEAT_MISC_INSTRUCTION_EXT3,
+S390_FEAT_VECTOR_ENH2,
 };
+
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
 /* generates a dependency warning, leave it out for now */
-- 
2.34.1




[PATCH v2 4/7] target/s390x: vxeh2: vector {load, store} elements reversed

2022-03-06 Thread David Miller
Signed-off-by: David Miller 
---
 target/s390x/helper.h   |  4 ++-
 target/s390x/tcg/insn-data.def  |  4 +++
 target/s390x/tcg/translate_vx.c.inc | 39 +
 target/s390x/tcg/vec.h  | 24 ++
 target/s390x/tcg/vec_helper.c   | 31 +++
 5 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index a36308d651..933921a87c 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -129,6 +129,9 @@ DEF_HELPER_FLAGS_3(probe_write_access, TCG_CALL_NO_WG, 
void, env, i64, i64)
 /* === Vector Support Instructions === */
 DEF_HELPER_FLAGS_4(gvec_vbperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
+DEF_HELPER_FLAGS_4(vler, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
+DEF_HELPER_FLAGS_4(vster, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
+DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)
 DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vpk64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
@@ -145,7 +148,6 @@ DEF_HELPER_5(gvec_vpkls_cc16, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vpkls_cc32, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, 
i32)
-DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)
 
 /* === Vector Integer Instructions === */
 DEF_HELPER_FLAGS_4(gvec_vavg8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 1bfe88a4ac..3a7f15a0b5 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1037,6 +1037,8 @@
 E(0xe741, VLEIH,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_16, IF_VEC)
 E(0xe743, VLEIF,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_32, IF_VEC)
 E(0xe742, VLEIG,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_64, IF_VEC)
+/* VECTOR LOAD ELEMENTS REVERSED */
+F(0xe607, VLER,VRX,   VE2, la2, 0, 0, 0, vler, 0, IF_VEC)
 /* VECTOR LOAD GR FROM VR ELEMENT */
 F(0xe721, VLGV,VRS_c, V,   la2, 0, r1, 0, vlgv, 0, IF_VEC)
 /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */
@@ -1082,6 +1084,8 @@
 E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
 E(0xe70b, VSTEF,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_32, IF_VEC)
 E(0xe70a, VSTEG,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_64, IF_VEC)
+/* VECTOR STORE ELEMENTS REVERSED */
+F(0xe60f, VSTER,   VRX,   VE2, la2, 0, 0, 0, vster, 0, IF_VEC)
 /* VECTOR STORE MULTIPLE */
 F(0xe73e, VSTM,VRS_a, V,   la2, 0, 0, 0, vstm, 0, IF_VEC)
 /* VECTOR STORE WITH LENGTH */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 60e1efdbfa..d543203e02 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -492,6 +492,26 @@ static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vler(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+TCGv_ptr a0 = tcg_temp_new_ptr();
+TCGv_i64 tes = tcg_const_i64(es & 0xf);
+
+tcg_gen_addi_ptr(a0, cpu_env, vec_full_reg_offset(get_field(s, v1)));
+gen_helper_vler(cpu_env, a0, o->addr1, tes);
+
+tcg_temp_free_i64(tes);
+tcg_temp_free_ptr(a0);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = get_field(s, m4);
@@ -976,6 +996,25 @@ static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vster(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m3);
+if (es < ES_16 || es > ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+TCGv_ptr a0 = tcg_temp_new_ptr();
+TCGv_i64 tes = tcg_const_i64(es);
+
+tcg_gen_addi_ptr(a0, cpu_env, vec_full_reg_offset(get_field(s, v1)));
+gen_helper_vster(cpu_env, a0, o->addr1, tes);
+
+tcg_temp_free_i64(tes);
+tcg_temp_free_ptr(a0);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
 {
 const uint8_t v3 = get_field(s, v3);
diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
index a6e361869b..5ea0446e4b 100644
--- a/target/s390x/tcg/vec.h
+++ b/target/s390x/tcg/vec.h
@@ -138,4 +138,28 @@ static inline void s390_vec_write_element(S390Vector *v, 
uint8_t enr,
 }
 }
 
+static inline void s390_vec_reverse(uint64_t *vdst, uint64_t *vsrc, uint8_t es)
+{
+const uint64_t l

[PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, right {logical, arithmetic}}

2022-03-06 Thread David Miller
Signed-off-by: David Miller 
---
 include/qemu/bitops.h   |  25 +++
 target/s390x/helper.h   |   3 +
 target/s390x/tcg/insn-data.def  |   6 +-
 target/s390x/tcg/translate_vx.c.inc | 108 ++--
 target/s390x/tcg/vec_int_helper.c   |  58 +++
 5 files changed, 163 insertions(+), 37 deletions(-)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 03213ce952..72426deea0 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -445,6 +445,31 @@ static inline int64_t sextract64(uint64_t value, int 
start, int length)
  */
 return ((int64_t)(value << (64 - length - start))) >> (64 - length);
 }
+/**
+ * deposit8:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 8 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 8 bit byte.
+ * It is valid to request that all 8 bits are modified (ie @length
+ * 8 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint8_t deposit8(uint8_t value, int start, int length,
+   uint8_t fieldval)
+{
+uint8_t mask;
+assert(start >= 0 && length > 0 && length <= 8 - start);
+mask = (~0ULL >> (8 - length)) << start;
+return (value & ~mask) | ((fieldval << start) & mask);
+}
 
 /**
  * deposit32:
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1e38ee2e4e..a36308d651 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -203,8 +203,11 @@ DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, 
ptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsra_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsrl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 46add91a0e..1bfe88a4ac 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1207,12 +1207,16 @@
 F(0xe774, VSL, VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
 /* VECTOR SHIFT LEFT BY BYTE */
 F(0xe775, VSLB,VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+/* VECTOR SHIFT LEFT DOUBLE BY BIT */
+   F(0xe786, VSLD,VRI_d, VE2, 0, 0, 0, 0, vsld, 0, IF_VEC)
 /* VECTOR SHIFT LEFT DOUBLE BY BYTE */
-F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
+F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsld, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC */
 F(0xe77e, VSRA,VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
 F(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+/* VECTOR SHIFT RIGHT DOUBLE BY BIT */
+   F(0xe787, VSRD,VRI_d, VE2, 0, 0, 0, 0, vsrd, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL */
 F(0xe77c, VSRL,VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index db86d9b87d..60e1efdbfa 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2020,26 +2020,33 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps 
*o)
 
 static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
 {
-TCGv_i64 shift = tcg_temp_new_i64();
-
-read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
-if (s->fields.op2 == 0x74) {
-tcg_gen_andi_i64(shift, shift, 0x7);
+const bool B = 0x75 == s->fields.op2;
+if (!B && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+   get_field(s, v3), 0,  gen_helper_gvec_vsl_ve2);
 } else {
-tcg_gen_andi_i64(shift, shift, 0x78);
-}
+TCGv_i64 shift = tcg_temp_new_i64();
 
-gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
-shift, 0, gen_helper_gvec_vsl);
-tcg_t

[PATCH v2 1/7] target/s390x: vxeh2: vector convert short/32b

2022-03-06 Thread David Miller
Signed-off-by: David Miller 
---
 target/s390x/helper.h   |  4 +++
 target/s390x/tcg/translate_vx.c.inc | 44 ++---
 target/s390x/tcg/vec_fpu_helper.c   | 31 
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 69f69cf718..7cbcbd7f0b 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -275,6 +275,10 @@ DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, 
ptr, cptr, cptr, env, i32
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfche128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfche128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 98eb7710a4..ea28e40d4f 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2720,23 +2720,59 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps 
*o)
 
 switch (s->fields.op2) {
 case 0xc3:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcdg64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcdg32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc1:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcdlg64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcdlg32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc2:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vcgd64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vcgd32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc0:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vclgd64;
+break;
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+fn = gen_helper_gvec_vclgd32;
+}
+break;
+default:
+break;
 }
 break;
 case 0xc7:
diff --git a/target/s390x/tcg/vec_fpu_helper.c 
b/target/s390x/tcg/vec_fpu_helper.c
index 1a77993471..6834dbc540 100644
--- a/target/s390x/tcg/vec_fpu_helper.c
+++ b/target/s390x/tcg/vec_fpu_helper.c
@@ -176,6 +176,30 @@ static void vop128_2(S390Vector *v1, const S390Vector *v2, 
CPUS390XState *env,
 *v1 = tmp;
 }
 
+static float32 vcdg32(float32 a, float_status *s)
+{
+return int32_to_float32(a, s);
+}
+
+static float32 vcdlg32(float32 a, float_status *s)
+{
+return uint32_to_float32(a, s);
+}
+
+static float32 vcgd32(float32 a, float_status *s)
+{
+const float32 tmp = float32_to_int32(a, s);
+
+return float32_is_any_nan(a) ? INT32_MIN : tmp;
+}
+
+static float32 vclgd32(float32 a, float_status *s)
+{
+const float32 tmp = float32_to_uint32(a, s);
+
+return float32_is_any_nan(a) ? 0 : tmp;
+}
+
 static float64 vcdg64(float64 a, float_status *s)
 {
 return int64_to_float64(a, s);
@@ -211,6 +235,9 @@ void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, 
CPUS390XState *env,   \
 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); 
\
 }
 
+#define DEF_GVEC_VOP2_32(NAME) 
\
+DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
+
 #define DEF_GVEC_VOP2_64(NAME) 
\
 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
 
@@ -219,6 +246,10 @@ DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)   
\
 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)   
\
 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
 
+DEF_GVEC_VOP2_32(vcdg)
+DEF_GVEC_VOP2_32(vcdlg)
+DEF_GVEC_VOP2_32(vcgd)
+DEF_GVEC_VOP2_32(vclgd)
 DEF_GVEC_VOP2_64(vcdg)
 DEF_GVEC_VOP2_64(vcdlg)
 DEF_GVEC_VOP2_64(vcgd)
-- 
2.34.1




[PATCH v2 2/7] target/s390x: vxeh2: vector string search

2022-03-06 Thread David Miller
Signed-off-by: David Miller 
---
 target/s390x/helper.h|  1 +
 target/s390x/tcg/insn-data.def   |  2 +
 target/s390x/tcg/translate.c |  3 +-
 target/s390x/tcg/translate_vx.c.inc  | 17 
 target/s390x/tcg/vec_string_helper.c | 65 
 5 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7cbcbd7f0b..1e38ee2e4e 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -246,6 +246,7 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(vstrs, void, ptr, cptr, cptr, ptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
 DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 6c8a8b229f..46add91a0e 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1246,6 +1246,8 @@
 F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
 /* VECTOR STRING RANGE COMPARE */
 F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
+/* VECTOR STRING SEARCH */
+F(0xe78b, VSTRS,   VRR_d, VE2, 0, 0, 0, 0, vstrs, 0, IF_VEC)
 
 /* === Vector Floating-Point Instructions */
 
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 904b51542f..d9ac29573d 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6222,7 +6222,8 @@ enum DisasInsnEnum {
 #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */
 #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION
 #define FAC_V   S390_FEAT_VECTOR /* vector facility */
-#define FAC_VE  S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 
*/
+#define FAC_VE  S390_FEAT_VECTOR_ENH  /* vector enhancements facility 
1 */
+#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 
2 */
 #define FAC_MIE2S390_FEAT_MISC_INSTRUCTION_EXT2 /* 
miscellaneous-instruction-extensions facility 2 */
 #define FAC_MIE3S390_FEAT_MISC_INSTRUCTION_EXT3 /* 
miscellaneous-instruction-extensions facility 3 */
 
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index ea28e40d4f..db86d9b87d 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2497,6 +2497,23 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
+{
+const uint8_t es = get_field(s, m5);
+const uint32_t D = get_field(s, m6);
+
+if (es > ES_32) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+   get_field(s, v3), get_field(s, v4),
+   cpu_env, (D << 16) | es, gen_helper_vstrs);
+
+set_cc_static(s);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
 const uint8_t fpf = get_field(s, m4);
diff --git a/target/s390x/tcg/vec_string_helper.c 
b/target/s390x/tcg/vec_string_helper.c
index ac315eb095..22c14c6925 100644
--- a/target/s390x/tcg/vec_string_helper.c
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -471,3 +471,68 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void 
*v2, const void *v3,  \
 DEF_VSTRC_CC_RT_HELPER(8)
 DEF_VSTRC_CC_RT_HELPER(16)
 DEF_VSTRC_CC_RT_HELPER(32)
+
+void HELPER(vstrs)(void *v1, const void *v2, const void *v3, void *v4,
+   CPUS390XState *env, uint32_t desc) {
+const bool zs = (desc >> 16);
+const uint8_t es = desc & 16;
+const uint8_t char_size = 1 << es;
+
+uint32_t str_len = 0, eos = 0;
+uint32_t i = 0, j = 0, k = 0, cc = 0;
+uint32_t substr_len = ((uint8_t *)v4)[H1(7)] & 31;
+
+for (i = 0; i < 16; i += char_size) {
+if (0 == es && !((uint8_t  *)v3)[H1(i >> es)]) { break; }
+if (1 == es && !((uint16_t *)v3)[H2(i >> es)]) { break; }
+if (2 == es && !((uint32_t *)v3)[H4(i >> es)]) { break; }
+}
+if (i < substr_len) {
+substr_len = i;
+}
+if (substr_len) {
+if (zs) {
+for (k = 0; k < 16; k += char_size) {
+if (0 == es && !((uint8_t  *)v2)[H1(k >> es)]) { break; }
+if (1 == es && !((uint16_t *)v2)[H2(k >> es)]) { break; }
+if (2 == es && !((uint32_t *)v2)[H4(k >> es)]) { break; }
+}
+eos = (16 != k);
+str_len = k;
+} else {
+str_len = 16;
+}
+
+for (k = 0

[PATCH v2 0/7] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-03-06 Thread David Miller
Implement Vector-Enhancements Facility 2 for s390x

resolves: https://gitlab.com/qemu-project/qemu/-/issues/738


implements:
VECTOR LOAD ELEMENTS REVERSED   (VLER)
VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
VECTOR STORE ELEMENTS REVERSED  (VSTER)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, VSTEBRG)
VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
VECTOR STRING SEARCH(VSTRS)

modifies:
VECTOR FP CONVERT FROM FIXED(VCFPS)
VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
VECTOR FP CONVERT TO FIXED  (VCSFP)
VECTOR FP CONVERT TO LOGICAL(VCLFP)
VECTOR SHIFT LEFT   (VSL)
VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
VECTOR SHIFT RIGHT LOGICAL  (VSRL)

David Miller (7):
  target/s390x: vxeh2: vector convert short/32b
  target/s390x: vxeh2: vector string search
  target/s390x: vxeh2: vector shift {double by bit, left, right
{logical,arithmetic}}
  target/s390x: vxeh2: vector {load,store} elements reversed
  target/s390x: vxeh2: vector {load,store} reversed elements [and
{zero,replicate}]
  target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max
  tests/tcg/s390x: Tests for Vector Enhancements Facility 2

 include/qemu/bitops.h|  25 ++
 target/s390x/gen-features.c  |   2 +
 target/s390x/helper.h|  12 +-
 target/s390x/tcg/insn-data.def   |  28 +-
 target/s390x/tcg/translate.c |   3 +-
 target/s390x/tcg/translate_vx.c.inc  | 369 ---
 target/s390x/tcg/vec.h   |  24 ++
 target/s390x/tcg/vec_fpu_helper.c|  31 +++
 target/s390x/tcg/vec_helper.c|  31 +++
 target/s390x/tcg/vec_int_helper.c|  58 +
 target/s390x/tcg/vec_string_helper.c |  65 +
 tests/tcg/s390x/Makefile.target  |   8 +
 tests/tcg/s390x/vxeh2_vcvt.c |  97 +++
 tests/tcg/s390x/vxeh2_vlstr.c| 146 +++
 tests/tcg/s390x/vxeh2_vs.c   |  91 +++
 15 files changed, 947 insertions(+), 43 deletions(-)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

-- 
2.34.1




Re: [PATCH v1 1/2] s390x/tcg: Implement Vector-Enhancements Facility 2 for s390x

2022-03-06 Thread David Miller
> Just use a little-endian load: MO_LE | es.
> While we use MO_TE all over, it's no secret that it's always big-endian.

> And everywhere else you do load then swap, or swap then store.

This is not working as expected...
I tried it in two places, both with a swap [removed] after and the tests
failed.

I'm submitting the split patches with whitespace fixes.
I hope it's not a dealbreaker,  seeing as it matches the rest of the file
this way anyhow.
If it is,  I will spend some more time looking into it tomorrow.

Thanks,
- David Miller


On Thu, Mar 3, 2022 at 1:04 PM David Hildenbrand  wrote:

> On 03.03.22 19:01, David Miller wrote:
> >
> > Makes sense,  thanks for the quick reply.
> > Last question,  the patches can depend on others in the same set right?
> > IE:  all of the additions to insn-data.def in one, implementations in
> > separate patches.
>
> For bisectability, each patch should be self-contained and can only
> depend on earlier patches in the series.
>
> See my VX patches to get an idea of how it could look like, e.g.,
>
>
> commit e58de341d948d12cb36bbc5aa4866b7412581880
> Author: David Hildenbrand 
> Date:   Wed Apr 10 22:45:35 2019 +0200
>
> s390x/tcg: Implement VECTOR SUM ACROSS WORD
>
> Similar to VECTOR SUM ACROSS DOUBLEWORD.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: David Hildenbrand 
>
> commit 8dc69a196eb2e3e8ab1d033b378e4f5a5efaa219
> Author: David Hildenbrand 
> Date:   Wed Apr 10 22:40:01 2019 +0200
>
> s390x/tcg: Implement VECTOR SUM ACROSS QUADWORD
>
> Similar to VECTOR SUM ACROSS DOUBLEWORD, however without a loop and
> using 128-bit calculations.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: David Hildenbrand 
>
> commit fe2be36d26b3d3e86246c88bb09a9613b99dc6c9
> Author: David Hildenbrand 
> Date:   Wed Apr 10 22:48:25 2019 +0200
>
> s390x/tcg: Implement VECTOR SUM ACROSS DOUBLEWORD
>
> Perform the calculations without a helper. Only 16 bit or 32 bit values
> have to be added.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: David Hildenbrand 
>
> commit bc725e65152c57d42f19eec134c99940114d6362
> Author: David Hildenbrand 
> Date:   Tue Apr 9 23:26:47 2019 +0200
>
> s390x/tcg: Implement VECTOR SUBTRACT WITH BORROW COMPUTE BORROW
> INDICATION
>
> Mostly courtesy of Richard H.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: David Hildenbrand 
>
> commit 48390a7c2716a128155b872d5316cda5f55dcfa9
> Author: David Hildenbrand 
> Date:   Wed Apr 10 22:15:07 2019 +0200
>
> s390x/tcg: Implement VECTOR SUBTRACT WITH BORROW INDICATION
>
> Fairly easy as only 128-bit handling is required. Simply perform the
> subtraction and then subtract the borrow.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: David Hildenbrand 
>
>
>
> --
> Thanks,
>
> David / dhildenb
>
>


Re: [PATCH v1 1/2] s390x/tcg: Implement Vector-Enhancements Facility 2 for s390x

2022-03-03 Thread David Miller
Makes sense,  thanks for the quick reply.
Last question,  the patches can depend on others in the same set right?
IE:  all of the additions to insn-data.def in one, implementations in
separate patches.


Thanks
- David Miller

On Thu, Mar 3, 2022 at 12:42 PM Richard Henderson <
richard.hender...@linaro.org> wrote:

> On 3/3/22 06:50, David Miller wrote:
> >
> >  > Too many changes in one patch.
> >  > You need to split these into smaller, logical units.
> >
> > Can you give some guideline on that?
> > IE: change to two,  the shifts and reversed loads into two patches or
> more on line count
> > of each patch?
>
> Your best guide is line count: < 50 is ideal, though of course that can't
> always be done.
>   For bug fixes or code reorg you may find yourself constrained by not
> breaking bisection.
>
> But for new code, like this, one patch per feature is easiest to review.
> In this case
> you've got:
>
>- load/store elements reversed,
>- load/store byte reversed elements,
>- shift double
>- string search
>- modify fp convert
>- modify shift
>
> > I wasn't sure if there was a reason MO_TE was used so just kept with the
> existing code flow.
>
> We have to put some indication of endianness there, and "target" endian
> was the easiest to
> replicate across all targets.  Especially with those that are bi-endian.
>
> I've just noticed that we haven't propagated this to the integer
> load/store reversed.  I
> presume that code pre-dates the existence of the feature.  But it would be
> good to change
>
>  C(0xe31f, LRVH,RXY_a, Z,   0, m2_16u, new, r1_16, rev16, 0)
>  C(0xe31e, LRV, RXY_a, Z,   0, m2_32u, new, r1_32, rev32, 0)
>  C(0xe30f, LRVG,RXY_a, Z,   0, m2_64, r1, 0, rev64, 0)
> ...
>  C(0xe33f, STRVH,   RXY_a, Z,   la2, r1_16u, new, m1_16, rev16, 0)
>  C(0xe33e, STRV,RXY_a, Z,   la2, r1_32u, new, m1_32, rev32, 0)
>  C(0xe32f, STRVG,   RXY_a, Z,   la2, r1_o, new, m1_64, rev64, 0)
>
> to use little-endian memory ops, rather than separately reversing the
> bytes.
>
>
> r~
>


Re: [PATCH v1 1/2] s390x/tcg: Implement Vector-Enhancements Facility 2 for s390x

2022-03-03 Thread David Miller
> Too many changes in one patch.
> You need to split these into smaller, logical units.

Can you give some guideline on that?
IE: change to two,  the shifts and reversed loads into two patches or more
on line count of each patch?
.
> Tabs, and more later.

The tabs should not happen at all,  I disabled them in editor will figure
out how they've reappeared.

> This seems likely to go wrong for vdst == vsrc.
> In addition, swapping the order of elements is something that can be done
in parallel.

There is always an even number of elements.
Will make the change there however, that code is more concise.

> Just use a little-endian load: MO_LE | es.
> While we use MO_TE all over, it's no secret that it's always big-endian.

> And everywhere else you do load then swap, or swap then store.

I wasn't sure if there was a reason MO_TE was used so just kept with the
existing code flow.

Thanks
- David Miller




On Thu, Mar 3, 2022 at 3:58 AM Richard Henderson <
richard.hender...@linaro.org> wrote:

> On 3/2/22 17:22, David Miller wrote:
> > resolves: https://gitlab.com/qemu-project/qemu/-/issues/738
> >
> > implements:
> > VECTOR LOAD ELEMENTS REVERSED   (VLER)
> > VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
> > VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
> > VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
> > VECTOR LOAD BYTE REVERSED ELEMENT AND REPLOCATE (VLBRREP)
> > VECTOR STORE ELEMENTS REVERSED  (VSTER)
> > VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
> > VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, VSTEBRG)
> > VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
> > VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
> > VECTOR STRING SEARCH(VSTRS)
> >
> > modifies:
> > VECTOR FP CONVERT FROM FIXED(VCFPS)
> > VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
> > VECTOR FP CONVERT TO FIXED  (VCSFP)
> > VECTOR FP CONVERT TO LOGICAL(VCLFP)
> > VECTOR SHIFT LEFT       (VSL)
> > VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
> > VECTOR SHIFT RIGHT LOGICAL  (VSRL)
> >
> > Signed-off-by: David Miller 
>
> Too many changes in one patch.
> You need to split these into smaller, logical units.
>
> > +/* VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO */
> > +F(0xe604, VLLEBRZ, VRX,   VE2, la2, 0, 0, 0, vllebrz, 0, IF_VEC)
> > +/* VECTOR LOAD BYTE REVERSED ELEMENTS */
> > + F(0xe606, VLBR,VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
> > +/* VECTOR LOAD ELEMENTS REVERSED */
> > + F(0xe607, VLER,VRX,   VE2, la2, 0, 0, 0, vler, 0, IF_VEC)
>
> Tabs, and more later.
>
> > @@ -457,6 +457,9 @@ static DisasJumpType op_vlrep(DisasContext *s,
> DisasOps *o)
> >   return DISAS_NEXT;
> >   }
> >
> > +
> > +
> > +
> >   static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
>
> Do not add pointless whitespace.
>
> > +static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
> > +{
> > +const uint8_t es = (1 == s->fields.op2) ? 1 : (1 ^ s->fields.op2);
> > +const uint8_t enr = get_field(s, m3);
> > +TCGv_i64 tmp;
> > +
> > +if (es < ES_16 || es > ES_64 || !valid_vec_element(enr, es)) {
> > +gen_program_exception(s, PGM_SPECIFICATION);
> > +return DISAS_NORETURN;
> > +}
> > +
> > +tmp = tcg_temp_new_i64();
> > +tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
>
> Just use a little-endian load: MO_LE | es.
> While we use MO_TE all over, it's no secret that it's always big-endian.
>
> And everywhere else you do load then swap, or swap then store.
>
> > +}
> > +
> > +
> > +
> > +static DisasJumpType op_vsteb(DisasContext *s, DisasOps *o)
>
> More care with spacing.
>
> > +static inline void s390_vec_reverse(S390Vector *vdst,
> > +S390Vector *vsrc, uint8_t es)
> > +{
> > +const uint8_t elems = 1 << (4 - es);
> > +uint32_t enr;
> > +
> > +for (enr = 0; enr < elems; enr++) {
> > +switch (es) {
> > +case MO_8:
> > +s390_vec_write_element8(vdst, enr,
> > +   s390_vec_read_element8(vsrc, 15 ^ enr));
> > +break;
> > +case MO_16:
> > +s390_vec_write_element16(vdst, enr,
> > +   s390_vec_read_element16(vsrc, 7 ^ enr));
> > +break;
> > +   

[PATCH v1 2/2] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

2022-03-02 Thread David Miller
tests/tcg/s390x/vxeh2_vcvt.c
tests/tcg/s390x/vxeh2_vs.c
tests/tcg/s390x/vxeh2_vlstr.c

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vxeh2_vcvt.c|  97 +
 tests/tcg/s390x/vxeh2_vlstr.c   | 146 
 tests/tcg/s390x/vxeh2_vs.c  |  91 
 4 files changed, 342 insertions(+)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 54e67446aa..2a2b184056 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 00..71ecbd77b0
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,97 @@
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include 
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+floatf[4];
+double   fd[2];
+__uint128_t v;
+} S390Vector;
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+: [v1] "=v" (v1->v)
+: [v2]  "v" (v2->v)
+, [m3]  "i" (m3)
+, [m4]  "i" (m4)
+, [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+S390Vector vd;
+S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] =  };
+S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+  .f[2] = 4.499, .f[3] = 0.512 };
+
+vd.d[0] = vd.d[1] = 0;
+vcfps(, _i32, 2, M4_def, 0);
+if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcfpl(, _u32, 2, M4_def, 0);
+if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] ||  != vd.f[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vcsfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+vd.d[0] = vd.d[1] = 0;
+vclfp(, _f32, 2, M4_def, 0);
+if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
new file mode 100644
index 00..bf2954e86d
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -0,0 +1,146 @@
+/*
+ * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
+ */
+#include 
+
+typedef union S390Vector {
+uint64_t d[2];  /* doubleword */
+uint32_t w[4];  /* word */
+uint16_t h[8];  /* halfword */
+uint8_t  b[16]; /* byte */
+__uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#define vtst(v1, v2) \
+if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1])

[PATCH v1 1/2] s390x/tcg: Implement Vector-Enhancements Facility 2 for s390x

2022-03-02 Thread David Miller
resolves: https://gitlab.com/qemu-project/qemu/-/issues/738

implements:
VECTOR LOAD ELEMENTS REVERSED   (VLER)
VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
VECTOR LOAD BYTE REVERSED ELEMENT AND REPLOCATE (VLBRREP)
VECTOR STORE ELEMENTS REVERSED  (VSTER)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, VSTEBRG)
VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
VECTOR STRING SEARCH(VSTRS)

modifies:
VECTOR FP CONVERT FROM FIXED(VCFPS)
VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
VECTOR FP CONVERT TO FIXED  (VCSFP)
VECTOR FP CONVERT TO LOGICAL(VCLFP)
VECTOR SHIFT LEFT   (VSL)
VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
VECTOR SHIFT RIGHT LOGICAL  (VSRL)

Signed-off-by: David Miller 
---
 include/qemu/bitops.h|  26 ++
 target/s390x/gen-features.c  |   2 +-
 target/s390x/helper.h|  12 +-
 target/s390x/tcg/insn-data.def   |  30 ++-
 target/s390x/tcg/translate.c |   3 +-
 target/s390x/tcg/translate_vx.c.inc  | 377 ---
 target/s390x/tcg/vec.h   |  30 +++
 target/s390x/tcg/vec_fpu_helper.c|  33 +++
 target/s390x/tcg/vec_helper.c|  33 +++
 target/s390x/tcg/vec_int_helper.c|  59 +
 target/s390x/tcg/vec_string_helper.c |  65 +
 11 files changed, 626 insertions(+), 44 deletions(-)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 03213ce952..9a955fc2f4 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -446,6 +446,32 @@ static inline int64_t sextract64(uint64_t value, int 
start, int length)
 return ((int64_t)(value << (64 - length - start))) >> (64 - length);
 }
 
+/**
+ * deposit8:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 8 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 8 bit byte.
+ * It is valid to request that all 8 bits are modified (ie @length
+ * 8 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint8_t deposit8(uint8_t value, int start, int length,
+   uint8_t fieldval)
+{
+uint8_t mask;
+assert(start >= 0 && length > 0 && length <= 8 - start);
+mask = (~0ULL >> (8 - length)) << start;
+return (value & ~mask) | ((fieldval << start) & mask);
+}
+
 /**
  * deposit32:
  * @value: initial value to insert bit field into
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 22846121c4..633891d59f 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,13 +740,13 @@ static uint16_t qemu_V6_2[] = {
 
 static uint16_t qemu_LATEST[] = {
 S390_FEAT_MISC_INSTRUCTION_EXT3,
+S390_FEAT_VECTOR_ENH2,
 };
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
-
 /** END FEATURE DEFS **/
 
 #define _YEARS  "2016"
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 69f69cf718..dbafdba62d 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -129,6 +129,9 @@ DEF_HELPER_FLAGS_3(probe_write_access, TCG_CALL_NO_WG, 
void, env, i64, i64)
 /* === Vector Support Instructions === */
 DEF_HELPER_FLAGS_4(gvec_vbperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
+DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)
+DEF_HELPER_FLAGS_4(vler, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
+DEF_HELPER_FLAGS_4(vster, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
 DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vpk64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
@@ -145,7 +148,6 @@ DEF_HELPER_5(gvec_vpkls_cc16, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vpkls_cc32, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, 
i32)
-DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)
 

[PATCH v1 0/2] s390x: Add support for Vector Enhancements Facility 2

2022-03-02 Thread David Miller
Extend s390x z15 to support and test Vector Enhancements Facility 2 (vxeh2)


David Miller (2):
  s390x/tcg: Implement Vector-Enhancements Facility 2 for s390x
  tests/tcg/s390x: Tests for Vector Enhancements Facility 2

 include/qemu/bitops.h|  26 ++
 target/s390x/gen-features.c  |   2 +-
 target/s390x/helper.h|  12 +-
 target/s390x/tcg/insn-data.def   |  30 ++-
 target/s390x/tcg/translate.c |   3 +-
 target/s390x/tcg/translate_vx.c.inc  | 377 ---
 target/s390x/tcg/vec.h   |  30 +++
 target/s390x/tcg/vec_fpu_helper.c|  33 +++
 target/s390x/tcg/vec_helper.c|  33 +++
 target/s390x/tcg/vec_int_helper.c|  59 +
 target/s390x/tcg/vec_string_helper.c |  65 +
 tests/tcg/s390x/Makefile.target  |   8 +
 tests/tcg/s390x/vxeh2_vcvt.c |  97 +++
 tests/tcg/s390x/vxeh2_vlstr.c| 146 +++
 tests/tcg/s390x/vxeh2_vs.c   |  91 +++
 15 files changed, 968 insertions(+), 44 deletions(-)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

-- 
2.34.1




[PATCH v3] tests/tcg/s390x: Cleanup of mie3 tests.

2022-03-01 Thread David Miller
Adds clobbers and merges remaining separate asm statements.

v2 -> v3:
* Removed all direct memory references in mie3-sel.c

v1 -> v2:
* Corrected side in rebase conflict, removing older code.

Signed-off-by: David Miller 
---
 tests/tcg/s390x/mie3-compl.c | 18 -
 tests/tcg/s390x/mie3-mvcrl.c | 12 
 tests/tcg/s390x/mie3-sel.c   | 38 
 3 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
index 35649f3b02..938938df9e 100644
--- a/tests/tcg/s390x/mie3-compl.c
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -1,13 +1,20 @@
 #include 
 
+
 #define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
-{ \
-uint64_t res = 0; \
-asm ("llihf %[res],801\n" ASM \
- : [res]"="(res) : [a]"r"(a), [b]"r"(b) : "cc"); \
-return res; \
+{   \
+uint64_t res = 0;   \
+asm volatile (  \
+"llihf %[res],801\n"\
+ASM \
+: [res] "=" (res)  \
+: [a] "r" (a)   \
+, [b] "r" (b)   \
+);  \
+return res; \
 }
 
+
 /* AND WITH COMPLEMENT */
 FbinOp(_ncrk,  ".insn rrf, 0xB9F5, %[res], %[b], %[a], 0\n")
 FbinOp(_ncgrk, ".insn rrf, 0xB9E5, %[res], %[b], %[a], 0\n")
@@ -28,6 +35,7 @@ FbinOp(_nogrk, ".insn rrf, 0xB966, %[res], %[b], %[a], 
0\n")
 FbinOp(_ocrk,  ".insn rrf, 0xB975, %[res], %[b], %[a], 0\n")
 FbinOp(_ocgrk, ".insn rrf, 0xB965, %[res], %[b], %[a], 0\n")
 
+
 int main(int argc, char *argv[])
 {
 if (_ncrk(0xFF88, 0xAA11)  != 0x03210011ull ||
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 57b08e48d0..f749dad9c2 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -1,15 +1,17 @@
 #include 
 #include 
 
+
 static inline void mvcrl_8(const char *dst, const char *src)
 {
 asm volatile (
-"llill %%r0, 8\n"
-".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
-: : [dst] "d" (dst), [src] "d" (src)
-: "memory");
+"llill %%r0, 8\n"
+".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
+: : [dst] "d" (dst), [src] "d" (src)
+: "r0", "memory");
 }
 
+
 int main(int argc, char *argv[])
 {
 const char *alpha = "abcdefghijklmnop";
@@ -25,3 +27,5 @@ int main(int argc, char *argv[])
 
 return strncmp(alpha, tstr, 16ul);
 }
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
index b0c5c9857d..4f54d37eeb 100644
--- a/tests/tcg/s390x/mie3-sel.c
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -1,32 +1,27 @@
 #include 
 
+
 #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
-{\
-uint64_t res = 0;\
-asm (\
- "lg %%r2, %[a]\n"   \
- "lg %%r3, %[b]\n"   \
- "lg %%r0, %[c]\n"   \
- "ltgr %%r0, %%r0\n" \
- ASM \
- "stg %%r0, %[res] " \
- : [res] "=m" (res)  \
- : [a] "m" (a),  \
-   [b] "m" (b),  \
-   [c] "m" (c)   \
- : "r0", "r2",   \
-   "r3", "r4"\
-);   \
-return res;  \
+{   \
+asm volatile (  \
+"ltgr %[c], %[c]\n" \
+ASM \
+: [c] "+r" (c)  \
+: [a]  "r" (a)  \
+, [b]  "r" (b)  \
+);  \
+return c;   \
 }
 
-Fi3 (_selre, ".insn rrf, 0xB9F0, %%r0, %%r3, %%r2, 8\n")
-Fi3 (_selgrz,".insn rrf, 0xB9E3, %%r0, %%r3, %%r2, 8\n")
-Fi3 (_selfhrnz,  ".insn rrf, 0xB9C0, %%r0, %%r3, %%r2, 7\n")
+Fi3 (_selre, ".insn rrf, 0xB9F0, %[c], %[b], %[a], 8\n")
+Fi3 (_selgrz,".insn rrf, 0xB9E3, %[c], %[b], %[a], 8\n")
+Fi3 (_selfhrnz,  ".insn rrf, 0xB9C0, %[c], %[b], %[a], 7\n")
+
 
 int main(int argc, char *argv[])
 {
 uint64_t a = ~0, b = ~0, c = ~0;
+
 a =_selre(0x06660066ull, 0x06660006ull, a);
 b =   _selgrz(0xF00D0005ull, 0xF00D0055ull, b);
 c = _selfhrnz(0x04320044ull, 0x06540004ull, c);
@@ -34,5 +29,6 @@ int main(int argc, char *argv[])
 return (int) (
 (0x0066ull != a) ||
 (0xF00D0005ull != b) ||
-(0x0654ull != c));
+(0x0654ull != c) );
 }
+
-- 
2.34.1




Re: [PATCH v2] tests/tcg/s390x: Cleanup of mie3 tests.

2022-03-01 Thread David Miller
However the constraint must be wrong there.
Sorry about split message.

On Tue, Mar 1, 2022 at 3:21 PM David Miller  wrote:

> I used
>
>
> #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
> {   \
> uint64_t res = 0;   \
> asm volatile (  \
> "ltgr %[c], %[c]\n" \
> ASM \
> "stg %[c], %[res] " \
> : [res] "=" (res) \
> : [a] "r" (a),  \
>   [b] "r" (b),  \
>   [c] "r" (c)   \
> );  \
> return res; \
> }
>
>
>


Re: [PATCH v2] tests/tcg/s390x: Cleanup of mie3 tests.

2022-03-01 Thread David Miller
I used


#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
{   \
uint64_t res = 0;   \
asm volatile (  \
"ltgr %[c], %[c]\n" \
ASM \
"stg %[c], %[res] " \
: [res] "=" (res) \
: [a] "r" (a),  \
  [b] "r" (b),  \
  [c] "r" (c)   \
);  \
return res; \
}


Re: [PATCH] tests/tcg/s390x: Cleanup of mie3 tests.

2022-03-01 Thread David Miller
Please disregard,  v2 sent.

On Tue, Mar 1, 2022 at 2:15 PM David Miller  wrote:

> Adds clobbers and merges remaining separate asm statements.
>
> Signed-off-by: David Miller 
> ---
>  tests/tcg/s390x/mie3-compl.c | 42 +++-
>  tests/tcg/s390x/mie3-mvcrl.c | 12 +++
>  tests/tcg/s390x/mie3-sel.c   | 40 ++
>  3 files changed, 57 insertions(+), 37 deletions(-)
>
> diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
> index 35649f3b02..85b23a9b7a 100644
> --- a/tests/tcg/s390x/mie3-compl.c
> +++ b/tests/tcg/s390x/mie3-compl.c
> @@ -1,32 +1,44 @@
>  #include 
>
> +
>  #define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
> -{ \
> -uint64_t res = 0; \
> -asm ("llihf %[res],801\n" ASM \
> - : [res]"="(res) : [a]"r"(a), [b]"r"(b) : "cc"); \
> -return res; \
> +{   \
> +uint64_t res = 0;   \
> +asm volatile (  \
> +"llihf %%r0,801\n"  \
> +"lg %%r2, %[a]\n"   \
> +"lg %%r3, %[b]\n"   \
> +ASM \
> +"stg %%r0, %[res] " \
> +: [res] "=m" (res)  \
> +: [a] "m" (a)   \
> +, [b] "m" (b)   \
> +: "r0", "r2", "r3"  \
> +);  \
> +return res; \
>  }
>
> +
>  /* AND WITH COMPLEMENT */
> -FbinOp(_ncrk,  ".insn rrf, 0xB9F5, %[res], %[b], %[a], 0\n")
> -FbinOp(_ncgrk, ".insn rrf, 0xB9E5, %[res], %[b], %[a], 0\n")
> +FbinOp(_ncrk,  ".insn rrf, 0xB9F5, %%r0, %%r3, %%r2, 0\n")
> +FbinOp(_ncgrk, ".insn rrf, 0xB9E5, %%r0, %%r3, %%r2, 0\n")
>
>  /* NAND */
> -FbinOp(_nnrk,  ".insn rrf, 0xB974, %[res], %[b], %[a], 0\n")
> -FbinOp(_nngrk, ".insn rrf, 0xB964, %[res], %[b], %[a], 0\n")
> +FbinOp(_nnrk,  ".insn rrf, 0xB974, %%r0, %%r3, %%r2, 0\n")
> +FbinOp(_nngrk, ".insn rrf, 0xB964, %%r0, %%r3, %%r2, 0\n")
>
>  /* NOT XOR */
> -FbinOp(_nxrk,  ".insn rrf, 0xB977, %[res], %[b], %[a], 0\n")
> -FbinOp(_nxgrk, ".insn rrf, 0xB967, %[res], %[b], %[a], 0\n")
> +FbinOp(_nxrk,  ".insn rrf, 0xB977, %%r0, %%r3, %%r2, 0\n")
> +FbinOp(_nxgrk, ".insn rrf, 0xB967, %%r0, %%r3, %%r2, 0\n")
>
>  /* NOR */
> -FbinOp(_nork,  ".insn rrf, 0xB976, %[res], %[b], %[a], 0\n")
> -FbinOp(_nogrk, ".insn rrf, 0xB966, %[res], %[b], %[a], 0\n")
> +FbinOp(_nork,  ".insn rrf, 0xB976, %%r0, %%r3, %%r2, 0\n")
> +FbinOp(_nogrk, ".insn rrf, 0xB966, %%r0, %%r3, %%r2, 0\n")
>
>  /* OR WITH COMPLEMENT */
> -FbinOp(_ocrk,  ".insn rrf, 0xB975, %[res], %[b], %[a], 0\n")
> -FbinOp(_ocgrk, ".insn rrf, 0xB965, %[res], %[b], %[a], 0\n")
> +FbinOp(_ocrk,  ".insn rrf, 0xB975, %%r0, %%r3, %%r2, 0\n")
> +FbinOp(_ocgrk, ".insn rrf, 0xB965, %%r0, %%r3, %%r2, 0\n")
> +
>
>  int main(int argc, char *argv[])
>  {
> diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
> index 57b08e48d0..f749dad9c2 100644
> --- a/tests/tcg/s390x/mie3-mvcrl.c
> +++ b/tests/tcg/s390x/mie3-mvcrl.c
> @@ -1,15 +1,17 @@
>  #include 
>  #include 
>
> +
>  static inline void mvcrl_8(const char *dst, const char *src)
>  {
>  asm volatile (
> -"llill %%r0, 8\n"
> -".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
> -: : [dst] "d" (dst), [src] "d" (src)
> -: "memory");
> +"llill %%r0, 8\n"
> +".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
> +: : [dst] "d" (dst), [src] "d" (src)
> +: "r0", "memory");
>  }
>
> +
>  int main(int argc, char *argv[])
>  {
>  const char *alpha = "abcdefghijklmnop";
> @@ -25,3 +27,5 @@ int main(int argc, char *argv[])
>
>  return strncmp(alpha, tstr, 16ul);
>  }
> +
> +
> diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
> index b0c5c9857d..98cf4d40f5 100644
> --- a/tests/tcg/s390x/mie3-sel.c
> +++ b/tests/tcg/s390x/mie3-sel.c
> @@ -1,29 +1,32 @@
>  #include 
>
> +
>  #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
> -{\
> -uint64_t res = 0;\
> -asm (\
> - "lg %%r2, %[a]\n"   \
> - "lg %%r3, %[b]\n"   \
> - "lg %%r0, %[c]\n"

[PATCH v2] tests/tcg/s390x: Cleanup of mie3 tests.

2022-03-01 Thread David Miller
Adds clobbers and merges remaining separate asm statements.

v1 -> v2:
* Corrected side in rebase conflict, removing older code.


Signed-off-by: David Miller 
---
 tests/tcg/s390x/mie3-compl.c | 18 +++-
 tests/tcg/s390x/mie3-mvcrl.c | 12 +++
 tests/tcg/s390x/mie3-sel.c   | 41 ++--
 3 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
index 35649f3b02..938938df9e 100644
--- a/tests/tcg/s390x/mie3-compl.c
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -1,13 +1,20 @@
 #include 
 
+
 #define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
-{ \
-uint64_t res = 0; \
-asm ("llihf %[res],801\n" ASM \
- : [res]"="(res) : [a]"r"(a), [b]"r"(b) : "cc"); \
-return res; \
+{   \
+uint64_t res = 0;   \
+asm volatile (  \
+"llihf %[res],801\n"\
+ASM \
+: [res] "=" (res)  \
+: [a] "r" (a)   \
+, [b] "r" (b)   \
+);  \
+return res; \
 }
 
+
 /* AND WITH COMPLEMENT */
 FbinOp(_ncrk,  ".insn rrf, 0xB9F5, %[res], %[b], %[a], 0\n")
 FbinOp(_ncgrk, ".insn rrf, 0xB9E5, %[res], %[b], %[a], 0\n")
@@ -28,6 +35,7 @@ FbinOp(_nogrk, ".insn rrf, 0xB966, %[res], %[b], %[a], 
0\n")
 FbinOp(_ocrk,  ".insn rrf, 0xB975, %[res], %[b], %[a], 0\n")
 FbinOp(_ocgrk, ".insn rrf, 0xB965, %[res], %[b], %[a], 0\n")
 
+
 int main(int argc, char *argv[])
 {
 if (_ncrk(0xFF88, 0xAA11)  != 0x03210011ull ||
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 57b08e48d0..f749dad9c2 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -1,15 +1,17 @@
 #include 
 #include 
 
+
 static inline void mvcrl_8(const char *dst, const char *src)
 {
 asm volatile (
-"llill %%r0, 8\n"
-".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
-: : [dst] "d" (dst), [src] "d" (src)
-: "memory");
+"llill %%r0, 8\n"
+".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
+: : [dst] "d" (dst), [src] "d" (src)
+: "r0", "memory");
 }
 
+
 int main(int argc, char *argv[])
 {
 const char *alpha = "abcdefghijklmnop";
@@ -25,3 +27,5 @@ int main(int argc, char *argv[])
 
 return strncmp(alpha, tstr, 16ul);
 }
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
index b0c5c9857d..ca6043251b 100644
--- a/tests/tcg/s390x/mie3-sel.c
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -1,28 +1,26 @@
 #include 
 
 #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
-{\
-uint64_t res = 0;\
-asm (\
- "lg %%r2, %[a]\n"   \
- "lg %%r3, %[b]\n"   \
- "lg %%r0, %[c]\n"   \
- "ltgr %%r0, %%r0\n" \
- ASM \
- "stg %%r0, %[res] " \
- : [res] "=m" (res)  \
- : [a] "m" (a),  \
-   [b] "m" (b),  \
-   [c] "m" (c)   \
- : "r0", "r2",   \
-   "r3", "r4"\
-);   \
-return res;  \
+{   \
+uint64_t res = 0;   \
+asm volatile (  \
+"lg %%r0, %[c]\n"   \
+"ltgr %%r0, %%r0\n" \
+ASM \
+"stg %%r0, %[res] " \
+: [res] "=m" (res)  \
+: [a] "r" (a),  \
+  [b] "r" (b),  \
+  [c] "m" (c)   \
+: "r0", "memory"\
+);  \
+return res; \
 }
 
-Fi3 (_selre, ".insn rrf, 0xB9F0, %%r0, %%r3, %%r2, 8\n")
-Fi3 (_selgrz,".insn rrf, 0xB9E3, %%r0, %%r3, %%r2, 8\n")
-Fi3 (_selfhrnz,  ".insn rrf, 0xB9C0, %%r0, %%r3, %%r2, 7\n")
+Fi3 (_selre, ".insn rrf, 0xB9F0, %%r0, %[b], %[a], 8\n")
+Fi3 (_selgrz,".insn rrf, 0xB9E3, %%r0, %[b], %[a], 8\n")
+Fi3 (_selfhrnz,  ".insn rrf, 0xB9C0, %%r0, %[b], %[a], 7\n")
+
 
 int main(int argc, char *argv[])
 {
@@ -34,5 +32,6 @@ int main(int argc, char *argv[])
 return (int) (
 (0x0066ull != a) ||
 (0xF00D0005ull != b) ||
-(0x0654ull != c));
+(0x0654ull != c) );
 }
+
-- 
2.34.1




[PATCH] tests/tcg/s390x: Cleanup of mie3 tests.

2022-03-01 Thread David Miller
Adds clobbers and merges remaining separate asm statements.

Signed-off-by: David Miller 
---
 tests/tcg/s390x/mie3-compl.c | 42 +++-
 tests/tcg/s390x/mie3-mvcrl.c | 12 +++
 tests/tcg/s390x/mie3-sel.c   | 40 ++
 3 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
index 35649f3b02..85b23a9b7a 100644
--- a/tests/tcg/s390x/mie3-compl.c
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -1,32 +1,44 @@
 #include 
 
+
 #define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
-{ \
-uint64_t res = 0; \
-asm ("llihf %[res],801\n" ASM \
- : [res]"="(res) : [a]"r"(a), [b]"r"(b) : "cc"); \
-return res; \
+{   \
+uint64_t res = 0;   \
+asm volatile (  \
+"llihf %%r0,801\n"  \
+"lg %%r2, %[a]\n"   \
+"lg %%r3, %[b]\n"   \
+ASM \
+"stg %%r0, %[res] " \
+: [res] "=m" (res)  \
+: [a] "m" (a)   \
+, [b] "m" (b)   \
+: "r0", "r2", "r3"  \
+);  \
+return res; \
 }
 
+
 /* AND WITH COMPLEMENT */
-FbinOp(_ncrk,  ".insn rrf, 0xB9F5, %[res], %[b], %[a], 0\n")
-FbinOp(_ncgrk, ".insn rrf, 0xB9E5, %[res], %[b], %[a], 0\n")
+FbinOp(_ncrk,  ".insn rrf, 0xB9F5, %%r0, %%r3, %%r2, 0\n")
+FbinOp(_ncgrk, ".insn rrf, 0xB9E5, %%r0, %%r3, %%r2, 0\n")
 
 /* NAND */
-FbinOp(_nnrk,  ".insn rrf, 0xB974, %[res], %[b], %[a], 0\n")
-FbinOp(_nngrk, ".insn rrf, 0xB964, %[res], %[b], %[a], 0\n")
+FbinOp(_nnrk,  ".insn rrf, 0xB974, %%r0, %%r3, %%r2, 0\n")
+FbinOp(_nngrk, ".insn rrf, 0xB964, %%r0, %%r3, %%r2, 0\n")
 
 /* NOT XOR */
-FbinOp(_nxrk,  ".insn rrf, 0xB977, %[res], %[b], %[a], 0\n")
-FbinOp(_nxgrk, ".insn rrf, 0xB967, %[res], %[b], %[a], 0\n")
+FbinOp(_nxrk,  ".insn rrf, 0xB977, %%r0, %%r3, %%r2, 0\n")
+FbinOp(_nxgrk, ".insn rrf, 0xB967, %%r0, %%r3, %%r2, 0\n")
 
 /* NOR */
-FbinOp(_nork,  ".insn rrf, 0xB976, %[res], %[b], %[a], 0\n")
-FbinOp(_nogrk, ".insn rrf, 0xB966, %[res], %[b], %[a], 0\n")
+FbinOp(_nork,  ".insn rrf, 0xB976, %%r0, %%r3, %%r2, 0\n")
+FbinOp(_nogrk, ".insn rrf, 0xB966, %%r0, %%r3, %%r2, 0\n")
 
 /* OR WITH COMPLEMENT */
-FbinOp(_ocrk,  ".insn rrf, 0xB975, %[res], %[b], %[a], 0\n")
-FbinOp(_ocgrk, ".insn rrf, 0xB965, %[res], %[b], %[a], 0\n")
+FbinOp(_ocrk,  ".insn rrf, 0xB975, %%r0, %%r3, %%r2, 0\n")
+FbinOp(_ocgrk, ".insn rrf, 0xB965, %%r0, %%r3, %%r2, 0\n")
+
 
 int main(int argc, char *argv[])
 {
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 57b08e48d0..f749dad9c2 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -1,15 +1,17 @@
 #include 
 #include 
 
+
 static inline void mvcrl_8(const char *dst, const char *src)
 {
 asm volatile (
-"llill %%r0, 8\n"
-".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
-: : [dst] "d" (dst), [src] "d" (src)
-: "memory");
+"llill %%r0, 8\n"
+".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
+: : [dst] "d" (dst), [src] "d" (src)
+: "r0", "memory");
 }
 
+
 int main(int argc, char *argv[])
 {
 const char *alpha = "abcdefghijklmnop";
@@ -25,3 +27,5 @@ int main(int argc, char *argv[])
 
 return strncmp(alpha, tstr, 16ul);
 }
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
index b0c5c9857d..98cf4d40f5 100644
--- a/tests/tcg/s390x/mie3-sel.c
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -1,29 +1,32 @@
 #include 
 
+
 #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
-{\
-uint64_t res = 0;\
-asm (\
- "lg %%r2, %[a]\n"   \
- "lg %%r3, %[b]\n"   \
- "lg %%r0, %[c]\n"   \
- "ltgr %%r0, %%r0\n" \
- ASM \
- "stg %%r0, %[res] " \
- : [res] "=m" (res)  \
- : [a] "m" (a),  \
-   [b] "m" (b),  \
-   [c] "m" (c)   \
- : "r0", "r2",   \
-   "r3", "r4"\
-);   \
-return res;  \
+{   \
+uint64_t res = 0;   \
+asm volatile (  \
+"lg %%r2, %[a]\n"   \
+"lg %%r3, %[b]\n"   \
+"lg %%r0, %[c]\n"   \
+"lt

Re: [PATCH v7 3/4] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-03-01 Thread David Miller
>On 28/02/2022 19.31, David Miller wrote:
> > Had it on my TODO list for this morning, thank you.

> Thanks! Please send it as additional patch on top of my s390x-next, since
I
> already sent a pull request for the other patches yesterday:
>
>  https://gitlab.com/thuth/qemu/-/commits/s390x-next/

Partial misread yesterday,  I was on mobile and saw that you had modified
the patch to stage.

I will look at it now,  as soon as gitlab comes back up,  it's been
problematic lately.


- David Miller


Re: [PATCH v7 3/4] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-28 Thread David Miller
Had it on my TODO list for this morning, thank you.

On Mon, Feb 28, 2022 at 12:59 PM Richard Henderson <
richard.hender...@linaro.org> wrote:

> On 2/28/22 00:14, Thomas Huth wrote:
> > Full patch can be seen here:
> >
> > https://gitlab.com/thuth/qemu/-/commit/38af118ea2fef0c473
>
>
> > static inline void mvcrl_8(const char *dst, const char *src)
> > {
> > asm volatile (
> > "llill %%r0, 8\n"
> > ".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
> > : : [dst] "d" (dst), [src] "d" (src)
> > : "memory");
> > }
>
> Need clobber of r0 here.
>
> > #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
> > {\
> > uint64_t res = 0;\
> > asm (\
> >  "lg %%r2, %[a]\n"   \
> >  "lg %%r3, %[b]\n"   \
> >  "lg %%r0, %[c]\n"   \
> >  "ltgr %%r0, %%r0\n" \
> >  ASM \
> >  "stg %%r0, %[res] " \
> >  : [res] "=m" (res)  \
> >  : [a] "m" (a),  \
> >[b] "m" (b),  \
> >[c] "m" (c)   \
> >  : "r0", "r2",   \
> >"r3", "r4"\
> > );   \
> > return res;  \
> > }
> >
> > Fi3 (_selre, ".insn rrf, 0xB9F0, %%r0, %%r3, %%r2, 8\n")
> > Fi3 (_selgrz,".insn rrf, 0xB9E3, %%r0, %%r3, %%r2, 8\n")
> > Fi3 (_selfhrnz,  ".insn rrf, 0xB9C0, %%r0, %%r3, %%r2, 7\n")
>
> This isn't actively broken, but could use the same treatment as NCRK et al:
>
> #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
> {\
>  uint64_t res;\
>  asm("ltgr %[c], %[c]\n\t" ASM
>  : [res] "=" (res)
>  : [a] "r" (a), [b] "r" (b), [c] "r" (c)
>  : "cc");
>  return res;
> }
>
> Fi3(_selre,   ".insn rrf, 0xB9F0, %[res], %[a], %[b], 8")
>
> etc.
>
>
> r~
>


[PATCH v7 4/4] tests/tcg/s390x: changed to using .insn for tests requiring z15

2022-02-23 Thread David Miller
Signed-off-by: David Miller 
---
 tests/tcg/s390x/mie3-compl.c | 21 +++--
 tests/tcg/s390x/mie3-mvcrl.c |  2 +-
 tests/tcg/s390x/mie3-sel.c   |  6 +++---
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
index 98281ee683..31820e4a2a 100644
--- a/tests/tcg/s390x/mie3-compl.c
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -14,25 +14,26 @@
 #define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
 { uint64_t res = 0; F_PRO; ASM; return res; }
 
+
 /* AND WITH COMPLEMENT */
-FbinOp(_ncrk,  asm("ncrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_ncgrk, asm("ncgrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ncrk,  asm(".insn rrf, 0xB9F5, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_ncgrk, asm(".insn rrf, 0xB9E5, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* NAND */
-FbinOp(_nnrk,  asm("nnrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_nngrk, asm("nngrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nnrk,  asm(".insn rrf, 0xB974, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_nngrk, asm(".insn rrf, 0xB964, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* NOT XOR */
-FbinOp(_nxrk,  asm("nxrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_nxgrk, asm("nxgrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nxrk,  asm(".insn rrf, 0xB977, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_nxgrk, asm(".insn rrf, 0xB967, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* NOR */
-FbinOp(_nork,  asm("nork  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_nogrk, asm("nogrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nork,  asm(".insn rrf, 0xB976, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_nogrk, asm(".insn rrf, 0xB966, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* OR WITH COMPLEMENT */
-FbinOp(_ocrk,  asm("ocrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_ocgrk, asm("ocgrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ocrk,  asm(".insn rrf, 0xB975, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_ocgrk, asm(".insn rrf, 0xB965, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 
 int main(int argc, char *argv[])
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 81cf3ad702..f0be83b197 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -6,7 +6,7 @@ static inline void mvcrl_8(const char *dst, const char *src)
 {
 asm volatile (
 "llill %%r0, 8\n"
-"mvcrl 0(%[dst]), 0(%[src])\n"
+".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
 : : [dst] "d" (dst), [src] "d" (src)
 : "memory");
 }
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
index 2e99e00b47..ee619a763d 100644
--- a/tests/tcg/s390x/mie3-sel.c
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -22,9 +22,9 @@ asm (   \
 }
 
 
-Fi3 (_selre,"selre%%r0, %%r3, %%r2\n")
-Fi3 (_selgrz,   "selgrz   %%r0, %%r3, %%r2\n")
-Fi3 (_selfhrnz, "selfhrnz %%r0, %%r3, %%r2\n")
+Fi3 (_selre, ".insn rrf, 0xB9F0, %%r0, %%r3, %%r2, 8\n")
+Fi3 (_selgrz,".insn rrf, 0xB9E3, %%r0, %%r3, %%r2, 8\n")
+Fi3 (_selfhrnz,  ".insn rrf, 0xB9C0, %%r0, %%r3, %%r2, 7\n")
 
 
 int main(int argc, char *argv[])
-- 
2.32.0




[PATCH v7 1/4] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-23 Thread David Miller
resolves: https://gitlab.com/qemu-project/qemu/-/issues/737
implements:
AND WITH COMPLEMENT   (NCRK, NCGRK)
NAND  (NNRK, NNGRK)
NOT EXCLUSIVE OR  (NXRK, NXGRK)
NOR   (NORK, NOGRK)
OR WITH COMPLEMENT(OCRK, OCGRK)
SELECT(SELR, SELGR)
SELECT HIGH   (SELFHR)
MOVE RIGHT TO LEFT(MVCRL)
POPULATION COUNT  (POPCNT)

Signed-off-by: David Miller 
---
 target/s390x/gen-features.c|  1 +
 target/s390x/helper.h  |  1 +
 target/s390x/tcg/insn-data.def | 30 +++--
 target/s390x/tcg/mem_helper.c  | 20 
 target/s390x/tcg/translate.c   | 60 --
 5 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 7cb1a6ec10..a3f30f69d9 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,6 +740,7 @@ static uint16_t qemu_LATEST[] = {
 
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 271b081e8c..69f69cf718 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -4,6 +4,7 @@ DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvcrl, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mvcin, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 1c3e115712..35e55d454e 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -105,6 +105,9 @@
 D(0xa507, NILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1000)
 D(0x9400, NI,  SI,Z,   la1, i2_8u, new, 0, ni, nz64, MO_UB)
 D(0xeb54, NIY, SIY,   LD,  la1, i2_8u, new, 0, ni, nz64, MO_UB)
+/* AND WITH COMPLEMENT */
+C(0xb9f5, NCRK,RRF_a, MIE3, r2, r3, new, r1_32, andc, nz32)
+C(0xb9e5, NCGRK,   RRF_a, MIE3, r2, r3, r1, 0, andc, nz64)
 
 /* BRANCH AND LINK */
 C(0x0500, BALR,RR_a,  Z,   0, r2_nz, r1, 0, bal, 0)
@@ -640,6 +643,8 @@
 C(0xeb8e, MVCLU,   RSY_a, E2,  0, a2, 0, 0, mvclu, 0)
 /* MOVE NUMERICS */
 C(0xd100, MVN, SS_a,  Z,   la1, a2, 0, 0, mvn, 0)
+/* MOVE RIGHT TO LEFT */
+C(0xe50a, MVCRL,   SSE,  MIE3, la1, a2, 0, 0, mvcrl, 0)
 /* MOVE PAGE */
 C(0xb254, MVPG,RRE,   Z,   0, 0, 0, 0, mvpg, 0)
 /* MOVE STRING */
@@ -707,6 +712,16 @@
 F(0xed0f, MSEB,RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
 F(0xed1f, MSDB,RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
 
+/* NAND */
+C(0xb974, NNRK,RRF_a, MIE3, r2, r3, new, r1_32, nand, nz32)
+C(0xb964, NNGRK,   RRF_a, MIE3, r2, r3, r1, 0, nand, nz64)
+/* NOR */
+C(0xb976, NORK,RRF_a, MIE3, r2, r3, new, r1_32, nor, nz32)
+C(0xb966, NOGRK,   RRF_a, MIE3, r2, r3, r1, 0, nor, nz64)
+/* NOT EXCLUSIVE OR */
+C(0xb977, NXRK,RRF_a, MIE3, r2, r3, new, r1_32, nxor, nz32)
+C(0xb967, NXGRK,   RRF_a, MIE3, r2, r3, r1, 0, nxor, nz64)
+
 /* OR */
 C(0x1600, OR,  RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
 C(0xb9f6, ORK, RRF_a, DO,  r2, r3, new, r1_32, or, nz32)
@@ -725,6 +740,9 @@
 D(0xa50b, OILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1000)
 D(0x9600, OI,  SI,Z,   la1, i2_8u, new, 0, oi, nz64, MO_UB)
 D(0xeb56, OIY, SIY,   LD,  la1, i2_8u, new, 0, oi, nz64, MO_UB)
+/* OR WITH COMPLEMENT */
+C(0xb975, OCRK,RRF_a, MIE3, r2, r3, new, r1_32, orc, nz32)
+C(0xb965, OCGRK,   RRF_a, MIE3, r2, r3, r1, 0, orc, nz64)
 
 /* PACK */
 /* Really format SS_b, but we pack both lengths into one argument
@@ -735,6 +753,9 @@
 /* PACK UNICODE */
 C(0xe100, PKU, SS_f,  E2,  la1, a2, 0, 0, pku, 0)
 
+/* POPULATION COUNT */
+C(0xb9e1, POPCNT,  RRF_c, PC,  0, r2_o, r1, 0, popcnt, nz64)
+
 /* PREFETCH */
 /* Implemented as nops of course.  */
 C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0)
@@ -743,9 +764,6 @@
 /* Implemented as nop of course.  */
 C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0)
 
-/* POPULATION COUNT */
-C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
-
 /* ROTATE LEFT SINGLE LOGICAL */
 C(0xeb1d, RLL, RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
 C(0xeb1c, RLLG,RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
@@ -765,6 +783,12 @@
 /* SEARCH STRING UNICODE */
 C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
 
+/* SELECT */
+C(0xb9f0, SELR,RRF_a, MIE3, r3, r2, new, r1_32, loc, 0)
+C(0xb9e3, SELGR,   RRF_a, MIE3

[PATCH v7 3/4] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-23 Thread David Miller
tests/tcg/s390x/mie3-compl.c: [N]*K instructions
tests/tcg/s390x/mie3-mvcrl.c: MVCRL instruction
tests/tcg/s390x/mie3-sel.c:  SELECT instruction

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |  5 ++-
 tests/tcg/s390x/mie3-compl.c| 55 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 +++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 4 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1a7238b4eb..54e67446aa 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,12 +1,15 @@
 S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
 VPATH+=$(S390X_SRC)
-CFLAGS+=-march=zEC12 -m64
+CFLAGS+=-march=z15 -m64
 TESTS+=hello-s390x
 TESTS+=csst
 TESTS+=ipm
 TESTS+=exrl-trt
 TESTS+=exrl-trtr
 TESTS+=pack
+TESTS+=mie3-compl
+TESTS+=mie3-mvcrl
+TESTS+=mie3-sel
 TESTS+=mvo
 TESTS+=mvc
 TESTS+=shift
diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
new file mode 100644
index 00..98281ee683
--- /dev/null
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -0,0 +1,55 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] " : [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm ( \
+"llihf %%r0,801\n" \
+"lg %%r2, %[a]\n"  \
+"lg %%r3, %[b] "   \
+: : [a] "m" (a),   \
+[b] "m" (b)\
+: "r2", "r3")
+
+#define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
+{ uint64_t res = 0; F_PRO; ASM; return res; }
+
+/* AND WITH COMPLEMENT */
+FbinOp(_ncrk,  asm("ncrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ncgrk, asm("ncgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NAND */
+FbinOp(_nnrk,  asm("nnrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nngrk, asm("nngrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NOT XOR */
+FbinOp(_nxrk,  asm("nxrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nxgrk, asm("nxgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NOR */
+FbinOp(_nork,  asm("nork  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nogrk, asm("nogrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* OR WITH COMPLEMENT */
+FbinOp(_ocrk,  asm("ocrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ocgrk, asm("ocgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+
+int main(int argc, char *argv[])
+{
+if (_ncrk(0xFF88, 0xAA11)  != 0x03210011ull ||
+_nnrk(0xFF88, 0xAA11)  != 0x032155FFull ||
+_nork(0xFF88, 0xAA11)  != 0x03210066ull ||
+_nxrk(0xFF88, 0xAA11)  != 0x0321AA66ull ||
+_ocrk(0xFF88, 0xAA11)  != 0x0321AA77ull ||
+_ncgrk(0xFF88, 0xAA11) != 0x0011ull ||
+_nngrk(0xFF88, 0xAA11) != 0x55FFull ||
+_nogrk(0xFF88, 0xAA11) != 0x0066ull ||
+_nxgrk(0xFF88, 0xAA11) != 0xAA66ull ||
+_ocgrk(0xFF88, 0xAA11) != 0xAA77ull)
+{
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
new file mode 100644
index 00..81cf3ad702
--- /dev/null
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -0,0 +1,31 @@
+#include 
+#include 
+
+
+static inline void mvcrl_8(const char *dst, const char *src)
+{
+asm volatile (
+"llill %%r0, 8\n"
+"mvcrl 0(%[dst]), 0(%[src])\n"
+: : [dst] "d" (dst), [src] "d" (src)
+: "memory");
+}
+
+
+int main(int argc, char *argv[])
+{
+const char *alpha = "abcdefghijklmnop";
+
+/* array missing 'i' */
+char tstr[17] = "abcdefghjklmnop\0" ;
+
+/* mvcrl reference use: 'open a hole in an array' */
+mvcrl_8(tstr + 9, tstr + 8);
+
+/* place missing 'i' */
+tstr[8] = 'i';
+
+return strncmp(alpha, tstr, 16ul);
+}
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
new file mode 100644
index 00..2e99e00b47
--- /dev/null
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -0,0 +1,42 @@
+#include 
+
+
+#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
+{   \
+uint64_t res = 0;   \
+asm (   \
+"lg %%r2, %[a]\n"   \
+"lg %%r3, %[b]\n"   \
+"lg %%r0, %[c]\n"   \
+"ltgr %%r0, %%r0\n" \
+ASM \
+"stg %%r0, %[res] " \
+: [res] "=m" (res)  \
+: [a] "m" (a),  \
+  [b] "m" (b),  \
+  [c] "m" (c)   \
+: "r0", "r2",   \
+  "r3", "r4"\
+);  \
+return res; \
+}
+
+
+Fi3 (_selre,"selre%%r0, %

[PATCH v7 0/4] s390x: Add partial z15 support and tests

2022-02-23 Thread David Miller
Add partial support for s390x z15 ga1 and specific tests for mie3 


v6 -> v7:
* Modified SELFHR insn-data + test to ensure high 32bits are copied.
* Changed m3 mask test value for popcnt to fix mie3 variant.

v5 -> v6:
* Swap operands for sel* instructions
* Use .insn in tests for z15 arch instructions

v4 -> v5:
* Readd missing tests/tcg/s390x/mie3-*.c to patch

v3 -> v4:
* Change popcnt encoding RRE -> RRF_c
* Remove redundant code op_sel -> op_loc
* Cleanup for checkpatch.pl
* Readded mie3-* to Makefile.target

v2 -> v3:
* Moved tests to separate patch.
* Combined patches into series.

David Miller (4):
  s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3
for the s390x
   * Reviewed-by: David Hildenbrand 
  s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1
   * Reviewed-by: David Hildenbrand 
  tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions
Facility 3
  tests/tcg/s390x: changed to using .insn for tests requiring z15
   * Reviewed-by: Thomas Huth 

 hw/s390x/s390-virtio-ccw.c  |  3 ++
 target/s390x/cpu_models.c   |  6 ++--
 target/s390x/gen-features.c |  6 +++-
 target/s390x/helper.h   |  1 +
 target/s390x/tcg/insn-data.def  | 30 +++--
 target/s390x/tcg/mem_helper.c   | 20 +++
 target/s390x/tcg/translate.c| 60 +++--
 tests/tcg/s390x/Makefile.target |  5 ++-
 tests/tcg/s390x/mie3-compl.c| 56 ++
 tests/tcg/s390x/mie3-mvcrl.c| 31 +
 tests/tcg/s390x/mie3-sel.c  | 42 +++
 11 files changed, 250 insertions(+), 10 deletions(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

-- 
2.32.0




[PATCH v7 2/4] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1

2022-02-23 Thread David Miller
TCG implements everything we need to run basic z15 OS+software

Signed-off-by: David Miller 
---
 hw/s390x/s390-virtio-ccw.c  | 3 +++
 target/s390x/cpu_models.c   | 6 +++---
 target/s390x/gen-features.c | 7 +--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 84e3e63c43..90480e7cf9 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -802,7 +802,10 @@ DEFINE_CCW_MACHINE(7_0, "7.0", true);
 
 static void ccw_machine_6_2_instance_options(MachineState *machine)
 {
+static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
 ccw_machine_7_0_instance_options(machine);
+s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_6_2_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 11e06cc51f..89f83e81d5 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -85,9 +85,9 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x3932, 16, 1, 47, 0x0800U, "gen16b", "IBM 3932 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x3906
-#define QEMU_MAX_CPU_GEN 14
-#define QEMU_MAX_CPU_EC_GA 2
+#define QEMU_MAX_CPU_TYPE 0x8561
+#define QEMU_MAX_CPU_GEN 15
+#define QEMU_MAX_CPU_EC_GA 1
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
 
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a3f30f69d9..22846121c4 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -731,16 +731,18 @@ static uint16_t qemu_V6_0[] = {
 S390_FEAT_ESOP,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_2[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
 S390_FEAT_VECTOR_ENH,
 };
 
+static uint16_t qemu_LATEST[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
+};
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
-S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
@@ -863,6 +865,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
 QEMU_FEAT_INITIALIZER(V4_0),
 QEMU_FEAT_INITIALIZER(V4_1),
 QEMU_FEAT_INITIALIZER(V6_0),
+QEMU_FEAT_INITIALIZER(V6_2),
 QEMU_FEAT_INITIALIZER(LATEST),
 QEMU_FEAT_INITIALIZER(MAX),
 };
-- 
2.32.0




Re: [PATCH v6 1/4] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-23 Thread David Miller
Yes I'm adding to this patch,  I haven't quite figured out where to
put them,  they are inline to various things in the patch themselves
so I'm putting in the cover letter under the patch they go to.
I hope that's correct.

Thanks
- David Miller

On Wed, Feb 23, 2022 at 8:40 AM Christian Borntraeger
 wrote:
>
>
> Am 18.02.22 um 00:17 schrieb David Miller:
> > resolves: https://gitlab.com/qemu-project/qemu/-/issues/737
> > implements:
> > AND WITH COMPLEMENT   (NCRK, NCGRK)
> > NAND  (NNRK, NNGRK)
> > NOT EXCLUSIVE OR  (NXRK, NXGRK)
> > NOR   (NORK, NOGRK)
> > OR WITH COMPLEMENT(OCRK, OCGRK)
> > SELECT(SELR, SELGR)
> > SELECT HIGH   (SELFHR)
> > MOVE RIGHT TO LEFT    (MVCRL)
> > POPULATION COUNT  (POPCNT)
> >
> > Signed-off-by: David Miller 
>
> For your next patches, feel free to add previous Reviewed-by: tags so that 
> others
> can see what review has already happened.



Re: [PATCH v6 1/4] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-23 Thread David Miller
> Bit 0 controls this, and recall that IBM uses big-bit numbering, so "8".

> This stores the low part of r[23] in the high part of r1.
> You need to select the high part of r[23].

good catch, these are both fixed will update patch shortly.

Thanks for the review

- David Miller




On Wed, Feb 23, 2022 at 2:41 PM Richard Henderson
 wrote:
>
> On 2/17/22 13:17, David Miller wrote:
> > +/* SELECT HIGH */
> > +C(0xb9c0, SELFHR,  RRF_a, MIE3, r3, r2, new, r1_32h, loc, 0)
>
> This stores the low part of r[23] in the high part of r1.
> You need to select the high part of r[23].
>
> >   static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o)
> >   {
> > -gen_helper_popcnt(o->out, o->in2);
> > +const uint8_t m3 = get_field(s, m3);
> > +
> > +if ((m3 & 1) && s390_has_feat(S390_FEAT_MISC_INSTRUCTION_EXT3)) {
>
> Bit 0 controls this, and recall that IBM uses big-bit numbering, so "8".
>
>
> r~



Re: [PATCH v6 3/4] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-23 Thread David Miller
> No test for popcnt, seeing as there's a bug in m3?

Originally popcnt was not in the task list, it was added later.

> You can't split these two asm, lest the ltgr and sel not be adjacent, and the 
> flags not
> having the correct value when we arrive at the sel.

This was tested, both gcc and clang assemble multiple 'asm' statements
into a single block as long as there are no C statements between.
I'm happy to change it.

On Wed, Feb 23, 2022 at 2:45 PM Richard Henderson
 wrote:
>
> On 2/17/22 13:17, David Miller wrote:
> > +#define F_PROasm ( \
> > +"lg %%r2, %[a]\n"  \
> > +"lg %%r3, %[b]\n"  \
> > +"lg %%r0, %[c]\n"  \
> > +"ltgr %%r0, %%r0"  \
> > +: : [a] "m" (a),   \
> > +[b] "m" (b),   \
> > +[c] "m" (c)\
> > +: "r0", "r2", "r3", "r4")
> > +
> > +
> > +
> > +#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
> > +{ uint64_t res = 0; F_PRO ; ASM ; return res; }
> > +
> > +
> > +Fi3 (_selre, asm("selre%%r0, %%r3, %%r2\n" F_EPI))
> > +Fi3 (_selgrz,asm("selgrz   %%r0, %%r3, %%r2\n" F_EPI))
> > +Fi3 (_selfhrnz,  asm("selfhrnz %%r0, %%r3, %%r2\n" F_EPI))
>
> You can't split these two asm, lest the ltgr and sel not be adjacent, and the 
> flags not
> having the correct value when we arrive at the sel.
>
> No test for popcnt, seeing as there's a bug in m3?
>
>
> r~



[PATCH v6 4/4] tests/tcg/s390x: changed to using .insn for tests requiring z15

2022-02-17 Thread David Miller
Signed-off-by: David Miller 
---
 tests/tcg/s390x/mie3-compl.c | 21 +++--
 tests/tcg/s390x/mie3-mvcrl.c |  2 +-
 tests/tcg/s390x/mie3-sel.c   |  6 +++---
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
index 98281ee683..31820e4a2a 100644
--- a/tests/tcg/s390x/mie3-compl.c
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -14,25 +14,26 @@
 #define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
 { uint64_t res = 0; F_PRO; ASM; return res; }
 
+
 /* AND WITH COMPLEMENT */
-FbinOp(_ncrk,  asm("ncrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_ncgrk, asm("ncgrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ncrk,  asm(".insn rrf, 0xB9F5, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_ncgrk, asm(".insn rrf, 0xB9E5, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* NAND */
-FbinOp(_nnrk,  asm("nnrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_nngrk, asm("nngrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nnrk,  asm(".insn rrf, 0xB974, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_nngrk, asm(".insn rrf, 0xB964, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* NOT XOR */
-FbinOp(_nxrk,  asm("nxrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_nxgrk, asm("nxgrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nxrk,  asm(".insn rrf, 0xB977, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_nxgrk, asm(".insn rrf, 0xB967, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* NOR */
-FbinOp(_nork,  asm("nork  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_nogrk, asm("nogrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nork,  asm(".insn rrf, 0xB976, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_nogrk, asm(".insn rrf, 0xB966, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 /* OR WITH COMPLEMENT */
-FbinOp(_ocrk,  asm("ocrk  %%r0, %%r3, %%r2\n" F_EPI))
-FbinOp(_ocgrk, asm("ocgrk %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ocrk,  asm(".insn rrf, 0xB975, %%r0, %%r3, %%r2, 0\n" F_EPI))
+FbinOp(_ocgrk, asm(".insn rrf, 0xB965, %%r0, %%r3, %%r2, 0\n" F_EPI))
 
 
 int main(int argc, char *argv[])
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 81cf3ad702..f0be83b197 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -6,7 +6,7 @@ static inline void mvcrl_8(const char *dst, const char *src)
 {
 asm volatile (
 "llill %%r0, 8\n"
-"mvcrl 0(%[dst]), 0(%[src])\n"
+".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
 : : [dst] "d" (dst), [src] "d" (src)
 : "memory");
 }
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
index d6b7b0933b..32d434b01a 100644
--- a/tests/tcg/s390x/mie3-sel.c
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -19,9 +19,9 @@
 { uint64_t res = 0; F_PRO ; ASM ; return res; }
 
 
-Fi3 (_selre, asm("selre%%r0, %%r3, %%r2\n" F_EPI))
-Fi3 (_selgrz,asm("selgrz   %%r0, %%r3, %%r2\n" F_EPI))
-Fi3 (_selfhrnz,  asm("selfhrnz %%r0, %%r3, %%r2\n" F_EPI))
+Fi3 (_selre, asm(".insn rrf, 0xB9F0, %%r0, %%r3, %%r2, 8\n" F_EPI))
+Fi3 (_selgrz,asm(".insn rrf, 0xB9E3, %%r0, %%r3, %%r2, 8\n" F_EPI))
+Fi3 (_selfhrnz,  asm(".insn rrf, 0xB9C0, %%r0, %%r3, %%r2, 7\n" F_EPI))
 
 
 int main(int argc, char *argv[])
-- 
2.32.0




[PATCH v6 3/4] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-17 Thread David Miller
tests/tcg/s390x/mie3-compl.c: [N]*K instructions
tests/tcg/s390x/mie3-mvcrl.c: MVCRL instruction
tests/tcg/s390x/mie3-sel.c:  SELECT instruction

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |  5 ++-
 tests/tcg/s390x/mie3-compl.c| 55 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 +++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 4 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1a7238b4eb..54e67446aa 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,12 +1,15 @@
 S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
 VPATH+=$(S390X_SRC)
-CFLAGS+=-march=zEC12 -m64
+CFLAGS+=-march=z15 -m64
 TESTS+=hello-s390x
 TESTS+=csst
 TESTS+=ipm
 TESTS+=exrl-trt
 TESTS+=exrl-trtr
 TESTS+=pack
+TESTS+=mie3-compl
+TESTS+=mie3-mvcrl
+TESTS+=mie3-sel
 TESTS+=mvo
 TESTS+=mvc
 TESTS+=shift
diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
new file mode 100644
index 00..98281ee683
--- /dev/null
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -0,0 +1,55 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] " : [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm ( \
+"llihf %%r0,801\n" \
+"lg %%r2, %[a]\n"  \
+"lg %%r3, %[b] "   \
+: : [a] "m" (a),   \
+[b] "m" (b)\
+: "r2", "r3")
+
+#define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
+{ uint64_t res = 0; F_PRO; ASM; return res; }
+
+/* AND WITH COMPLEMENT */
+FbinOp(_ncrk,  asm("ncrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ncgrk, asm("ncgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NAND */
+FbinOp(_nnrk,  asm("nnrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nngrk, asm("nngrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NOT XOR */
+FbinOp(_nxrk,  asm("nxrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nxgrk, asm("nxgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NOR */
+FbinOp(_nork,  asm("nork  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nogrk, asm("nogrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* OR WITH COMPLEMENT */
+FbinOp(_ocrk,  asm("ocrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ocgrk, asm("ocgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+
+int main(int argc, char *argv[])
+{
+if (_ncrk(0xFF88, 0xAA11)  != 0x03210011ull ||
+_nnrk(0xFF88, 0xAA11)  != 0x032155FFull ||
+_nork(0xFF88, 0xAA11)  != 0x03210066ull ||
+_nxrk(0xFF88, 0xAA11)  != 0x0321AA66ull ||
+_ocrk(0xFF88, 0xAA11)  != 0x0321AA77ull ||
+_ncgrk(0xFF88, 0xAA11) != 0x0011ull ||
+_nngrk(0xFF88, 0xAA11) != 0x55FFull ||
+_nogrk(0xFF88, 0xAA11) != 0x0066ull ||
+_nxgrk(0xFF88, 0xAA11) != 0xAA66ull ||
+_ocgrk(0xFF88, 0xAA11) != 0xAA77ull)
+{
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
new file mode 100644
index 00..81cf3ad702
--- /dev/null
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -0,0 +1,31 @@
+#include 
+#include 
+
+
+static inline void mvcrl_8(const char *dst, const char *src)
+{
+asm volatile (
+"llill %%r0, 8\n"
+"mvcrl 0(%[dst]), 0(%[src])\n"
+: : [dst] "d" (dst), [src] "d" (src)
+: "memory");
+}
+
+
+int main(int argc, char *argv[])
+{
+const char *alpha = "abcdefghijklmnop";
+
+/* array missing 'i' */
+char tstr[17] = "abcdefghjklmnop\0" ;
+
+/* mvcrl reference use: 'open a hole in an array' */
+mvcrl_8(tstr + 9, tstr + 8);
+
+/* place missing 'i' */
+tstr[8] = 'i';
+
+return strncmp(alpha, tstr, 16ul);
+}
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
new file mode 100644
index 00..d6b7b0933b
--- /dev/null
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -0,0 +1,42 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] " : [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm ( \
+"lg %%r2, %[a]\n"  \
+"lg %%r3, %[b]\n"  \
+"lg %%r0, %[c]\n"  \
+"ltgr %%r0, %%r0"  \
+: : [a] "m" (a),   \
+[b] "m" (b),   \
+[c] "m" (c)\
+: "r0", "r2", "r3", "r4")
+
+
+
+#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
+{ uint64_t res = 0; F_PRO ; ASM ; return res; }
+
+
+Fi3 (_selre, asm("selre%%r0, %%r3, %%r2\n" F_EPI))
+

[PATCH v6 1/4] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-17 Thread David Miller
resolves: https://gitlab.com/qemu-project/qemu/-/issues/737
implements:
AND WITH COMPLEMENT   (NCRK, NCGRK)
NAND  (NNRK, NNGRK)
NOT EXCLUSIVE OR  (NXRK, NXGRK)
NOR   (NORK, NOGRK)
OR WITH COMPLEMENT(OCRK, OCGRK)
SELECT(SELR, SELGR)
SELECT HIGH   (SELFHR)
MOVE RIGHT TO LEFT(MVCRL)
POPULATION COUNT  (POPCNT)

Signed-off-by: David Miller 
---
 target/s390x/gen-features.c|  1 +
 target/s390x/helper.h  |  1 +
 target/s390x/tcg/insn-data.def | 30 +--
 target/s390x/tcg/mem_helper.c  | 20 +
 target/s390x/tcg/translate.c   | 53 --
 5 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 7cb1a6ec10..a3f30f69d9 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,6 +740,7 @@ static uint16_t qemu_LATEST[] = {
 
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 271b081e8c..69f69cf718 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -4,6 +4,7 @@ DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvcrl, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mvcin, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 1c3e115712..3e51cd7c6d 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -105,6 +105,9 @@
 D(0xa507, NILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1000)
 D(0x9400, NI,  SI,Z,   la1, i2_8u, new, 0, ni, nz64, MO_UB)
 D(0xeb54, NIY, SIY,   LD,  la1, i2_8u, new, 0, ni, nz64, MO_UB)
+/* AND WITH COMPLEMENT */
+C(0xb9f5, NCRK,RRF_a, MIE3, r2, r3, new, r1_32, andc, nz32)
+C(0xb9e5, NCGRK,   RRF_a, MIE3, r2, r3, r1, 0, andc, nz64)
 
 /* BRANCH AND LINK */
 C(0x0500, BALR,RR_a,  Z,   0, r2_nz, r1, 0, bal, 0)
@@ -640,6 +643,8 @@
 C(0xeb8e, MVCLU,   RSY_a, E2,  0, a2, 0, 0, mvclu, 0)
 /* MOVE NUMERICS */
 C(0xd100, MVN, SS_a,  Z,   la1, a2, 0, 0, mvn, 0)
+/* MOVE RIGHT TO LEFT */
+C(0xe50a, MVCRL,   SSE,  MIE3, la1, a2, 0, 0, mvcrl, 0)
 /* MOVE PAGE */
 C(0xb254, MVPG,RRE,   Z,   0, 0, 0, 0, mvpg, 0)
 /* MOVE STRING */
@@ -707,6 +712,16 @@
 F(0xed0f, MSEB,RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
 F(0xed1f, MSDB,RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
 
+/* NAND */
+C(0xb974, NNRK,RRF_a, MIE3, r2, r3, new, r1_32, nand, nz32)
+C(0xb964, NNGRK,   RRF_a, MIE3, r2, r3, r1, 0, nand, nz64)
+/* NOR */
+C(0xb976, NORK,RRF_a, MIE3, r2, r3, new, r1_32, nor, nz32)
+C(0xb966, NOGRK,   RRF_a, MIE3, r2, r3, r1, 0, nor, nz64)
+/* NOT EXCLUSIVE OR */
+C(0xb977, NXRK,RRF_a, MIE3, r2, r3, new, r1_32, nxor, nz32)
+C(0xb967, NXGRK,   RRF_a, MIE3, r2, r3, r1, 0, nxor, nz64)
+
 /* OR */
 C(0x1600, OR,  RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
 C(0xb9f6, ORK, RRF_a, DO,  r2, r3, new, r1_32, or, nz32)
@@ -725,6 +740,9 @@
 D(0xa50b, OILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1000)
 D(0x9600, OI,  SI,Z,   la1, i2_8u, new, 0, oi, nz64, MO_UB)
 D(0xeb56, OIY, SIY,   LD,  la1, i2_8u, new, 0, oi, nz64, MO_UB)
+/* OR WITH COMPLEMENT */
+C(0xb975, OCRK,RRF_a, MIE3, r2, r3, new, r1_32, orc, nz32)
+C(0xb965, OCGRK,   RRF_a, MIE3, r2, r3, r1, 0, orc, nz64)
 
 /* PACK */
 /* Really format SS_b, but we pack both lengths into one argument
@@ -735,6 +753,9 @@
 /* PACK UNICODE */
 C(0xe100, PKU, SS_f,  E2,  la1, a2, 0, 0, pku, 0)
 
+/* POPULATION COUNT */
+C(0xb9e1, POPCNT,  RRF_c, PC,  0, r2_o, r1, 0, popcnt, nz64)
+
 /* PREFETCH */
 /* Implemented as nops of course.  */
 C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0)
@@ -743,9 +764,6 @@
 /* Implemented as nop of course.  */
 C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0)
 
-/* POPULATION COUNT */
-C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
-
 /* ROTATE LEFT SINGLE LOGICAL */
 C(0xeb1d, RLL, RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
 C(0xeb1c, RLLG,RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
@@ -765,6 +783,12 @@
 /* SEARCH STRING UNICODE */
 C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
 
+/* SELECT */
+C(0xb9f0, SELR,RRF_a, MIE3, r3, r2, new, r1_32, loc, 0)
+C(0xb9e3, SELGR,   RRF_a, MIE3

[PATCH v6 2/4] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1

2022-02-17 Thread David Miller
TCG implements everything we need to run basic z15 OS+software

Signed-off-by: David Miller 
---
 hw/s390x/s390-virtio-ccw.c  | 3 +++
 target/s390x/cpu_models.c   | 6 +++---
 target/s390x/gen-features.c | 7 +--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 84e3e63c43..90480e7cf9 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -802,7 +802,10 @@ DEFINE_CCW_MACHINE(7_0, "7.0", true);
 
 static void ccw_machine_6_2_instance_options(MachineState *machine)
 {
+static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
 ccw_machine_7_0_instance_options(machine);
+s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_6_2_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 11e06cc51f..89f83e81d5 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -85,9 +85,9 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x3932, 16, 1, 47, 0x0800U, "gen16b", "IBM 3932 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x3906
-#define QEMU_MAX_CPU_GEN 14
-#define QEMU_MAX_CPU_EC_GA 2
+#define QEMU_MAX_CPU_TYPE 0x8561
+#define QEMU_MAX_CPU_GEN 15
+#define QEMU_MAX_CPU_EC_GA 1
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
 
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a3f30f69d9..22846121c4 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -731,16 +731,18 @@ static uint16_t qemu_V6_0[] = {
 S390_FEAT_ESOP,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_2[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
 S390_FEAT_VECTOR_ENH,
 };
 
+static uint16_t qemu_LATEST[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
+};
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
-S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
@@ -863,6 +865,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
 QEMU_FEAT_INITIALIZER(V4_0),
 QEMU_FEAT_INITIALIZER(V4_1),
 QEMU_FEAT_INITIALIZER(V6_0),
+QEMU_FEAT_INITIALIZER(V6_2),
 QEMU_FEAT_INITIALIZER(LATEST),
 QEMU_FEAT_INITIALIZER(MAX),
 };
-- 
2.32.0




[PATCH v6 0/4] s390x: Add partial z15 support and tests

2022-02-17 Thread David Miller
Add partial support for s390x z15 ga1 and specific tests for mie3 

v5 -> v6:
* Swap operands for sel* instructions 
* Use .insn in tests for z15 arch instructions

v4 -> v5:
* Readd missing tests/tcg/s390x/mie3-*.c to patch

v3 -> v4:
* Change popcnt encoding RRE -> RRF_c
* Remove redundant code op_sel -> op_loc
* Cleanup for checkpatch.pl
* Readded mie3-* to Makefile.target

v2 -> v3:
* Moved tests to separate patch.
* Combined patches into series.


David Miller (4):
  s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3
for the s390x
  s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1
  tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions
Facility 3
  tests/tcg/s390x: changed to using .insn for tests requiring z15

 hw/s390x/s390-virtio-ccw.c  |  3 ++
 target/s390x/cpu_models.c   |  6 ++--
 target/s390x/gen-features.c |  6 +++-
 target/s390x/helper.h   |  1 +
 target/s390x/tcg/insn-data.def  | 30 --
 target/s390x/tcg/mem_helper.c   | 20 
 target/s390x/tcg/translate.c| 53 +--
 tests/tcg/s390x/Makefile.target |  5 ++-
 tests/tcg/s390x/mie3-compl.c| 56 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 ++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 11 files changed, 243 insertions(+), 10 deletions(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

-- 
2.32.0




Re: [PATCH v5 1/3] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-17 Thread David Miller
Will submit patch later today, thanks



[PATCH v5 2/3] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1

2022-02-16 Thread David Miller
TCG implements everything we need to run basic z15 OS+software

Signed-off-by: David Miller 
---
 hw/s390x/s390-virtio-ccw.c  | 3 +++
 target/s390x/cpu_models.c   | 6 +++---
 target/s390x/gen-features.c | 7 +--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 84e3e63c43..90480e7cf9 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -802,7 +802,10 @@ DEFINE_CCW_MACHINE(7_0, "7.0", true);
 
 static void ccw_machine_6_2_instance_options(MachineState *machine)
 {
+static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
 ccw_machine_7_0_instance_options(machine);
+s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_6_2_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 11e06cc51f..89f83e81d5 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -85,9 +85,9 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x3932, 16, 1, 47, 0x0800U, "gen16b", "IBM 3932 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x3906
-#define QEMU_MAX_CPU_GEN 14
-#define QEMU_MAX_CPU_EC_GA 2
+#define QEMU_MAX_CPU_TYPE 0x8561
+#define QEMU_MAX_CPU_GEN 15
+#define QEMU_MAX_CPU_EC_GA 1
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
 
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a3f30f69d9..22846121c4 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -731,16 +731,18 @@ static uint16_t qemu_V6_0[] = {
 S390_FEAT_ESOP,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_2[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
 S390_FEAT_VECTOR_ENH,
 };
 
+static uint16_t qemu_LATEST[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
+};
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
-S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
@@ -863,6 +865,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
 QEMU_FEAT_INITIALIZER(V4_0),
 QEMU_FEAT_INITIALIZER(V4_1),
 QEMU_FEAT_INITIALIZER(V6_0),
+QEMU_FEAT_INITIALIZER(V6_2),
 QEMU_FEAT_INITIALIZER(LATEST),
 QEMU_FEAT_INITIALIZER(MAX),
 };
-- 
2.32.0




[PATCH v5 1/3] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-16 Thread David Miller
resolves: https://gitlab.com/qemu-project/qemu/-/issues/737
implements:
AND WITH COMPLEMENT   (NCRK, NCGRK)
NAND  (NNRK, NNGRK)
NOT EXCLUSIVE OR  (NXRK, NXGRK)
NOR   (NORK, NOGRK)
OR WITH COMPLEMENT(OCRK, OCGRK)
SELECT(SELR, SELGR)
SELECT HIGH   (SELFHR)
MOVE RIGHT TO LEFT(MVCRL)
POPULATION COUNT  (POPCNT)

Signed-off-by: David Miller 
---
 target/s390x/gen-features.c|  1 +
 target/s390x/helper.h  |  1 +
 target/s390x/tcg/insn-data.def | 30 +--
 target/s390x/tcg/mem_helper.c  | 20 +
 target/s390x/tcg/translate.c   | 53 --
 5 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 7cb1a6ec10..a3f30f69d9 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,6 +740,7 @@ static uint16_t qemu_LATEST[] = {
 
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 271b081e8c..69f69cf718 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -4,6 +4,7 @@ DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvcrl, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mvcin, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 1c3e115712..efb1d5bc19 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -105,6 +105,9 @@
 D(0xa507, NILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1000)
 D(0x9400, NI,  SI,Z,   la1, i2_8u, new, 0, ni, nz64, MO_UB)
 D(0xeb54, NIY, SIY,   LD,  la1, i2_8u, new, 0, ni, nz64, MO_UB)
+/* AND WITH COMPLEMENT */
+C(0xb9f5, NCRK,RRF_a, MIE3, r2, r3, new, r1_32, andc, nz32)
+C(0xb9e5, NCGRK,   RRF_a, MIE3, r2, r3, r1, 0, andc, nz64)
 
 /* BRANCH AND LINK */
 C(0x0500, BALR,RR_a,  Z,   0, r2_nz, r1, 0, bal, 0)
@@ -640,6 +643,8 @@
 C(0xeb8e, MVCLU,   RSY_a, E2,  0, a2, 0, 0, mvclu, 0)
 /* MOVE NUMERICS */
 C(0xd100, MVN, SS_a,  Z,   la1, a2, 0, 0, mvn, 0)
+/* MOVE RIGHT TO LEFT */
+C(0xe50a, MVCRL,   SSE,  MIE3, la1, a2, 0, 0, mvcrl, 0)
 /* MOVE PAGE */
 C(0xb254, MVPG,RRE,   Z,   0, 0, 0, 0, mvpg, 0)
 /* MOVE STRING */
@@ -707,6 +712,16 @@
 F(0xed0f, MSEB,RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
 F(0xed1f, MSDB,RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
 
+/* NAND */
+C(0xb974, NNRK,RRF_a, MIE3, r2, r3, new, r1_32, nand, nz32)
+C(0xb964, NNGRK,   RRF_a, MIE3, r2, r3, r1, 0, nand, nz64)
+/* NOR */
+C(0xb976, NORK,RRF_a, MIE3, r2, r3, new, r1_32, nor, nz32)
+C(0xb966, NOGRK,   RRF_a, MIE3, r2, r3, r1, 0, nor, nz64)
+/* NOT EXCLUSIVE OR */
+C(0xb977, NXRK,RRF_a, MIE3, r2, r3, new, r1_32, nxor, nz32)
+C(0xb967, NXGRK,   RRF_a, MIE3, r2, r3, r1, 0, nxor, nz64)
+
 /* OR */
 C(0x1600, OR,  RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
 C(0xb9f6, ORK, RRF_a, DO,  r2, r3, new, r1_32, or, nz32)
@@ -725,6 +740,9 @@
 D(0xa50b, OILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1000)
 D(0x9600, OI,  SI,Z,   la1, i2_8u, new, 0, oi, nz64, MO_UB)
 D(0xeb56, OIY, SIY,   LD,  la1, i2_8u, new, 0, oi, nz64, MO_UB)
+/* OR WITH COMPLEMENT */
+C(0xb975, OCRK,RRF_a, MIE3, r2, r3, new, r1_32, orc, nz32)
+C(0xb965, OCGRK,   RRF_a, MIE3, r2, r3, r1, 0, orc, nz64)
 
 /* PACK */
 /* Really format SS_b, but we pack both lengths into one argument
@@ -735,6 +753,9 @@
 /* PACK UNICODE */
 C(0xe100, PKU, SS_f,  E2,  la1, a2, 0, 0, pku, 0)
 
+/* POPULATION COUNT */
+C(0xb9e1, POPCNT,  RRF_c, PC,  0, r2_o, r1, 0, popcnt, nz64)
+
 /* PREFETCH */
 /* Implemented as nops of course.  */
 C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0)
@@ -743,9 +764,6 @@
 /* Implemented as nop of course.  */
 C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0)
 
-/* POPULATION COUNT */
-C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
-
 /* ROTATE LEFT SINGLE LOGICAL */
 C(0xeb1d, RLL, RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
 C(0xeb1c, RLLG,RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
@@ -765,6 +783,12 @@
 /* SEARCH STRING UNICODE */
 C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
 
+/* SELECT */
+C(0xb9f0, SELR,RRF_a, MIE3, r2, r3, new, r1_32, loc, 0)
+C(0xb9e3, SELGR,   RRF_a, MIE3

[PATCH v5 3/3] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-16 Thread David Miller
tests/tcg/s390x/mie3-compl.c: [N]*K instructions
tests/tcg/s390x/mie3-mvcrl.c: MVCRL instruction
tests/tcg/s390x/mie3-sel.c:  SELECT instruction

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |  5 ++-
 tests/tcg/s390x/mie3-compl.c| 55 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 +++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 4 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1a7238b4eb..54e67446aa 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,12 +1,15 @@
 S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
 VPATH+=$(S390X_SRC)
-CFLAGS+=-march=zEC12 -m64
+CFLAGS+=-march=z15 -m64
 TESTS+=hello-s390x
 TESTS+=csst
 TESTS+=ipm
 TESTS+=exrl-trt
 TESTS+=exrl-trtr
 TESTS+=pack
+TESTS+=mie3-compl
+TESTS+=mie3-mvcrl
+TESTS+=mie3-sel
 TESTS+=mvo
 TESTS+=mvc
 TESTS+=shift
diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
new file mode 100644
index 00..98281ee683
--- /dev/null
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -0,0 +1,55 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] " : [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm ( \
+"llihf %%r0,801\n" \
+"lg %%r2, %[a]\n"  \
+"lg %%r3, %[b] "   \
+: : [a] "m" (a),   \
+[b] "m" (b)\
+: "r2", "r3")
+
+#define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
+{ uint64_t res = 0; F_PRO; ASM; return res; }
+
+/* AND WITH COMPLEMENT */
+FbinOp(_ncrk,  asm("ncrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ncgrk, asm("ncgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NAND */
+FbinOp(_nnrk,  asm("nnrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nngrk, asm("nngrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NOT XOR */
+FbinOp(_nxrk,  asm("nxrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nxgrk, asm("nxgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* NOR */
+FbinOp(_nork,  asm("nork  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_nogrk, asm("nogrk %%r0, %%r3, %%r2\n" F_EPI))
+
+/* OR WITH COMPLEMENT */
+FbinOp(_ocrk,  asm("ocrk  %%r0, %%r3, %%r2\n" F_EPI))
+FbinOp(_ocgrk, asm("ocgrk %%r0, %%r3, %%r2\n" F_EPI))
+
+
+int main(int argc, char *argv[])
+{
+if (_ncrk(0xFF88, 0xAA11)  != 0x03210011ull ||
+_nnrk(0xFF88, 0xAA11)  != 0x032155FFull ||
+_nork(0xFF88, 0xAA11)  != 0x03210066ull ||
+_nxrk(0xFF88, 0xAA11)  != 0x0321AA66ull ||
+_ocrk(0xFF88, 0xAA11)  != 0x0321AA77ull ||
+_ncgrk(0xFF88, 0xAA11) != 0x0011ull ||
+_nngrk(0xFF88, 0xAA11) != 0x55FFull ||
+_nogrk(0xFF88, 0xAA11) != 0x0066ull ||
+_nxgrk(0xFF88, 0xAA11) != 0xAA66ull ||
+_ocgrk(0xFF88, 0xAA11) != 0xAA77ull)
+{
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
new file mode 100644
index 00..81cf3ad702
--- /dev/null
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -0,0 +1,31 @@
+#include 
+#include 
+
+
+static inline void mvcrl_8(const char *dst, const char *src)
+{
+asm volatile (
+"llill %%r0, 8\n"
+"mvcrl 0(%[dst]), 0(%[src])\n"
+: : [dst] "d" (dst), [src] "d" (src)
+: "memory");
+}
+
+
+int main(int argc, char *argv[])
+{
+const char *alpha = "abcdefghijklmnop";
+
+/* array missing 'i' */
+char tstr[17] = "abcdefghjklmnop\0" ;
+
+/* mvcrl reference use: 'open a hole in an array' */
+mvcrl_8(tstr + 9, tstr + 8);
+
+/* place missing 'i' */
+tstr[8] = 'i';
+
+return strncmp(alpha, tstr, 16ul);
+}
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
new file mode 100644
index 00..d6b7b0933b
--- /dev/null
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -0,0 +1,42 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] " : [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm ( \
+"lg %%r2, %[a]\n"  \
+"lg %%r3, %[b]\n"  \
+"lg %%r0, %[c]\n"  \
+"ltgr %%r0, %%r0"  \
+: : [a] "m" (a),   \
+[b] "m" (b),   \
+[c] "m" (c)\
+: "r0", "r2", "r3", "r4")
+
+
+
+#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
+{ uint64_t res = 0; F_PRO ; ASM ; return res; }
+
+
+Fi3 (_selre, asm("selre%%r0, %%r3, %%r2\n" F_EPI))
+

[PATCH v5 0/3] s390x: Add partial z15 support and tests

2022-02-16 Thread David Miller
Add partial support for s390x z15 ga1 and specific tests for mie3 

v4 -> v5:
* Readd missing tests/tcg/s390x/mie3-*.c to patch

v3 -> v4:
* Change popcnt encoding RRE -> RRF_c
* Remove redundant code op_sel -> op_loc
* Cleanup for checkpatch.pl
* Readded mie3-* to Makefile.target

v2 -> v3:
* Moved tests to separate patch.
* Combined patches into series.


David Miller (3):
  s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3
for the s390x
  s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1
  tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions
Facility 3

 hw/s390x/s390-virtio-ccw.c  |  3 ++
 target/s390x/cpu_models.c   |  6 ++--
 target/s390x/gen-features.c |  6 +++-
 target/s390x/helper.h   |  1 +
 target/s390x/tcg/insn-data.def  | 30 --
 target/s390x/tcg/mem_helper.c   | 20 
 target/s390x/tcg/translate.c| 53 +--
 tests/tcg/s390x/Makefile.target |  5 ++-
 tests/tcg/s390x/mie3-compl.c| 55 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 +++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 11 files changed, 242 insertions(+), 10 deletions(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

-- 
2.32.0




[PATCH v4 0/3] s390x: Add partial z15 support and tests

2022-02-16 Thread David Miller
Add partial support for s390x z15 ga1 and specific tests for mie3 

v3 -> v4:
* Change popcnt encoding RRE -> RRF_c
* Remove redundant code op_sel -> op_loc
* Cleanup for checkpatch.pl
* Readded mie3-* to Makefile.target

v2 -> v3:
* Moved tests to separate patch.
* Combined patches into series.


David Miller (3):
  s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3
for the s390x
  s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1
  tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions
Facility 3

 hw/s390x/s390-virtio-ccw.c  |  3 ++
 target/s390x/cpu_models.c   |  6 ++--
 target/s390x/gen-features.c |  6 +++-
 target/s390x/helper.h   |  1 +
 target/s390x/tcg/insn-data.def  | 30 +--
 target/s390x/tcg/mem_helper.c   | 20 +
 target/s390x/tcg/translate.c| 53 +++--
 tests/tcg/s390x/Makefile.target |  5 +++-
 8 files changed, 114 insertions(+), 10 deletions(-)

-- 
2.32.0




[PATCH v4 3/3] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-16 Thread David Miller
tests/tcg/s390x/mie3-compl.c: [N]*K instructions
tests/tcg/s390x/mie3-mvcrl.c: MVCRL instruction
tests/tcg/s390x/mie3-sel.c:  SELECT instruction

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1a7238b4eb..54e67446aa 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,12 +1,15 @@
 S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
 VPATH+=$(S390X_SRC)
-CFLAGS+=-march=zEC12 -m64
+CFLAGS+=-march=z15 -m64
 TESTS+=hello-s390x
 TESTS+=csst
 TESTS+=ipm
 TESTS+=exrl-trt
 TESTS+=exrl-trtr
 TESTS+=pack
+TESTS+=mie3-compl
+TESTS+=mie3-mvcrl
+TESTS+=mie3-sel
 TESTS+=mvo
 TESTS+=mvc
 TESTS+=shift
-- 
2.32.0




Re: [PATCH v4 3/3] tests/tcg/s390x: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-16 Thread David Miller
That is strange, if I unstage them show status they are set to be committed:

null@rygar:~/projects/qemu/build$ git reset --soft HEAD~1
null@rygar:~/projects/qemu/build$ git status
On branch t2
Changes to be committed:
  (use "git restore --staged ..." to unstage)
modified:   ../tests/tcg/s390x/Makefile.target
new file:   ../tests/tcg/s390x/mie3-compl.c
new file:   ../tests/tcg/s390x/mie3-mvcrl.c
new file:   ../tests/tcg/s390x/mie3-sel.c




On Wed, Feb 16, 2022 at 3:13 PM David Hildenbrand  wrote:
>
> On 16.02.22 21:03, David Miller wrote:
> > tests/tcg/s390x/mie3-compl.c: [N]*K instructions
> > tests/tcg/s390x/mie3-mvcrl.c: MVCRL instruction
> > tests/tcg/s390x/mie3-sel.c:  SELECT instruction
> >
> > Signed-off-by: David Miller 
> > ---
> >  tests/tcg/s390x/Makefile.target | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/tests/tcg/s390x/Makefile.target 
> > b/tests/tcg/s390x/Makefile.target
> > index 1a7238b4eb..54e67446aa 100644
> > --- a/tests/tcg/s390x/Makefile.target
> > +++ b/tests/tcg/s390x/Makefile.target
> > @@ -1,12 +1,15 @@
> >  S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
> >  VPATH+=$(S390X_SRC)
> > -CFLAGS+=-march=zEC12 -m64
> > +CFLAGS+=-march=z15 -m64
> >  TESTS+=hello-s390x
> >  TESTS+=csst
> >  TESTS+=ipm
> >  TESTS+=exrl-trt
> >  TESTS+=exrl-trtr
> >  TESTS+=pack
> > +TESTS+=mie3-compl
> > +TESTS+=mie3-mvcrl
> > +TESTS+=mie3-sel
> >  TESTS+=mvo
> >  TESTS+=mvc
> >  TESTS+=shift
>
> 404, tests not found :)
>
> --
> Thanks,
>
> David / dhildenb
>



[PATCH v4 2/3] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1

2022-02-16 Thread David Miller
TCG implements everything we need to run basic z15 OS+software

Signed-off-by: David Miller 
---
 hw/s390x/s390-virtio-ccw.c  | 3 +++
 target/s390x/cpu_models.c   | 6 +++---
 target/s390x/gen-features.c | 7 +--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 84e3e63c43..90480e7cf9 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -802,7 +802,10 @@ DEFINE_CCW_MACHINE(7_0, "7.0", true);
 
 static void ccw_machine_6_2_instance_options(MachineState *machine)
 {
+static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
 ccw_machine_7_0_instance_options(machine);
+s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_6_2_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 11e06cc51f..89f83e81d5 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -85,9 +85,9 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x3932, 16, 1, 47, 0x0800U, "gen16b", "IBM 3932 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x3906
-#define QEMU_MAX_CPU_GEN 14
-#define QEMU_MAX_CPU_EC_GA 2
+#define QEMU_MAX_CPU_TYPE 0x8561
+#define QEMU_MAX_CPU_GEN 15
+#define QEMU_MAX_CPU_EC_GA 1
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
 
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a3f30f69d9..22846121c4 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -731,16 +731,18 @@ static uint16_t qemu_V6_0[] = {
 S390_FEAT_ESOP,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_2[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
 S390_FEAT_VECTOR_ENH,
 };
 
+static uint16_t qemu_LATEST[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
+};
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
-S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
@@ -863,6 +865,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
 QEMU_FEAT_INITIALIZER(V4_0),
 QEMU_FEAT_INITIALIZER(V4_1),
 QEMU_FEAT_INITIALIZER(V6_0),
+QEMU_FEAT_INITIALIZER(V6_2),
 QEMU_FEAT_INITIALIZER(LATEST),
 QEMU_FEAT_INITIALIZER(MAX),
 };
-- 
2.32.0




[PATCH v4 1/3] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-16 Thread David Miller
resolves: https://gitlab.com/qemu-project/qemu/-/issues/737
implements:
AND WITH COMPLEMENT   (NCRK, NCGRK)
NAND  (NNRK, NNGRK)
NOT EXCLUSIVE OR  (NXRK, NXGRK)
NOR   (NORK, NOGRK)
OR WITH COMPLEMENT(OCRK, OCGRK)
SELECT(SELR, SELGR)
SELECT HIGH   (SELFHR)
MOVE RIGHT TO LEFT(MVCRL)
POPULATION COUNT  (POPCNT)

Signed-off-by: David Miller 
---
 target/s390x/gen-features.c|  1 +
 target/s390x/helper.h  |  1 +
 target/s390x/tcg/insn-data.def | 30 +--
 target/s390x/tcg/mem_helper.c  | 20 +
 target/s390x/tcg/translate.c   | 53 --
 5 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 7cb1a6ec10..a3f30f69d9 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,6 +740,7 @@ static uint16_t qemu_LATEST[] = {
 
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 271b081e8c..69f69cf718 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -4,6 +4,7 @@ DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvcrl, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mvcin, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 1c3e115712..efb1d5bc19 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -105,6 +105,9 @@
 D(0xa507, NILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1000)
 D(0x9400, NI,  SI,Z,   la1, i2_8u, new, 0, ni, nz64, MO_UB)
 D(0xeb54, NIY, SIY,   LD,  la1, i2_8u, new, 0, ni, nz64, MO_UB)
+/* AND WITH COMPLEMENT */
+C(0xb9f5, NCRK,RRF_a, MIE3, r2, r3, new, r1_32, andc, nz32)
+C(0xb9e5, NCGRK,   RRF_a, MIE3, r2, r3, r1, 0, andc, nz64)
 
 /* BRANCH AND LINK */
 C(0x0500, BALR,RR_a,  Z,   0, r2_nz, r1, 0, bal, 0)
@@ -640,6 +643,8 @@
 C(0xeb8e, MVCLU,   RSY_a, E2,  0, a2, 0, 0, mvclu, 0)
 /* MOVE NUMERICS */
 C(0xd100, MVN, SS_a,  Z,   la1, a2, 0, 0, mvn, 0)
+/* MOVE RIGHT TO LEFT */
+C(0xe50a, MVCRL,   SSE,  MIE3, la1, a2, 0, 0, mvcrl, 0)
 /* MOVE PAGE */
 C(0xb254, MVPG,RRE,   Z,   0, 0, 0, 0, mvpg, 0)
 /* MOVE STRING */
@@ -707,6 +712,16 @@
 F(0xed0f, MSEB,RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
 F(0xed1f, MSDB,RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
 
+/* NAND */
+C(0xb974, NNRK,RRF_a, MIE3, r2, r3, new, r1_32, nand, nz32)
+C(0xb964, NNGRK,   RRF_a, MIE3, r2, r3, r1, 0, nand, nz64)
+/* NOR */
+C(0xb976, NORK,RRF_a, MIE3, r2, r3, new, r1_32, nor, nz32)
+C(0xb966, NOGRK,   RRF_a, MIE3, r2, r3, r1, 0, nor, nz64)
+/* NOT EXCLUSIVE OR */
+C(0xb977, NXRK,RRF_a, MIE3, r2, r3, new, r1_32, nxor, nz32)
+C(0xb967, NXGRK,   RRF_a, MIE3, r2, r3, r1, 0, nxor, nz64)
+
 /* OR */
 C(0x1600, OR,  RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
 C(0xb9f6, ORK, RRF_a, DO,  r2, r3, new, r1_32, or, nz32)
@@ -725,6 +740,9 @@
 D(0xa50b, OILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1000)
 D(0x9600, OI,  SI,Z,   la1, i2_8u, new, 0, oi, nz64, MO_UB)
 D(0xeb56, OIY, SIY,   LD,  la1, i2_8u, new, 0, oi, nz64, MO_UB)
+/* OR WITH COMPLEMENT */
+C(0xb975, OCRK,RRF_a, MIE3, r2, r3, new, r1_32, orc, nz32)
+C(0xb965, OCGRK,   RRF_a, MIE3, r2, r3, r1, 0, orc, nz64)
 
 /* PACK */
 /* Really format SS_b, but we pack both lengths into one argument
@@ -735,6 +753,9 @@
 /* PACK UNICODE */
 C(0xe100, PKU, SS_f,  E2,  la1, a2, 0, 0, pku, 0)
 
+/* POPULATION COUNT */
+C(0xb9e1, POPCNT,  RRF_c, PC,  0, r2_o, r1, 0, popcnt, nz64)
+
 /* PREFETCH */
 /* Implemented as nops of course.  */
 C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0)
@@ -743,9 +764,6 @@
 /* Implemented as nop of course.  */
 C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0)
 
-/* POPULATION COUNT */
-C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
-
 /* ROTATE LEFT SINGLE LOGICAL */
 C(0xeb1d, RLL, RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
 C(0xeb1c, RLLG,RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
@@ -765,6 +783,12 @@
 /* SEARCH STRING UNICODE */
 C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
 
+/* SELECT */
+C(0xb9f0, SELR,RRF_a, MIE3, r2, r3, new, r1_32, loc, 0)
+C(0xb9e3, SELGR,   RRF_a, MIE3

[PATCH v3 1/3] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x

2022-02-15 Thread David Miller

resolves: https://gitlab.com/qemu-project/qemu/-/issues/737
implements:
AND WITH COMPLEMENT   (NCRK, NCGRK)
NAND  (NNRK, NNGRK)
NOT EXCLUSIVE OR  (NXRK, NXGRK)
NOR   (NORK, NOGRK)
OR WITH COMPLEMENT(OCRK, OCGRK)
SELECT(SELR, SELGR)
SELECT HIGH   (SELFHR)
MOVE RIGHT TO LEFT(MVCRL)
POPULATION COUNT  (POPCNT)

Signed-off-by: David Miller 
---
 target/s390x/gen-features.c|  1 +
 target/s390x/helper.h  |  1 +
 target/s390x/tcg/insn-data.def | 30 --
 target/s390x/tcg/mem_helper.c  | 20 
 target/s390x/tcg/translate.c   | 56 +-
 5 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 7cb1a6ec10..a3f30f69d9 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -740,6 +740,7 @@ static uint16_t qemu_LATEST[] = {
  /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 271b081e8c..69f69cf718 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -4,6 +4,7 @@ DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, 
i64, i64)

 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvcrl, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mvcin, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 1c3e115712..a64555f824 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -105,6 +105,9 @@
 D(0xa507, NILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1000)
 D(0x9400, NI,  SI,Z,   la1, i2_8u, new, 0, ni, nz64, MO_UB)
 D(0xeb54, NIY, SIY,   LD,  la1, i2_8u, new, 0, ni, nz64, MO_UB)
+/* AND WITH COMPLEMENT */
+C(0xb9f5, NCRK,RRF_a, MIE3, r2, r3, new, r1_32, andc, nz32)
+C(0xb9e5, NCGRK,   RRF_a, MIE3, r2, r3, r1, 0, andc, nz64)
  /* BRANCH AND LINK */
 C(0x0500, BALR,RR_a,  Z,   0, r2_nz, r1, 0, bal, 0)
@@ -640,6 +643,8 @@
 C(0xeb8e, MVCLU,   RSY_a, E2,  0, a2, 0, 0, mvclu, 0)
 /* MOVE NUMERICS */
 C(0xd100, MVN, SS_a,  Z,   la1, a2, 0, 0, mvn, 0)
+/* MOVE RIGHT TO LEFT */
+C(0xe50a, MVCRL,   SSE,  MIE3, la1, a2, 0, 0, mvcrl, 0)
 /* MOVE PAGE */
 C(0xb254, MVPG,RRE,   Z,   0, 0, 0, 0, mvpg, 0)
 /* MOVE STRING */
@@ -707,6 +712,16 @@
 F(0xed0f, MSEB,RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
 F(0xed1f, MSDB,RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
 +/* NAND */
+C(0xb974, NNRK,RRF_a, MIE3, r2, r3, new, r1_32, nand, nz32)
+C(0xb964, NNGRK,   RRF_a, MIE3, r2, r3, r1, 0, nand, nz64)
+/* NOR */
+C(0xb976, NORK,RRF_a, MIE3, r2, r3, new, r1_32, nor, nz32)
+C(0xb966, NOGRK,   RRF_a, MIE3, r2, r3, r1, 0, nor, nz64)
+/* NOT EXCLUSIVE OR */
+C(0xb977, NXRK,RRF_a, MIE3, r2, r3, new, r1_32, nxor, nz32)
+C(0xb967, NXGRK,   RRF_a, MIE3, r2, r3, r1, 0, nxor, nz64)
+
 /* OR */
 C(0x1600, OR,  RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
 C(0xb9f6, ORK, RRF_a, DO,  r2, r3, new, r1_32, or, nz32)
@@ -725,6 +740,9 @@
 D(0xa50b, OILL,RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1000)
 D(0x9600, OI,  SI,Z,   la1, i2_8u, new, 0, oi, nz64, MO_UB)
 D(0xeb56, OIY, SIY,   LD,  la1, i2_8u, new, 0, oi, nz64, MO_UB)
+/* OR WITH COMPLEMENT */
+C(0xb975, OCRK,RRF_a, MIE3, r2, r3, new, r1_32, orc, nz32)
+C(0xb965, OCGRK,   RRF_a, MIE3, r2, r3, r1, 0, orc, nz64)
  /* PACK */
 /* Really format SS_b, but we pack both lengths into one argument
@@ -735,6 +753,9 @@
 /* PACK UNICODE */
 C(0xe100, PKU, SS_f,  E2,  la1, a2, 0, 0, pku, 0)
 +/* POPULATION COUNT */
+C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
+
 /* PREFETCH */
 /* Implemented as nops of course.  */
 C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0)
@@ -743,9 +764,6 @@
 /* Implemented as nop of course.  */
 C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0)
 -/* POPULATION COUNT */
-C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
-
 /* ROTATE LEFT SINGLE LOGICAL */
 C(0xeb1d, RLL, RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
 C(0xeb1c, RLLG,RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
@@ -765,6 +783,12 @@
 /* SEARCH STRING UNICODE */
 C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
 +/* SELECT */
+C(0xb9f0, SELR,RRF_a, MIE3, r2, r3, new, r1_32, sel, 0)
+C(0xb9e3, SELGR,   RRF_a, MIE3, r2

[PATCH v3 3/3] s390x/tcg/tests: Tests for Miscellaneous-Instruction-Extensions Facility 3

2022-02-15 Thread David Miller

tests/tcg/s390x/mie3-compl.c: [N]*K instructions
tests/tcg/s390x/mie3-mvcrl.c: MVCRL instruction
tests/tcg/s390x/mie3-sel.c:  SELECT instruction

Signed-off-by: David Miller 
---
 tests/tcg/s390x/Makefile.target |  2 +-
 tests/tcg/s390x/mie3-compl.c| 56 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 ++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 4 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

diff --git a/tests/tcg/s390x/Makefile.target 
b/tests/tcg/s390x/Makefile.target

index 1a7238b4eb..16b9d45307 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,6 +1,6 @@
 S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
 VPATH+=$(S390X_SRC)
-CFLAGS+=-march=zEC12 -m64
+CFLAGS+=-march=z15 -m64
 TESTS+=hello-s390x
 TESTS+=csst
 TESTS+=ipm
diff --git a/tests/tcg/s390x/mie3-compl.c b/tests/tcg/s390x/mie3-compl.c
new file mode 100644
index 00..1254fe21fb
--- /dev/null
+++ b/tests/tcg/s390x/mie3-compl.c
@@ -0,0 +1,56 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] ": [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm ( \
+"llihf %%r0,801\n" \
+"lg %%r2, %[a] \n" \
+"lg %%r3, %[b] "   \
+: : [a] "m" (a),   \
+[b] "m" (b)\
+: "r2", "r3" )
+
+#define FbinOp(S, ASM) uint64_t S(uint64_t a, uint64_t b) \
+{ uint64_t res = 0; F_PRO; ASM; return res; }
+
+/* AND WITH COMPLEMENT */
+FbinOp(_ncrk,  asm("ncrk  %%r0, %%r3, %%r2 \n" F_EPI))
+FbinOp(_ncgrk, asm("ncgrk %%r0, %%r3, %%r2 \n" F_EPI))
+
+/* NAND */
+FbinOp(_nnrk,  asm("nnrk  %%r0, %%r3, %%r2 \n" F_EPI))
+FbinOp(_nngrk, asm("nngrk %%r0, %%r3, %%r2 \n" F_EPI))
+
+/* NOT XOR */
+FbinOp(_nxrk,  asm("nxrk  %%r0, %%r3, %%r2 \n" F_EPI))
+FbinOp(_nxgrk, asm("nxgrk %%r0, %%r3, %%r2 \n" F_EPI))
+
+/* NOR */
+FbinOp(_nork,  asm("nork  %%r0, %%r3, %%r2 \n" F_EPI))
+FbinOp(_nogrk, asm("nogrk %%r0, %%r3, %%r2 \n" F_EPI))
+
+/* OR WITH COMPLEMENT */
+FbinOp(_ocrk,  asm("ocrk  %%r0, %%r3, %%r2 \n" F_EPI))
+FbinOp(_ocgrk, asm("ocgrk %%r0, %%r3, %%r2 \n" F_EPI))
+
+
+
+int main(int argc, char *argv[])
+{
+if (_ncrk(0xFF88, 0xAA11)  != 0x03210011ull ||
+_nnrk(0xFF88, 0xAA11)  != 0x032155FFull ||
+_nork(0xFF88, 0xAA11)  != 0x03210066ull ||
+_nxrk(0xFF88, 0xAA11)  != 0x0321AA66ull ||
+_ocrk(0xFF88, 0xAA11)  != 0x0321AA77ull ||
+_ncgrk(0xFF88, 0xAA11) != 0x0011ull ||
+_nngrk(0xFF88, 0xAA11) != 0x55FFull ||
+_nogrk(0xFF88, 0xAA11) != 0x0066ull ||
+_nxgrk(0xFF88, 0xAA11) != 0xAA66ull ||
+_ocgrk(0xFF88, 0xAA11) != 0xAA77ull)
+{
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
new file mode 100644
index 00..00f9c150a1
--- /dev/null
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -0,0 +1,31 @@
+#include 
+#include 
+
+
+static inline void mvcrl_8(const char *dst, const char *src)
+{
+asm volatile (
+"llill %%r0, 8 \n"
+"mvcrl 0(%[dst]), 0(%[src]) \n"
+: : [dst] "d" (dst), [src] "d" (src)
+: "memory");
+}
+
+
+int main(int argc, char *argv[])
+{
+const char* alpha = "abcdefghijklmnop";
+
+/* array missing 'i' */
+char tstr[17] = "abcdefghjklmnop\0" ;
+
+/* mvcrl reference use: 'open a hole in an array' */
+mvcrl_8(tstr+9, tstr+8);
+
+/* place missing 'i' */
+tstr[8] = 'i';
+
+return strncmp(alpha, tstr, 16ul);
+}
+
+
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
new file mode 100644
index 00..e771b1e413
--- /dev/null
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -0,0 +1,42 @@
+#include 
+
+
+#define F_EPI "stg %%r0, %[res] ": [res] "+m" (res) : : "r0", "r2", "r3"
+
+#define F_PROasm (  \
+"lg %%r2, %[a]  \n" \
+"lg %%r3, %[b]  \n" \
+"lg %%r0, %[c]  \n" \
+"ltgr %%r0, %%r0"   \
+: : [a] "m" (a),\
+[b] "m" (b),\
+[c] "m" (c) \
+: "r0", "r2", "r3", "r4")
+
+
+
+#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
+{ uint64_t res=0; F_PRO ; ASM ; return res; }
+
+
+Fi3 (_selre, asm("selre%%r0, %%r3, %%r2 \n" F_EPI))
+Fi3 (_selgrz,asm("selgrz   %%r0, %%r3, %%r2 \n" F_EPI))
+Fi3 (_selfhrnz,  asm("selfhrnz %%r0, %%r3, %%r2 \n" F_EPI))
+
+
+int main(int argc, char *argv[])
+{
+uint64_t a = ~0, b = ~0, c = ~0;
+a =_selre(0x06660066ull, 0x06660006ull,a);
+b =   _selgrz(0xF00D0005ull, 0xF00D0055ull,b);
+c = _selfhrnz(0x00440044ull, 0x00040004ull,c);
+
+if( (0x0066ull != a) ||
+(0xF00D0005ull != b) ||
+(0x0004ull != c) )
+{
+return 1;
+}
+return 0;
+}
+
--
2.32.0




[PATCH v3 0/3] s390x: Add partial z15 support and tests

2022-02-15 Thread David Miller

Add partial support for s390x z15 ga1 and specific tests for minste3

v2 -> v3:
* Moved tests to separate patch.
* Combined patches into series.

David Miller (3):
  s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3
for the s390x
  s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1
  s390x/tcg/tests: Tests for Miscellaneous-Instruction-Extensions
Facility 3

 hw/s390x/s390-virtio-ccw.c  |  3 ++
 target/s390x/cpu_models.c   |  6 ++--
 target/s390x/gen-features.c |  6 +++-
 target/s390x/helper.h   |  1 +
 target/s390x/tcg/insn-data.def  | 30 --
 target/s390x/tcg/mem_helper.c   | 20 
 target/s390x/tcg/translate.c| 56 -
 tests/tcg/s390x/Makefile.target |  2 +-
 tests/tcg/s390x/mie3-compl.c| 56 +
 tests/tcg/s390x/mie3-mvcrl.c| 31 ++
 tests/tcg/s390x/mie3-sel.c  | 42 +
 11 files changed, 244 insertions(+), 9 deletions(-)
 create mode 100644 tests/tcg/s390x/mie3-compl.c
 create mode 100644 tests/tcg/s390x/mie3-mvcrl.c
 create mode 100644 tests/tcg/s390x/mie3-sel.c

--
2.32.0




[PATCH v3 2/3] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z15 GA1

2022-02-15 Thread David Miller

TCG implements everything we need to run basic z15 OS+software.

Signed-off-by: David Miller 
---
 hw/s390x/s390-virtio-ccw.c  | 3 +++
 target/s390x/cpu_models.c   | 6 +++---
 target/s390x/gen-features.c | 7 +--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 84e3e63c43..90480e7cf9 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -802,7 +802,10 @@ DEFINE_CCW_MACHINE(7_0, "7.0", true);
  static void ccw_machine_6_2_instance_options(MachineState *machine)
 {
+static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
 ccw_machine_7_0_instance_options(machine);
+s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
 }
  static void ccw_machine_6_2_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 11e06cc51f..89f83e81d5 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -85,9 +85,9 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x3932, 16, 1, 47, 0x0800U, "gen16b", "IBM 3932 GA1"),
 };
 -#define QEMU_MAX_CPU_TYPE 0x3906
-#define QEMU_MAX_CPU_GEN 14
-#define QEMU_MAX_CPU_EC_GA 2
+#define QEMU_MAX_CPU_TYPE 0x8561
+#define QEMU_MAX_CPU_GEN 15
+#define QEMU_MAX_CPU_EC_GA 1
 static const S390FeatInit qemu_max_cpu_feat_init = { 
S390_FEAT_LIST_QEMU_MAX };

 static S390FeatBitmap qemu_max_cpu_feat;
 diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a3f30f69d9..22846121c4 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -731,16 +731,18 @@ static uint16_t qemu_V6_0[] = {
 S390_FEAT_ESOP,
 };
 -static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_2[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
 S390_FEAT_VECTOR_ENH,
 };
 +static uint16_t qemu_LATEST[] = {
+S390_FEAT_MISC_INSTRUCTION_EXT3,
+};
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
-S390_FEAT_MISC_INSTRUCTION_EXT3,
 /* generates a dependency warning, leave it out for now */
 S390_FEAT_MSA_EXT_5,
 };
@@ -863,6 +865,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
 QEMU_FEAT_INITIALIZER(V4_0),
 QEMU_FEAT_INITIALIZER(V4_1),
 QEMU_FEAT_INITIALIZER(V6_0),
+QEMU_FEAT_INITIALIZER(V6_2),
 QEMU_FEAT_INITIALIZER(LATEST),
 QEMU_FEAT_INITIALIZER(MAX),
 };
--
2.32.0





[Bug 1858488] Re: qemu git && 4.2: timed audio issues with sb16, gus not working?

2020-01-08 Thread David Miller
It seems this might be related to buffer/sample size,  spice works and uses 
timed audio with a larger buffer. 
GUS I have not gotten to work.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1858488

Title:
  qemu git && 4.2:  timed audio issues with sb16,  gus not working?

Status in QEMU:
  New

Bug description:
  
  I have built [both] current git, and 4.2.0, there are issues with 
audio/soundhw for both.

  Specifics:

  Linux nullrig 5.3.0-24-generic #26-Ubuntu SMP Thu Nov 14 01:33:18 UTC
  2019 x86_64 x86_64 x86_64 GNU/Linux

  Out of source build, successful for both:

  ../configure --prefix=/opt/qemu --target-list=i386-softmmu,mips64el-
  softmmu ---enable-sdl --enable-sdl-image --enable-lzo --enable-bzip2
  --enable-avx2 --enable-kvm --enable-membarrier --enable-plugin

  Call:

  ./qemu -machine pc,accel=kvm,usb=off -cpu pentium -m 64 -rtc
  base=localtime -parallel none -soundhw sb16,adlib,pcspk -device
  cirrus-vga,bus=pci.0 -drive
  id=disk1,file=doom.cow,format=qcow2,if=virtio -audiodev pa,id=pa

  Audio for sb16 sounds ok,  however if i switch to a timer based audio:
  -audiodev wav

  The output is wrong..  I had assumed it was all timer based audio,
  however it seems to be limited to sb16.

  So I then tried the next popular/compatible audio device for dos
  games:  gravis ultrasound [gus].

  I get no output at all for it.
  I have tried more than one piece of software,  DOOM shareware is any easy 
example.

  I realize there are better solutions for playing DOS games, however I
  am interested in snapshot support which many of them lack.

  I am willing to put the work into fixing it myself if need be,
  however i'm not very familiar with the audio backend.   Specifically,
  it is already mixed into a single buffer,  if 'adlib' driver is
  already working: (audio_pcm_ops.write() output is correct on timer
  based output) I failed to see how it affects emulation of the sound
  blaster.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1858488/+subscriptions



[Bug 1858488] [NEW] qemu git && 4.2: timed audio issues with sb16, gus not working?

2020-01-06 Thread David Miller
Public bug reported:


I have built [both] current git, and 4.2.0, there are issues with audio/soundhw 
for both.

Specifics:

Linux nullrig 5.3.0-24-generic #26-Ubuntu SMP Thu Nov 14 01:33:18 UTC
2019 x86_64 x86_64 x86_64 GNU/Linux

Out of source build, successful for both:

../configure --prefix=/opt/qemu --target-list=i386-softmmu,mips64el-
softmmu ---enable-sdl --enable-sdl-image --enable-lzo --enable-bzip2
--enable-avx2 --enable-kvm --enable-membarrier --enable-plugin

Call:

./qemu -machine pc,accel=kvm,usb=off -cpu pentium -m 64 -rtc
base=localtime -parallel none -soundhw sb16,adlib,pcspk -device cirrus-
vga,bus=pci.0 -drive id=disk1,file=doom.cow,format=qcow2,if=virtio
-audiodev pa,id=pa

Audio for sb16 sounds ok,  however if i switch to a timer based audio:
-audiodev wav

The output is wrong..  I had assumed it was all timer based audio,
however it seems to be limited to sb16.

So I then tried the next popular/compatible audio device for dos games:
gravis ultrasound [gus].

I get no output at all for it.
I have tried more than one piece of software,  DOOM shareware is any easy 
example.

I realize there are better solutions for playing DOS games, however I am
interested in snapshot support which many of them lack.

I am willing to put the work into fixing it myself if need be,  however
i'm not very familiar with the audio backend.   Specifically, it is
already mixed into a single buffer,  if 'adlib' driver is already
working: (audio_pcm_ops.write() output is correct on timer based output)
I failed to see how it affects emulation of the sound blaster.

** Affects: qemu
 Importance: Undecided
 Status: New


** Tags: audio dos games gus sb16 timed timer

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1858488

Title:
  qemu git && 4.2:  timed audio issues with sb16,  gus not working?

Status in QEMU:
  New

Bug description:
  
  I have built [both] current git, and 4.2.0, there are issues with 
audio/soundhw for both.

  Specifics:

  Linux nullrig 5.3.0-24-generic #26-Ubuntu SMP Thu Nov 14 01:33:18 UTC
  2019 x86_64 x86_64 x86_64 GNU/Linux

  Out of source build, successful for both:

  ../configure --prefix=/opt/qemu --target-list=i386-softmmu,mips64el-
  softmmu ---enable-sdl --enable-sdl-image --enable-lzo --enable-bzip2
  --enable-avx2 --enable-kvm --enable-membarrier --enable-plugin

  Call:

  ./qemu -machine pc,accel=kvm,usb=off -cpu pentium -m 64 -rtc
  base=localtime -parallel none -soundhw sb16,adlib,pcspk -device
  cirrus-vga,bus=pci.0 -drive
  id=disk1,file=doom.cow,format=qcow2,if=virtio -audiodev pa,id=pa

  Audio for sb16 sounds ok,  however if i switch to a timer based audio:
  -audiodev wav

  The output is wrong..  I had assumed it was all timer based audio,
  however it seems to be limited to sb16.

  So I then tried the next popular/compatible audio device for dos
  games:  gravis ultrasound [gus].

  I get no output at all for it.
  I have tried more than one piece of software,  DOOM shareware is any easy 
example.

  I realize there are better solutions for playing DOS games, however I
  am interested in snapshot support which many of them lack.

  I am willing to put the work into fixing it myself if need be,
  however i'm not very familiar with the audio backend.   Specifically,
  it is already mixed into a single buffer,  if 'adlib' driver is
  already working: (audio_pcm_ops.write() output is correct on timer
  based output) I failed to see how it affects emulation of the sound
  blaster.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1858488/+subscriptions



[Bug 1858488] Re: qemu git && 4.2: timed audio issues with sb16, gus not working?

2020-01-06 Thread David Miller
./qemu is a symlink to qemu/build/i386-softmmu/qemu-system-i386

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1858488

Title:
  qemu git && 4.2:  timed audio issues with sb16,  gus not working?

Status in QEMU:
  New

Bug description:
  
  I have built [both] current git, and 4.2.0, there are issues with 
audio/soundhw for both.

  Specifics:

  Linux nullrig 5.3.0-24-generic #26-Ubuntu SMP Thu Nov 14 01:33:18 UTC
  2019 x86_64 x86_64 x86_64 GNU/Linux

  Out of source build, successful for both:

  ../configure --prefix=/opt/qemu --target-list=i386-softmmu,mips64el-
  softmmu ---enable-sdl --enable-sdl-image --enable-lzo --enable-bzip2
  --enable-avx2 --enable-kvm --enable-membarrier --enable-plugin

  Call:

  ./qemu -machine pc,accel=kvm,usb=off -cpu pentium -m 64 -rtc
  base=localtime -parallel none -soundhw sb16,adlib,pcspk -device
  cirrus-vga,bus=pci.0 -drive
  id=disk1,file=doom.cow,format=qcow2,if=virtio -audiodev pa,id=pa

  Audio for sb16 sounds ok,  however if i switch to a timer based audio:
  -audiodev wav

  The output is wrong..  I had assumed it was all timer based audio,
  however it seems to be limited to sb16.

  So I then tried the next popular/compatible audio device for dos
  games:  gravis ultrasound [gus].

  I get no output at all for it.
  I have tried more than one piece of software,  DOOM shareware is any easy 
example.

  I realize there are better solutions for playing DOS games, however I
  am interested in snapshot support which many of them lack.

  I am willing to put the work into fixing it myself if need be,
  however i'm not very familiar with the audio backend.   Specifically,
  it is already mixed into a single buffer,  if 'adlib' driver is
  already working: (audio_pcm_ops.write() output is correct on timer
  based output) I failed to see how it affects emulation of the sound
  blaster.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1858488/+subscriptions



Re: [RFC net-next 08/18] tun: run offloaded XDP program in Tx path

2019-12-01 Thread David Miller
From: "Michael S. Tsirkin" 
Date: Sun, 1 Dec 2019 16:40:22 -0500

> Right. But it is helpful to expose the supported functionality
> to guest in some way, if nothing else then so that
> guests can be moved between different hosts.
> 
> Also, we need a way to report this kind of event to guest
> so it's possible to figure out what went wrong.

On the contrary, this is why it is of utmost importance that all
XDP implementations support the full suite of XDP facilities from
the very beginning.

This is why we keep giving people a hard time when they add support
only for some of the XDP return values and semantics.  Users will get
killed by this, and it makes XDP a poor technology to use because
behavior is not consistent across device types.

That's not acceptable and I'll push back on anything that continues
this trend.

If you can't HW offload it, kick it to software.



Re: [RFC net-next 08/18] tun: run offloaded XDP program in Tx path

2019-12-01 Thread David Miller
From: David Ahern 
Date: Sun, 1 Dec 2019 09:39:54 -0700

> Below you just drop the packet which is going to be a bad user
> experience. A better user experience is to detect XDP return codes a
> program uses, catch those that are not supported for this use case and
> fail the install of the program.

This is not universally possible.

Return codes can be calculated dynamically, come from maps potentially
shared with other bpf programs, etc.

So unfortunately this suggestion is not tenable.



Re: [Qemu-devel] [PATCH net-next v4 1/3] virtio_net: propagate linkspeed/duplex settings from the hypervisor

2018-01-09 Thread David Miller
From: Jason Baron 
Date: Fri,  5 Jan 2018 17:44:54 -0500

> The ability to set speed and duplex for virtio_net is useful in various
> scenarios as described here:
> 
> 16032be virtio_net: add ethtool support for set and get of settings
> 
> However, it would be nice to be able to set this from the hypervisor,
> such that virtio_net doesn't require custom guest ethtool commands.
> 
> Introduce a new feature flag, VIRTIO_NET_F_SPEED_DUPLEX, which allows
> the hypervisor to export a linkspeed and duplex setting. The user can
> subsequently overwrite it later if desired via: 'ethtool -s'.
> 
> Note that VIRTIO_NET_F_SPEED_DUPLEX is defined as bit 63, the intention
> is that device feature bits are to grow down from bit 63, since the
> transports are starting from bit 24 and growing up.
> 
> Signed-off-by: Jason Baron 

Applied, thanks Jason.



Re: [Qemu-devel] [PATCH net-next v2 1/3] virtio_net: propagate linkspeed/duplex settings from the hypervisor

2017-12-27 Thread David Miller
From: Jason Baron 
Date: Fri, 22 Dec 2017 16:54:01 -0500

> The ability to set speed and duplex for virtio_net in useful in various
> scenarios as described here:
> 
> 16032be virtio_net: add ethtool support for set and get of settings
> 
> However, it would be nice to be able to set this from the hypervisor,
> such that virtio_net doesn't require custom guest ethtool commands.
> 
> Introduce a new feature flag, VIRTIO_NET_F_SPEED_DUPLEX, which allows
> the hypervisor to export a linkspeed and duplex setting. The user can
> subsequently overwrite it later if desired via: 'ethtool -s'.
> 
> Signed-off-by: Jason Baron 
> Cc: "Michael S. Tsirkin" 
> Cc: Jason Wang 

Looks mostly fine to me but need some virtio_net reviewers on this one.

> @@ -57,6 +57,8 @@
>* Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23/* Set MAC address */
>  
> +#define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Host set linkspeed and duplex */
> +

Why use a value so far away from the largest existing one?

Just curious.



Re: [Qemu-devel] TCP performance problems - GSO/TSO, MSS, 8139cp related

2016-11-11 Thread David Miller
From: Russell King - ARM Linux 
Date: Fri, 11 Nov 2016 22:33:08 +

> "The new buffer management algorithm provides capabilities of Microsoft
> Large-Send offload" and as yet I haven't found anything that describes
> what this is or how it works.

For once I will give Microsoft a big shout out here.

This, and everything a Microsoft networking driver interfaces to, is
_very_ much documented in extreme detail in the Microsoft NDIS
(Network Driver Interface Specification).

Microsoft's networking driver interfaces and expectations are
documented 1,000 times better than that of Linux.



Re: [Qemu-devel] [PATCH] tun: orphan an skb on tx

2015-02-03 Thread David Miller
From: David Woodhouse dw...@infradead.org
Date: Mon, 02 Feb 2015 07:27:10 +

 I'm guessing you don't want to push the *whole* management of the TLS
 control connection *and* the UDP transport, and probing the latter with
 keepalives, into the kernel? I certainly don't :)

Whilst Herbert Xu and I have discussed in the past supporting
automatic SSL handling of socket data during socket writes in the
kernel, doing TLS stuff would be a bit of a stretch :-)



Re: [Qemu-devel] [PATCH] tun: orphan an skb on tx

2015-02-01 Thread David Miller
From: David Woodhouse dw...@infradead.org
Date: Sun, 01 Feb 2015 13:33:50 +

 Of course, now I'm looking closely at the path these packets take to
 leave the box, it starts to offend me that they're being passed up to
 userspace just to encrypt them (as DTLS or ESP) and then send them back
 down to the kernel on a UDP socket. The kernel already knows how to
 {en,de}crypt ESP, and do the sequence number checking on incoming
 packets.

It's funny, I thought we had an IPSEC stack



Re: [Qemu-devel] [PATCH] tun: orphan an skb on tx

2015-02-01 Thread David Miller
From: David Woodhouse dw...@infradead.org
Date: Sun, 01 Feb 2015 21:29:43 +

 I really was looking for some way to push down something like an XFRM
 state into the tun device and just say shove them out here until I tell
 you otherwise.

People decided to use TUN and push VPN stuff back into userspace,
and there are repercussions for that decision.

I'm not saying this to be mean or whatever, but I was very
disappointed when userland IPSEC solutions using TUN started showing
up.

We might as well have not have implemented the IPSEC stack at all,
because as a result of the userland VPN stuff our IPSEC stack is
largely unused except by a very narrow group of users.



Re: [Qemu-devel] [PATCH v5 0/3] make mac programming for virtio net more robust

2013-01-21 Thread David Miller
From: Amos Kong ak...@redhat.com
Date: Mon, 21 Jan 2013 19:17:20 +0800

 Currenly mac is programmed byte by byte. This means that we
 have an intermediate step where mac is wrong. 
 
 Third patch introduced a new vq control command to set mac
 address, it's atomic.
 
 V2: check return of sending command, delay eth_mac_addr()
 V3: restore software address when fail to set hardware address
 V4: split eth_mac_addr, fix error handle
 V5: rebase patches to net-next tree

I'll apply this series, thanks.



Re: [Qemu-devel] [PATCH v4 2/3] net: split eth_mac_addr for better error handling

2013-01-20 Thread David Miller
From: ak...@redhat.com
Date: Sun, 20 Jan 2013 10:43:08 +0800

 From: Stefan Hajnoczi stefa...@gmail.com
 
 When we set mac address, software mac address in system and hardware mac
 address all need to be updated. Current eth_mac_addr() doesn't allow
 callers to implement error handling nicely.
 
 This patch split eth_mac_addr() to prepare part and real commit part,
 then we can prepare first, and try to change hardware address, then do
 the real commit if hardware address is set successfully.
 
 Signed-off-by: Stefan Hajnoczi stefa...@gmail.com
 Signed-off-by: Amos Kong ak...@redhat.com

This patch doesn't apply to net-next.



  1   2   >