Re: [Qemu-devel] [PATCH v3 13/18] target/s390x: Implement CONVERT UNICODE insns

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 9c8f184..634ef98 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -313,6 +313,19 @@
>  C(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0)
>  C(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0)
>  
> +/* CONVERT UTF-8 TO UTF-16 */
> +D(0xb2a7, CU12,RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 12)
> +/* CONVERT UTF-8 TO UTF-32 */
> +D(0xb9b0, CU14,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 14)
> +/* CONVERT UTF-16 to UTF-8 */
> +D(0xb2a6, CU21,RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 21)
> +/* CONVERT UTF-16 to UTF-32 */
> +D(0xb9b1, CU24,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 24)
> +/* CONVERT UTF-32 to UTF-8 */
> +D(0xb9b3, CU41,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 41)
> +/* CONVERT UTF-32 to UTF-16 */
> +D(0xb9b2, CU42,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 42)
> +

CU41 and CU42 are inverted here. CU41 has the 0xb9b2 opcode and CU42 the
0xb9b3 opcode.

> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index 4376c72..df082f5 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c

...

> +static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
> +   uintptr_t ra, uint32_t c, uint32_t *olen)
> +{
> +uint8_t d[4];
> +uint32_t l, i;
> +
> +if (c <= 0x7f) {
> +/* one byte character */
> +l = 1;
> +d[0] = c;
> +} else if (c <= 0x7ff) {
> +/* two byte character */
> +l = 2;
> +d[1] = 0x80 | extract32(c, 0, 6);
> +d[0] = 0xc0 | extract32(c, 6, 5);
> +} else if (c <= 0x) {
> +/* three byte character */
> +l = 3;
> +d[2] = 0x80 | extract32(c, 0, 6);
> +d[1] = 0x80 | extract32(c, 6, 6);
> +d[0] = 0xe0 | extract32(c, 12, 4);
> +} else {
> +/* four byte character */
> +l = 4;
> +d[3] = 0x80 | extract32(c, 0, 6);
> +d[2] = 0x80 | extract32(c, 6, 6);
> +d[1] = 0x80 | extract32(c, 12, 6);
> +d[0] = 0xe0 | extract32(c, 18, 3);

This should be 0xf0 instead of 0xe0.

> +static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
> +uintptr_t ra, uint32_t c, uint32_t *olen)
> +{
> +uint16_t d0, d1;
> +
> +if (c <= 0x) {
> +/* one word character */
> +if (ilen < 2) {
> +return 1;
> +}
> +cpu_stw_data_ra(env, addr, c, ra);
> +*olen = 2;
> +} else {
> +/* two word character */
> +if (ilen < 4) {
> +return 1;
> +}
> +d1 = 0xbc00 | extract32(c, 0, 10);
> +d0 = 0xb800 | extract32(c, 10, 6);

This should be 0xdc00 and 0xd800;


Otherwise the patch looks fine to me.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] [PATCH v3 13/18] target/s390x: Implement CONVERT UNICODE insns

2017-06-19 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/s390x/helper.h  |   6 +
 target/s390x/insn-data.def |  13 ++
 target/s390x/mem_helper.c  | 309 +
 target/s390x/translate.c   |  44 +++
 4 files changed, 372 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 456aaa9..c014820 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -107,6 +107,12 @@ DEF_HELPER_2(stfle, i32, env, i64)
 DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 DEF_HELPER_4(mvcos, i32, env, i64, i64, i64)
+DEF_HELPER_4(cu12, i32, env, i32, i32, i32)
+DEF_HELPER_4(cu14, i32, env, i32, i32, i32)
+DEF_HELPER_4(cu21, i32, env, i32, i32, i32)
+DEF_HELPER_4(cu24, i32, env, i32, i32, i32)
+DEF_HELPER_4(cu41, i32, env, i32, i32, i32)
+DEF_HELPER_4(cu42, i32, env, i32, i32, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 9c8f184..634ef98 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -313,6 +313,19 @@
 C(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0)
 C(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0)
 
+/* CONVERT UTF-8 TO UTF-16 */
+D(0xb2a7, CU12,RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 12)
+/* CONVERT UTF-8 TO UTF-32 */
+D(0xb9b0, CU14,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 14)
+/* CONVERT UTF-16 to UTF-8 */
+D(0xb2a6, CU21,RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 21)
+/* CONVERT UTF-16 to UTF-32 */
+D(0xb9b1, CU24,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 24)
+/* CONVERT UTF-32 to UTF-8 */
+D(0xb9b3, CU41,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 41)
+/* CONVERT UTF-32 to UTF-16 */
+D(0xb9b2, CU42,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 42)
+
 /* DIVIDE */
 C(0x1d00, DR,  RR_a,  Z,   r1_D32, r2_32s, new_P, r1_P32, divs32, 0)
 C(0x5d00, D,   RX_a,  Z,   r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 4376c72..df082f5 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -2140,3 +2140,312 @@ uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t 
dest, uint64_t src,
 
 return cc;
 }
+
+/* Decode a Unicode character.  A return value < 0 indicates success, storing
+   the UTF-32 result into OCHAR and the input length into OLEN.  A return
+   value >= 0 indicates failure, and the CC value to be returned.  */
+typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
+ uint64_t ilen, bool enh_check, uintptr_t ra,
+ uint32_t *ochar, uint32_t *olen);
+
+/* Encode a Unicode character.  A return value < 0 indicates success, storing
+   the bytes into ADDR and the output length into OLEN.  A return value >= 0
+   indicates failure, and the CC value to be returned.  */
+typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
+ uint64_t ilen, uintptr_t ra, uint32_t c,
+ uint32_t *olen);
+
+static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+   bool enh_check, uintptr_t ra,
+   uint32_t *ochar, uint32_t *olen)
+{
+uint8_t s0, s1, s2, s3;
+uint32_t c, l;
+
+if (ilen < 1) {
+return 0;
+}
+s0 = cpu_ldub_data_ra(env, addr, ra);
+if (s0 <= 0x7f) {
+/* one byte character */
+l = 1;
+c = s0;
+} else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
+/* invalid character */
+return 2;
+} else if (s0 <= 0xdf) {
+/* two byte character */
+l = 2;
+if (ilen < 2) {
+return 0;
+}
+s1 = cpu_ldub_data_ra(env, addr + 1, ra);
+c = s0 & 0x1f;
+c = (c << 6) | (s1 & 0x3f);
+if (enh_check && (s1 & 0xc0) != 0x80) {
+return 2;
+}
+} else if (s0 <= 0xef) {
+/* three byte character */
+l = 3;
+if (ilen < 3) {
+return 0;
+}
+s1 = cpu_ldub_data_ra(env, addr + 1, ra);
+s2 = cpu_ldub_data_ra(env, addr + 2, ra);
+c = s0 & 0x0f;
+c = (c << 6) | (s1 & 0x3f);
+c = (c << 6) | (s2 & 0x3f);
+/* Fold the byte-by-byte range descriptions in the PoO into
+   tests against the complete value.  It disallows encodings
+   that could be smaller, and the UTF-16 surrogates.  */
+if (enh_check
+&& ((s1 & 0xc0) != 0x80
+|| (s2 & 0xc0) != 0x80
+|| c < 0x1000
+|| (c >= 0xd800 && c <= 0xdfff))) {
+return 2;
+}
+} else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
+/* four byte character */
+l = 4;
+if (ilen < 4) {
+return 0;
+}
+s1 = cpu_ldub_data_ra(env,