The following have overrides
    S2_insert
    S2_insert_rp
    S2_asr_r_svw_trun
    A2_swiz

These instructions have semantics that write to the destination
before all the operand reads have been completed.  Therefore,
the idef-parser versions were disabled with the short-circuit patch.

Test cases added to tests/tcg/hexagon/read_write_overlap.c

Signed-off-by: Taylor Simpson <tsimp...@quicinc.com>
Reviewed-by: Richard Henderson <richard.hender...@linaro.org>
---
 target/hexagon/gen_tcg.h               |  18 ++++
 target/hexagon/genptr.c                |  99 ++++++++++++++++++
 tests/tcg/hexagon/read_write_overlap.c | 136 +++++++++++++++++++++++++
 tests/tcg/hexagon/Makefile.target      |   1 +
 4 files changed, 254 insertions(+)
 create mode 100644 tests/tcg/hexagon/read_write_overlap.c

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 1f7e535300..fabc1eb623 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -1181,6 +1181,24 @@
         tcg_gen_extrl_i64_i32(RdV, tmp); \
     } while (0)
 
+#define fGEN_TCG_S2_insert(SHORTCODE) \
+    do { \
+        int width = uiV; \
+        int offset = UiV; \
+        if (width != 0) { \
+            if (offset + width > 32) { \
+                width = 32 - offset; \
+            } \
+            tcg_gen_deposit_tl(RxV, RxV, RsV, offset, width); \
+        } \
+    } while (0)
+#define fGEN_TCG_S2_insert_rp(SHORTCODE) \
+    gen_insert_rp(ctx, RxV, RsV, RttV)
+#define fGEN_TCG_S2_asr_r_svw_trun(SHORTCODE) \
+    gen_asr_r_svw_trun(ctx, RdV, RssV, RtV)
+#define fGEN_TCG_A2_swiz(SHORTCODE) \
+    tcg_gen_bswap_tl(RdV, RsV)
+
 /* Floating point */
 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
     gen_helper_conv_sf2df(RddV, cpu_env, RsV)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index d134d8082a..0727d4524b 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -1065,6 +1065,105 @@ static void gen_asl_r_r_sat(DisasContext *ctx, TCGv 
RdV, TCGv RsV, TCGv RtV)
     gen_set_label(done);
 }
 
+static void gen_insert_rp(DisasContext *ctx, TCGv RxV, TCGv RsV, TCGv_i64 RttV)
+{
+    /*
+     * int width = fZXTN(6, 32, (fGETWORD(1, RttV)));
+     * int offset = fSXTN(7, 32, (fGETWORD(0, RttV)));
+     * size8u_t mask = ((fCONSTLL(1) << width) - 1);
+     * if (offset < 0) {
+     *     RxV = 0;
+     * } else {
+     *     RxV &= ~(mask << offset);
+     *     RxV |= ((RsV & mask) << offset);
+     * }
+     */
+
+    TCGv width = tcg_temp_new();
+    TCGv offset = tcg_temp_new();
+    TCGv_i64 mask = tcg_temp_new_i64();
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 offset64 = tcg_temp_new_i64();
+    TCGLabel *label = gen_new_label();
+    TCGLabel *done = gen_new_label();
+
+    tcg_gen_extrh_i64_i32(width, RttV);
+    tcg_gen_extract_tl(width, width, 0, 6);
+    tcg_gen_extrl_i64_i32(offset, RttV);
+    tcg_gen_sextract_tl(offset, offset, 0, 7);
+    /* Possible values for offset are -64 .. 63 */
+    tcg_gen_brcondi_tl(TCG_COND_GE, offset, 0, label);
+    /* For negative offsets, zero out the result */
+    tcg_gen_movi_tl(RxV, 0);
+    tcg_gen_br(done);
+    gen_set_label(label);
+    /* At this point, possible values of offset are 0 .. 63 */
+    tcg_gen_ext_i32_i64(mask, width);
+    tcg_gen_shl_i64(mask, tcg_constant_i64(1), mask);
+    tcg_gen_subi_i64(mask, mask, 1);
+    tcg_gen_extu_i32_i64(result, RxV);
+    tcg_gen_ext_i32_i64(tmp, offset);
+    tcg_gen_shl_i64(tmp, mask, tmp);
+    tcg_gen_andc_i64(result, result, tmp);
+    tcg_gen_extu_i32_i64(tmp, RsV);
+    tcg_gen_and_i64(tmp, tmp, mask);
+    tcg_gen_extu_i32_i64(offset64, offset);
+    tcg_gen_shl_i64(tmp, tmp, offset64);
+    tcg_gen_or_i64(result, result, tmp);
+    tcg_gen_extrl_i64_i32(RxV, result);
+    gen_set_label(done);
+}
+
+static void gen_asr_r_svw_trun(DisasContext *ctx, TCGv RdV,
+                               TCGv_i64 RssV, TCGv RtV)
+{
+    /*
+     * for (int i = 0; i < 2; i++) {
+     *     fSETHALF(i, RdV, fGETHALF(0, ((fSXTN(7, 32, RtV) > 0) ?
+     *         (fCAST4_8s(fGETWORD(i, RssV)) >> fSXTN(7, 32, RtV)) :
+     *         (fCAST4_8s(fGETWORD(i, RssV)) << -fSXTN(7, 32, RtV)))));
+     * }
+     */
+    TCGv shift_amt32 = tcg_temp_new();
+    TCGv_i64 shift_amt64 = tcg_temp_new_i64();
+    TCGv_i64 tmp64 = tcg_temp_new_i64();
+    TCGv tmp32 = tcg_temp_new();
+    TCGLabel *label = gen_new_label();
+    TCGLabel *zero = gen_new_label();
+    TCGLabel *done =  gen_new_label();
+
+    tcg_gen_sextract_tl(shift_amt32, RtV, 0, 7);
+    /* Possible values of shift_amt32 are -64 .. 63 */
+    tcg_gen_brcondi_tl(TCG_COND_LE, shift_amt32, 0, label);
+    /* After branch, possible values of shift_amt32 are 1 .. 63 */
+    tcg_gen_ext_i32_i64(shift_amt64, shift_amt32);
+    for (int i = 0; i < 2; i++) {
+        tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32);
+        tcg_gen_sar_i64(tmp64, tmp64, shift_amt64);
+        tcg_gen_extrl_i64_i32(tmp32, tmp64);
+        tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16);
+    }
+    tcg_gen_br(done);
+    gen_set_label(label);
+    tcg_gen_neg_tl(shift_amt32, shift_amt32);
+    /*At this point, possible values of shift_amt32 are 0 .. 64 */
+    tcg_gen_brcondi_tl(TCG_COND_GT, shift_amt32, 63, zero);
+    /*At this point, possible values of shift_amt32 are 0 .. 63 */
+    tcg_gen_ext_i32_i64(shift_amt64, shift_amt32);
+    for (int i = 0; i < 2; i++) {
+        tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32);
+        tcg_gen_shl_i64(tmp64, tmp64, shift_amt64);
+        tcg_gen_extrl_i64_i32(tmp32, tmp64);
+        tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16);
+    }
+    tcg_gen_br(done);
+    gen_set_label(zero);
+    /* When the shift_amt is 64, zero out the result */
+    tcg_gen_movi_tl(RdV, 0);
+    gen_set_label(done);
+}
+
 static intptr_t vreg_src_off(DisasContext *ctx, int num)
 {
     intptr_t offset = offsetof(CPUHexagonState, VRegs[num]);
diff --git a/tests/tcg/hexagon/read_write_overlap.c 
b/tests/tcg/hexagon/read_write_overlap.c
new file mode 100644
index 0000000000..a75fc11dc4
--- /dev/null
+++ b/tests/tcg/hexagon/read_write_overlap.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test instructions where the semantics write to the destination
+ * before all the operand reads have been completed.
+ *
+ * These instructions are problematic when we short-circuit the
+ * register writes because the destination and source operands could
+ * be the same TCGv.
+ *
+ * We test by forcing the read and write to be register r7.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int err;
+
+static void __check(const char *filename, int line, int x, int expect)
+{
+    if (x != expect) {
+        printf("ERROR %s:%d - 0x%08x != 0x%08x\n",
+               filename, line, x, expect);
+        err++;
+    }
+}
+
+#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect))
+
+#define insert(RES, X, WIDTH, OFFSET) \
+    asm("r7 = %1\n\t" \
+        "r7 = insert(r7, #" #WIDTH ", #" #OFFSET ")\n\t" \
+        "%0 = r7\n\t" \
+        : "=r"(RES) : "r"(X) : "r7")
+
+static void test_insert(void)
+{
+    uint32_t res;
+
+    insert(res, 0x12345678, 8, 1);
+    check(res, 0x123456f0);
+    insert(res, 0x12345678, 0, 1);
+    check(res, 0x12345678);
+    insert(res, 0x12345678, 20, 16);
+    check(res, 0x56785678);
+}
+
+static inline uint32_t insert_rp(uint32_t x, uint32_t width, uint32_t offset)
+{
+    uint64_t width_offset = (uint64_t)width << 32 | offset;
+    uint32_t res;
+    asm("r7 = %1\n\t"
+        "r7 = insert(r7, %2)\n\t"
+        "%0 = r7\n\t"
+        : "=r"(res) : "r"(x), "r"(width_offset) : "r7");
+    return res;
+
+}
+
+static void test_insert_rp(void)
+{
+    check(insert_rp(0x12345678,   8,  1), 0x123456f0);
+    check(insert_rp(0x12345678,  63,  8), 0x34567878);
+    check(insert_rp(0x12345678, 127,  8), 0x34567878);
+    check(insert_rp(0x12345678,   8, 24), 0x78345678);
+    check(insert_rp(0x12345678,   8, 63), 0x12345678);
+    check(insert_rp(0x12345678,   8, 64), 0x00000000);
+}
+
+static inline uint32_t asr_r_svw_trun(uint64_t x, uint32_t y)
+{
+    uint32_t res;
+    asm("r7 = %2\n\t"
+        "r7 = vasrw(%1, r7)\n\t"
+        "%0 = r7\n\t"
+        : "=r"(res) : "r"(x), "r"(y) : "r7");
+    return res;
+}
+
+static void test_asr_r_svw_trun(void)
+{
+    check(asr_r_svw_trun(0x1111111122222222ULL, 5),
+          0x88881111);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 63),
+          0x00000000);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 64),
+          0x00000000);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 127),
+          0x22224444);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 128),
+          0x11112222);
+    check(asr_r_svw_trun(0xffffffff22222222ULL, 128),
+          0xffff2222);
+}
+
+static inline uint32_t swiz(uint32_t x)
+{
+    uint32_t res;
+    asm("r7 = %1\n\t"
+        "r7 = swiz(r7)\n\t"
+        "%0 = r7\n\t"
+        : "=r"(res) : "r"(x) : "r7");
+    return res;
+}
+
+static void test_swiz(void)
+{
+    check(swiz(0x11223344), 0x44332211);
+}
+
+int main()
+{
+    test_insert();
+    test_insert_rp();
+    test_asr_r_svw_trun();
+    test_swiz();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tests/tcg/hexagon/Makefile.target 
b/tests/tcg/hexagon/Makefile.target
index 7c94db4bc4..d8d3793732 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -45,6 +45,7 @@ HEX_TESTS += fpstuff
 HEX_TESTS += overflow
 HEX_TESTS += signal_context
 HEX_TESTS += reg_mut
+HEX_TESTS += read_write_overlap
 HEX_TESTS += vector_add_int
 HEX_TESTS += scatter_gather
 HEX_TESTS += hvx_misc
-- 
2.25.1

Reply via email to