The following have overrides S2_insert S2_insert_rp S2_asr_r_svw_trun A2_swiz
These instructions have semantics that write to the destination before all the operand reads have been completed. Therefore, the idef-parser versions were disabled with the short-circuit patch. Test cases added to tests/tcg/hexagon/read_write_overlap.c Signed-off-by: Taylor Simpson <tsimp...@quicinc.com> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> --- target/hexagon/gen_tcg.h | 18 ++++ target/hexagon/genptr.c | 99 ++++++++++++++++++ tests/tcg/hexagon/read_write_overlap.c | 136 +++++++++++++++++++++++++ tests/tcg/hexagon/Makefile.target | 1 + 4 files changed, 254 insertions(+) create mode 100644 tests/tcg/hexagon/read_write_overlap.c diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 1f7e535300..fabc1eb623 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1181,6 +1181,24 @@ tcg_gen_extrl_i64_i32(RdV, tmp); \ } while (0) +#define fGEN_TCG_S2_insert(SHORTCODE) \ + do { \ + int width = uiV; \ + int offset = UiV; \ + if (width != 0) { \ + if (offset + width > 32) { \ + width = 32 - offset; \ + } \ + tcg_gen_deposit_tl(RxV, RxV, RsV, offset, width); \ + } \ + } while (0) +#define fGEN_TCG_S2_insert_rp(SHORTCODE) \ + gen_insert_rp(ctx, RxV, RsV, RttV) +#define fGEN_TCG_S2_asr_r_svw_trun(SHORTCODE) \ + gen_asr_r_svw_trun(ctx, RdV, RssV, RtV) +#define fGEN_TCG_A2_swiz(SHORTCODE) \ + tcg_gen_bswap_tl(RdV, RsV) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index d134d8082a..0727d4524b 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -1065,6 +1065,105 @@ static void gen_asl_r_r_sat(DisasContext *ctx, TCGv RdV, TCGv RsV, TCGv RtV) gen_set_label(done); } +static void gen_insert_rp(DisasContext *ctx, TCGv RxV, TCGv RsV, TCGv_i64 RttV) +{ + /* + * int width = fZXTN(6, 32, (fGETWORD(1, RttV))); + * int offset = fSXTN(7, 32, (fGETWORD(0, RttV))); + * size8u_t mask = ((fCONSTLL(1) << width) - 1); + * if (offset < 0) { + * RxV = 0; + * } else { + * RxV &= ~(mask << offset); + * RxV |= ((RsV & mask) << offset); + * } + */ + + TCGv width = tcg_temp_new(); + TCGv offset = tcg_temp_new(); + TCGv_i64 mask = tcg_temp_new_i64(); + TCGv_i64 result = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 offset64 = tcg_temp_new_i64(); + TCGLabel *label = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_extrh_i64_i32(width, RttV); + tcg_gen_extract_tl(width, width, 0, 6); + tcg_gen_extrl_i64_i32(offset, RttV); + tcg_gen_sextract_tl(offset, offset, 0, 7); + /* Possible values for offset are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_GE, offset, 0, label); + /* For negative offsets, zero out the result */ + tcg_gen_movi_tl(RxV, 0); + tcg_gen_br(done); + gen_set_label(label); + /* At this point, possible values of offset are 0 .. 63 */ + tcg_gen_ext_i32_i64(mask, width); + tcg_gen_shl_i64(mask, tcg_constant_i64(1), mask); + tcg_gen_subi_i64(mask, mask, 1); + tcg_gen_extu_i32_i64(result, RxV); + tcg_gen_ext_i32_i64(tmp, offset); + tcg_gen_shl_i64(tmp, mask, tmp); + tcg_gen_andc_i64(result, result, tmp); + tcg_gen_extu_i32_i64(tmp, RsV); + tcg_gen_and_i64(tmp, tmp, mask); + tcg_gen_extu_i32_i64(offset64, offset); + tcg_gen_shl_i64(tmp, tmp, offset64); + tcg_gen_or_i64(result, result, tmp); + tcg_gen_extrl_i64_i32(RxV, result); + gen_set_label(done); +} + +static void gen_asr_r_svw_trun(DisasContext *ctx, TCGv RdV, + TCGv_i64 RssV, TCGv RtV) +{ + /* + * for (int i = 0; i < 2; i++) { + * fSETHALF(i, RdV, fGETHALF(0, ((fSXTN(7, 32, RtV) > 0) ? + * (fCAST4_8s(fGETWORD(i, RssV)) >> fSXTN(7, 32, RtV)) : + * (fCAST4_8s(fGETWORD(i, RssV)) << -fSXTN(7, 32, RtV))))); + * } + */ + TCGv shift_amt32 = tcg_temp_new(); + TCGv_i64 shift_amt64 = tcg_temp_new_i64(); + TCGv_i64 tmp64 = tcg_temp_new_i64(); + TCGv tmp32 = tcg_temp_new(); + TCGLabel *label = gen_new_label(); + TCGLabel *zero = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_sextract_tl(shift_amt32, RtV, 0, 7); + /* Possible values of shift_amt32 are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_LE, shift_amt32, 0, label); + /* After branch, possible values of shift_amt32 are 1 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_sar_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(label); + tcg_gen_neg_tl(shift_amt32, shift_amt32); + /*At this point, possible values of shift_amt32 are 0 .. 64 */ + tcg_gen_brcondi_tl(TCG_COND_GT, shift_amt32, 63, zero); + /*At this point, possible values of shift_amt32 are 0 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_shl_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(zero); + /* When the shift_amt is 64, zero out the result */ + tcg_gen_movi_tl(RdV, 0); + gen_set_label(done); +} + static intptr_t vreg_src_off(DisasContext *ctx, int num) { intptr_t offset = offsetof(CPUHexagonState, VRegs[num]); diff --git a/tests/tcg/hexagon/read_write_overlap.c b/tests/tcg/hexagon/read_write_overlap.c new file mode 100644 index 0000000000..a75fc11dc4 --- /dev/null +++ b/tests/tcg/hexagon/read_write_overlap.c @@ -0,0 +1,136 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test instructions where the semantics write to the destination + * before all the operand reads have been completed. + * + * These instructions are problematic when we short-circuit the + * register writes because the destination and source operands could + * be the same TCGv. + * + * We test by forcing the read and write to be register r7. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +int err; + +static void __check(const char *filename, int line, int x, int expect) +{ + if (x != expect) { + printf("ERROR %s:%d - 0x%08x != 0x%08x\n", + filename, line, x, expect); + err++; + } +} + +#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect)) + +#define insert(RES, X, WIDTH, OFFSET) \ + asm("r7 = %1\n\t" \ + "r7 = insert(r7, #" #WIDTH ", #" #OFFSET ")\n\t" \ + "%0 = r7\n\t" \ + : "=r"(RES) : "r"(X) : "r7") + +static void test_insert(void) +{ + uint32_t res; + + insert(res, 0x12345678, 8, 1); + check(res, 0x123456f0); + insert(res, 0x12345678, 0, 1); + check(res, 0x12345678); + insert(res, 0x12345678, 20, 16); + check(res, 0x56785678); +} + +static inline uint32_t insert_rp(uint32_t x, uint32_t width, uint32_t offset) +{ + uint64_t width_offset = (uint64_t)width << 32 | offset; + uint32_t res; + asm("r7 = %1\n\t" + "r7 = insert(r7, %2)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x), "r"(width_offset) : "r7"); + return res; + +} + +static void test_insert_rp(void) +{ + check(insert_rp(0x12345678, 8, 1), 0x123456f0); + check(insert_rp(0x12345678, 63, 8), 0x34567878); + check(insert_rp(0x12345678, 127, 8), 0x34567878); + check(insert_rp(0x12345678, 8, 24), 0x78345678); + check(insert_rp(0x12345678, 8, 63), 0x12345678); + check(insert_rp(0x12345678, 8, 64), 0x00000000); +} + +static inline uint32_t asr_r_svw_trun(uint64_t x, uint32_t y) +{ + uint32_t res; + asm("r7 = %2\n\t" + "r7 = vasrw(%1, r7)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x), "r"(y) : "r7"); + return res; +} + +static void test_asr_r_svw_trun(void) +{ + check(asr_r_svw_trun(0x1111111122222222ULL, 5), + 0x88881111); + check(asr_r_svw_trun(0x1111111122222222ULL, 63), + 0x00000000); + check(asr_r_svw_trun(0x1111111122222222ULL, 64), + 0x00000000); + check(asr_r_svw_trun(0x1111111122222222ULL, 127), + 0x22224444); + check(asr_r_svw_trun(0x1111111122222222ULL, 128), + 0x11112222); + check(asr_r_svw_trun(0xffffffff22222222ULL, 128), + 0xffff2222); +} + +static inline uint32_t swiz(uint32_t x) +{ + uint32_t res; + asm("r7 = %1\n\t" + "r7 = swiz(r7)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x) : "r7"); + return res; +} + +static void test_swiz(void) +{ + check(swiz(0x11223344), 0x44332211); +} + +int main() +{ + test_insert(); + test_insert_rp(); + test_asr_r_svw_trun(); + test_swiz(); + + puts(err ? "FAIL" : "PASS"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 7c94db4bc4..d8d3793732 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -45,6 +45,7 @@ HEX_TESTS += fpstuff HEX_TESTS += overflow HEX_TESTS += signal_context HEX_TESTS += reg_mut +HEX_TESTS += read_write_overlap HEX_TESTS += vector_add_int HEX_TESTS += scatter_gather HEX_TESTS += hvx_misc -- 2.25.1