The implementation was failing for the following 7 hexadecimal patterns
which return one wrong byte (0x00 instead of 0xff):
orc.b(0x............01..) = 0x............00.. (instead of 0x............ff..)
orc.b(0x..........01....) = 0x..........00.... (instead of 0x..........ff....)
orc.b(0x........01......) = 0x........00...... (instead of 0x........ff......)
orc.b(0x......01........) = 0x......00........ (instead of 0x......ff........)
orc.b(0x....01..........) = 0x....00.......... (instead of 0x....ff..........)
orc.b(0x..01............) = 0x..00............ (instead of 0x..ff............)
orc.b(0x01..............) = 0x00.............. (instead of 0xff..............)

Implement a simpler but less astute/optimized 'divide and conquer' method
where bits are or'ed by pairs, then the pairs are or'ed by pair ...

Signed-off-by: Vincent Palatin <vpala...@rivosinc.com>
---
 target/riscv/insn_trans/trans_rvb.c.inc | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvb.c.inc 
b/target/riscv/insn_trans/trans_rvb.c.inc
index 185c3e9a60..04f795652d 100644
--- a/target/riscv/insn_trans/trans_rvb.c.inc
+++ b/target/riscv/insn_trans/trans_rvb.c.inc
@@ -249,18 +249,26 @@ static bool trans_rev8_64(DisasContext *ctx, arg_rev8_64 
*a)
 static void gen_orc_b(TCGv ret, TCGv source1)
 {
     TCGv  tmp = tcg_temp_new();
+    TCGv  shifted = tcg_temp_new();
     TCGv  ones = tcg_constant_tl(dup_const_tl(MO_8, 0x01));
 
-    /* Set lsb in each byte if the byte was zero. */
-    tcg_gen_sub_tl(tmp, source1, ones);
-    tcg_gen_andc_tl(tmp, tmp, source1);
-    tcg_gen_shri_tl(tmp, tmp, 7);
-    tcg_gen_andc_tl(tmp, ones, tmp);
+    /*
+     * Divide and conquer: show one byte of the word in the comments,
+     * with U meaning Useful or'ed bit, X Junk content bit, . don't care.
+     */
+    tcg_gen_shri_tl(shifted, source1, 1);
+    tcg_gen_or_tl(tmp, source1, shifted); /* tmp[15:8] = XU.U.U.U */
+    tcg_gen_shri_tl(shifted, tmp, 2);
+    tcg_gen_or_tl(tmp, shifted, tmp);     /* tmp[15:8] = XXXU...U */
+    tcg_gen_shri_tl(shifted, tmp, 4);
+    tcg_gen_or_tl(tmp, shifted, tmp);     /* tmp[15:8] = XXXXXXXU */
+    tcg_gen_and_tl(tmp, ones, tmp);       /* tmp[15:8] = 0000000U */
 
     /* Replicate the lsb of each byte across the byte. */
     tcg_gen_muli_tl(ret, tmp, 0xff);
 
     tcg_temp_free(tmp);
+    tcg_temp_free(shifted);
 }
 
 static bool trans_orc_b(DisasContext *ctx, arg_orc_b *a)
-- 
2.25.1


Reply via email to