Re: [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI

2017-07-13 Thread Ricardo Ribalda Delgado
Hi

For completion. This is my poor man tbm test. It has run for 5 minutes
with no errors

gcc tbm.c -O3 -march=native  -o pc
gcc tbm.c -mtbm -O3

 for a in $(seq 0 65535); do /tmp/qemu/x86_64-linux-user/qemu-x86_64
-cpu qemu64,+tbm ./a.out $a >/tmp/res.qemu ; ./pc $a  > /tmp/res.pc ;
if !  diff  /tmp/res.pc /tmp/res.qemu; then echo $a ; fi ; done



#include 
#include 
#include 


long test_bextr(long src) {
unsigned char start =1, len=3;
return (src >> start) & ((1 << len)-1);
}

long test_blcfill(long val){

return val & (val + 1);
}

long test_blci(long val){

return val | ~(val + 1);
}

long test_blcic(long val){

return ~val & (val + 1);
}

long test_blcmsk(long val){

return val ^ (val + 1);
}

long test_blcs(long val){

return val | (val + 1);
}

long test_blsfill(long val){

return val | (val - 1);
}

long test_blsic(long val){

return ~val | (val - 1);
}

long test_t1mskc(long val){

return ~val | (val + 1);
}

long test_tzmsk(long val){

return ~val & (val - 1);
}

int main(int argc, char *argv[]) {
long op1;
long ret;

if (argc < 2) {
fprintf(stderr, "use %s op1 \n", argv[0]);
return -1;
}
op1 = strtoul(argv[1], NULL, 0);

fprintf(stdout, "op 1 %ld (0x%lx)\n", op1, op1);

ret = test_bextr(op1);
fprintf(stdout, "bextr %ld (0x%lx)\n",ret, ret);

ret = test_blcfill(op1);
fprintf(stdout, "blcfill %ld (0x%lx)\n",ret, ret);

ret = test_blci(op1);
fprintf(stdout, "blci %ld (0x%lx)\n",ret, ret);

ret = test_blcic(op1);
fprintf(stdout, "blcic %ld (0x%lx)\n",ret, ret);

ret = test_blcmsk(op1);
fprintf(stdout, "blcmsk %ld (0x%lx)\n",ret, ret);

ret = test_blcs(op1);
fprintf(stdout, "blcs %ld (0x%lx)\n",ret, ret);

ret = test_blsfill(op1);
fprintf(stdout, "blsfill %ld (0x%lx)\n",ret, ret);

ret = test_blsic(op1);
fprintf(stdout, "blsic %ld (0x%lx)\n",ret, ret);

ret = test_t1mskc(op1);
fprintf(stdout, "t1mskc %ld (0x%lx)\n",ret, ret);

ret = test_tzmsk(op1);
fprintf(stdout, "tzmsk %ld (0x%lx)\n",ret, ret);



return 0;
}

On Thu, Jul 13, 2017 at 11:55 PM, Ricardo Ribalda Delgado
 wrote:
> Hi again
>
> Some progress here, I think that I have found a bug in andn, I have
> already send a patch.
>
> I have made a rudimentary testcase for bmi. I will try tomorrow o
> build something similar for tbm.
>
> For reference, I am using this script:
>
> for a in $(seq 0 255); do for b in $(seq 0 255); do for c in $(seq 0
> 255); do /tmp/qemu/x86_64-linux-user/qemu-x86_64 -cpu qemu64,+bmi1
> ./a.out $a $b $c >/tmp/res.qemu; ./pc $a $b $c > /tmp/res.pc; if !
> diff  /tmp/res.pc /tmp/res.qemu; then echo $a $b $c; fi ; done ; done
> ; done
>
> with this build options
> gcc kk.c -mbmi -O3 -Wall
> gcc kk.c -march=native -O3 -Wall -o pc
>
> and this code:
>
> #include 
> #include 
> #include 
>
>
> long test_blsr(long val){
>
> return (val & (val - 1));
> }
>
> long test_blsi(long val){
>
> return (val & (-val));
> }
>
> long test_blmsk(long val){
>
> return (val ^ (val -1));
> }
>
> long test_andn(long v1, long v2){
>
> return (~v1 & v2);
> }
>
> long test_tzcnt(long val) {
> #ifdef PC
> return val ? __builtin_ctz(val) : 32;
> #else
> return __tzcnt_u32(val);
> #endif
> }
>
> long test_bextr(long src, unsigned char start, unsigned char len) {
> #ifdef PC
> return (src >> start) & ((1 << len)-1);
> #else
> return __bextr_u32(src, start | len <<8);
> #endif
> }
>
> int main(int argc, char *argv[]) {
> long op1, op2, op3;
> long ret;
>
> if (argc < 4) {
> fprintf(stderr, "use %s op1 op2 op3\n", argv[0]);
> return -1;
> }
> op1 = strtoul(argv[1], NULL, 0);
> op2 = strtoul(argv[2], NULL, 0);
> op3 = strtoul(argv[3], NULL, 0);
>
> fprintf(stdout, "op 1 %ld (0x%lx)\n", op1, op1);
> fprintf(stdout, "op 2 %ld (0x%lx)\n", op2, op2);
>
> ret = test_blsr(op1);
> fprintf(stdout, "blsr %ld (0x%lx)\n",ret, ret);
>
> ret = test_blsi(op1);
> fprintf(stdout, "blsi %ld (0x%lx)\n",ret, ret);
>
> ret = test_blmsk(op1);
> fprintf(stdout, "blmsk %ld (0x%lx)\n",ret, ret);
>
> ret = test_andn(op1,op2);
> fprintf(stdout, "andn %ld (0x%lx)\n",ret, ret);
>
> ret = test_tzcnt(op1);
> fprintf(stdout, "tzcnt %ld (0x%lx)\n",ret, ret);
>
> ret = test_bextr(op1, op2, op3);
> fprintf(stdout, "bextr %ld (0x%lx)\n",ret, ret);
>
> return 0;
> }
>
> On Thu, Jul 13, 2017 at 10:42 PM, Ricardo Ribalda Delgado
>  wrote:
>> Hi Richard
>>
>> The simple example works as expected, but my big application
>> (gobject-introspection) still crashes with sigsegv :(.
>>
>> it seems to be something related to the bmi and tbm instructions. If I
>> disable them in gcc ( -mno-bmi -mno-tbm), the application
>> runs ok.
>>
>> A look at qemu's code does not show anything obvious, but I am not
>> that familiar with qemu source yet to find something like this through
>> static analysis.
>>
>> My plan (as soon as I have some time) is to create a small set of apps
>> to validate bmi/tbm/ (Are you aware of something already existing for
>> this?)
>> My stupid guess is that maybe the ops are switched, or the flags are
>> not properly 

Re: [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI

2017-07-13 Thread Ricardo Ribalda Delgado
Hi again

Some progress here, I think that I have found a bug in andn, I have
already send a patch.

I have made a rudimentary testcase for bmi. I will try tomorrow o
build something similar for tbm.

For reference, I am using this script:

for a in $(seq 0 255); do for b in $(seq 0 255); do for c in $(seq 0
255); do /tmp/qemu/x86_64-linux-user/qemu-x86_64 -cpu qemu64,+bmi1
./a.out $a $b $c >/tmp/res.qemu; ./pc $a $b $c > /tmp/res.pc; if !
diff  /tmp/res.pc /tmp/res.qemu; then echo $a $b $c; fi ; done ; done
; done

with this build options
gcc kk.c -mbmi -O3 -Wall
gcc kk.c -march=native -O3 -Wall -o pc

and this code:

#include 
#include 
#include 


long test_blsr(long val){

return (val & (val - 1));
}

long test_blsi(long val){

return (val & (-val));
}

long test_blmsk(long val){

return (val ^ (val -1));
}

long test_andn(long v1, long v2){

return (~v1 & v2);
}

long test_tzcnt(long val) {
#ifdef PC
return val ? __builtin_ctz(val) : 32;
#else
return __tzcnt_u32(val);
#endif
}

long test_bextr(long src, unsigned char start, unsigned char len) {
#ifdef PC
return (src >> start) & ((1 << len)-1);
#else
return __bextr_u32(src, start | len <<8);
#endif
}

int main(int argc, char *argv[]) {
long op1, op2, op3;
long ret;

if (argc < 4) {
fprintf(stderr, "use %s op1 op2 op3\n", argv[0]);
return -1;
}
op1 = strtoul(argv[1], NULL, 0);
op2 = strtoul(argv[2], NULL, 0);
op3 = strtoul(argv[3], NULL, 0);

fprintf(stdout, "op 1 %ld (0x%lx)\n", op1, op1);
fprintf(stdout, "op 2 %ld (0x%lx)\n", op2, op2);

ret = test_blsr(op1);
fprintf(stdout, "blsr %ld (0x%lx)\n",ret, ret);

ret = test_blsi(op1);
fprintf(stdout, "blsi %ld (0x%lx)\n",ret, ret);

ret = test_blmsk(op1);
fprintf(stdout, "blmsk %ld (0x%lx)\n",ret, ret);

ret = test_andn(op1,op2);
fprintf(stdout, "andn %ld (0x%lx)\n",ret, ret);

ret = test_tzcnt(op1);
fprintf(stdout, "tzcnt %ld (0x%lx)\n",ret, ret);

ret = test_bextr(op1, op2, op3);
fprintf(stdout, "bextr %ld (0x%lx)\n",ret, ret);

return 0;
}

On Thu, Jul 13, 2017 at 10:42 PM, Ricardo Ribalda Delgado
 wrote:
> Hi Richard
>
> The simple example works as expected, but my big application
> (gobject-introspection) still crashes with sigsegv :(.
>
> it seems to be something related to the bmi and tbm instructions. If I
> disable them in gcc ( -mno-bmi -mno-tbm), the application
> runs ok.
>
> A look at qemu's code does not show anything obvious, but I am not
> that familiar with qemu source yet to find something like this through
> static analysis.
>
> My plan (as soon as I have some time) is to create a small set of apps
> to validate bmi/tbm/ (Are you aware of something already existing for
> this?)
> My stupid guess is that maybe the ops are switched, or the flags are
> not properly modified.
>
> If you want, I can share the application that crashes with you, just
> be aware that the number of dependencies is considerable.
>
> BTW I can only run the gdb stub on version 2.8.0. On git HEAD I am getting 
> only:
>
> Quit
> (gdb) c
> Continuing.
> warning: Remote failure reply: E22
>
> Program stopped.
> 0x0040017bac07 in ?? ()
> (gdb) c
> Continuing.
>
>
>
> Thanks for your help, it is greatly appreciated!
>
> On Wed, Jul 12, 2017 at 9:29 PM, Richard Henderson  wrote:
>> The implementation of these two instructions was swapped.
>> At the same time, unify the setup of eflags for the insn group.
>>
>> Reported-by: Ricardo Ribalda Delgado 
>> Signed-off-by: Richard Henderson 
>> ---
>>  target/i386/translate.c | 26 +-
>>  1 file changed, 9 insertions(+), 17 deletions(-)
>>
>> diff --git a/target/i386/translate.c b/target/i386/translate.c
>> index 9d5f1c3..69d3787 100644
>> --- a/target/i386/translate.c
>> +++ b/target/i386/translate.c
>> @@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext 
>> *s, int b,
>>  ot = mo_64_32(s->dflag);
>>  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>>
>> +tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>>  switch (reg & 7) {
>>  case 1: /* blsr By,Ey */
>> -tcg_gen_neg_tl(cpu_T1, cpu_T0);
>> +tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>>  tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>> -gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
>> -gen_op_update2_cc();
>> -set_cc_op(s, CC_OP_BMILGB + ot);
>>  break;
>> -
>>  case 2: /* blsmsk By,Ey */
>> -tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>> -tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>> -tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
>> -tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>> -set_cc_op(s, CC_OP_BMILGB + ot);
>> +tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>> +

Re: [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI

2017-07-13 Thread Ricardo Ribalda Delgado
Hi Richard

The simple example works as expected, but my big application
(gobject-introspection) still crashes with sigsegv :(.

it seems to be something related to the bmi and tbm instructions. If I
disable them in gcc ( -mno-bmi -mno-tbm), the application
runs ok.

A look at qemu's code does not show anything obvious, but I am not
that familiar with qemu source yet to find something like this through
static analysis.

My plan (as soon as I have some time) is to create a small set of apps
to validate bmi/tbm/ (Are you aware of something already existing for
this?)
My stupid guess is that maybe the ops are switched, or the flags are
not properly modified.

If you want, I can share the application that crashes with you, just
be aware that the number of dependencies is considerable.

BTW I can only run the gdb stub on version 2.8.0. On git HEAD I am getting only:

Quit
(gdb) c
Continuing.
warning: Remote failure reply: E22

Program stopped.
0x0040017bac07 in ?? ()
(gdb) c
Continuing.



Thanks for your help, it is greatly appreciated!

On Wed, Jul 12, 2017 at 9:29 PM, Richard Henderson  wrote:
> The implementation of these two instructions was swapped.
> At the same time, unify the setup of eflags for the insn group.
>
> Reported-by: Ricardo Ribalda Delgado 
> Signed-off-by: Richard Henderson 
> ---
>  target/i386/translate.c | 26 +-
>  1 file changed, 9 insertions(+), 17 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 9d5f1c3..69d3787 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext 
> *s, int b,
>  ot = mo_64_32(s->dflag);
>  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>
> +tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>  switch (reg & 7) {
>  case 1: /* blsr By,Ey */
> -tcg_gen_neg_tl(cpu_T1, cpu_T0);
> +tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>  tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> -gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> -gen_op_update2_cc();
> -set_cc_op(s, CC_OP_BMILGB + ot);
>  break;
> -
>  case 2: /* blsmsk By,Ey */
> -tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -set_cc_op(s, CC_OP_BMILGB + ot);
> +tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
> +tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>  break;
> -
>  case 3: /* blsi By, Ey */
> -tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -set_cc_op(s, CC_OP_BMILGB + ot);
> +tcg_gen_neg_tl(cpu_T1, cpu_T0);
> +tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>  break;
> -
>  default:
>  goto unknown_op;
>  }
> +tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> +set_cc_op(s, CC_OP_BMILGB + ot);
>  break;
>
>  default:
> --
> 2.9.4
>



-- 
Ricardo Ribalda



[Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI

2017-07-12 Thread Richard Henderson
The implementation of these two instructions was swapped.
At the same time, unify the setup of eflags for the insn group.

Reported-by: Ricardo Ribalda Delgado 
Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 26 +-
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9d5f1c3..69d3787 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 ot = mo_64_32(s->dflag);
 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
+tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
 switch (reg & 7) {
 case 1: /* blsr By,Ey */
-tcg_gen_neg_tl(cpu_T1, cpu_T0);
+tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
-gen_op_update2_cc();
-set_cc_op(s, CC_OP_BMILGB + ot);
 break;
-
 case 2: /* blsmsk By,Ey */
-tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
-tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-set_cc_op(s, CC_OP_BMILGB + ot);
+tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
 break;
-
 case 3: /* blsi By, Ey */
-tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
-tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-set_cc_op(s, CC_OP_BMILGB + ot);
+tcg_gen_neg_tl(cpu_T1, cpu_T0);
+tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
 break;
-
 default:
 goto unknown_op;
 }
+tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+set_cc_op(s, CC_OP_BMILGB + ot);
 break;
 
 default:
-- 
2.9.4