On 02/04/20 00:52, Janne Grunau wrote: > Detected by asm test suite failures in dav1d > (https://code.videolan.org/videolan/dav1d). Can be reproduced by > `qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`. > > Signed-off-by: Janne Grunau <j...@jannau.net> > --- > target/i386/ops_sse.h | 53 +++++++++++++++++++++++++++---------------- > 1 file changed, 33 insertions(+), 20 deletions(-) > > diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h > index ec1ec745d0..2f41511aef 100644 > --- a/target/i386/ops_sse.h > +++ b/target/i386/ops_sse.h > @@ -1435,34 +1435,47 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, > Reg *d, Reg *s) > > void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > { > - d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); > - d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); > - XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); > - XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); > - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); > - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); > - XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); > - XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); > + > + Reg r; > + > + r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); > + r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); > + XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); > + XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); > + r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); > + r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); > + XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); > + XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); > + > + *d = r; > } > > void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > { > - d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); > - XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); > - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); > - XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); > + Reg r; > + > + r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); > + XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); > + r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); > + XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); > + > + *d = r; > } > > void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > { > - d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); > - d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); > - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); > - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); > - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); > - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); > - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); > - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); > + Reg r; > + > + r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); > + r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); > + XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); > + XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); > + r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); > + r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); > + XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); > + XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); > + > + *d = r; > } > > void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) >
Queued, thanks. Paolo