Re: [Mesa-dev] [PATCH] swr/rast: fix intrinsic/function for LLVM 7 compatibility

2018-10-16 Thread Chuck Atkins
Tested-by: Chuck Atkins 

On Tue, Oct 16, 2018 at 8:51 AM Cherniak, Bruce 
wrote:

> Reviewed-by: Bruce Cherniak 
>
> > On Oct 15, 2018, at 9:53 AM, Alok Hota  wrote:
> >
> > Converted from x86 VFMADDPS intrinsic to generic LLVM intrinsic, and
> > removed createInstructionSimplifierPass, which were both removed in LLVM
> > 7.0.0
> >
> > These changes combine patches we received from the community and our own
> > internal patches
> > ---
> > .../swr/rasterizer/codegen/gen_llvm_ir_macros.py  |  2 +-
> > .../drivers/swr/rasterizer/jitter/blend_jit.cpp   |  1 -
> > .../drivers/swr/rasterizer/jitter/builder_misc.cpp| 11 ++-
> > .../drivers/swr/rasterizer/jitter/fetch_jit.cpp   |  1 -
> > .../rasterizer/jitter/functionpasses/lower_x86.cpp|  1 -
> > .../drivers/swr/rasterizer/jitter/streamout_jit.cpp   |  1 -
> > 6 files changed, 3 insertions(+), 14 deletions(-)
> >
> > diff --git
> a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> > index 2e7f1a88a0..d34e88d1bc 100644
> > --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> > +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> > @@ -57,7 +57,6 @@ intrinsics = [
> > ['VHSUBPS', ['a', 'b'], 'a'],
> > ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
> > ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
> > -['VFMADDPS',['a', 'b', 'c'], 'a'],
> > ['VPHADDD', ['a', 'b'], 'a'],
> > ['PDEP32',  ['a', 'b'], 'a'],
> > ['RDTSC',   [], 'mInt64Ty'],
> > @@ -71,6 +70,7 @@ llvm_intrinsics = [
> > ['STACKRESTORE', 'stackrestore', ['a'], []],
> > ['VMINPS', 'minnum', ['a', 'b'], ['a']],
> > ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
> > +['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']],
> > ['DEBUGTRAP', 'debugtrap', [], []],
> > ['POPCNT', 'ctpop', ['a'], ['a']],
> > ['LOG2', 'log2', ['a'], ['a']],
> > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> > index f89c502db7..d5328c8e4e 100644
> > --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> > +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> > @@ -870,7 +870,6 @@ struct BlendJit : public Builder
> > passes.add(createCFGSimplificationPass());
> > passes.add(createEarlyCSEPass());
> > passes.add(createInstructionCombiningPass());
> > -passes.add(createInstructionSimplifierPass());
> > passes.add(createConstantPropagationPass());
> > passes.add(createSCCPPass());
> > passes.add(createAggressiveDCEPass());
> > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> > index 4116dad443..26d8688f5e 100644
> > --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> > +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> > @@ -755,15 +755,8 @@ namespace SwrJit
> > Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
> > {
> > Value* vOut;
> > -// use FMADs if available
> > -if (JM()->mArch.AVX2())
> > -{
> > -vOut = VFMADDPS(a, b, c);
> > -}
> > -else
> > -{
> > -vOut = FADD(FMUL(a, b), c);
> > -}
> > +// This maps to LLVM fmuladd intrinsic
> > +vOut = VFMADDPS(a, b, c);
> > return vOut;
> > }
> >
> > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> > index b4d326ebdc..3ad0fabe81 100644
> > --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> > +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> > @@ -294,7 +294,6 @@ Function* FetchJit::Create(const
> FETCH_COMPILE_STATE& fetchState)
> > optPasses.add(createCFGSimplificationPass());
> > optPasses.add(createEarlyCSEPass());
> > optPasses.add(createInstructionCombiningPass());
> > -optPasses.add(createInstructionSimplifierPass());
> > optPasses.add(createConstantPropagationPass());
> > optPasses.add(createSCCPPass());
> > optPasses.add(createAggressiveDCEPass());
> > diff --git
> a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> > index 7605823c04..c34959d35e 100644
> > ---
> a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> > +++
> b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> > @@ -76,7 +76,6 @@ namespace SwrJit
> > {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
> > {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
> > {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
> > -{"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
> >   

Re: [Mesa-dev] [PATCH] swr/rast: fix intrinsic/function for LLVM 7 compatibility

2018-10-16 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 

> On Oct 15, 2018, at 9:53 AM, Alok Hota  wrote:
> 
> Converted from x86 VFMADDPS intrinsic to generic LLVM intrinsic, and
> removed createInstructionSimplifierPass, which were both removed in LLVM
> 7.0.0
> 
> These changes combine patches we received from the community and our own
> internal patches
> ---
> .../swr/rasterizer/codegen/gen_llvm_ir_macros.py  |  2 +-
> .../drivers/swr/rasterizer/jitter/blend_jit.cpp   |  1 -
> .../drivers/swr/rasterizer/jitter/builder_misc.cpp| 11 ++-
> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp   |  1 -
> .../rasterizer/jitter/functionpasses/lower_x86.cpp|  1 -
> .../drivers/swr/rasterizer/jitter/streamout_jit.cpp   |  1 -
> 6 files changed, 3 insertions(+), 14 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
> b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> index 2e7f1a88a0..d34e88d1bc 100644
> --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
> @@ -57,7 +57,6 @@ intrinsics = [
> ['VHSUBPS', ['a', 'b'], 'a'],
> ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
> ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
> -['VFMADDPS',['a', 'b', 'c'], 'a'],
> ['VPHADDD', ['a', 'b'], 'a'],
> ['PDEP32',  ['a', 'b'], 'a'],
> ['RDTSC',   [], 'mInt64Ty'],
> @@ -71,6 +70,7 @@ llvm_intrinsics = [
> ['STACKRESTORE', 'stackrestore', ['a'], []],
> ['VMINPS', 'minnum', ['a', 'b'], ['a']],
> ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
> +['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']],
> ['DEBUGTRAP', 'debugtrap', [], []],
> ['POPCNT', 'ctpop', ['a'], ['a']],
> ['LOG2', 'log2', ['a'], ['a']],
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> index f89c502db7..d5328c8e4e 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
> @@ -870,7 +870,6 @@ struct BlendJit : public Builder
> passes.add(createCFGSimplificationPass());
> passes.add(createEarlyCSEPass());
> passes.add(createInstructionCombiningPass());
> -passes.add(createInstructionSimplifierPass());
> passes.add(createConstantPropagationPass());
> passes.add(createSCCPPass());
> passes.add(createAggressiveDCEPass());
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> index 4116dad443..26d8688f5e 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
> @@ -755,15 +755,8 @@ namespace SwrJit
> Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
> {
> Value* vOut;
> -// use FMADs if available
> -if (JM()->mArch.AVX2())
> -{
> -vOut = VFMADDPS(a, b, c);
> -}
> -else
> -{
> -vOut = FADD(FMUL(a, b), c);
> -}
> +// This maps to LLVM fmuladd intrinsic
> +vOut = VFMADDPS(a, b, c);
> return vOut;
> }
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> index b4d326ebdc..3ad0fabe81 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> @@ -294,7 +294,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& 
> fetchState)
> optPasses.add(createCFGSimplificationPass());
> optPasses.add(createEarlyCSEPass());
> optPasses.add(createInstructionCombiningPass());
> -optPasses.add(createInstructionSimplifierPass());
> optPasses.add(createConstantPropagationPass());
> optPasses.add(createSCCPPass());
> optPasses.add(createAggressiveDCEPass());
> diff --git 
> a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> index 7605823c04..c34959d35e 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
> @@ -76,7 +76,6 @@ namespace SwrJit
> {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
> {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
> {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
> -{"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
> {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
> {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
> {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp 
> b/src/g

[Mesa-dev] [PATCH] swr/rast: fix intrinsic/function for LLVM 7 compatibility

2018-10-15 Thread Alok Hota
Converted from x86 VFMADDPS intrinsic to generic LLVM intrinsic, and
removed createInstructionSimplifierPass, which were both removed in LLVM
7.0.0

These changes combine patches we received from the community and our own
internal patches
---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py  |  2 +-
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp   |  1 -
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp| 11 ++-
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp   |  1 -
 .../rasterizer/jitter/functionpasses/lower_x86.cpp|  1 -
 .../drivers/swr/rasterizer/jitter/streamout_jit.cpp   |  1 -
 6 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 2e7f1a88a0..d34e88d1bc 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -57,7 +57,6 @@ intrinsics = [
 ['VHSUBPS', ['a', 'b'], 'a'],
 ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
 ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
-['VFMADDPS',['a', 'b', 'c'], 'a'],
 ['VPHADDD', ['a', 'b'], 'a'],
 ['PDEP32',  ['a', 'b'], 'a'],
 ['RDTSC',   [], 'mInt64Ty'],
@@ -71,6 +70,7 @@ llvm_intrinsics = [
 ['STACKRESTORE', 'stackrestore', ['a'], []],
 ['VMINPS', 'minnum', ['a', 'b'], ['a']],
 ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
+['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']],
 ['DEBUGTRAP', 'debugtrap', [], []],
 ['POPCNT', 'ctpop', ['a'], ['a']],
 ['LOG2', 'log2', ['a'], ['a']],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
index f89c502db7..d5328c8e4e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -870,7 +870,6 @@ struct BlendJit : public Builder
 passes.add(createCFGSimplificationPass());
 passes.add(createEarlyCSEPass());
 passes.add(createInstructionCombiningPass());
-passes.add(createInstructionSimplifierPass());
 passes.add(createConstantPropagationPass());
 passes.add(createSCCPPass());
 passes.add(createAggressiveDCEPass());
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 4116dad443..26d8688f5e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -755,15 +755,8 @@ namespace SwrJit
 Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
 {
 Value* vOut;
-// use FMADs if available
-if (JM()->mArch.AVX2())
-{
-vOut = VFMADDPS(a, b, c);
-}
-else
-{
-vOut = FADD(FMUL(a, b), c);
-}
+// This maps to LLVM fmuladd intrinsic
+vOut = VFMADDPS(a, b, c);
 return vOut;
 }
 
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index b4d326ebdc..3ad0fabe81 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -294,7 +294,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& 
fetchState)
 optPasses.add(createCFGSimplificationPass());
 optPasses.add(createEarlyCSEPass());
 optPasses.add(createInstructionCombiningPass());
-optPasses.add(createInstructionSimplifierPass());
 optPasses.add(createConstantPropagationPass());
 optPasses.add(createSCCPPass());
 optPasses.add(createAggressiveDCEPass());
diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 7605823c04..c34959d35e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -76,7 +76,6 @@ namespace SwrJit
 {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
 {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
 {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
-{"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
 {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
 {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
 {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
index 8f86af2a4b..11ad36521b 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -306,7 +306,6 @@