Mesa (master): swr/rast: Renamed MetaData calls
Module: Mesa Branch: master Commit: 98d0201577ba21223e6d9a54b1240fe49524d486 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=98d0201577ba21223e6d9a54b1240fe49524d486 Author: Alok HotaDate: Fri May 25 10:19:46 2018 -0500 swr/rast: Renamed MetaData calls Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/jitter/builder.cpp | 170 ++--- .../drivers/swr/rasterizer/jitter/builder.h| 4 +- 2 files changed, 87 insertions(+), 87 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp index e1c5d80c80..4b06aaa3ab 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp @@ -1,32 +1,32 @@ / -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice (including the next -* paragraph) shall be included in all copies or substantial portions of the -* Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -* IN THE SOFTWARE. -* -* @file builder.h -* -* @brief Includes all the builder related functionality -* -* Notes: -* -**/ + * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * @file builder.h + * + * @brief Includes all the builder related functionality + * + * Notes: + * + **/ #include "jit_pch.hpp" #include "builder.h" @@ -38,11 +38,9 @@ namespace SwrJit // /// @brief Contructor for Builder. /// @param pJitMgr - JitManager which contains modules, function passes, etc. -Builder::Builder(JitManager *pJitMgr) -: mpJitMgr(pJitMgr), - mpPrivateContext(nullptr) +Builder::Builder(JitManager *pJitMgr) : mpJitMgr(pJitMgr), mpPrivateContext(nullptr) { -mVWidth = pJitMgr->mVWidth; +mVWidth = pJitMgr->mVWidth; mVWidth16 = 16; mpIRBuilder = >mBuilder; @@ -70,29 +68,29 @@ namespace SwrJit // Built in types: simd16 -mSimd16Int1Ty = VectorType::get(mInt1Ty, mVWidth16); -mSimd16Int16Ty = VectorType::get(mInt16Ty, mVWidth16); -mSimd16Int32Ty = VectorType::get(mInt32Ty, mVWidth16); -mSimd16Int64Ty = VectorType::get(mInt64Ty, mVWidth16); -mSimd16FP16Ty = VectorType::get(mFP16Ty, mVWidth16); -mSimd16FP32Ty = VectorType::get(mFP32Ty, mVWidth16); -mSimd16VectorTy = ArrayType::get(mSimd16FP32Ty, 4); -mSimd16VectorTRTy = ArrayType::get(mSimd16FP32Ty, 5); +mSimd16Int1Ty = VectorType::get(mInt1Ty, mVWidth16); +
Mesa (master): swr/rast: Removed superfluous JitManager argument from passes
Module: Mesa Branch: master Commit: b6b114c1aeaa996a4bf8c1fd409e8141d18b120c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b6b114c1aeaa996a4bf8c1fd409e8141d18b120c Author: Alok HotaDate: Fri May 25 10:19:47 2018 -0500 swr/rast: Removed superfluous JitManager argument from passes Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp | 2 +- src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 2 +- .../swr/rasterizer/jitter/functionpasses/lower_x86.cpp | 17 - .../swr/rasterizer/jitter/functionpasses/passes.h | 2 +- .../drivers/swr/rasterizer/jitter/streamout_jit.cpp | 2 +- src/gallium/drivers/swr/swr_shader.cpp | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index 72bf900c85..20f2e42eec 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -819,7 +819,7 @@ struct BlendJit : public Builder passes.add(createSCCPPass()); passes.add(createAggressiveDCEPass()); -passes.add(createLowerX86Pass(JM(), this)); +passes.add(createLowerX86Pass(this)); passes.run(*blendFunc); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 7b0b80a713..0abcd1a8d7 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -269,7 +269,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) optPasses.run(*fetch); -optPasses.add(createLowerX86Pass(JM(), this)); +optPasses.add(createLowerX86Pass(this)); optPasses.run(*fetch); JitManager::DumpToFile(fetch, "opt"); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index 5a69eaef26..f2bd8889fc 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -136,21 +136,21 @@ namespace SwrJit struct LowerX86 : public FunctionPass { -LowerX86(JitManager* pJitMgr = nullptr, Builder* b = nullptr) -: FunctionPass(ID), mpJitMgr(pJitMgr), B(b) +LowerX86(Builder* b = nullptr) +: FunctionPass(ID), B(b) { initializeLowerX86Pass(*PassRegistry::getPassRegistry()); // Determine target arch -if (mpJitMgr->mArch.AVX512F()) +if (JM()->mArch.AVX512F()) { mTarget = AVX512; } -else if (mpJitMgr->mArch.AVX2()) +else if (JM()->mArch.AVX2()) { mTarget = AVX2; } -else if (mpJitMgr->mArch.AVX()) +else if (JM()->mArch.AVX()) { mTarget = AVX; @@ -356,9 +356,8 @@ namespace SwrJit { } -JitManager* JM() { return mpJitMgr; } +JitManager* JM() { return B->JM(); } -JitManager* mpJitMgr; Builder* B; TargetArch mTarget; @@ -368,9 +367,9 @@ namespace SwrJit char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID. -FunctionPass* createLowerX86Pass(JitManager* pJitMgr, Builder* b) +FunctionPass* createLowerX86Pass(Builder* b) { -return new LowerX86(pJitMgr, b); +return new LowerX86(b); } Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h index f7373f034b..95ef4bcf01 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h @@ -33,5 +33,5 @@ namespace SwrJit { using namespace llvm; -FunctionPass* createLowerX86Pass(JitManager* pJitMgr, Builder* b); +FunctionPass* createLowerX86Pass(Builder* b); } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp index f804900291..cb2e3aed61 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp @@ -307,7 +307,7 @@ struct StreamOutJit : public Builder passes.add(createSCCPPass()); passes.add(createAggressiveDCEPass()); -passes.add(createLowerX86Pass(JM(), this)); +passes.add(createLowerX86Pass(this)); passes.run(*soFunc); diff --git
Mesa (master): swr/rast: Added in-place building to SCATTERPS
Module: Mesa Branch: master Commit: cfe75cc7b5acbf0692baff07a516ff4efe7fa968 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cfe75cc7b5acbf0692baff07a516ff4efe7fa968 Author: Alok HotaDate: Fri May 25 10:19:43 2018 -0500 swr/rast: Added in-place building to SCATTERPS SCATTERPS previously assumed it was being used with an existing basic block Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/jitter/builder_mem.cpp | 29 +++--- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index 6e17888f83..77c2095ea9 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -617,17 +617,28 @@ namespace SwrJit Value* pIsUndef = ICMP_EQ(pIndex, C(32)); -// Split current block -BasicBlock* pPostLoop = pCurBB->splitBasicBlock(cast(pIsUndef)->getNextNode()); +// Split current block or create new one if building inline +BasicBlock* pPostLoop; +if (pCurBB->getTerminator()) +{ +pPostLoop = pCurBB->splitBasicBlock(cast(pIsUndef)->getNextNode()); -// Remove unconditional jump created by splitBasicBlock -pCurBB->getTerminator()->eraseFromParent(); +// Remove unconditional jump created by splitBasicBlock +pCurBB->getTerminator()->eraseFromParent(); -// Add terminator to end of original block -IRB()->SetInsertPoint(pCurBB); +// Add terminator to end of original block +IRB()->SetInsertPoint(pCurBB); -// Add conditional branch -COND_BR(pIsUndef, pPostLoop, pLoop); +// Add conditional branch +COND_BR(pIsUndef, pPostLoop, pLoop); +} +else +{ +pPostLoop = BasicBlock::Create(mpJitMgr->mContext, "PostScatter_Loop", pFunc); + +// Add conditional branch +COND_BR(pIsUndef, pPostLoop, pLoop); +} // Add loop basic block contents IRB()->SetInsertPoint(pLoop); @@ -642,7 +653,7 @@ namespace SwrJit Value* pOffsetElem = LOADV(pOffsetsArrayPtr, { pIndexPhi }); // GEP to this offset in dst -Value* pCurDst = GEP(pDst, pOffsetElem); +Value* pCurDst = GEP(pDst, pOffsetElem, mInt8PtrTy); pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0)); STORE(pSrcElem, pCurDst); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Moved memory init out of core swr init
Module: Mesa Branch: master Commit: b3360f5c8b74906187a8801d83f2c4f73f3c025e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3360f5c8b74906187a8801d83f2c4f73f3c025e Author: Alok HotaDate: Fri May 25 10:19:48 2018 -0500 swr/rast: Moved memory init out of core swr init Added two new files for a wrapper function for initialization v2: added missing include for single architecture builds Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/Makefile.sources | 4 ++- src/gallium/drivers/swr/meson.build| 2 ++ src/gallium/drivers/swr/rasterizer/core/api.cpp| 4 --- src/gallium/drivers/swr/rasterizer/core/state.h| 3 +- .../drivers/swr/rasterizer/memory/InitMemory.cpp | 39 ++ .../drivers/swr/rasterizer/memory/InitMemory.h | 33 ++ src/gallium/drivers/swr/swr_loader.cpp | 8 - 7 files changed, 86 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources index 6753d501a0..b298356079 100644 --- a/src/gallium/drivers/swr/Makefile.sources +++ b/src/gallium/drivers/swr/Makefile.sources @@ -177,4 +177,6 @@ MEMORY_CXX_SOURCES := \ rasterizer/memory/StoreTile_TileY2.cpp \ rasterizer/memory/StoreTile_TileY.cpp \ rasterizer/memory/TilingFunctions.h \ - rasterizer/memory/tilingtraits.h + rasterizer/memory/tilingtraits.h \ + rasterizer/memory/InitMemory.cpp \ + rasterizer/memory/InitMemory.h diff --git a/src/gallium/drivers/swr/meson.build b/src/gallium/drivers/swr/meson.build index 9b272aaebd..b95c8bc1bf 100644 --- a/src/gallium/drivers/swr/meson.build +++ b/src/gallium/drivers/swr/meson.build @@ -151,6 +151,8 @@ files_swr_arch = files( 'rasterizer/memory/StoreTile_TileY.cpp', 'rasterizer/memory/TilingFunctions.h', 'rasterizer/memory/tilingtraits.h', + 'rasterizer/memory/InitMemory.h', + 'rasterizer/memory/InitMemory.cpp', ) swr_context_files = files('swr_context.h') diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 47f3633d54..c932ec0bd6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1728,10 +1728,6 @@ void InitBackendFuncTables(); /// @brief Initialize swr backend and memory internal tables void SwrInit() { -InitSimLoadTilesTable(); -InitSimStoreTilesTable(); -InitSimClearTilesTable(); - InitClearTilesTable(); InitBackendFuncTables(); InitRasterizerFunctions(); diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index c26dabe838..9db17eeed0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -29,10 +29,11 @@ #include "common/formats.h" #include "common/intrin.h" -using gfxptr_t = unsigned long long; #include #include +using gfxptr_t = unsigned long long; + // /// PRIMITIVE_TOPOLOGY. // diff --git a/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp b/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp new file mode 100644 index 00..bff96e17f4 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp @@ -0,0 +1,39 @@ +/ +* Copyright (C) 2018 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +* +* @file InitMemory.cpp +* +* @brief Provide access to tiles table initialization functions +* +**/ +#include
Mesa (master): swr/rast: Adjusted avx512 primitive assembly for msvc codegen
Module: Mesa Branch: master Commit: fb20ae0374425ae3aff2a50a498c7e2b428632a4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fb20ae0374425ae3aff2a50a498c7e2b428632a4 Author: Alok HotaDate: Fri May 25 10:19:49 2018 -0500 swr/rast: Adjusted avx512 primitive assembly for msvc codegen Optimize AVX-512 PA Assemble (PA_STATE_OPT). Reduced generated code by about 4x, MSVC compiler was going crazy making temporaries and split-loading inputs onto the stack unless explicit AVX-512 load ops were added Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 139 + 1 file changed, 90 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp index 64a90c768b..4f89e0c179 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp @@ -755,36 +755,51 @@ bool PaTriList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) { -#if KNOB_ARCH == KNOB_ARCH_AVX -simd16scalar perm0 = _simd16_setzero_ps(); -simd16scalar perm1 = _simd16_setzero_ps(); -simd16scalar perm2 = _simd16_setzero_ps(); -#elif KNOB_ARCH >= KNOB_ARCH_AVX2 +#if KNOB_ARCH >= KNOB_ARCH_AVX2 const simd16scalari perm0 = _simd16_set_epi32(13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3, 0); const simd16scalari perm1 = _simd16_set_epi32(14, 11, 8, 5, 2, 15, 12, 9, 6, 3, 0, 13, 10, 7, 4, 1); const simd16scalari perm2 = _simd16_set_epi32(15, 12, 9, 6, 3, 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2); +#else // KNOB_ARCH == KNOB_ARCH_AVX +simd16scalar perm0 = _simd16_setzero_ps(); +simd16scalar perm1 = _simd16_setzero_ps(); +simd16scalar perm2 = _simd16_setzero_ps(); #endif const simd16vector = PaGetSimdVector_simd16(pa, 0, slot); const simd16vector = PaGetSimdVector_simd16(pa, 1, slot); const simd16vector = PaGetSimdVector_simd16(pa, 2, slot); -simd16vector = verts[0]; -simd16vector = verts[1]; -simd16vector = verts[2]; +const simd16mask mask0 = 0x4924; +const simd16mask mask1 = 0x2492; +const simd16mask mask2 = 0x9249; // v0 -> a0 a3 a6 a9 aC aF b2 b5 b8 bB bE c1 c4 c7 cA cD // v1 -> a1 a4 a7 aA aD b0 b3 b6 b9 bC bF c2 c5 c8 cB cE // v2 -> a2 a5 a8 aB aE b1 b4 b7 bA bD c0 c3 c6 c9 cC cF +simd16vector = verts[0]; +simd16vector = verts[1]; +simd16vector = verts[2]; + // for simd16 x, y, z, and w for (int i = 0; i < 4; i += 1) { -simd16scalar temp0 = _simd16_blend_ps(_simd16_blend_ps(a[i], b[i], 0x4924), c[i], 0x2492); -simd16scalar temp1 = _simd16_blend_ps(_simd16_blend_ps(a[i], b[i], 0x9249), c[i], 0x4924); -simd16scalar temp2 = _simd16_blend_ps(_simd16_blend_ps(a[i], b[i], 0x2492), c[i], 0x9249); +simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast([i])); +simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast([i])); +simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast([i])); + +simd16scalar temp0 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask0), tempc, mask1); +simd16scalar temp1 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask2), tempc, mask0); +simd16scalar temp2 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask1), tempc, mask2); + +#if KNOB_ARCH >= KNOB_ARCH_AVX2 +v0[i] = _simd16_permute_ps(temp0, perm0); +v1[i] = _simd16_permute_ps(temp1, perm1); +v2[i] = _simd16_permute_ps(temp2, perm2); +#else // #if KNOB_ARCH == KNOB_ARCH_AVX + +// the general permutes (above) are prohibitively slow to emulate on AVX (its scalar code) -#if KNOB_ARCH == KNOB_ARCH_AVX temp0 = _simd16_permute_ps_i(temp0, 0x6C); // (0, 3, 2, 1) => 00 11 01 10 => 0x6C perm0 = _simd16_permute2f128_ps(temp0, temp0, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1 temp0 = _simd16_blend_ps(temp0, perm0, 0x); // 0010 0010 0010 0010 @@ -802,10 +817,6 @@ bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) temp2 = _simd16_blend_ps(temp2, perm2, 0x); // 0100 0100 0100 0100 perm2 = _simd16_permute2f128_ps(temp2, temp2, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E v2[i] = _simd16_blend_ps(temp2, perm2, 0x1C1C); // 0011 1000 0011 1000 -#elif KNOB_ARCH >= KNOB_ARCH_AVX2 -v0[i] = _simd16_permute_ps(temp0, perm0); -v1[i] = _simd16_permute_ps(temp1, perm1); -v2[i] = _simd16_permute_ps(temp2, perm2); #endif } @@ -1056,26 +1067,31 @@ bool PaTriStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) const simd16vector = PaGetSimdVector_simd16(pa, pa.prev, slot); const simd16vector =
Mesa (master): swr/rast: Check gCoreBuckets/CORE_BUCKETS equal length at compile time
Module: Mesa Branch: master Commit: f09636e2e1311b24cbcd2a2d49e97f8a69702cfd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f09636e2e1311b24cbcd2a2d49e97f8a69702cfd Author: Alok HotaDate: Fri May 25 10:19:44 2018 -0500 swr/rast: Check gCoreBuckets/CORE_BUCKETS equal length at compile time Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp index f289a319ca..48ea397018 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp @@ -89,6 +89,7 @@ BUCKET_DESC gCoreBuckets[] = { { "BEStoreTiles", "", true, 0xff00 }, { "BEEndTile", "", false, 0x }, }; +static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])), "RDTSC Bucket enum and description table size mismatched."); /// @todo bucketmanager and mapping should probably be a part of the SWR context std::vector gBucketMap; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Use metadata to communicate between passes
Module: Mesa Branch: master Commit: 14b5cac0be15b2a1f6624431ae1b694f3a4487dd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=14b5cac0be15b2a1f6624431ae1b694f3a4487dd Author: Alok HotaDate: Fri May 25 10:19:45 2018 -0500 swr/rast: Use metadata to communicate between passes Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/jitter/builder.h| 28 ++ 1 file changed, 28 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h index 6ca128d38f..08a3a6e473 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h @@ -124,6 +124,34 @@ namespace SwrJit bool SetTexelMaskEvaluate(Instruction* inst); bool IsTexelMaskEvaluate(Instruction* inst); Type* GetVectorType(Type* pType); +void SetMetadata(StringRef s, uint32_t val) +{ +llvm::NamedMDNode *metaData = mpJitMgr->mpCurrentModule->getOrInsertNamedMetadata(s); +Constant* cval = mpIRBuilder->getInt32(val); +llvm::MDNode *mdNode = llvm::MDNode::get(mpJitMgr->mpCurrentModule->getContext(), llvm::ConstantAsMetadata::get(cval)); +if (metaData->getNumOperands()) +{ +metaData->setOperand(0, mdNode); +} +else +{ +metaData->addOperand(mdNode); +} +} +uint32_t GetMetadata(StringRef s) +{ +NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getNamedMetadata(s); +if (metaData) +{ +MDNode* mdNode = metaData->getOperand(0); +Metadata* val = mdNode->getOperand(0); +return mdconst::dyn_extract(val)->getZExtValue(); +} +else +{ +return 0; +} +} #include "gen_builder.hpp" #include "gen_builder_meta.hpp" ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: SIMD16 builder - cleanup naming (simd2 -> simd16)
Module: Mesa Branch: master Commit: e14b48e00e56b59de4bb916be994756295d7b685 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e14b48e00e56b59de4bb916be994756295d7b685 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Dec 19 13:39:09 2017 -0600 swr/rast: SIMD16 builder - cleanup naming (simd2 -> simd16) Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder.cpp | 76 +- .../drivers/swr/rasterizer/jitter/builder.h| 45 +++--- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 133 .../drivers/swr/rasterizer/jitter/builder_misc.h | 50 +++--- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 168 +++-- 5 files changed, 239 insertions(+), 233 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp index 4b83a3204c..c46159a35a 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp @@ -40,52 +40,56 @@ namespace SwrJit Builder::Builder(JitManager *pJitMgr) : mpJitMgr(pJitMgr) { +SWR_ASSERT(pJitMgr->mVWidth == 8); + mVWidth = pJitMgr->mVWidth; -#if USE_SIMD16_BUILDER -mVWidth2 = pJitMgr->mVWidth * 2; -#endif +mVWidth16 = pJitMgr->mVWidth * 2; mpIRBuilder = >mBuilder; -mVoidTy = Type::getVoidTy(pJitMgr->mContext); -mFP16Ty = Type::getHalfTy(pJitMgr->mContext); -mFP32Ty = Type::getFloatTy(pJitMgr->mContext); -mFP32PtrTy = PointerType::get(mFP32Ty, 0); -mDoubleTy = Type::getDoubleTy(pJitMgr->mContext); -mInt1Ty = Type::getInt1Ty(pJitMgr->mContext); -mInt8Ty = Type::getInt8Ty(pJitMgr->mContext); -mInt16Ty = Type::getInt16Ty(pJitMgr->mContext); -mInt32Ty = Type::getInt32Ty(pJitMgr->mContext); -mInt8PtrTy = PointerType::get(mInt8Ty, 0); +// Built in types: scalar + +mVoidTy = Type::getVoidTy(pJitMgr->mContext); +mFP16Ty = Type::getHalfTy(pJitMgr->mContext); +mFP32Ty = Type::getFloatTy(pJitMgr->mContext); +mFP32PtrTy = PointerType::get(mFP32Ty, 0); +mDoubleTy = Type::getDoubleTy(pJitMgr->mContext); +mInt1Ty = Type::getInt1Ty(pJitMgr->mContext); +mInt8Ty = Type::getInt8Ty(pJitMgr->mContext); +mInt16Ty= Type::getInt16Ty(pJitMgr->mContext); +mInt32Ty= Type::getInt32Ty(pJitMgr->mContext); +mInt8PtrTy = PointerType::get(mInt8Ty, 0); mInt16PtrTy = PointerType::get(mInt16Ty, 0); mInt32PtrTy = PointerType::get(mInt32Ty, 0); -mInt64Ty = Type::getInt64Ty(pJitMgr->mContext); -mSimdInt1Ty = VectorType::get(mInt1Ty, mVWidth); -mSimdInt16Ty = VectorType::get(mInt16Ty, mVWidth); -mSimdInt32Ty = VectorType::get(mInt32Ty, mVWidth); -mSimdInt64Ty = VectorType::get(mInt64Ty, mVWidth); -mSimdFP16Ty = VectorType::get(mFP16Ty, mVWidth); -mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth); -mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4); +mInt64Ty= Type::getInt64Ty(pJitMgr->mContext); + +// Built in types: simd8 + +mSimdInt1Ty = VectorType::get(mInt1Ty, mVWidth); +mSimdInt16Ty= VectorType::get(mInt16Ty, mVWidth); +mSimdInt32Ty= VectorType::get(mInt32Ty, mVWidth); +mSimdInt64Ty= VectorType::get(mInt64Ty, mVWidth); +mSimdFP16Ty = VectorType::get(mFP16Ty, mVWidth); +mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth); +mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4); mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5); -#if USE_SIMD16_BUILDER -mSimd2Int1Ty = VectorType::get(mInt1Ty, mVWidth2); -mSimd2Int16Ty = VectorType::get(mInt16Ty, mVWidth2); -mSimd2Int32Ty = VectorType::get(mInt32Ty, mVWidth2); -mSimd2Int64Ty = VectorType::get(mInt64Ty, mVWidth2); -mSimd2FP16Ty = VectorType::get(mFP16Ty, mVWidth2); -mSimd2FP32Ty = VectorType::get(mFP32Ty, mVWidth2); -mSimd2VectorTy = ArrayType::get(mSimd2FP32Ty, 4); -mSimd2VectorTRTy = ArrayType::get(mSimd2FP32Ty, 5); -#endif + +// Built in types: simd16 + +mSimd16Int1Ty = VectorType::get(mInt1Ty, mVWidth16); +mSimd16Int16Ty = VectorType::get(mInt16Ty, mVWidth16); +mSimd16Int32Ty = VectorType::get(mInt32Ty, mVWidth16); +mSimd16Int64Ty = VectorType::get(mInt64Ty, mVWidth16); +mSimd16FP16Ty = VectorType::get(mFP16Ty, mVWidth16); +mSimd16FP32Ty = VectorType::get(mFP32Ty, mVWidth16); +mSimd16VectorTy = ArrayType::get(mSimd16FP32Ty, 4); +mSimd16VectorTRTy = ArrayType::get(mSimd16FP32Ty, 5); if (sizeof(uint3
Mesa (master): swr/rast: SIMD16 fetch shader jitter cleanup
Module: Mesa Branch: master Commit: 04d0bfde3962ee76ee7310b3dee5e0f72d2b4c17 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=04d0bfde3962ee76ee7310b3dee5e0f72d2b4c17 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Dec 22 13:58:08 2017 -0600 swr/rast: SIMD16 fetch shader jitter cleanup Bake in USE_SIMD16_BUILDER code paths (for USE_SIMD16_SHADER defined), remove USE_SIMD16_BUILDER define, remove deprecated psuedo-SIMD16 code paths. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 1118 +++- 1 file changed, 383 insertions(+), 735 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=04d0bfde3962ee76ee7310b3dee5e0f72d2b4c17 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: autogenerate named structs instead of literal structs
Module: Mesa Branch: master Commit: f5f1bbcb5c66c55a45e47c71685ca6709b714390 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5f1bbcb5c66c55a45e47c71685ca6709b714390 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 28 17:56:03 2017 -0600 swr/rast: autogenerate named structs instead of literal structs Results in far smaller and useful IR output. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/templates/gen_llvm.hpp | 23 ++ 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp index 18ea781713..574ee5aaa7 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp @@ -40,15 +40,22 @@ namespace SwrJit INLINE static StructType *Gen_${type['name']}(JitManager* pJitMgr) { LLVMContext& ctx = pJitMgr->mContext; -std::vector<Type*> members; -<% -(max_type_len, max_name_len) = calc_max_len(type['members']) -%> -%for member in type['members']: -/* ${member['name']} ${pad(len(member['name']), max_name_len)}*/ members.push_back( ${member['type']} ); -%endfor -return StructType::get(ctx, members, false); +StructType* pRetType = pJitMgr->mpCurrentModule->getTypeByName("${type['name']}"); +if (pRetType == nullptr) +{ +std::vector<Type*> members; +<% +(max_type_len, max_name_len) = calc_max_len(type['members']) +%> +%for member in type['members']: +/* ${member['name']} ${pad(len(member['name']), max_name_len)}*/ members.push_back(${ member['type'] }); +%endfor + +pRetType = StructType::create(members, "${type['name']}", false); +} + +return pRetType; } %for member in type['members']: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: shuffle header files for msvc pre-compiled header usage
Module: Mesa Branch: master Commit: d3a4c8057dfd31b562a8007a511f1de88a153528 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d3a4c8057dfd31b562a8007a511f1de88a153528 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 21 11:01:37 2017 -0600 swr/rast: shuffle header files for msvc pre-compiled header usage Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/Makefile.sources | 1 + .../drivers/swr/rasterizer/jitter/JitManager.cpp | 36 +- .../drivers/swr/rasterizer/jitter/JitManager.h | 46 +-- .../drivers/swr/rasterizer/jitter/blend_jit.cpp| 3 +- .../drivers/swr/rasterizer/jitter/builder.cpp | 1 + .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 1 + .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 3 +- .../drivers/swr/rasterizer/jitter/jit_api.h| 1 - .../drivers/swr/rasterizer/jitter/jit_pch.hpp | 134 + .../swr/rasterizer/jitter/streamout_jit.cpp| 5 +- 10 files changed, 143 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources index 53f8bf011b..cd2040e137 100644 --- a/src/gallium/drivers/swr/Makefile.sources +++ b/src/gallium/drivers/swr/Makefile.sources @@ -145,6 +145,7 @@ JITTER_CXX_SOURCES := \ rasterizer/jitter/fetch_jit.cpp \ rasterizer/jitter/fetch_jit.h \ rasterizer/jitter/jit_api.h \ + rasterizer/jitter/jit_pch.hpp \ rasterizer/jitter/JitManager.cpp \ rasterizer/jitter/JitManager.h \ rasterizer/jitter/streamout_jit.cpp \ diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 59672bb545..883ac77482 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -27,41 +27,7 @@ * Notes: * **/ -#if defined(_WIN32) -#pragma warning(disable: 4800 4146 4244 4267 4355 4996) -#endif - -#pragma push_macro("DEBUG") -#undef DEBUG - -#if defined(_WIN32) -#include "llvm/ADT/Triple.h" -#endif -#include "llvm/IR/Function.h" - -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" - -#include "llvm/Analysis/CFGPrinter.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Config/llvm-config.h" - -#if LLVM_VERSION_MAJOR < 4 -#include "llvm/Bitcode/ReaderWriter.h" -#else -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/Bitcode/BitcodeReader.h" -#endif - -#if LLVM_USE_INTEL_JITEVENTS -#include "llvm/ExecutionEngine/JITEventListener.h" -#endif - -#pragma pop_macro("DEBUG") +#include "jit_pch.hpp" #include "JitManager.h" #include "jit_api.h" diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h index c30a807222..9e5e4cf2b6 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h @@ -29,52 +29,9 @@ **/ #pragma once -#if defined(_WIN32) -#pragma warning(disable : 4146 4244 4267 4800 4996) -#endif - -// llvm 3.7+ reuses "DEBUG" as an enum value -#pragma push_macro("DEBUG") -#undef DEBUG - -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/ExecutionEngine/ObjectCache.h" - -#include "llvm/Config/llvm-config.h" - -#include "llvm/IR/Verifier.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/Support/FileSystem.h" -#define LLVM_F_NONE sys::fs::F_None - -#include "llvm/Analysis/Passes.h" - -#include "llvm/IR/LegacyPassManager.h" -using FunctionPassManager = llvm::legacy::FunctionPassManager; -using PassManager = llvm::legacy::PassManager; - -#include "llvm/CodeGen/Passes.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/DynamicLibrary.h" - - -#includ
Mesa (master): swr/rast: don't use 32-bit gathers for elements < 32-bits in size
Module: Mesa Branch: master Commit: 3d4d34e380f33e9daa86ff3aa4c06a56c5fa1318 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d4d34e380f33e9daa86ff3aa4c06a56c5fa1318 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Jan 3 11:58:50 2018 -0600 swr/rast: don't use 32-bit gathers for elements < 32-bits in size Using a gather for elements less than 32-bits in size can cause pagefaults when loading the last elements in a page-aligned-sized buffer. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 61 +- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 99a936d176..ad70cbe95d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -741,7 +741,66 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pB // only works if pixel size is <= 32bits SWR_ASSERT(info.bpp <= 32); -Value *pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask); +Value *pGather; +if (info.bpp == 32) +{ +pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask); +} +else +{ +// Can't use 32-bit gather for items less than 32-bits, could cause page faults. +Value *pMem = ALLOCA(mSimdInt32Ty); +STORE(VIMMED1(0u), pMem); + +pBase = BITCAST(pBase, PointerType::get(mInt8Ty, 0)); +Value* pDstMem = BITCAST(pMem, mInt32PtrTy); + +for (uint32_t lane = 0; lane < mVWidth; ++lane) +{ +// Get index +Value* index = VEXTRACT(pOffsets, C(lane)); +Value* mask = VEXTRACT(pMask, C(lane)); +switch (info.bpp) +{ +case 8: +{ +Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0)); +Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt8Ty, 0)); +STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); +break; +} + +case 16: +{ +Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); +Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0)); +STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); +break; +} +break; + +case 24: +{ +// First 16-bits of data +Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); +Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0)); +STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + +// Last 8-bits of data +pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0)); +pSrc = BITCAST(GEP(pSrc, C(1)), PointerType::get(mInt8Ty, 0)); +STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); +break; +} + +default: +SWR_INVALID("Shouldn't have BPP = %d now", info.bpp); +break; +} +} + +pGather = LOAD(pMem); +} for (uint32_t comp = 0; comp < 4; ++comp) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: switch win32 jit format to COFF
Module: Mesa Branch: master Commit: c259888c52a3cd9f6dd39cc33e919540435e5f5a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c259888c52a3cd9f6dd39cc33e919540435e5f5a Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Jan 4 10:27:13 2018 -0600 swr/rast: switch win32 jit format to COFF Allows for call-stack and exception handling for jitted functions. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 883ac77482..508bc027dd 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -92,7 +92,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) #if defined(_WIN32) // Needed for MCJIT on windows Triple hostTriple(sys::getProcessTriple()); -hostTriple.setObjectFormat(Triple::ELF); +hostTriple.setObjectFormat(Triple::COFF); mpCurrentModule->setTargetTriple(hostTriple.getTriple()); #endif // _WIN32 @@ -486,4 +486,4 @@ std::unique_ptr JitCache::getObject(const llvm::Module* M) fclose(fpIn); return pBuf; -} +} \ No newline at end of file ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: fix invalid sign masks in avx512 simdlib code
Module: Mesa Branch: master Commit: 396c006d907b023f9b187db618ee2a6e4e1b8a85 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=396c006d907b023f9b187db618ee2a6e4e1b8a85 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Jan 4 10:08:48 2018 -0600 swr/rast: fix invalid sign masks in avx512 simdlib code Should be 0x8000 instead of 0x800. Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl | 2 +- src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl | 2 +- src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl index 66e8309610..b70a7691e2 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl @@ -270,7 +270,7 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In { __mmask16 m = 0xf; m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)), -_mm512_set1_epi32(0x800)); +_mm512_set1_epi32(0x8000)); return __conv(_mm512_mask_i32gather_ps( __conv(old), m, diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl index 3f93cfbd7f..3fcfd250f9 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl @@ -271,7 +271,7 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In { __mmask16 m = 0xff; m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)), -_mm512_set1_epi32(0x800)); +_mm512_set1_epi32(0x8000)); return __conv(_mm512_mask_i32gather_ps( __conv(old), m, diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index c13b9f616a..8de62f2a7e 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -540,7 +540,7 @@ static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a) } static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a) { -__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x800)); +__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x8000)); return static_cast(m); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: fix MemoryBuffer build break for llvm-6
Module: Mesa Branch: master Commit: ad218754c79e0af61d5ba225a4b195cb55c2cac9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad218754c79e0af61d5ba225a4b195cb55c2cac9 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Jan 2 10:48:21 2018 -0600 swr/rast: fix MemoryBuffer build break for llvm-6 LLVM api change. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104381 Tested-by: Laurent Carlier <lordhea...@gmail.com> Reviewed-By: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 3f0772c942..59672bb545 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -498,7 +498,11 @@ std::unique_ptr JitCache::getObject(const llvm::Module* M) break; } +#if LLVM_VERSION_MAJOR < 6 pBuf = llvm::MemoryBuffer::getNewUninitMemBuffer(size_t(header.GetBufferSize())); +#else +pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetBufferSize())); +#endif if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetBufferSize(), 1, fpIn)) { pBuf = nullptr; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Binner fixes for viewport index offset handling
Module: Mesa Branch: master Commit: 0e9e24768785a4e09a785c1f3ab9c0117e82da4e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0e9e24768785a4e09a785c1f3ab9c0117e82da4e Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Nov 29 10:46:49 2017 -0600 swr/rast: Binner fixes for viewport index offset handling Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 9 - src/gallium/drivers/swr/rasterizer/core/clip.h | 5 - 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 9d1f0d8799..52375f8956 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -470,6 +470,10 @@ void SIMDCALL BinTrianglesImpl( typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); viewportIdx = SIMD_T::and_si(vClearMask, vpai); } +else +{ +viewportIdx = vpai; +} if (feState.vpTransformDisable) { @@ -1326,6 +1330,10 @@ void BinPointsImpl( typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); viewportIdx = SIMD_T::and_si(vClearMask, vpai); } +else +{ +viewportIdx = vpai; +} if (!feState.vpTransformDisable) { @@ -1647,7 +1655,6 @@ void SIMDCALL BinLinesImpl( if (state.backendState.readViewportArrayIndex) { pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); } diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 0d3d78057f..9d8bbc19e6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -694,7 +694,6 @@ public: if (state.backendState.readViewportArrayIndex) { pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); } @@ -707,6 +706,10 @@ public: typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); viewportIdx = SIMD_T::and_si(vClearMask, vpai); } +else +{ +viewportIdx = vpai; +} ComputeClipCodes(prim, viewportIdx); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Pass prim to ClipSimd
Module: Mesa Branch: master Commit: fbc27ff0279c76542fd8e3c61562ca69fa539272 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fbc27ff0279c76542fd8e3c61562ca69fa539272 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 7 17:54:40 2017 -0600 swr/rast: Pass prim to ClipSimd Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/clip.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 148f661ab4..8b947668d3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -437,7 +437,7 @@ public: return SIMD_T::movemask_ps(vClipCullMask); } -void ClipSimd(const typename SIMD_T::Float , const typename SIMD_T::Float , PA_STATE , const typename SIMD_T::Integer , const typename SIMD_T::Integer ) +void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float , const typename SIMD_T::Float , PA_STATE , const typename SIMD_T::Integer , const typename SIMD_T::Integer ) { // input/output vertex store for clipper SIMDVERTEX_T vertices[7]; // maximum 7 verts generated per triangle @@ -452,10 +452,9 @@ public: // assemble pos typename SIMD_T::Vec4 tmpVector[NumVertsPerPrim]; -pa.Assemble(VERTEX_POSITION_SLOT, tmpVector); for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { -vertices[i].attrib[VERTEX_POSITION_SLOT] = tmpVector[i]; +vertices[i].attrib[VERTEX_POSITION_SLOT] = prim[i]; } // assemble attribs @@ -568,7 +567,8 @@ public: SIMDVERTEX_T transposedPrims[2]; #endif -for (uint32_t inputPrim = 0; inputPrim < pa.NumPrims(); ++inputPrim) +uint32_t numInputPrims = pa.NumPrims(); +for (uint32_t inputPrim = 0; inputPrim < numInputPrims; ++inputPrim) { uint32_t numEmittedVerts = pVertexCount[inputPrim]; if (numEmittedVerts < NumVertsPerPrim) @@ -716,7 +716,7 @@ public: AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner -ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx); +ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx); AR_END(FEGuardbandClip, 1); } else if (validMask) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Remove unneeded copy of gather mask
Module: Mesa Branch: master Commit: f2e3900a1e7b48b640bd9fa32d2e1285e397fad0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f2e3900a1e7b48b640bd9fa32d2e1285e397fad0 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Nov 21 11:05:08 2017 -0600 swr/rast: Remove unneeded copy of gather mask Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 22 +- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 80 ++ 2 files changed, 23 insertions(+), 79 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 8ffe05b41c..0221106664 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -1107,23 +1107,19 @@ namespace SwrJit } void Builder::GATHER4PS(const SWR_FORMAT_INFO , Value* pSrcBase, Value* byteOffsets, -Value* mask, Value* vGatherComponents[], bool bPackedOutput) +Value* vMask, Value* vGatherComponents[], bool bPackedOutput) { switch(info.bpp / info.numComps) { case 16: { Value* vGatherResult[2]; -Value *vMask; // TODO: vGatherMaskedVal Value* vGatherMaskedVal = VIMMED1((float)0); // always have at least one component out of x or y to fetch -// save mask as it is zero'd out after each gather -vMask = mask; - vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 01234567 @@ -1135,7 +1131,6 @@ namespace SwrJit { // offset base to the next components(zw) in the vertex to gather pSrcBase = GEP(pSrcBase, C((char)4)); -vMask = mask; vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of second 8x32bit integer gather for 16bit components @@ -1164,9 +1159,6 @@ namespace SwrJit { uint32_t swizzleIndex = info.swizzle[i]; -// save mask as it is zero'd out after each gather -Value *vMask = mask; - // Gather a SIMD of components vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask); @@ -1182,14 +1174,14 @@ namespace SwrJit } void Builder::GATHER4DD(const SWR_FORMAT_INFO , Value* pSrcBase, Value* byteOffsets, -Value* mask, Value* vGatherComponents[], bool bPackedOutput) +Value* vMask, Value* vGatherComponents[], bool bPackedOutput) { switch (info.bpp / info.numComps) { case 8: { Value* vGatherMaskedVal = VIMMED1((int32_t)0); -Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, mask); +Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of an 8x32bit integer gather for 8bit components // 256i - 01234567 //xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw @@ -1200,16 +1192,12 @@ namespace SwrJit case 16: { Value* vGatherResult[2]; -Value *vMask; // TODO: vGatherMaskedVal Value* vGatherMaskedVal = VIMMED1((int32_t)0); // always have at least one component out of x or y to fetch -// save mask as it is zero'd out after each gather -vMask = mask; - vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 01234567 @@ -1221,7 +1209,6 @@ namespace SwrJit { // offset base to the next components(zw) in the vertex to gather pSrcBase = GEP(pSrcBase, C((char)4)); -vMask = mask; vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of second 8x32bit integer gather for 16bit components @@ -1251,9 +1238,6 @@ namespace SwrJit { uint32_t swizzleIndex = info.swizzle[i]; -
Mesa (master): swr/rast: WIP - Widen fetch shader to SIMD16
Module: Mesa Branch: master Commit: 36e276b6b03da852c78e314640b3822be263def2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=36e276b6b03da852c78e314640b3822be263def2 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Sun Dec 3 18:49:29 2017 -0600 swr/rast: WIP - Widen fetch shader to SIMD16 Widen vertex gather/storage to SIMD16 for all component types. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 716 - 1 file changed, 689 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 337bb7f660..6c0e658e68 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -70,6 +70,9 @@ struct FetchJit : public Builder #else void Shuffle8bpcGatherd(Shuffle8bpcArgs ); #endif +#if USE_SIMD16_BUILDER +void Shuffle8bpcGatherd2(Shuffle8bpcArgs ); +#endif typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType, uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs; @@ -78,6 +81,9 @@ struct FetchJit : public Builder #else void Shuffle16bpcGather(Shuffle16bpcArgs ); #endif +#if USE_SIMD16_BUILDER +void Shuffle16bpcGather2(Shuffle16bpcArgs ); +#endif void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* ()[4]); #if USE_SIMD16_BUILDER @@ -726,7 +732,7 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pB // only works if pixel size is <= 32bits SWR_ASSERT(info.bpp <= 32); - Value* pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask); +Value *pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask); for (uint32_t comp = 0; comp < 4; ++comp) { @@ -825,6 +831,9 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , Value* vVertexElements[4]; #if USE_SIMD16_GATHERS Value* vVertexElements2[4]; +#if USE_SIMD16_BUILDER +Value *pVtxSrc2[4]; +#endif #endif Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}); @@ -961,6 +970,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , #if USE_SIMD16_GATHERS // override cur indices with 0 if pitch is 0 Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0)); +vCurIndices = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices); vCurIndices2 = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices2); // are vertices partially OOB? @@ -983,7 +993,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , // only fetch lanes that pass both tests vGatherMask = AND(vMaxGatherMask, vMinGatherMask); -vGatherMask2 = AND(vMaxGatherMask, vMinGatherMask2); +vGatherMask2 = AND(vMaxGatherMask2, vMinGatherMask2); } else { @@ -1074,15 +1084,32 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , { if (isComponentEnabled(compMask, c)) { -vVertexElements[currentVertexElement] = pResults[c]; +#if USE_SIMD16_BUILDER +// pack adjacent pairs of SIMD8s into SIMD16s +pVtxSrc2[currentVertexElement] = VUNDEF2_F(); +pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pResults[c], 0); +pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pResults2[c], 1); + +#else +vVertexElements[currentVertexElement] = pResults[c]; vVertexElements2[currentVertexElement] = pResults2[c]; -currentVertexElement++; + +#endif +currentVertexElement += 1; if (currentVertexElement > 3) { +#if USE_SIMD16_BUILDER +// store SIMD16s +Value *pVtxOut2 = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0)); + +StoreVertexElements2(pVtxOut2, outputElt, 4, pVtxSrc2); + +#else StoreVertexElements(pVtxOut, outputElt, 4, vVertexElements); StoreVertexElements(GEP(pVtxOut, C(1)), outputElt, 4, vVertexElements2); +#endif outputElt += 1; // reset to the next vVertexElement to output @@ -1113,9 +1140,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , else if(info.type[0] == SWR_TYPE_FLOAT) { ///@todo: support 64 bit vb accesses -Value* gatherSrc = VIMMED1(0.0f); +Value *gatherSrc = VIMMED1(0.0f); #if
Mesa (master): swr/rast: SIMD16 Fetch - Fully widen 16-bit float vertex components
Module: Mesa Branch: master Commit: b38ac9dca1536062d5167e6c3c1f587a27ea3d58 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b38ac9dca1536062d5167e6c3c1f587a27ea3d58 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Dec 8 13:59:19 2017 -0600 swr/rast: SIMD16 Fetch - Fully widen 16-bit float vertex components Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 55 +++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 2065db3475..c960dc77fb 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1277,6 +1277,43 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , case 16: { #if USE_SIMD16_GATHERS +#if USE_SIMD16_BUILDER +Value *gatherResult[2]; + +// if we have at least one component out of x or y to fetch +if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) +{ +gatherResult[0] = GATHERPS_16(gatherSrc16, pStreamBase, vOffsets16, vGatherMask16); + +// e.g. result of first 8x32bit integer gather for 16bit components +// 256i - 01234567 +//xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy +// +} +else +{ +gatherResult[0] = VUNDEF2_I(); +} + +// if we have at least one component out of z or w to fetch +if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)) +{ +// offset base to the next components(zw) in the vertex to gather +pStreamBase = GEP(pStreamBase, C((char)4)); + +gatherResult[1] = GATHERPS_16(gatherSrc16, pStreamBase, vOffsets16, vGatherMask16); + +// e.g. result of second 8x32bit integer gather for 16bit components +// 256i - 01234567 +//zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw +// +} +else +{ +gatherResult[1] = VUNDEF2_I(); +} + +#else Value *vGatherResult[2]; Value *vGatherResult2[2]; @@ -1315,10 +1352,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , vGatherResult2[1] = VUNDEF_I(); } +#endif // if we have at least one component to shuffle into place if (compMask) { #if USE_SIMD16_BUILDER +#if USE_SIMD16_BUILDER +#else Value *gatherResult[2]; gatherResult[0] = VUNDEF2_I(); @@ -1330,6 +1370,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , gatherResult[1] = INSERT2_I(gatherResult[1], vGatherResult[1], 0); gatherResult[1] = INSERT2_I(gatherResult[1], vGatherResult2[1], 1); +#endif Value *pVtxOut2 = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0)); Shuffle16bpcArgs args = std::forward_as_tuple(gatherResult, pVtxOut2, Instruction::CastOps::FPExt, CONVERT_NONE, @@ -1511,21 +1552,21 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , // if we need to gather the component if (compCtrl[i] == StoreSrc) { -Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); +Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); Value *vMaskLo2 = VSHUFFLE(vGatherMask2, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); -Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); +Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); Value *vMaskHi2 = VSHUFFLE(vGatherMask2, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); -Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0)); +Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0)); Value *vOffsetsLo2 = VEXTRACTI128(vOf
Mesa (master): swr/rast: SIMD16 Fetch - Fully widen 32-bit integer vertex components
Module: Mesa Branch: master Commit: 01a57c11cb7fe85196b9cb4b5a1555e6eb239297 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=01a57c11cb7fe85196b9cb4b5a1555e6eb239297 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Sun Dec 10 23:54:30 2017 -0600 swr/rast: SIMD16 Fetch - Fully widen 32-bit integer vertex components Also widen the 16-bit a 8-bit integer vertex component gathers to SIMD16. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 1 + .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 36 + .../drivers/swr/rasterizer/jitter/builder_misc.h | 3 + .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 86 +- 4 files changed, 109 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index ac8b3badf6..8bbf36d9b8 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -46,6 +46,7 @@ intrinsics = [ ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], +['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], ['VPSRLI_16', 'x86_avx512_psrli_d_512', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 3a486e4c1e..684c9fac54 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -723,6 +723,42 @@ namespace SwrJit return vGather; } +#if USE_SIMD16_BUILDER +Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale) +{ +Value *vGather = VUNDEF2_F(); + +// use avx512 gather instruction if available +if (JM()->mArch.AVX512F()) +{ +// force mask to , required by vgather2 +Value *mask = BITCAST(vMask, mInt16Ty); + +vGather = VGATHERDD_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); +} +else +{ +Value *src0 = EXTRACT2_F(vSrc, 0); +Value *src1 = EXTRACT2_F(vSrc, 1); + +Value *indices0 = EXTRACT2_I(vIndices, 0); +Value *indices1 = EXTRACT2_I(vIndices, 1); + +Value *vmask16 = VMASK2(vMask); + +Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better.. +Value *mask1 = MASK(EXTRACT2_I(vmask16, 1)); + +Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale); +Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale); + +vGather = JOIN2(gather0, gather1); +} + +return vGather; +} + +#endif // /// @brief Generate a masked gather operation in LLVM IR. If not /// supported on the underlying platform, emulate it with loads diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 231bd6ad85..6c883d8f52 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -135,6 +135,9 @@ void GATHER4PS(const SWR_FORMAT_INFO , Value* pSrcBase, Value* byteOffsets, Value* mask, Value* vGatherComponents[], bool bPackedOutput); Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); +#if USE_SIMD16_BUILDER +Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1); +#endif void GATHER4DD(const SWR_FORMAT_INFO , Value* pSrcBase, Value* byteOffsets, Value* mask, Value* vGatherComponents[], bool bPackedOutput); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index e0a0770560..ec3b5eafcc 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1349,14 +1349,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , if (compMask) { #if USE_SIMD16_BUILDER -#if USE_SIMD16_BUILDER -#else -Value *gatherResult[2]; - -gatherResult[0] = JOIN2(vGatherResult[0], v
Mesa (master): swr/rast: Move GatherScissors to header
Module: Mesa Branch: master Commit: f88289168470873ba47a51b331178cf265c155e5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f88289168470873ba47a51b331178cf265c155e5 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Dec 6 12:07:59 2017 -0600 swr/rast: Move GatherScissors to header Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 127 - src/gallium/drivers/swr/rasterizer/core/binner.h | 127 + 2 files changed, 127 insertions(+), 127 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 8a5356b168..22996c5a5d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -212,133 +212,6 @@ INLINE void ProcessAttributes( } } -// -/// @brief Gather scissor rect data based on per-prim viewport indices. -/// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point. -/// @param pViewportIndex - array of per-primitive vewport indexes. -/// @param scisXmin - output vector of per-prmitive scissor rect Xmin data. -/// @param scisYmin - output vector of per-prmitive scissor rect Ymin data. -/// @param scisXmax - output vector of per-prmitive scissor rect Xmax data. -/// @param scisYmax - output vector of per-prmitive scissor rect Ymax data. -// -/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer. -static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex, -simdscalari , simdscalari , simdscalari , simdscalari ) -{ -scisXmin = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[7]].xmin, -pScissorsInFixedPoint[pViewportIndex[6]].xmin, -pScissorsInFixedPoint[pViewportIndex[5]].xmin, -pScissorsInFixedPoint[pViewportIndex[4]].xmin, -pScissorsInFixedPoint[pViewportIndex[3]].xmin, -pScissorsInFixedPoint[pViewportIndex[2]].xmin, -pScissorsInFixedPoint[pViewportIndex[1]].xmin, -pScissorsInFixedPoint[pViewportIndex[0]].xmin); -scisYmin = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[7]].ymin, -pScissorsInFixedPoint[pViewportIndex[6]].ymin, -pScissorsInFixedPoint[pViewportIndex[5]].ymin, -pScissorsInFixedPoint[pViewportIndex[4]].ymin, -pScissorsInFixedPoint[pViewportIndex[3]].ymin, -pScissorsInFixedPoint[pViewportIndex[2]].ymin, -pScissorsInFixedPoint[pViewportIndex[1]].ymin, -pScissorsInFixedPoint[pViewportIndex[0]].ymin); -scisXmax = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[7]].xmax, -pScissorsInFixedPoint[pViewportIndex[6]].xmax, -pScissorsInFixedPoint[pViewportIndex[5]].xmax, -pScissorsInFixedPoint[pViewportIndex[4]].xmax, -pScissorsInFixedPoint[pViewportIndex[3]].xmax, -pScissorsInFixedPoint[pViewportIndex[2]].xmax, -pScissorsInFixedPoint[pViewportIndex[1]].xmax, -pScissorsInFixedPoint[pViewportIndex[0]].xmax); -scisYmax = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[7]].ymax, -pScissorsInFixedPoint[pViewportIndex[6]].ymax, -pScissorsInFixedPoint[pViewportIndex[5]].ymax, -pScissorsInFixedPoint[pViewportIndex[4]].ymax, -pScissorsInFixedPoint[pViewportIndex[3]].ymax, -pScissorsInFixedPoint[pViewportIndex[2]].ymax, -pScissorsInFixedPoint[pViewportIndex[01]].ymax, -pScissorsInFixedPoint[pViewportIndex[00]].ymax); -} - -static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex, -simd16scalari , simd16scalari , simd16scalari , simd16scalari ) -{ -scisXmin = _simd16_set_epi32( -pScissorsInFixedPoint[pViewportIndex[15]].xmin, -pScissorsInFixedPoint[pViewportIndex[14]].xmin, -pScissorsInFixedPoint[pViewportIndex[13]].xmin, -pScissorsInFixedPoint[pViewportIndex[12]].xmin, -pScissorsInFixedPoint[pViewportIndex[11]].xmin, -pScissorsInFixedPoint[pViewportIndex[10]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 9]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 8]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 7]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 6]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 5]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 4]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 3]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 2]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 1]].xmin, -pScissorsInFixedPoint[pViewportIndex[ 0]].xmin); - -scisYmin = _simd16_set_epi32( -pScissorsInFixedPoint[pViewportIndex[15]].ymin, -pScissorsInFixedPoint[pViewportInd
Mesa (master): swr/rast: Rework thread binding parameters for machine partitioning
Module: Mesa Branch: master Commit: 20f9006603139a479b756c593c04a540041e3471 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=20f9006603139a479b756c593c04a540041e3471 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Dec 11 17:45:58 2017 -0600 swr/rast: Rework thread binding parameters for machine partitioning Add BASE_NUMA_NODE, BASE_CORE, BASE_THREAD parameters to SwrCreateContext. Add optional SWR_API_THREADING_INFO parameter to SwrCreateContext to control reservation of API threads. Add SwrBindApiThread() function to allow binding of API threads to reserved HW threads. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/codegen/knob_defs.py| 29 +- src/gallium/drivers/swr/rasterizer/core/api.cpp| 40 ++- src/gallium/drivers/swr/rasterizer/core/api.h | 33 +++ src/gallium/drivers/swr/rasterizer/core/context.h | 1 + .../drivers/swr/rasterizer/core/threads.cpp| 299 +++-- src/gallium/drivers/swr/rasterizer/core/threads.h | 4 + .../drivers/swr/rasterizer/core/tilemgr.cpp| 4 +- 7 files changed, 322 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py index 09e3124602..30803927e3 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py @@ -62,15 +62,33 @@ KNOBS = [ 'category' : 'perf', }], -['MAX_NUMA_NODES', { +['BASE_NUMA_NODE', { 'type' : 'uint32_t', 'default' : '0', +'desc' : ['Starting NUMA node index to use when allocating compute resources.', + 'Setting this to a non-zero value will reduce the maximum # of NUMA nodes used.'], +'category' : 'perf', +'advanced' : True, +}], + +['MAX_NUMA_NODES', { +'type' : 'uint32_t', +'default' : '1' if sys.platform == 'win32' else '0', 'desc' : ['Maximum # of NUMA-nodes per system used for worker threads', ' 0 == ALL NUMA-nodes in the system', ' N == Use at most N NUMA-nodes for rendering'], 'category' : 'perf', }], +['BASE_CORE', { +'type' : 'uint32_t', +'default' : '0', +'desc' : ['Starting core index to use when allocating compute resources.', + 'Setting this to a non-zero value will reduce the maximum # of cores used.'], +'category' : 'perf', +'advanced' : True, +}], + ['MAX_CORES_PER_NUMA_NODE', { 'type' : 'uint32_t', 'default' : '0', @@ -80,6 +98,15 @@ KNOBS = [ 'category' : 'perf', }], +['BASE_THREAD', { +'type' : 'uint32_t', +'default' : '0', +'desc' : ['Starting thread index to use when allocating compute resources.', + 'Setting this to a non-zero value will reduce the maximum # of threads used.'], +'category' : 'perf', +'advanced' : True, +}], + ['MAX_THREADS_PER_CORE', { 'type' : 'uint32_t', 'default' : '1', diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 9265440904..25a3f34841 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -95,16 +95,32 @@ HANDLE SwrCreateContext( pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator); } -pContext->threadInfo.MAX_WORKER_THREADS= KNOB_MAX_WORKER_THREADS; -pContext->threadInfo.MAX_NUMA_NODES= KNOB_MAX_NUMA_NODES; -pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; -pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; -pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; - if (pCreateInfo->pThreadInfo) { pContext->threadInfo = *pCreateInfo->pThreadInfo; } +else +{ +pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS; +pContext->threadInfo.BASE_NUMA_NODE = KNOB_BASE_NUMA_NODE; +pContext->threadInfo.BASE_CORE = KNOB_BASE_CORE; +pContext->threadInfo.BASE_THREAD= KNOB_BASE_THREAD; +pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES; +pContext->threadInfo.MAX_CORES_PER_NUMA_NODE= KNOB_MAX_CORES_PER_NUMA_NODE; +pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; +pContext->threadInfo.SINGLE_THREADED= KNOB_SINGLE_THREADED; +} + +if (pCreateInfo->pApiThreadInfo) +{ +pCo
Mesa (master): swr/rast: Remove no-op VBROADCAST of vID
Module: Mesa Branch: master Commit: ca59b2e75ccb0de2ef7f72751a52b035d060d1bc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ca59b2e75ccb0de2ef7f72751a52b035d060d1bc Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Dec 11 08:38:46 2017 -0600 swr/rast: Remove no-op VBROADCAST of vID Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index ec3b5eafcc..1312ac0009 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -3101,7 +3101,7 @@ Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl) #else Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty); #endif -return VBROADCAST(pId); +return pId; } case StoreInstanceId: { @@ -3129,7 +3129,7 @@ Value* FetchJit::GenerateCompCtrlVector2(const ComponentControl ctrl) Value *pId = JOIN2(pId_lo, pId_hi); -return VBROADCAST2(pId); +return pId; } case StoreInstanceId: { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Move more RTAI handling out of binner
Module: Mesa Branch: master Commit: f475ac3c40c6204ef73ad5d07d9ae6932822cc2f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f475ac3c40c6204ef73ad5d07d9ae6932822cc2f Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 14 13:49:56 2017 -0600 swr/rast: Move more RTAI handling out of binner Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 13 + src/gallium/drivers/swr/rasterizer/core/clip.h | 1 + 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 7ef87c4443..9aa9f9e79b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -1023,18 +1023,7 @@ void BinPostSetupPointsImpl( SIMD_T::store_si(reinterpret_cast(aMTBottom), bbox.ymax); // store render target array index -OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH]; -if (state.backendState.readRenderTargetArrayIndex) -{ -typename SIMD_T::Vec4 vRtai[2]; -pa.Assemble(VERTEX_SGV_SLOT, vRtai); -typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); -SIMD_T::store_si(reinterpret_cast(aRTAI), vRtaii); -} -else -{ -SIMD_T::store_si(reinterpret_cast(aRTAI), SIMD_T::setzero_si()); -} +const uint32_t *aRTAI = reinterpret_cast(); OSALIGNSIMD16(float) aPointSize[SIMD_WIDTH]; SIMD_T::store_ps(reinterpret_cast(aPointSize), vPointSize); diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index e5e00d49b0..592c9bfa73 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -646,6 +646,7 @@ public: PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast([0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology); clipPA.viewportArrayActive = pa.viewportArrayActive; +clipPA.rtArrayActive = pa.rtArrayActive; static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f }; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Fix cache of API thread event manager
Module: Mesa Branch: master Commit: 12adf2c8152d0500b2e1149ad0f5397c4955df86 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=12adf2c8152d0500b2e1149ad0f5397c4955df86 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Dec 13 17:52:52 2017 -0600 swr/rast: Fix cache of API thread event manager Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/api.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 25a3f34841..09b482dcc0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -166,7 +166,7 @@ HANDLE SwrCreateContext( #if defined(KNOB_ENABLE_AR) // cache the API thread event manager, for use with sim layer -pCreateInfo->hArEventManager = pContext->pArContext[16]; +pCreateInfo->hArEventManager = pContext->pArContext[pContext->NumWorkerThreads + 1]; #endif // State setup AFTER context is fully initialized ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Pull most of the VPAI manipulation out of the binner/clipper
Module: Mesa Branch: master Commit: 8b069207965b8cbfcb9de0e06ff03dadc8dbd291 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8b069207965b8cbfcb9de0e06ff03dadc8dbd291 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 7 11:59:45 2017 -0600 swr/rast: Pull most of the VPAI manipulation out of the binner/clipper Move out of binner/clipper; hand them down from the frontend code instead. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 124 ++--- src/gallium/drivers/swr/rasterizer/core/clip.cpp | 25 ++--- src/gallium/drivers/swr/rasterizer/core/clip.h | 58 +++--- src/gallium/drivers/swr/rasterizer/core/context.h | 4 +- .../drivers/swr/rasterizer/core/frontend.cpp | 112 ++- src/gallium/drivers/swr/rasterizer/core/frontend.h | 8 +- src/gallium/drivers/swr/rasterizer/core/pa.h | 4 +- 7 files changed, 177 insertions(+), 158 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 22996c5a5d..a664ed812f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -307,7 +307,8 @@ void SIMDCALL BinTrianglesImpl( uint32_t workerId, typename SIMD_T::Vec4 tri[3], uint32_t triMask, -typename SIMD_T::Integer const ) +typename SIMD_T::Integer const , +typename SIMD_T::Integer const ) { SWR_CONTEXT *pContext = pDC->pContext; @@ -323,31 +324,6 @@ void SIMDCALL BinTrianglesImpl( typename SIMD_T::Float vRecipW1 = SIMD_T::set1_ps(1.0f); typename SIMD_T::Float vRecipW2 = SIMD_T::set1_ps(1.0f); -typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); -typename SIMD_T::Vec4 vpiAttrib[3]; -typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); - -if (state.backendState.readViewportArrayIndex) -{ -pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - -vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); -} - - -if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 -{ -// OOB indices => forced to zero. -vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); -typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); -typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); -viewportIdx = SIMD_T::and_si(vClearMask, vpai); -} -else -{ -viewportIdx = vpai; -} - if (feState.vpTransformDisable) { // RHW is passed in directly when VP transform is disabled @@ -375,7 +351,7 @@ void SIMDCALL BinTrianglesImpl( tri[2].v[2] = SIMD_T::mul_ps(tri[2].v[2], vRecipW2); // Viewport transform to screen space coords -if (state.backendState.readViewportArrayIndex) +if (pa.viewportArrayActive) { viewportTransform<3>(tri, state.vpMatrices, viewportIdx); } @@ -568,8 +544,8 @@ void SIMDCALL BinTrianglesImpl( /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer. { typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax; +if (pa.viewportArrayActive) -if (state.backendState.readViewportArrayIndex) { GatherScissors([0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax); } @@ -786,9 +762,10 @@ void BinTriangles( uint32_t workerId, simdvector tri[3], uint32_t triMask, -simdscalari const ) +simdscalari const , +simdscalari const ) { -BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID); +BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx); } #if USE_SIMD16_FRONTEND @@ -799,9 +776,10 @@ void SIMDCALL BinTriangles_simd16( uint32_t workerId, simd16vector tri[3], uint32_t triMask, -simd16scalari const ) +simd16scalari const , +simd16scalari const ) { -BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID); +BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx); } #endif @@ -1026,7 +1004,7 @@ void BinPostSetupPointsImpl( { typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax; -if (state.backendState.readViewportArrayIndex) +if (pa.viewportArrayActive) { GatherScissors([0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax); } @@ -1176,38 +1154,13 @@ void BinPointsImpl( uint32_t workerId, typename SIMD_T::Vec4 prim[3], uint32_t primMask, -typename SIMD_T::Int
Mesa (master): swr/rast: Replace VPSRL with LSHR
Module: Mesa Branch: master Commit: c68b2d5c79239e721d8825e373a02fc843d15f6a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c68b2d5c79239e721d8825e373a02fc843d15f6a Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Dec 12 14:23:50 2017 -0600 swr/rast: Replace VPSRL with LSHR Replace use of x86 intrinsic with general llvm IR instruction. Generates the same final assembly. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 2 -- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 30 -- .../drivers/swr/rasterizer/jitter/builder_misc.h | 5 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 8 +++--- 4 files changed, 4 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 8bbf36d9b8..9544353eb9 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -47,8 +47,6 @@ intrinsics = [ ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']], -['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], -['VPSRLI_16', 'x86_avx512_psrli_d_512', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], ['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']], ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 684c9fac54..bdcafd28a3 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -809,36 +809,6 @@ namespace SwrJit } #if USE_SIMD16_BUILDER -Value *Builder::PSRLI(Value *a, Value *imm) -{ -return VPSRLI(a, imm); -} - -Value *Builder::PSRLI_16(Value *a, Value *imm) -{ -Value *result = VUNDEF2_I(); - -// use avx512 shift right instruction if available -if (JM()->mArch.AVX512F()) -{ -result = VPSRLI_16(a, imm); -} -else -{ -Value *a0 = EXTRACT2_I(a, 0); -Value *a1 = EXTRACT2_I(a, 1); - -Value *result0 = PSRLI(a0, imm); -Value *result1 = PSRLI(a1, imm); - -result = JOIN2(result0, result1); -} - -return result; -} - -#endif -#if USE_SIMD16_BUILDER // /// @brief Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 6c883d8f52..98bc563351 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -143,11 +143,6 @@ void GATHER4DD(const SWR_FORMAT_INFO , Value* pSrcBase, Value* byteOffsets, Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); -#if USE_SIMD16_BUILDER -Value *PSRLI(Value *a, Value *imm); -Value *PSRLI_16(Value *a, Value *imm); - -#endif void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask); void Shuffle8bpcGather4(const SWR_FORMAT_INFO , Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 1312ac0009..8d97ddfdc9 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1422,12 +1422,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , // But, we know that elements must be aligned for FETCH. :) // Right shift the offset by a bit and then scale by 2 to remove the sign extension. #if USE_SIMD16_BUILDER -Value *shiftedOffsets = VPSRLI_16(vOffsets16, C(1)); +Value *shiftedOffsets = LSHR(vOffsets16, 1); pVtxSrc2[currentVertexElement] = GATHERPS_16(gatherSrc16, pStreamBase, shiftedOffsets, vGatherMask16, 2); #else -Value *vShiftedOffsets = VPSRLI(vOffsets, C(1)); -Value *vShiftedOffsets2 = VPSRLI(vOffsets2, C(1)); +Value *vShiftedOffsets = LSHR(vOffsets, 1); +Value *vShiftedOffsets2 = LS
Mesa (master): swr/rast: Corrections to multi-scissor handling
Module: Mesa Branch: master Commit: 6d5275498a9a8e571048ca3dd6c99f693b49a7ed URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d5275498a9a8e571048ca3dd6c99f693b49a7ed Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Nov 29 15:14:20 2017 -0600 swr/rast: Corrections to multi-scissor handling binner's GatherScissors() will be turned into a real gather in the not too distant future. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 176 ++--- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 52375f8956..8a5356b168 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -226,117 +226,117 @@ static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t simdscalari , simdscalari , simdscalari , simdscalari ) { scisXmin = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[0]].xmin, -pScissorsInFixedPoint[pViewportIndex[1]].xmin, -pScissorsInFixedPoint[pViewportIndex[2]].xmin, -pScissorsInFixedPoint[pViewportIndex[3]].xmin, -pScissorsInFixedPoint[pViewportIndex[4]].xmin, -pScissorsInFixedPoint[pViewportIndex[5]].xmin, +pScissorsInFixedPoint[pViewportIndex[7]].xmin, pScissorsInFixedPoint[pViewportIndex[6]].xmin, -pScissorsInFixedPoint[pViewportIndex[7]].xmin); +pScissorsInFixedPoint[pViewportIndex[5]].xmin, +pScissorsInFixedPoint[pViewportIndex[4]].xmin, +pScissorsInFixedPoint[pViewportIndex[3]].xmin, +pScissorsInFixedPoint[pViewportIndex[2]].xmin, +pScissorsInFixedPoint[pViewportIndex[1]].xmin, +pScissorsInFixedPoint[pViewportIndex[0]].xmin); scisYmin = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[0]].ymin, -pScissorsInFixedPoint[pViewportIndex[1]].ymin, -pScissorsInFixedPoint[pViewportIndex[2]].ymin, -pScissorsInFixedPoint[pViewportIndex[3]].ymin, -pScissorsInFixedPoint[pViewportIndex[4]].ymin, -pScissorsInFixedPoint[pViewportIndex[5]].ymin, +pScissorsInFixedPoint[pViewportIndex[7]].ymin, pScissorsInFixedPoint[pViewportIndex[6]].ymin, -pScissorsInFixedPoint[pViewportIndex[7]].ymin); +pScissorsInFixedPoint[pViewportIndex[5]].ymin, +pScissorsInFixedPoint[pViewportIndex[4]].ymin, +pScissorsInFixedPoint[pViewportIndex[3]].ymin, +pScissorsInFixedPoint[pViewportIndex[2]].ymin, +pScissorsInFixedPoint[pViewportIndex[1]].ymin, +pScissorsInFixedPoint[pViewportIndex[0]].ymin); scisXmax = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[0]].xmax, -pScissorsInFixedPoint[pViewportIndex[1]].xmax, -pScissorsInFixedPoint[pViewportIndex[2]].xmax, -pScissorsInFixedPoint[pViewportIndex[3]].xmax, -pScissorsInFixedPoint[pViewportIndex[4]].xmax, -pScissorsInFixedPoint[pViewportIndex[5]].xmax, +pScissorsInFixedPoint[pViewportIndex[7]].xmax, pScissorsInFixedPoint[pViewportIndex[6]].xmax, -pScissorsInFixedPoint[pViewportIndex[7]].xmax); +pScissorsInFixedPoint[pViewportIndex[5]].xmax, +pScissorsInFixedPoint[pViewportIndex[4]].xmax, +pScissorsInFixedPoint[pViewportIndex[3]].xmax, +pScissorsInFixedPoint[pViewportIndex[2]].xmax, +pScissorsInFixedPoint[pViewportIndex[1]].xmax, +pScissorsInFixedPoint[pViewportIndex[0]].xmax); scisYmax = _simd_set_epi32( -pScissorsInFixedPoint[pViewportIndex[0]].ymax, -pScissorsInFixedPoint[pViewportIndex[1]].ymax, -pScissorsInFixedPoint[pViewportIndex[2]].ymax, -pScissorsInFixedPoint[pViewportIndex[3]].ymax, -pScissorsInFixedPoint[pViewportIndex[4]].ymax, -pScissorsInFixedPoint[pViewportIndex[5]].ymax, +pScissorsInFixedPoint[pViewportIndex[7]].ymax, pScissorsInFixedPoint[pViewportIndex[6]].ymax, -pScissorsInFixedPoint[pViewportIndex[7]].ymax); +pScissorsInFixedPoint[pViewportIndex[5]].ymax, +pScissorsInFixedPoint[pViewportIndex[4]].ymax, +pScissorsInFixedPoint[pViewportIndex[3]].ymax, +pScissorsInFixedPoint[pViewportIndex[2]].ymax, +pScissorsInFixedPoint[pViewportIndex[01]].ymax, +pScissorsInFixedPoint[pViewportIndex[00]].ymax); } static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex, simd16scalari , simd16scalari , simd16scalari , simd16scalari ) { scisXmin = _simd16_set_epi32( -pScissorsInFixedPoint[pViewportIndex[0]].xmin, -pScissorsInFixedPoint[pViewportIndex[1]].xmin, -pScissorsInFixedPoint[pViewportIndex[2]].xmin, -pScissorsInFixedPoint[pViewportIn
Mesa (master): swr/rast: Rewrite Shuffle8bpcGatherd using shuffle
Module: Mesa Branch: master Commit: cdb61d45cd0ca80c3545c1942933abdfbcf7683b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cdb61d45cd0ca80c3545c1942933abdfbcf7683b Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Dec 6 10:37:41 2017 -0600 swr/rast: Rewrite Shuffle8bpcGatherd using shuffle Ease future code maintenance, prepare for folding simd8 and simd16 versions. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 244 ++--- 1 file changed, 62 insertions(+), 182 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 67a4a04072..a847cb74da 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -2014,206 +2014,86 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs ) const uint32_t ()[4] = std::get<9>(args); // cast types -Type* vGatherTy = mSimdInt32Ty; Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits -// have to do extra work for sign extending -if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)){ -Type* v16x8Ty = VectorType::get(mInt8Ty, mVWidth * 2); // 8x16bit ints in a 128bit lane -Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits - -// shuffle mask, including any swizzling -const char x = (char)swizzle[0]; const char y = (char)swizzle[1]; -const char z = (char)swizzle[2]; const char w = (char)swizzle[3]; -Value* vConstMask = C({char(x), char(x+4), char(x+8), char(x+12), -char(y), char(y+4), char(y+8), char(y+12), -char(z), char(z+4), char(z+8), char(z+12), -char(w), char(w+4), char(w+8), char(w+12), -char(x), char(x+4), char(x+8), char(x+12), -char(y), char(y+4), char(y+8), char(y+12), -char(z), char(z+4), char(z+8), char(z+12), -char(w), char(w+4), char(w+8), char(w+12)}); - -Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy); -// after pshufb: group components together in each 128bit lane -// 256i - 01234567 -// - -Value* vi128XY = nullptr; -if(isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)){ -vi128XY = BITCAST(PERMD(vShufResult, C({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty); -// after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane -// 256i - 01234567 -// dcdc dcdc dcdc dcdc (dc - don't care) -} - -// do the same for zw components -Value* vi128ZW = nullptr; -if(isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)){ -vi128ZW = BITCAST(PERMD(vShufResult, C({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty); -} - -// init denormalize variables if needed -Instruction::CastOps fpCast; -Value* conversionFactor; - -switch (conversionType) -{ -case CONVERT_NORMALIZED: -fpCast = Instruction::CastOps::SIToFP; -conversionFactor = VIMMED1((float)(1.0 / 127.0)); -break; -case CONVERT_SSCALED: -fpCast = Instruction::CastOps::SIToFP; -conversionFactor = VIMMED1((float)(1.0)); -break; -case CONVERT_USCALED: -SWR_INVALID("Type should not be sign extended!"); -conversionFactor = nullptr; -break; -default: -SWR_ASSERT(conversionType == CONVERT_NONE); -conversionFactor = nullptr; -break; -} +for (uint32_t i = 0; i < 4; i++) +{ +if (!isComponentEnabled(compMask, i)) +continue; -// sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex -for (uint32_t i = 0; i < 4; i++) +if (compCtrl[i] == ComponentControl::StoreSrc) { -if (isComponentEnabled(compMask, i)) -{ -if (compCtrl[i] == ComponentControl::StoreSrc) -{ -// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1 -uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1; -// if x or y, use vi128XY permute result, else use vi128ZW -Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW; - -// sign extend -vVertexEl
Mesa (master): swr/rast: SIMD16 Fetch - Fully widen 32-bit float vertex components
Module: Mesa Branch: master Commit: df54678ba0733380961947d25830ae9695c77d7e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=df54678ba0733380961947d25830ae9695c77d7e Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 7 18:37:07 2017 -0600 swr/rast: SIMD16 Fetch - Fully widen 32-bit float vertex components Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 3 +- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 41 - .../drivers/swr/rasterizer/jitter/builder_misc.h | 7 +- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 175 ++--- 4 files changed, 194 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 44fc857371..ac8b3badf6 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -44,9 +44,10 @@ inst_aliases = { intrinsics = [ ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], -['VGATHERPS2', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], +['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], +['VPSRLI_16', 'x86_avx512_psrli_d_512', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], ['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']], ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 04092541e5..b2210db717 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -639,7 +639,7 @@ namespace SwrJit } #if USE_SIMD16_BUILDER -Value *Builder::GATHERPS2(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale) +Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale) { Value *vGather = VUNDEF2_F(); @@ -649,7 +649,7 @@ namespace SwrJit // force mask to , required by vgather2 Value *mask = BITCAST(vMask, mInt16Ty); -vGather = VGATHERPS2(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); +vGather = VGATHERPS_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); } else { @@ -659,8 +659,10 @@ namespace SwrJit Value *indices0 = EXTRACT2_I(vIndices, 0); Value *indices1 = EXTRACT2_I(vIndices, 1); -Value *mask0 = EXTRACT2_I(vMask, 0); -Value *mask1 = EXTRACT2_I(vMask, 1); +Value *vmask16 = VMASK2(vMask); + +Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better.. +Value *mask1 = MASK(EXTRACT2_I(vmask16, 1)); Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale); Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale); @@ -772,6 +774,37 @@ namespace SwrJit } #if USE_SIMD16_BUILDER +Value *Builder::PSRLI(Value *a, Value *imm) +{ +return VPSRLI(a, imm); +} + +Value *Builder::PSRLI_16(Value *a, Value *imm) +{ +Value *result = VUNDEF2_I(); + +// use avx512 shift right instruction if available +if (JM()->mArch.AVX512F()) +{ +result = VPSRLI_16(a, imm); +} +else +{ +Value *a0 = EXTRACT2_I(a, 0); +Value *a1 = EXTRACT2_I(a, 1); + +Value *result0 = PSRLI(a0, imm); +Value *result1 = PSRLI(a1, imm); + +result = INSERT2_I(result, result0, 0); +result = INSERT2_I(result, result1, 1); +} + +return result; +} + +#endif +#if USE_SIMD16_BUILDER // /// @brief Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index d858a827db..62360a3ad7 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -130,7 +130,7 @@ void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets, Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1); #if USE_SIMD16_BUILDER -Value *GATHERPS2(Value *src, Value *pBase, Value
Mesa (master): swr/rast: Pull of RTAI gather & offset out of clip/bin code
Module: Mesa Branch: master Commit: 182cc51a50492926ebf72d4cd38f1e574c768e72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=182cc51a50492926ebf72d4cd38f1e574c768e72 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Dec 11 15:51:46 2017 -0600 swr/rast: Pull of RTAI gather & offset out of clip/bin code Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 118 +++- src/gallium/drivers/swr/rasterizer/core/clip.cpp | 30 ++-- src/gallium/drivers/swr/rasterizer/core/clip.h | 35 +++-- src/gallium/drivers/swr/rasterizer/core/context.h | 4 +- .../drivers/swr/rasterizer/core/frontend.cpp | 153 +++-- src/gallium/drivers/swr/rasterizer/core/frontend.h | 8 +- src/gallium/drivers/swr/rasterizer/core/pa.h | 1 + 7 files changed, 203 insertions(+), 146 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index a664ed812f..7ef87c4443 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -45,7 +45,8 @@ void BinPostSetupLinesImpl( typename SIMD_T::Float recipW[], uint32_t primMask, typename SIMD_T::Integer const , -typename SIMD_T::Integer const ); +typename SIMD_T::Integer const , +typename SIMD_T::Integer const ); template void BinPostSetupPointsImpl( @@ -55,7 +56,8 @@ void BinPostSetupPointsImpl( typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const , -typename SIMD_T::Integer const ); +typename SIMD_T::Integer const , +typename SIMD_T::Integer const ); // /// @brief Processes attributes for the backend based on linkage mask and @@ -308,9 +310,11 @@ void SIMDCALL BinTrianglesImpl( typename SIMD_T::Vec4 tri[3], uint32_t triMask, typename SIMD_T::Integer const , -typename SIMD_T::Integer const ) +typename SIMD_T::Integer const , +typename SIMD_T::Integer const ) { SWR_CONTEXT *pContext = pDC->pContext; +const uint32_t *aRTAI = reinterpret_cast(); AR_BEGIN(FEBinTriangles, pDC->drawId); @@ -604,21 +608,21 @@ endBinTriangles: recipW[0] = vRecipW0; recipW[1] = vRecipW1; -BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx); +BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); line[0] = tri[1]; line[1] = tri[2]; recipW[0] = vRecipW1; recipW[1] = vRecipW2; -BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx); +BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); line[0] = tri[2]; line[1] = tri[0]; recipW[0] = vRecipW2; recipW[1] = vRecipW0; -BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx); +BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); AR_END(FEBinTriangles, 1); return; @@ -626,9 +630,9 @@ endBinTriangles: else if (rastState.fillMode == SWR_FILLMODE_POINT) { // Bin 3 points -BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, [0], triMask, primID, viewportIdx); -BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, [1], triMask, primID, viewportIdx); -BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, [2], triMask, primID, viewportIdx); +BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, [0], triMask, primID, viewportIdx, rtIdx); +BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, [1], triMask, primID, viewportIdx, rtIdx); +BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, [2], triMask, primID, viewportIdx, rtIdx); AR_END(FEBinTriangles, 1); return; @@ -659,22 +663,6 @@ endBinTriangles: TransposeVertices(vHorizZ, tri[0].z, tri[1].z, tri[2].z); TransposeVertices(vHorizW, vRecipW0, vRecipW1, vRecipW2); -// store render target array index -OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH]; -if (state.backendState.readRenderTargetArrayIndex) -{ -typename SIMD_T::Vec4 vRtai[3]; -pa.Assemble(VERTEX_SGV_SLOT, vRtai); -typename SIMD_T::Integer vRtaii; -vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); -SIMD_T::store_si(reinterpret_cast(aRTAI), vRtaii); -} -else -{ -SIMD_T::store_si(reinterpret_
Mesa (master): swr/rast: Replace INSERT2 vextract/ vinsert with JOIN2 vshuffle
Module: Mesa Branch: master Commit: fa3105cdb54415d7b93be932351966d3108511e4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa3105cdb54415d7b93be932351966d3108511e4 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Dec 8 17:33:23 2017 -0600 swr/rast: Replace INSERT2 vextract/vinsert with JOIN2 vshuffle Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 38 ++--- .../drivers/swr/rasterizer/jitter/builder_misc.h | 5 +- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 92 ++ 3 files changed, 30 insertions(+), 105 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index b2210db717..3a486e4c1e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -667,8 +667,7 @@ namespace SwrJit Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale); Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale); -vGather = INSERT2_F(vGather, gather0, 0); -vGather = INSERT2_F(vGather, gather1, 1); +vGather = JOIN2(gather0, gather1); } return vGather; @@ -796,8 +795,7 @@ namespace SwrJit Value *result0 = PSRLI(a0, imm); Value *result1 = PSRLI(a1, imm); -result = INSERT2_I(result, result0, 0); -result = INSERT2_I(result, result1, 1); +result = JOIN2(result0, result1); } return result; @@ -835,37 +833,13 @@ namespace SwrJit return BITCAST(EXTRACT2_F(a2, imm), mSimdInt32Ty); } -// -/// @brief -Value *Builder::INSERT2_F(Value *a2, Value *b, uint32_t imm) +Value *Builder::JOIN2(Value *a, Value *b) { -const uint32_t i0 = (imm > 0) ? mVWidth : 0; - -Value *result = BITCAST(a2, mSimd2FP32Ty); - -for (uint32_t i = 0; i < mVWidth; i += 1) -{ -#if 1 -if (!b->getType()->getScalarType()->isFloatTy()) -{ -b = BITCAST(b, mSimdFP32Ty); -} - -#endif -Value *temp = VEXTRACT(b, C(i)); - -result = VINSERT(result, temp, C(i0 + i)); -} - -return result; +return VSHUFFLE(a, b, +{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); } - -Value *Builder::INSERT2_I(Value *a2, Value *b, uint32_t imm) -{ -return BITCAST(INSERT2_F(a2, b, imm), mSimd2Int32Ty); -} - #endif + // /// @brief convert x86 mask to llvm mask Value *Builder::MASK(Value *vmask) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 62360a3ad7..231bd6ad85 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -119,10 +119,9 @@ Value *VMASK2(Value *mask); #if USE_SIMD16_BUILDER Value *EXTRACT2_F(Value *a2, uint32_t imm); Value *EXTRACT2_I(Value *a2, uint32_t imm); -Value *INSERT2_F(Value *a2, Value *b, uint32_t imm); -Value *INSERT2_I(Value *a2, Value *b, uint32_t imm); - +Value *JOIN2(Value *a, Value *b); #endif + Value *MASKLOADD(Value* src, Value* mask); void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets, diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index c960dc77fb..e0a0770560 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -960,10 +960,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , // offset indices by baseVertex #if USE_SIMD16_GATHERS #if USE_SIMD16_BUILDER -Value *vIndices16 = VUNDEF2_I(); - -vIndices16 = INSERT2_I(vIndices16, vIndices, 0); -vIndices16 = INSERT2_I(vIndices16, vIndices2, 1); +Value *vIndices16 = JOIN2(vIndices, vIndices2); vCurIndices16 = ADD(vIndices16, vBaseVertex16); #else @@ -982,10 +979,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , // offset indices by baseVertex #if USE_SIMD16_GATHERS #if USE_SIMD16_BUILDER -Value *vIndices16 = VUNDEF2_I(); - -vIndices16 = INSERT2_I(vIndices16, vIndices, 0); -vIndices16 = INSERT2_I(vIndices16, vIndices2, 1); +Value *vIndices16 = JOIN2(vIndices, vIndices2); vCurIndices16 = ADD(vIndices16, vBaseVertex16); #else @@ -1206,9 +1200,7 @@ void FetchJit::JitGatherVertices
Mesa (master): swr/rast: EXTRACT2 changed from vextract/ vinsert to vshuffle
Module: Mesa Branch: master Commit: 11a9d4f9b53722a491d9f23e848a02b741febd44 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11a9d4f9b53722a491d9f23e848a02b741febd44 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Dec 14 13:39:29 2017 -0600 swr/rast: EXTRACT2 changed from vextract/vinsert to vshuffle Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 60 ++ .../drivers/swr/rasterizer/jitter/builder_misc.h | 3 +- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 30 +-- 3 files changed, 32 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index bdcafd28a3..0774889af1 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -653,16 +653,14 @@ namespace SwrJit } else { -Value *src0 = EXTRACT2_F(vSrc, 0); -Value *src1 = EXTRACT2_F(vSrc, 1); +Value *src0 = EXTRACT2(vSrc, 0); +Value *src1 = EXTRACT2(vSrc, 1); -Value *indices0 = EXTRACT2_I(vIndices, 0); -Value *indices1 = EXTRACT2_I(vIndices, 1); +Value *indices0 = EXTRACT2(vIndices, 0); +Value *indices1 = EXTRACT2(vIndices, 1); -Value *vmask16 = VMASK2(vMask); - -Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better.. -Value *mask1 = MASK(EXTRACT2_I(vmask16, 1)); +Value *mask0 = EXTRACT2(vMask, 0); +Value *mask1 = EXTRACT2(vMask, 1); Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale); Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale); @@ -738,16 +736,14 @@ namespace SwrJit } else { -Value *src0 = EXTRACT2_F(vSrc, 0); -Value *src1 = EXTRACT2_F(vSrc, 1); - -Value *indices0 = EXTRACT2_I(vIndices, 0); -Value *indices1 = EXTRACT2_I(vIndices, 1); +Value *src0 = EXTRACT2(vSrc, 0); +Value *src1 = EXTRACT2(vSrc, 1); -Value *vmask16 = VMASK2(vMask); +Value *indices0 = EXTRACT2(vIndices, 0); +Value *indices1 = EXTRACT2(vIndices, 1); -Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better.. -Value *mask1 = MASK(EXTRACT2_I(vmask16, 1)); +Value *mask0 = EXTRACT2(vMask, 0); +Value *mask1 = EXTRACT2(vMask, 1); Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale); Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale); @@ -809,34 +805,12 @@ namespace SwrJit } #if USE_SIMD16_BUILDER -// -/// @brief -Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm) -{ -const uint32_t i0 = (imm > 0) ? mVWidth : 0; - -Value *result = VUNDEF_F(); - -for (uint32_t i = 0; i < mVWidth; i += 1) -{ -#if 1 -if (!a2->getType()->getScalarType()->isFloatTy()) -{ -a2 = BITCAST(a2, mSimd2FP32Ty); -} - -#endif -Value *temp = VEXTRACT(a2, C(i0 + i)); - -result = VINSERT(result, temp, C(i)); -} - -return result; -} - -Value *Builder::EXTRACT2_I(Value *a2, uint32_t imm) +Value *Builder::EXTRACT2(Value *x, uint32_t imm) { -return BITCAST(EXTRACT2_F(a2, imm), mSimdInt32Ty); +if (imm == 0) +return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7}); +else +return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15}); } Value *Builder::JOIN2(Value *a, Value *b) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 98bc563351..646ed0efb2 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -117,8 +117,7 @@ Value *VMASK2(Value *mask); // #if USE_SIMD16_BUILDER -Value *EXTRACT2_F(Value *a2, uint32_t imm); -Value *EXTRACT2_I(Value *a2, uint32_t imm); +Value *EXTRACT2(Value *x, uint32_t imm); Value *JOIN2(Value *a, Value *b); #endif diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 8d97ddfdc9..aa911b58f3 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1078,14 +1078,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE ,
Mesa (master): swr/rast: Convert gather masks to Nx1bit
Module: Mesa Branch: master Commit: 3ec98ab5d4fc9d53948fc9280caac83c70d9dc09 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3ec98ab5d4fc9d53948fc9280caac83c70d9dc09 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Dec 4 15:16:13 2017 -0600 swr/rast: Convert gather masks to Nx1bit Simplifies calling code, gets gather function interface closer to llvm's masked_gather. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 20 + .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 34 +- 2 files changed, 14 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 0221106664..04092541e5 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -602,7 +602,7 @@ namespace SwrJit if(JM()->mArch.AVX2()) { // force mask to , required by vgather -Value *mask = BITCAST(vMask, mSimdFP32Ty); +Value *mask = BITCAST(VMASK(vMask), mSimdFP32Ty); vGather = VGATHERPS(vSrc, pBase, vIndices, mask, C(scale)); } @@ -617,7 +617,6 @@ namespace SwrJit vGather = VUNDEF_F(); Value *vScaleVec = VIMMED1((uint32_t)scale); Value *vOffsets = MUL(vIndices,vScaleVec); -Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth; ++i) { // single component byte index @@ -627,7 +626,7 @@ namespace SwrJit loadAddress = BITCAST(loadAddress,PointerType::get(mFP32Ty,0)); // pointer to the value to load if we're masking off a component Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)}); -Value *selMask = VEXTRACT(mask,C(i)); +Value *selMask = VEXTRACT(vMask,C(i)); // switch in a safe address to load if we're trying to access a vertex Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); Value *val = LOAD(validAddress); @@ -648,7 +647,7 @@ namespace SwrJit if (JM()->mArch.AVX512F()) { // force mask to , required by vgather2 -Value *mask = BITCAST(MASK2(vMask), mInt16Ty); +Value *mask = BITCAST(vMask, mInt16Ty); vGather = VGATHERPS2(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); } @@ -689,7 +688,7 @@ namespace SwrJit // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { -vGather = VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale)); +vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale)); } else { @@ -702,7 +701,6 @@ namespace SwrJit vGather = VUNDEF_I(); Value *vScaleVec = VIMMED1((uint32_t)scale); Value *vOffsets = MUL(vIndices, vScaleVec); -Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth; ++i) { // single component byte index @@ -712,7 +710,7 @@ namespace SwrJit loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 0)); // pointer to the value to load if we're masking off a component Value *maskLoadAddress = GEP(vSrcPtr, {C(0), C(i)}); -Value *selMask = VEXTRACT(mask, C(i)); +Value *selMask = VEXTRACT(vMask, C(i)); // switch in a safe address to load if we're trying to access a vertex Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); Value *val = LOAD(validAddress, C(0)); @@ -739,6 +737,7 @@ namespace SwrJit // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { +vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth/2)), VectorType::get(mDoubleTy, mVWidth/2)); vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale)); } else @@ -752,7 +751,6 @@ namespace SwrJit vGather = UndefValue::get(VectorType::get(mDoubleTy, 4)); Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale)); Value *vOffsets = MUL(vIndices,vScaleVec); -Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth/2; ++i) { // single component byte index @@ -762,7 +760,7 @@ namespace SwrJit loadAddress = BITCAST(loadAddress,PointerType::get(mDoubleTy,0)); // pointer to the value to load if we're masking off a component Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)}); -Value *selMask = VEXTRACT(mask,C(i)); +
Mesa (master): swr/rast: Add alignment to transpose targets
Module: Mesa Branch: master Commit: 44025def06a8b8d1c019f611079a003964ea7511 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=44025def06a8b8d1c019f611079a003964ea7511 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Nov 7 15:24:25 2017 -0600 swr/rast: Add alignment to transpose targets Needed to ensure alignment for avx512. Fixes address sanitizer crash. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index b624ae69b3..9d1f0d8799 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -796,10 +796,10 @@ endBinTriangles: // transpose verts needed for backend /// @todo modify BE to take non-transformed verts -simd4scalar vHorizX[SIMD_WIDTH]; -simd4scalar vHorizY[SIMD_WIDTH]; -simd4scalar vHorizZ[SIMD_WIDTH]; -simd4scalar vHorizW[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizX[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizY[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizZ[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizW[SIMD_WIDTH]; TransposeVertices(vHorizX, tri[0].x, tri[1].x, tri[2].x); TransposeVertices(vHorizY, tri[0].y, tri[1].y, tri[2].y); @@ -1510,10 +1510,10 @@ void BinPostSetupLinesImpl( // transpose verts needed for backend /// @todo modify BE to take non-transformed verts -simd4scalar vHorizX[SIMD_WIDTH]; -simd4scalar vHorizY[SIMD_WIDTH]; -simd4scalar vHorizZ[SIMD_WIDTH]; -simd4scalar vHorizW[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizX[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizY[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizZ[SIMD_WIDTH]; +OSALIGNSIMD16(simd4scalar) vHorizW[SIMD_WIDTH]; if (!primMask) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Enable AVX-512 targets in the jitter
Module: Mesa Branch: master Commit: 395a298fa52adf04062b9fee98258b25c0f047e9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=395a298fa52adf04062b9fee98258b25c0f047e9 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 31 16:46:59 2017 -0500 swr/rast: Enable AVX-512 targets in the jitter Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/knobs.h| 8 src/gallium/drivers/swr/rasterizer/jitter/JitManager.h | 2 -- 2 files changed, 10 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h index fe0a044ae8..e00e2da650 100644 --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h @@ -61,18 +61,10 @@ #define KNOB_SIMD_WIDTH 8 #define KNOB_SIMD_BYTES 32 #elif (KNOB_ARCH == KNOB_ARCH_AVX512) -#if 0 -// not ready to enable this globally, enabled on the side (below) #define KNOB_ARCH_ISA AVX512F #define KNOB_ARCH_STR "AVX512" -#define KNOB_SIMD_WIDTH 16 -#define KNOB_SIMD_BYTES 64 -#else -#define KNOB_ARCH_ISA AVX2 -#define KNOB_ARCH_STR "AVX2" #define KNOB_SIMD_WIDTH 8 #define KNOB_SIMD_BYTES 32 -#endif #else #error "Unknown architecture" #endif diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h index 46ffe276a0..c30a807222 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h @@ -102,14 +102,12 @@ public: bForceAVX2 = true; bForceAVX512 = false; } -#if 0 else if(isaRequest == "avx512") { bForceAVX = false; bForceAVX2 = false; bForceAVX512 = true; } -#endif }; bool AVX2(void) { return bForceAVX ? 0 : InstructionSet::AVX2(); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Points with clipdistance can' t go through simplepoints path
Module: Mesa Branch: master Commit: 37bb69fb88d632b4c50162c5d6b0ccd96f23d533 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=37bb69fb88d632b4c50162c5d6b0ccd96f23d533 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 31 09:41:02 2017 -0500 swr/rast: Points with clipdistance can't go through simplepoints path Fixes piglit glsl-1.20:vs-clip-vertex-primitives and glsl-1.30:vs-clip-distance-primitives. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index 5cb2f87c15..11099d6449 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -352,7 +352,8 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC) return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X && state.rastState.pointSize == 1.0f && !state.rastState.pointParam && -!state.rastState.pointSpriteEnable); +!state.rastState.pointSpriteEnable && +!state.backendState.clipDistanceMask); } INLINE ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Support flexible vertex layout for DS output
Module: Mesa Branch: master Commit: e612231f20883aa31a6ed5b260872f1cdb84c223 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e612231f20883aa31a6ed5b260872f1cdb84c223 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Oct 18 16:51:07 2017 -0500 swr/rast: Support flexible vertex layout for DS output Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 1 + src/gallium/drivers/swr/rasterizer/core/state.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 211e9e4b07..e15b300979 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1237,6 +1237,7 @@ static void TessellationStages( dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU; dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV; dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput; +dsContext.outVertexAttribOffset = tsState.dsOutVtxAttribOffset; #if USE_SIMD16_FRONTEND dsContext.vectorStride = RoundUpEven(requiredDSVectorInvocations); // simd8 -> simd16 #else diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 2af384fd90..d11ffc69b0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -288,6 +288,7 @@ struct SWR_DS_CONTEXT uint32_tPrimitiveID;// IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation uint32_tvectorOffset; // IN: (SCALAR) vector index offset into SIMD data. uint32_tvectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component +uint32_toutVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage. ScalarPatch*pCpIn; // IN: (SCALAR) Control patch simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords @@ -819,6 +820,7 @@ struct SWR_TS_STATE uint32_tnumHsOutputAttribs; uint32_tnumDsOutputAttribs; uint32_tdsAllocationSize; +uint32_tdsOutVtxAttribOffset; // Offset to the start of the attributes of the input vertices, in simdvector units uint32_tvertexAttribOffset; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Widen fetch shader to SIMD16
Module: Mesa Branch: master Commit: 08512c52de783233fd2292951095e2456da843a4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=08512c52de783233fd2292951095e2456da843a4 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Oct 19 17:33:37 2017 -0500 swr/rast: Widen fetch shader to SIMD16 Widen fetch shader to SIMD16, enable SIMD16 types in the jitter, and provide utility EXTRACT/INSERT SIMD8 <-> SIMD16 utility functions. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder.cpp | 20 .../drivers/swr/rasterizer/jitter/builder.h| 16 ++ .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 52 .../drivers/swr/rasterizer/jitter/builder_misc.h | 9 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 57 -- 5 files changed, 151 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp index 6a33ec265f..4b83a3204c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp @@ -41,6 +41,9 @@ namespace SwrJit : mpJitMgr(pJitMgr) { mVWidth = pJitMgr->mVWidth; +#if USE_SIMD16_BUILDER +mVWidth2 = pJitMgr->mVWidth * 2; +#endif mpIRBuilder = >mBuilder; @@ -65,17 +68,34 @@ namespace SwrJit mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth); mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4); mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5); +#if USE_SIMD16_BUILDER +mSimd2Int1Ty = VectorType::get(mInt1Ty, mVWidth2); +mSimd2Int16Ty = VectorType::get(mInt16Ty, mVWidth2); +mSimd2Int32Ty = VectorType::get(mInt32Ty, mVWidth2); +mSimd2Int64Ty = VectorType::get(mInt64Ty, mVWidth2); +mSimd2FP16Ty = VectorType::get(mFP16Ty, mVWidth2); +mSimd2FP32Ty = VectorType::get(mFP32Ty, mVWidth2); +mSimd2VectorTy = ArrayType::get(mSimd2FP32Ty, 4); +mSimd2VectorTRTy = ArrayType::get(mSimd2FP32Ty, 5); +#endif if (sizeof(uint32_t*) == 4) { mIntPtrTy = mInt32Ty; mSimdIntPtrTy = mSimdInt32Ty; +#if USE_SIMD16_BUILDER +mSimd2IntPtrTy = mSimd2Int32Ty; +#endif } else { SWR_ASSERT(sizeof(uint32_t*) == 8); + mIntPtrTy = mInt64Ty; mSimdIntPtrTy = mSimdInt64Ty; +#if USE_SIMD16_BUILDER +mSimd2IntPtrTy = mSimd2Int64Ty; +#endif } } } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h index 8210e49b18..c6ab64e06e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h @@ -32,6 +32,8 @@ #include "JitManager.h" #include "common/formats.h" +#define USE_SIMD16_BUILDER 0 + namespace SwrJit { using namespace llvm; @@ -45,6 +47,9 @@ namespace SwrJit IRBuilder<>* mpIRBuilder; uint32_t mVWidth; +#if USE_SIMD16_BUILDER +uint32_t mVWidth2; +#endif // Built in types. Type*mVoidTy; @@ -70,6 +75,17 @@ namespace SwrJit Type*mSimdIntPtrTy; Type*mSimdVectorTy; Type*mSimdVectorTRTy; +#if USE_SIMD16_BUILDER +Type*mSimd2FP16Ty; +Type*mSimd2FP32Ty; +Type*mSimd2Int1Ty; +Type*mSimd2Int16Ty; +Type*mSimd2Int32Ty; +Type*mSimd2Int64Ty; +Type*mSimd2IntPtrTy; +Type*mSimd2VectorTy; +Type*mSimd2VectorTRTy; +#endif #include "gen_builder.hpp" #include "gen_builder_x86.hpp" diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 9ca36b2467..daa9cb1ec1 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -231,6 +231,13 @@ namespace SwrJit return UndefValue::get(VectorType::get(mFP32Ty, mVWidth)); } +#if USE_SIMD16_BUILDER +Value *Builder::VUNDEF2_F() +{ +return UndefValue::get(VectorType::get(mFP32Ty, mVWidth2)); +} + +#endif Value *Builder::VUNDEF(Type* t) { return UndefValue::get(VectorType::get(t, mVWidth)); @@ -690,6 +697,51 @@ namespace SwrJit return vGather; } +#if USE_SIMD16_BUILDER +// +/// @brief +Value *Builder::EXTRACT(Value *a2, uint32_t imm) +{ +const uint32_t i0 = (imm > 0) ? mV
Mesa (master): swr/rast: Repair simd8 frontend code rot
Module: Mesa Branch: master Commit: 34838c221260f961140040416b1a84b490448ac1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=34838c221260f961140040416b1a84b490448ac1 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Nov 10 16:45:38 2017 -0600 swr/rast: Repair simd8 frontend code rot Keep non-default simd8 frontend code running for comparison purposes. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 2fe6cfcf69..5a61dc33a0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -956,7 +956,7 @@ static void GeometryShaderStage( PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, reinterpret_cast(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim); #else -PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts); +PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim); #endif while (gsPa.GetNextStreamOutput()) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Cache eventmanager
Module: Mesa Branch: master Commit: bc356b0fc0839b19eadbd96018f23c486ff00e84 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bc356b0fc0839b19eadbd96018f23c486ff00e84 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Nov 7 13:50:11 2017 -0600 swr/rast: Cache eventmanager Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/archrast/archrast.h | 1 + src/gallium/drivers/swr/rasterizer/core/api.cpp| 5 + src/gallium/drivers/swr/rasterizer/core/api.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h index fa88a4948c..c74d6ad909 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h @@ -29,6 +29,7 @@ #include "common/os.h" #include "gen_ar_event.hpp" +#include "eventmanager.h" namespace ArchRast { diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 20eeb29681..9265440904 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -143,6 +143,11 @@ HANDLE SwrCreateContext( #endif } +#if defined(KNOB_ENABLE_AR) +// cache the API thread event manager, for use with sim layer +pCreateInfo->hArEventManager = pContext->pArContext[16]; +#endif + // State setup AFTER context is fully initialized SetupDefaultState(pContext); diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 60f56c6d76..c032b0bb10 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -213,6 +213,9 @@ struct SWR_CREATECONTEXT_INFO // Output: size required memory passed to for SwrSaveState / SwrRestoreState size_t contextSaveSize; +// ArchRast event manager. +HANDLE hArEventManager; + // Input (optional): Threading info that overrides any set KNOB values. SWR_THREADING_INFO* pThreadInfo; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Code style change (NFC)
Module: Mesa Branch: master Commit: d9de8f3122737517352eeaa4d1f2e79360526eff URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9de8f3122737517352eeaa4d1f2e79360526eff Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Oct 23 15:10:35 2017 -0500 swr/rast: Code style change (NFC) Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index e15b300979..2fe6cfcf69 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -39,6 +39,7 @@ #include "tilemgr.h" #include "tessellator.h" #include +#include // /// @brief Helper macro to generate a bitmask @@ -770,6 +771,7 @@ void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t } } + // /// @brief Implements GS stage. /// @param pDC - pointer to draw context. @@ -1335,8 +1337,11 @@ static void TessellationStages( SWR_ASSERT(pfnClipFunc); #if USE_SIMD16_FRONTEND -tessPa.useAlternateOffset = false; -pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID); + +{ +tessPa.useAlternateOffset = false; +pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID); +} #else pfnClipFunc(pDC, tessPa, workerId, prim, GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Implement AVX-512 GATHERPS in SIMD16 fetch shader
Module: Mesa Branch: master Commit: 005d937e1533521e87f0119c400298c02f365bf1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=005d937e1533521e87f0119c400298c02f365bf1 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Nov 8 19:17:24 2017 -0600 swr/rast: Implement AVX-512 GATHERPS in SIMD16 fetch shader Disabled for now. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 1 + .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 126 +++-- .../drivers/swr/rasterizer/jitter/builder_misc.h | 31 - .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 91 --- 4 files changed, 220 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index ce892a9abe..44fc857371 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -44,6 +44,7 @@ inst_aliases = { intrinsics = [ ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], +['VGATHERPS2', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index bd3a52566d..8ffe05b41c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -211,6 +211,28 @@ namespace SwrJit return ConstantVector::getSplat(mVWidth, cast(C(i))); } +#if USE_SIMD16_BUILDER +Value *Builder::VIMMED2_1(int i) +{ +return ConstantVector::getSplat(mVWidth2, cast(C(i))); +} + +Value *Builder::VIMMED2_1(uint32_t i) +{ +return ConstantVector::getSplat(mVWidth2, cast(C(i))); +} + +Value *Builder::VIMMED2_1(float i) +{ +return ConstantVector::getSplat(mVWidth2, cast(C(i))); +} + +Value *Builder::VIMMED2_1(bool i) +{ +return ConstantVector::getSplat(mVWidth2, cast(C(i))); +} + +#endif Value *Builder::VUNDEF_IPTR() { return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth)); @@ -237,6 +259,11 @@ namespace SwrJit return UndefValue::get(VectorType::get(mFP32Ty, mVWidth2)); } +Value *Builder::VUNDEF2_I() +{ +return UndefValue::get(VectorType::get(mInt32Ty, mVWidth2)); +} + #endif Value *Builder::VUNDEF(Type* t) { @@ -254,6 +281,19 @@ namespace SwrJit return VECTOR_SPLAT(mVWidth, src); } +#if USE_SIMD16_BUILDER +Value *Builder::VBROADCAST2(Value *src) +{ +// check if src is already a vector +if (src->getType()->isVectorTy()) +{ +return src; +} + +return VECTOR_SPLAT(mVWidth2, src); +} + +#endif uint32_t Builder::IMMED(Value* v) { SWR_ASSERT(isa(v)); @@ -554,16 +594,17 @@ namespace SwrJit /// @param vIndices - SIMD wide value of VB byte offsets /// @param vMask - SIMD wide mask that controls whether to access memory or the src values /// @param scale - value to scale indices by -Value *Builder::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale) +Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale) { -Value* vGather; +Value *vGather; // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { // force mask to , required by vgather -vMask = BITCAST(vMask, mSimdFP32Ty); -vGather = VGATHERPS(vSrc,pBase,vIndices,vMask,C(scale)); +Value *mask = BITCAST(vMask, mSimdFP32Ty); + +vGather = VGATHERPS(vSrc, pBase, vIndices, mask, C(scale)); } else { @@ -598,6 +639,41 @@ namespace SwrJit return vGather; } +#if USE_SIMD16_BUILDER +Value *Builder::GATHERPS2(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale) +{ +Value *vGather = VUNDEF2_F(); + +// use avx512 gather instruction if available +if (JM()->mArch.AVX512F()) +{ +// force mask to , required by vgather2 +Value *mask = BITCAST(MASK2(vMask), mInt16Ty); + +vGather = VGATHERPS2(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); +} +else +{ +Value *src0 = EXTRACT2_F(vSrc, 0);
Mesa (master): swr/rast: Simplify GATHER* jit builder api
Module: Mesa Branch: master Commit: 2e244c7168a1130a18c8d8a901161db9b6cbaac3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e244c7168a1130a18c8d8a901161db9b6cbaac3 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Nov 8 14:07:33 2017 -0600 swr/rast: Simplify GATHER* jit builder api General cleanup, and prep work for possibly moving to llvm masked gather intrinsic. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 32 ++--- .../drivers/swr/rasterizer/jitter/builder_misc.h | 6 +-- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 56 +++--- src/gallium/drivers/swr/swr_shader.cpp | 2 +- 4 files changed, 48 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index daa9cb1ec1..bd3a52566d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -554,7 +554,7 @@ namespace SwrJit /// @param vIndices - SIMD wide value of VB byte offsets /// @param vMask - SIMD wide mask that controls whether to access memory or the src values /// @param scale - value to scale indices by -Value *Builder::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, Value* scale) +Value *Builder::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale) { Value* vGather; @@ -563,7 +563,7 @@ namespace SwrJit { // force mask to , required by vgather vMask = BITCAST(vMask, mSimdFP32Ty); -vGather = VGATHERPS(vSrc,pBase,vIndices,vMask,scale); +vGather = VGATHERPS(vSrc,pBase,vIndices,vMask,C(scale)); } else { @@ -574,7 +574,7 @@ namespace SwrJit STORE(vSrc, vSrcPtr); vGather = VUNDEF_F(); -Value *vScaleVec = VBROADCAST(Z_EXT(scale,mInt32Ty)); +Value *vScaleVec = VIMMED1((uint32_t)scale); Value *vOffsets = MUL(vIndices,vScaleVec); Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth; ++i) @@ -606,14 +606,14 @@ namespace SwrJit /// @param vIndices - SIMD wide value of VB byte offsets /// @param vMask - SIMD wide mask that controls whether to access memory or the src values /// @param scale - value to scale indices by -Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, Value* scale) +Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale) { Value* vGather; // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { -vGather = VGATHERDD(vSrc, pBase, vIndices, vMask, scale); +vGather = VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale)); } else { @@ -624,7 +624,7 @@ namespace SwrJit STORE(vSrc, vSrcPtr); vGather = VUNDEF_I(); -Value *vScaleVec = VBROADCAST(Z_EXT(scale, mInt32Ty)); +Value *vScaleVec = VIMMED1((uint32_t)scale); Value *vOffsets = MUL(vIndices, vScaleVec); Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth; ++i) @@ -656,14 +656,14 @@ namespace SwrJit /// @param vIndices - SIMD wide value of VB byte offsets /// @param vMask - SIMD wide mask that controls whether to access memory or the src values /// @param scale - value to scale indices by -Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, Value* scale) +Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale) { Value* vGather; // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { -vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, scale); +vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale)); } else { @@ -674,7 +674,7 @@ namespace SwrJit STORE(vSrc, vSrcPtr); vGather = UndefValue::get(VectorType::get(mDoubleTy, 4)); -Value *vScaleVec = VECTOR_SPLAT(4, Z_EXT(scale,mInt32Ty)); +Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale)); Value *vOffsets = MUL(vIndices,vScaleVec); Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth/2; ++i) @@ -1016,7 +1016,7 @@ namespace SwrJit // save mask as it is zero'd out after each gather vMask = mask; -vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, C((char)1)); +vGatherResult[0] = GATHERPS(vGatherMaskedVal, pS
Mesa (master): swr/rast: Faster emulated simd16 permute
Module: Mesa Branch: master Commit: d8489517a572c7e5c5405ebf510db9d20b1e2591 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8489517a572c7e5c5405ebf510db9d20b1e2591 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Nov 13 18:39:38 2017 -0600 swr/rast: Faster emulated simd16 permute Speed up simd16 frontend (default) on avx/avx2 platforms; fixes performance regression caused by switch to simdlib. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> Cc: mesa-sta...@lists.freedesktop.org --- .../swr/rasterizer/common/simdlib_512_emu.inl | 34 +++--- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl index d6af7b1c64..44eba0b126 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl @@ -521,36 +521,24 @@ SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const , Integer const ) // return a[swiz[i]] for each 32-bit lane i (int32) { -Integer result; - -// Ugly slow implementation -uint32_t const *pA = reinterpret_cast(); -uint32_t const *pSwiz = reinterpret_cast(); -uint32_t *pResult = reinterpret_cast(); - -for (uint32_t i = 0; i < SIMD_WIDTH; ++i) -{ -pResult[i] = pA[0xF & pSwiz[i]]; -} - -return result; +return castps_si(permute_ps(castsi_ps(a), swiz)); } static SIMDINLINE Float SIMDCALL permute_ps(Float const , Integer const )// return a[swiz[i]] for each 32-bit lane i (float) { -Float result; +const auto mask = SIMD256T::set1_epi32(7); -// Ugly slow implementation -float const *pA = reinterpret_cast(); -uint32_t const *pSwiz = reinterpret_cast(); -float *pResult = reinterpret_cast(); +auto lolo = SIMD256T::permute_ps(a.v8[0], SIMD256T::and_si(swiz.v8[0], mask)); +auto lohi = SIMD256T::permute_ps(a.v8[1], SIMD256T::and_si(swiz.v8[0], mask)); -for (uint32_t i = 0; i < SIMD_WIDTH; ++i) -{ -pResult[i] = pA[0xF & pSwiz[i]]; -} +auto hilo = SIMD256T::permute_ps(a.v8[0], SIMD256T::and_si(swiz.v8[1], mask)); +auto hihi = SIMD256T::permute_ps(a.v8[1], SIMD256T::and_si(swiz.v8[1], mask)); -return result; +return Float +{ +SIMD256T::blendv_ps(lolo, lohi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[0], mask))), +SIMD256T::blendv_ps(hilo, hihi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[1], mask))), +}; } // All of the 512-bit permute2f128_XX intrinsics do the following: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Use gather instruction for i32gather_ps on simd16 /avx512
Module: Mesa Branch: master Commit: 439904847e9c2970494c18e8c47bd6c38c0ed8ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=439904847e9c2970494c18e8c47bd6c38c0ed8ab Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Nov 13 15:11:21 2017 -0600 swr/rast: Use gather instruction for i32gather_ps on simd16/avx512 Speed up avx512 platforms; fixes performance regression caused by swithc to simdlib. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> Cc: mesa-sta...@lists.freedesktop.org --- .../drivers/swr/rasterizer/common/simdlib_512_avx512.inl | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 95e4c31909..c13b9f616a 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -484,17 +484,7 @@ SIMD_WRAPPER_2(unpacklo_ps); template static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) { -uint32_t *pOffsets = (uint32_t*) -Float vResult; -float* pResult = (float*) -for (uint32_t i = 0; i < SIMD_WIDTH; ++i) -{ -uint32_t offset = pOffsets[i]; -offset = offset * static_cast(ScaleT); -pResult[i] = *(float const*)(((uint8_t const*)p + offset)); -} - -return vResult; +return _mm512_i32gather_ps(idx, p, static_cast(ScaleT)); } static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: allow arch rounding with avx512
Module: Mesa Branch: master Commit: 0023b5ae67255000e6de93f6e17f74895e7677e0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0023b5ae67255000e6de93f6e17f74895e7677e0 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Nov 1 13:22:47 2017 -0500 gallivm: allow arch rounding with avx512 Fixes piglit vs-roundeven-{float,vec[234]} with simd16 VS. Reviewed-by: Roland Scheidegger <srol...@vmware.com> --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index cf1958b3b6..a1edd349f1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1953,7 +1953,8 @@ arch_rounding_available(const struct lp_type type) { if ((util_cpu_caps.has_sse4_1 && (type.length == 1 || type.width*type.length == 128)) || - (util_cpu_caps.has_avx && type.width*type.length == 256)) + (util_cpu_caps.has_avx && type.width*type.length == 256) || + (util_cpu_caps.has_avx512f && type.width*type.length == 512)) return TRUE; else if ((util_cpu_caps.has_altivec && (type.width == 32 && type.length == 4))) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Fix indentation
Module: Mesa Branch: master Commit: 04ea03d99d8810a7df5aca059ff00c26ecaa71ee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=04ea03d99d8810a7df5aca059ff00c26ecaa71ee Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Oct 6 13:50:14 2017 -0500 swr/rast: Fix indentation Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index f7c9308be0..d9450fcbd7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -820,7 +820,7 @@ struct SWR_TS_STATE uint32_tnumDsOutputAttribs; // Offset to the start of the attributes of the input vertices, in simdvector units -uint32_t vertexAttribOffset; +uint32_tvertexAttribOffset; }; // output merger state ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Add api to override draws in flight
Module: Mesa Branch: master Commit: 028ffa5e1820707ef0cab52853e36a259b00c849 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=028ffa5e1820707ef0cab52853e36a259b00c849 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 17 15:02:53 2017 -0500 swr/rast: Add api to override draws in flight Allow draws in flight to be overridden via SWR_CREATECONTEXT_INFO. Patch by Jan Zielinski. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/api.cpp| 26 +- src/gallium/drivers/swr/rasterizer/core/api.h | 4 src/gallium/drivers/swr/rasterizer/core/context.h | 2 ++ .../drivers/swr/rasterizer/core/threads.cpp| 18 +++ 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 632309821f..20eeb29681 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -74,13 +74,19 @@ HANDLE SwrCreateContext( pContext->privateStateSize = pCreateInfo->privateStateSize; -pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); -pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); +pContext->MAX_DRAWS_IN_FLIGHT = KNOB_MAX_DRAWS_IN_FLIGHT; +if (pCreateInfo->MAX_DRAWS_IN_FLIGHT != 0) +{ +pContext->MAX_DRAWS_IN_FLIGHT = pCreateInfo->MAX_DRAWS_IN_FLIGHT; +} + +pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT); +pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT); -pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64); -pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64); +pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64); +pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64); -for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc) +for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc) { pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator); new (>pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena); @@ -173,7 +179,7 @@ template void QueueWork(SWR_CONTEXT *pContext) { DRAW_CONTEXT* pDC = pContext->pCurDrawContext; -uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT; +uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT; if (IsDraw) { @@ -257,7 +263,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) } uint64_t curDraw = pContext->dcRing.GetHead(); -uint32_t dcIndex = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT; +uint32_t dcIndex = curDraw % pContext->MAX_DRAWS_IN_FLIGHT; if ((pContext->frameCount - pContext->lastFrameChecked) > 2 || (curDraw - pContext->lastDrawChecked) > 0x1) @@ -273,7 +279,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) pContext->pCurDrawContext = pCurDrawContext; // Assign next available entry in DS ring to this DC. -uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT; +uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT; pCurDrawContext->pState = >dsRing[dsIndex]; // Copy previous state to current state. @@ -361,7 +367,7 @@ void SwrDestroyContext(HANDLE hContext) DestroyThreadPool(pContext, >threadPool); // free the fifos -for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i) +for (uint32_t i = 0; i < pContext->MAX_DRAWS_IN_FLIGHT; ++i) { AlignedFree(pContext->dcRing[i].dynState.pStats); delete pContext->dcRing[i].pArena; @@ -1481,7 +1487,7 @@ void SwrDispatch( pTaskData->threadGroupCountZ = threadGroupCountZ; uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ; -uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT; +uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT; pDC->pDispatch = >pDispatchQueueArray[dcIndex]; pDC->pDispatch->initialize(totalThreadGroups, pTaskData, ); diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 577cfb157a..60f56c6d76 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -215,6 +215,10 @@ struct SWR_CREATECONTEXT_INFO // Input (optional): Threading info that overrides any set KNOB values.
Mesa (master): swr: knob overrides for Intel Xeon Phi
Module: Mesa Branch: master Commit: bfda35c8dd4bc602a3b174377dfea92319438e2b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bfda35c8dd4bc602a3b174377dfea92319438e2b Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 17 15:11:19 2017 -0500 swr: knob overrides for Intel Xeon Phi Architecture benefits from having more threads/work outstanding. Patch by Jan Zielinski. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/swr_context.cpp | 27 +++ src/gallium/drivers/swr/swr_context.h | 2 ++ src/gallium/drivers/swr/swr_loader.cpp | 4 src/gallium/drivers/swr/swr_scratch.cpp | 2 +- src/gallium/drivers/swr/swr_screen.h| 3 +++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 34d9a259fe..b61720cd30 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -39,6 +39,7 @@ #include "api.h" #include "backend.h" +#include "knobs.h" static struct pipe_surface * swr_create_surface(struct pipe_context *pipe, @@ -483,6 +484,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) ctx->blendJIT = new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>; + ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT; + SWR_CREATECONTEXT_INFO createInfo; memset(, 0, sizeof(createInfo)); createInfo.privateStateSize = sizeof(swr_draw_context); @@ -491,6 +494,30 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) createInfo.pfnClearTile = swr_StoreHotTileClear; createInfo.pfnUpdateStats = swr_UpdateStats; createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; + + SWR_THREADING_INFO threadingInfo {0}; + + threadingInfo.MAX_WORKER_THREADS= KNOB_MAX_WORKER_THREADS; + threadingInfo.MAX_NUMA_NODES= KNOB_MAX_NUMA_NODES; + threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; + threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; + threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; + + // Use non-standard settings for KNL + if (swr_screen(p_screen)->is_knl) + { + if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE")) + threadingInfo.MAX_THREADS_PER_CORE = 2; + + if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT")) + { + ctx->max_draws_in_flight = 2048; + createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight; + } + } + + createInfo.pThreadInfo = + ctx->swrContext = ctx->api.pfnSwrCreateContext(); ctx->api.pfnSwrInit(); diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 8bed78f869..5c280ee365 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -173,6 +173,8 @@ struct swr_context { unsigned dirty; /**< Mask of SWR_NEW_x flags */ SWR_INTERFACE api; + + uint32_t max_draws_in_flight; }; static INLINE struct swr_context * diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp index e205fe2d7e..9d6f918e34 100644 --- a/src/gallium/drivers/swr/swr_loader.cpp +++ b/src/gallium/drivers/swr/swr_loader.cpp @@ -38,11 +38,14 @@ swr_create_screen(struct sw_winsys *winsys) util_cpu_detect(); + bool is_knl = false; + if (!strlen(filename) && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) { #if HAVE_SWR_KNL fprintf(stderr, "KNL "); sprintf(filename, "%s%s%s", UTIL_DL_PREFIX, "swrKNL", UTIL_DL_EXT); + is_knl = true; #else fprintf(stderr, "KNL (not built) "); #endif @@ -99,6 +102,7 @@ swr_create_screen(struct sw_winsys *winsys) struct pipe_screen *screen = swr_create_screen_internal(winsys); swr_screen(screen)->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc; + swr_screen(screen)->is_knl = is_knl; return screen; } diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp index d298a48dc0..8afe73c30e 100644 --- a/src/gallium/drivers/swr/swr_scratch.cpp +++ b/src/gallium/drivers/swr/swr_scratch.cpp @@ -45,7 +45,7 @@ swr_copy_to_scratch_space(struct swr_context *ctx, ptr = ctx->api.pfnSwrAllocDrawContextMemory(ctx->swrContext, size, 4); } else { /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */ - unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t max_size_in_flight = size * ctx->max_draws_in_flight; /* Need to grow space */ if (max_size_in_flight > space->current_size) { diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_scre
Mesa (master): swr/rast: Miscellaneous viewport array code changes
Module: Mesa Branch: master Commit: 62e2d657c868ee7c7ad6a24269c81a9827c66b8f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62e2d657c868ee7c7ad6a24269c81a9827c66b8f Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Sep 29 14:45:16 2017 -0500 swr/rast: Miscellaneous viewport array code changes Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 45 -- src/gallium/drivers/swr/rasterizer/core/clip.h | 14 +-- .../drivers/swr/rasterizer/core/frontend.cpp | 22 ++- src/gallium/drivers/swr/rasterizer/core/pa.h | 24 ++-- src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 4 +- 5 files changed, 71 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index e08e4896f3..b624ae69b3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -450,16 +450,22 @@ void SIMDCALL BinTrianglesImpl( typename SIMD_T::Float vRecipW1 = SIMD_T::set1_ps(1.0f); typename SIMD_T::Float vRecipW2 = SIMD_T::set1_ps(1.0f); -typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0); +typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); +typename SIMD_T::Vec4 vpiAttrib[3]; +typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); if (state.backendState.readViewportArrayIndex) { -typename SIMD_T::Vec4 vpiAttrib[3]; pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); +vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); +} + + +if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 +{ // OOB indices => forced to zero. -typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); -vpai = SIMD_T::max_epi32(SIMD_T::setzero_si(), vpai); +vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); viewportIdx = SIMD_T::and_si(vClearMask, vpai); @@ -815,6 +821,7 @@ endBinTriangles: SIMD_T::store_si(reinterpret_cast(aRTAI), SIMD_T::setzero_si()); } + // scan remaining valid triangles and bin each separately while (_BitScanForward(, triMask)) { @@ -1299,15 +1306,22 @@ void BinPointsImpl( const SWR_RASTSTATE& rastState = state.rastState; // Read back viewport index if required -typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0); +typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); +typename SIMD_T::Vec4 vpiAttrib[1]; +typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); + if (state.backendState.readViewportArrayIndex) { -typename SIMD_T::Vec4 vpiAttrib[1]; pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); +vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); +} + + +if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 +{ // OOB indices => forced to zero. -typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); -vpai = SIMD_T::max_epi32(SIMD_T::setzero_si(), vpai); +vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); viewportIdx = SIMD_T::and_si(vClearMask, vpai); @@ -1626,15 +1640,22 @@ void SIMDCALL BinLinesImpl( typename SIMD_T::Float vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) }; -typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0); +typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); +typename SIMD_T::Vec4 vpiAttrib[2]; +typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); + if (state.backendState.readViewportArrayIndex) { -typename SIMD_T::Vec4 vpiAttrib[2]; pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); +vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); +} + + +if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 +{ // OOB indices => forced to zero. -typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); -vpai = SIMD_T::max_epi32(SIMD_T::setzero_si(), vpai); +vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCI
Mesa (master): swr/rast: Change DS memory allocation
Module: Mesa Branch: master Commit: 49090ccf54798f7c9081f9b20d0ed0d0433ec026 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=49090ccf54798f7c9081f9b20d0ed0d0433ec026 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Oct 11 16:21:21 2017 -0500 swr/rast: Change DS memory allocation Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 4 ++-- src/gallium/drivers/swr/rasterizer/core/state.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index a803512b7c..211e9e4b07 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1212,9 +1212,9 @@ static void TessellationStages( // Allocate DS Output memory uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH; #if USE_SIMD16_FRONTEND -size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs; // simd8 -> simd16, padding +size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.dsAllocationSize; // simd8 -> simd16, padding #else -size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs; +size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.dsAllocationSize; size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors; #endif if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize) diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index d9450fcbd7..2af384fd90 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -818,6 +818,7 @@ struct SWR_TS_STATE uint32_tnumHsInputAttribs; uint32_tnumHsOutputAttribs; uint32_tnumDsOutputAttribs; +uint32_tdsAllocationSize; // Offset to the start of the attributes of the input vertices, in simdvector units uint32_tvertexAttribOffset; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: allow 512-bit vectors
Module: Mesa Branch: master Commit: 9cad9cbaf89b50ec9e15a7e0fef35fc2e4270550 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9cad9cbaf89b50ec9e15a7e0fef35fc2e4270550 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 10 11:07:11 2017 -0500 gallium: allow 512-bit vectors Increase the max allowed vector size from 256 to 512. No piglit llvmpipe regressions running on avx2. Reviewed-by: Jose Fonseca <jfons...@vmware.com> Reviewed-by: Roland Scheidegger <srol...@vmware.com> --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 14 +++--- src/gallium/auxiliary/gallivm/lp_bld_type.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index de18f629cd..97efc3a399 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1272,9 +1272,9 @@ emit_fetch_constant( /** * Fetch 64-bit values from two separate channels. * 64-bit values are stored split across two channels, like xy and zw. - * This function creates a set of 16 floats, + * This function creates a set of vec_length*2 floats, * extracts the values from the two channels, - * puts them in the correct place, then casts to 8 64-bits. + * puts them in the correct place, then casts to vec_length 64-bits. */ static LLVMValueRef emit_fetch_64bit( @@ -1289,9 +1289,9 @@ emit_fetch_64bit( LLVMValueRef res; struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); int i; - LLVMValueRef shuffles[16]; + LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; int len = bld_base->base.type.length * 2; - assert(len <= 16); + assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); for (i = 0; i < bld_base->base.type.length * 2; i+=2) { shuffles[i] = lp_build_const_int32(gallivm, i / 2); @@ -1691,7 +1691,7 @@ emit_fetch_deriv( } /** - * store an array of 8 64-bit into two arrays of 8 floats + * store an array of vec-length 64-bit into two arrays of vec_length floats * i.e. * value is d0, d1, d2, d3 etc. * each 64-bit has high and low pieces x, y @@ -1710,8 +1710,8 @@ emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base, struct lp_build_context *float_bld = _base->base; unsigned i; LLVMValueRef temp, temp2; - LLVMValueRef shuffles[8]; - LLVMValueRef shuffles2[8]; + LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; for (i = 0; i < bld_base->base.type.length; i++) { shuffles[i] = lp_build_const_int32(gallivm, i * 2); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index afe8722b05..62f1f85461 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -59,7 +59,7 @@ extern unsigned lp_native_vector_width; * Should only be used when lp_native_vector_width isn't available, * i.e. sizing/alignment of non-malloced variables. */ -#define LP_MAX_VECTOR_WIDTH 256 +#define LP_MAX_VECTOR_WIDTH 512 /** * Minimum vector alignment for static variable alignment @@ -67,7 +67,7 @@ extern unsigned lp_native_vector_width; * It should always be a constant equal to LP_MAX_VECTOR_WIDTH/8. An * expression is non-portable. */ -#define LP_MIN_VECTOR_ALIGN 32 +#define LP_MIN_VECTOR_ALIGN 64 /** * Several functions can only cope with vectors of length up to this value. ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: simd16 shaders work in progress
Module: Mesa Branch: master Commit: e4848053528ee108755652acc9763f904677bfd3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e4848053528ee108755652acc9763f904677bfd3 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 10 11:08:29 2017 -0500 swr: simd16 shaders work in progress Start building vertex shaders as simd16. Disabled by default, set USE_SIMD16_SHADERS in knobs.h to experiment. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/swr_screen.cpp | 6 ++ src/gallium/drivers/swr/swr_screen.h | 3 +++ src/gallium/drivers/swr/swr_shader.cpp | 14 -- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 639b18f930..46b3a003c6 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -1058,6 +1058,9 @@ swr_destroy_screen(struct pipe_screen *p_screen) swr_fence_reference(p_screen, >flush_fence, NULL); JitDestroyContext(screen->hJitMgr); +#if USE_SIMD16_SHADERS + JitDestroyContext(screen->hJitMgr16); +#endif if (winsys->destroy) winsys->destroy(winsys); @@ -1141,6 +1144,9 @@ swr_create_screen_internal(struct sw_winsys *winsys) // Pass in "" for architecture for run-time determination screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, "", "swr"); +#if USE_SIMD16_SHADERS + screen->hJitMgr16 = JitCreateContext(16, "", "swr"); +#endif swr_fence_init(>base); diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h index a11ea9f41d..1c4e331583 100644 --- a/src/gallium/drivers/swr/swr_screen.h +++ b/src/gallium/drivers/swr/swr_screen.h @@ -49,6 +49,9 @@ struct swr_screen { uint32_t client_copy_limit; HANDLE hJitMgr; +#if USE_SIMD16_SHADERS + HANDLE hJitMgr16; +#endif PFNSwrGetInterface pfnSwrGetInterface; }; diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 510bc0e457..732e08dae7 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -693,7 +693,7 @@ swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key ) void BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel) { -#if USE_SIMD16_FRONTEND +#if USE_SIMD16_FRONTEND && !USE_SIMD16_SHADERS // interleave the simdvertex components into the dest simd16vertex // slot16offset = slot8offset * 2 // comp16offset = comp8offset * 2 + alternateOffset @@ -756,6 +756,9 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key ) const_sizes_ptr->setName("num_vs_constants"); Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); +#if USE_SIMD16_SHADERS + vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0)); +#endif for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; @@ -777,7 +780,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key ) lp_build_tgsi_soa(gallivm, swr_vs->pipe.tokens, - lp_type_float_vec(32, 32 * 8), + lp_type_float_vec(32, 32 * mVWidth), NULL, // mask wrap(consts_ptr), wrap(const_sizes_ptr), @@ -795,6 +798,9 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key ) IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); +#if USE_SIMD16_SHADERS + vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0)); +#endif for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { @@ -905,7 +911,11 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key ) return NULL; BuilderSWR builder( +#if USE_SIMD16_SHADERS + reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr16), +#else reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr), +#endif "VS"); PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: use proper alignment for debug transposedPrims
Module: Mesa Branch: master Commit: 9716c69e22613229bdc78c0a28491f39bec2520d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9716c69e22613229bdc78c0a28491f39bec2520d Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Oct 3 15:23:44 2017 -0500 swr/rast: use proper alignment for debug transposedPrims Causing a crash in ParaView waveletcontour.py test when _DEBUG defined due to vector aligned copy with unaligned address. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/clip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index cde5261521..e9a410daa3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -561,7 +561,7 @@ public: #if defined(_DEBUG) // TODO: need to increase stack size, allocating SIMD16-widened transposedPrims causes stack overflow in debug builds -SIMDVERTEX_T *transposedPrims = reinterpret_cast<SIMDVERTEX_T *>(malloc(sizeof(SIMDVERTEX_T) * 2)); +SIMDVERTEX_T *transposedPrims = reinterpret_cast<SIMDVERTEX_T *>(AlignedMalloc(sizeof(SIMDVERTEX_T) * 2, 64)); #else SIMDVERTEX_T transposedPrims[2]; @@ -667,7 +667,7 @@ public: } #if defined(_DEBUG) -free(transposedPrims); +AlignedFree(transposedPrims); #endif // update global pipeline stat ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Add support for R10G10B10_FLOAT_A2_UNORM pixel format
Module: Mesa Branch: master Commit: 41565ddf7a7f8986d232b5619ac80233251d0900 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=41565ddf7a7f8986d232b5619ac80233251d0900 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Sep 11 16:07:32 2017 -0500 swr/rast: Add support for R10G10B10_FLOAT_A2_UNORM pixel format Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/common/formats.cpp | 27 +++--- .../drivers/swr/rasterizer/core/format_traits.h| 2 +- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 16 ++--- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.cpp b/src/gallium/drivers/swr/rasterizer/common/formats.cpp index 263dec649a..1c086ff188 100644 --- a/src/gallium/drivers/swr/rasterizer/common/formats.cpp +++ b/src/gallium/drivers/swr/rasterizer/common/formats.cpp @@ -2729,16 +2729,27 @@ const SWR_FORMAT_INFO gFormatInfo[] = { { 0.0f, 0.0f, 0.0f, 0.0f }, 1, 1 }, -// padding (0xD5) + +// R10G10B10_FLOAT_A2_UNORM (0xD5) { -nullptr, -{ SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, -{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, -0, 0, 0, false, false, false, false, -{ false, false, false, false }, -{ 0.0f, 0.0f, 0.0f, 0.0f }, -1, 1 +"R10G10B10_FLOAT_A2_UNORM", +{ SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM }, +{ 0, 0, 0, 0x3f80 }, // Defaults for missing components +{ 0, 1, 2, 3 }, // Swizzle +{ 10, 10, 10, 2 }, // Bits per component +32, // Bits per element +4, // Bytes per element +4, // Num components +false, // isSRGB +false, // isBC +false, // isSubsampled +false, // isLuminance +{ false, false, false, false }, // Is normalized? +{ 1.0f, 1.0f, 1.0f, 1.0f / 3.0f }, // To float scale factor +1, // bcWidth +1, // bcHeight }, + // R32_SINT (0xD6) { "R32_SINT", diff --git a/src/gallium/drivers/swr/rasterizer/core/format_traits.h b/src/gallium/drivers/swr/rasterizer/core/format_traits.h index c04ea5f8ee..bc585dd175 100644 --- a/src/gallium/drivers/swr/rasterizer/core/format_traits.h +++ b/src/gallium/drivers/swr/rasterizer/core/format_traits.h @@ -1237,7 +1237,7 @@ template<> struct FormatTraits : /// FormatTraits - Format traits specialization for R10G10B10_FLOAT_A2_UNORM // template<> struct FormatTraits : -ComponentTraits<SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 2>, +ComponentTraits<SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_UNORM, 2>, FormatSwizzle<0, 1, 2, 3>, Defaults<0, 0, 0, 0x3f80> { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 402fd2652f..b943909a57 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -42,7 +42,7 @@ namespace SwrJit ///number of mantissa bits. /// @param val - 32-bit float /// @todo Maybe move this outside of this file into a header? -static uint16_t Convert32To16Float(float val) +static uint16_t ConvertFloat32ToFloat16(float val) { uint32_t sign, exp, mant; uint32_t roundBits; @@ -112,7 +112,7 @@ namespace SwrJit ///float /// @param val - 16-bit float /// @todo Maybe move this outside of this file into a header? -static float ConvertSmallFloatTo32(uint32_t val) +static float ConvertFloat16ToFloat32(uint32_t val) { uint32_t result; if ((val & 0x7fff) == 0) @@ -888,11 +888,11 @@ namespace SwrJit else { FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty); -Function* pCvtPh2Ps = cast(JM()->mpCurrentModule->getOrInsertFunction("ConvertSmallFloatTo32", pFuncTy)); +Function* pCvtPh2Ps = cast(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy)); -if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertSmallFloatTo32") == nullptr) +if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr) { -sys::DynamicLibrary::AddSymbol("ConvertSmallFloatTo32", (void *)); +sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", (void *)); } Value* pResult = UndefValue::get(mSimdFP32Ty); @@ -921,11 +921,11 @@ namespace SwrJit {
Mesa (master): swr/rast: Properly sized null GS buffer
Module: Mesa Branch: master Commit: 5033d49d5d04efd01f9f4957e3b3dce0250908ad URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5033d49d5d04efd01f9f4957e3b3dce0250908ad Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Sep 12 15:11:07 2017 -0500 swr/rast: Properly sized null GS buffer Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 15bc93db63..22a5705c48 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -798,7 +798,7 @@ static void GeometryShaderStage( const SWR_GS_STATE* pState = SWR_GS_CONTEXT gsContext; -static uint8_t sNullBuffer[1024] = { 0 }; +static uint8_t sNullBuffer[128] = { 0 }; for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Slightly more efficient blend jit
Module: Mesa Branch: master Commit: d18c2a1fa415b660244b25081c6597ea0439565c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d18c2a1fa415b660244b25081c6597ea0439565c Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Sep 13 19:16:45 2017 -0500 swr/rast: Slightly more efficient blend jit Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/blend_jit.cpp| 30 -- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index f2e6e532bb..3258639d38 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -581,13 +581,13 @@ struct BlendJit : public Builder // load src1 src1[i] = LOAD(pSrc1, { i }); } -Value* currentMask = VIMMED1(-1); +Value* currentSampleMask = VIMMED1(-1); if (state.desc.alphaToCoverageEnable) { Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f); uint32_t bits = (1 << state.desc.numSamples) - 1; -currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); -currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty); +currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); +currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty); } // alpha test @@ -766,34 +766,24 @@ struct BlendJit : public Builder assert(!(state.desc.alphaToCoverageEnable)); // load current mask Value* oMask = LOAD(ppoMask); -Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum)); -oMask = AND(oMask, sampleMasked); -currentMask = AND(oMask, currentMask); +currentSampleMask = AND(oMask, currentSampleMask); } if(state.desc.sampleMaskEnable) { Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask}); -Value* sampleMasked = SHL(C(1), sampleNum); -sampleMask = AND(sampleMask, sampleMasked); -sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0))); -sampleMask = S_EXT(sampleMask, mSimdInt32Ty); -currentMask = AND(sampleMask, currentMask); -} - -if (state.desc.alphaToCoverageEnable) -{ -Value* sampleMasked = SHL(C(1), sampleNum); -currentMask = AND(currentMask, VBROADCAST(sampleMasked)); +currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask); } if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable || state.desc.oMaskEnable) { -// load coverage mask +// load coverage mask and mask off any lanes with no samples Value* pMask = LOAD(ppMask); -currentMask = S_EXT(ICMP_UGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty); -Value* outputMask = AND(pMask, currentMask); +Value* sampleMasked = SHL(C(1), sampleNum); +currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked)); +currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty); +Value* outputMask = AND(pMask, currentSampleMask); // store new mask STORE(outputMask, GEP(ppMask, C(0))); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Fix allocation of DS output data for USE_SIMD16_FRONTEND
Module: Mesa Branch: master Commit: 9c468c775b666f6da7468a795a98e2fd021c23bf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c468c775b666f6da7468a795a98e2fd021c23bf Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Sep 15 18:53:47 2017 -0500 swr/rast: Fix allocation of DS output data for USE_SIMD16_FRONTEND Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 16 ++-- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 22a5705c48..aea8e88de4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1062,7 +1062,7 @@ struct TessellationThreadLocalData size_t tsCtxSize; simdscalar* pDSOutput; -size_t numDSOutputVectors; +size_t dsOutputAllocSize; }; THREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr; @@ -1210,24 +1210,20 @@ static void TessellationStages( // Allocate DS Output memory uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH; -size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs; #if USE_SIMD16_FRONTEND size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs; // simd8 -> simd16, padding #else +size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs; size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors; #endif -if (requiredDSOutputVectors > gt_pTessellationThreadData->numDSOutputVectors) +if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize) { AlignedFree(gt_pTessellationThreadData->pDSOutput); gt_pTessellationThreadData->pDSOutput = (simdscalar*)AlignedMalloc(requiredAllocSize, 64); -#if USE_SIMD16_FRONTEND -gt_pTessellationThreadData->numDSOutputVectors = RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs; // simd8 -> simd16, padding -#else -gt_pTessellationThreadData->numDSOutputVectors = requiredDSOutputVectors; -#endif +gt_pTessellationThreadData->dsOutputAllocSize = requiredAllocSize; } SWR_ASSERT(gt_pTessellationThreadData->pDSOutput); -SWR_ASSERT(gt_pTessellationThreadData->numDSOutputVectors >= requiredDSOutputVectors); +SWR_ASSERT(gt_pTessellationThreadData->dsOutputAllocSize >= requiredAllocSize); #if defined(_DEBUG) memset(gt_pTessellationThreadData->pDSOutput, 0x90, requiredAllocSize); @@ -1356,7 +1352,7 @@ static void TessellationStages( AlignedFree(gt_pTessellationThreadData->pDSOutput); gt_pTessellationThreadData->pDSOutput = nullptr; } -gt_pTessellationThreadData->numDSOutputVectors = 0; +gt_pTessellationThreadData->dsOutputAllocSize = 0; #endif TSDestroyCtx(tsCtx); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Fetch compile state changes
Module: Mesa Branch: master Commit: efe7fa4384f89ba909c7a5a303658a6442f4f787 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=efe7fa4384f89ba909c7a5a303658a6442f4f787 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Sep 12 13:38:31 2017 -0500 swr/rast: Fetch compile state changes Add ForceSequentialAccessEnable and InstanceIDOffsetEnable bools to FETCH_COMPILE_STATE. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 6 ++ src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h | 7 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index f3a4b27d9a..906129829c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -275,6 +275,12 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) : JitGatherVertices(fetchState, streams, vIndices, pVtxOut); #endif +if (fetchState.bInstanceIDOffsetEnable) +{ +// TODO: +SWR_ASSERT((0), "Add support for handling InstanceID Offset Enable."); +} + RET_VOID(); JitManager::DumpToFile(fetch, "src"); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index 0dd6de759a..18fa96357b 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -107,6 +107,9 @@ struct FETCH_COMPILE_STATE bool bVertexIDOffsetEnable{ false };// Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws bool bPartialVertexBuffer{ false }; // for indexed draws, map illegal indices to a known resident vertex +bool bForceSequentialAccessEnable{ false }; +bool bInstanceIDOffsetEnable{ false }; + FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false): bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ }; @@ -120,11 +123,13 @@ struct FETCH_COMPILE_STATE if (cutIndex != other.cutIndex) return false; if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false; if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false; +if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable) return false; +if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable) return false; for(uint32_t i = 0; i < numAttribs; ++i) { if((layout[i].bits != other.layout[i].bits) || - ((layout[i].InstanceEnable == 1) && + (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) && (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState))){ return false; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Remove code supporting legacy llvm (<3.9)
Module: Mesa Branch: master Commit: 68d8dd1fb5a0c28e4f6dfd8512ff6c3550458b46 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=68d8dd1fb5a0c28e4f6dfd8512ff6c3550458b46 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Sep 19 18:19:53 2017 -0500 swr/rast: Remove code supporting legacy llvm (<3.9) Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/JitManager.cpp | 11 ++- .../drivers/swr/rasterizer/jitter/JitManager.h | 7 -- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 102 ++--- 3 files changed, 15 insertions(+), 105 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index e4281f8e92..3f0772c942 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -48,8 +48,9 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Path.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Config/llvm-config.h" -#if HAVE_LLVM < 0x400 +#if LLVM_VERSION_MAJOR < 4 #include "llvm/Bitcode/ReaderWriter.h" #else #include "llvm/Bitcode/BitcodeWriter.h" @@ -231,8 +232,8 @@ void JitManager::DumpAsm(Function* pFunction, const char* fileName) #if defined(_WIN32) DWORD pid = GetCurrentProcessId(); -TCHAR procname[MAX_PATH]; -GetModuleFileName(NULL, procname, MAX_PATH); +char procname[MAX_PATH]; +GetModuleFileNameA(NULL, procname, MAX_PATH); const char* pBaseName = strrchr(procname, '\\'); std::stringstream outDir; outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; @@ -269,8 +270,8 @@ void JitManager::DumpToFile(Function *f, const char *fileName) { #if defined(_WIN32) DWORD pid = GetCurrentProcessId(); -TCHAR procname[MAX_PATH]; -GetModuleFileName(NULL, procname, MAX_PATH); +char procname[MAX_PATH]; +GetModuleFileNameA(NULL, procname, MAX_PATH); const char* pBaseName = strrchr(procname, '\\'); std::stringstream outDir; outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h index 4bc543b560..46ffe276a0 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h @@ -47,13 +47,6 @@ #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/Config/llvm-config.h" -#ifndef LLVM_VERSION_MAJOR -#include "llvm/Config/config.h" -#endif - -#ifndef HAVE_LLVM -#define HAVE_LLVM ((LLVM_VERSION_MAJOR << 8) | LLVM_VERSION_MINOR) -#endif #include "llvm/IR/Verifier.h" #include "llvm/ExecutionEngine/MCJIT.h" diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index b943909a57..9ca36b2467 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -763,22 +763,10 @@ namespace SwrJit /// lower 8 values are used. Value *Builder::PMOVSXBD(Value* a) { -// llvm-3.9 removed the pmovsxbd intrinsic -#if HAVE_LLVM < 0x309 -// use avx2 byte sign extend instruction if available -if(JM()->mArch.AVX2()) -{ -Function *pmovsxbd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxbd); -return CALL(pmovsxbd, std::initializer_list<Value*>{a}); -} -else -#endif -{ -// VPMOVSXBD output type -Type* v8x32Ty = VectorType::get(mInt32Ty, 8); -// Extract 8 values from 128bit lane and sign extend -return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); -} +// VPMOVSXBD output type +Type* v8x32Ty = VectorType::get(mInt32Ty, 8); +// Extract 8 values from 128bit lane and sign extend +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); } // @@ -787,22 +775,10 @@ namespace SwrJit /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values. Value *Builder::PMOVSXWD(Value* a) { -// llvm-3.9 removed the pmovsxwd intrinsic -#if HAVE_LLVM < 0x309 -// use avx2 word sign extend if available -if(JM()->mArch.AVX2()) -{ -Function *pmovsxwd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxwd); -retur
Mesa (master): swr/rast: Handle instanceID offset / Instance Stride enable
Module: Mesa Branch: master Commit: 5a2bca5db5e025f0884487f590feac0c33db48fd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5a2bca5db5e025f0884487f590feac0c33db48fd Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Sep 20 11:50:32 2017 -0500 swr/rast: Handle instanceID offset / Instance Stride enable Supported in JitGatherVertices(); FetchJit::JitLoadVertices() may require similar changes, will need address this if it is determined that this path is still in use. Handle Force Sequential Access in FetchJit::Create. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 46 ++ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 906129829c..1e3db902bb 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -222,6 +222,18 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; break; } +if(fetchState.bForceSequentialAccessEnable) +{ +Value* pOffsets = C({ 0, 1, 2, 3, 4, 5, 6, 7 }); + +// VertexData buffers are accessed sequentially, the index is equal to the vertex number +vIndices = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex })); +vIndices = ADD(vIndices, pOffsets); +#if USE_SIMD16_SHADERS +vIndices2 = ADD(vIndices, VIMMED1(8)); +#endif +} + Value* vVertexId = vIndices; #if USE_SIMD16_SHADERS Value* vVertexId2 = vIndices2; @@ -275,12 +287,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) : JitGatherVertices(fetchState, streams, vIndices, pVtxOut); #endif -if (fetchState.bInstanceIDOffsetEnable) -{ -// TODO: -SWR_ASSERT((0), "Add support for handling InstanceID Offset Enable."); -} - RET_VOID(); JitManager::DumpToFile(fetch, "src"); @@ -362,6 +368,11 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE , Value* str vectors.clear(); +if (fetchState.bInstanceIDOffsetEnable) +{ +SWR_ASSERT((0), "TODO: Fill out more once driver sends this down"); +} + Value *vCurIndices; Value *startOffset; if(ied.InstanceEnable) @@ -831,8 +842,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , minVertex = LOAD(minVertex); } +if (fetchState.bInstanceIDOffsetEnable) +{ +// the InstanceID (curInstance) value is offset by StartInstanceLocation +curInstance = ADD(curInstance, startInstance); +} + Value *vCurIndices; Value *startOffset; +Value *vInstanceStride = VIMMED1(0); + if(ied.InstanceEnable) { Value* stepRate = C(ied.InstanceAdvancementState); @@ -853,11 +872,19 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , } else if (ied.InstanceStrideEnable) { +// grab the instance advancement state, determines stride in bytes from one instance to the next +Value* stepRate = C(ied.InstanceAdvancementState); +vInstanceStride = VBROADCAST(MUL(curInstance, stepRate)); + +// offset indices by baseVertex +vCurIndices = ADD(vIndices, vBaseVertex); + +startOffset = startVertex; SWR_ASSERT((0), "TODO: Fill out more once driver sends this down."); } else { -// offset indices by baseVertex +// offset indices by baseVertex vCurIndices = ADD(vIndices, vBaseVertex); startOffset = startVertex; @@ -925,6 +952,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , Value* vOffsets = MUL(vCurIndices, vStride); vOffsets = ADD(vOffsets, vAlignmentOffsets); +// if instance stride enable is: +// true - add product of the instanceID and advancement state to the offst into the VB +// false - value of vInstanceStride has been initialialized to zero +vOffsets = ADD(vOffsets, vInstanceStride); + // Packing and component control ComponentEnable compMask = (ComponentEnable)ied.ComponentPacking; const ComponentControl compCtrl[4] { (ComponentControl)ied.ComponentControl0, (ComponentControl)ied.ComponentControl1, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: New GS state/context API
Module: Mesa Branch: master Commit: cd6e91d3a2012d2177732f27795e66c8c38e0aba URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd6e91d3a2012d2177732f27795e66c8c38e0aba Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Sep 11 17:29:12 2017 -0500 swr/rast: New GS state/context API One piglit regression, which was a false pass: spec@glsl-1.50@execution@geometry@dynamic_input_array_index Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/core/frontend.cpp | 227 - src/gallium/drivers/swr/rasterizer/core/state.h| 55 +++-- src/gallium/drivers/swr/swr_shader.cpp | 183 - 3 files changed, 253 insertions(+), 212 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index f882869eb7..26e76a92ef 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -710,45 +710,67 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num THREAD SWR_GS_CONTEXT tlsGsContext; -template -struct GsBufferInfo +// Buffers that are allocated if GS is enabled +struct GsBuffers { -GsBufferInfo(const SWR_GS_STATE ) -{ -const uint32_t vertexCount = gsState.maxNumVerts; -const uint32_t vertexStride = sizeof(SIMDVERTEX); -const uint32_t numSimdBatches = (vertexCount + SIMD_WIDTH - 1) / SIMD_WIDTH; +uint8_t* pGsIn; +uint8_t* pGsOut[KNOB_SIMD_WIDTH]; +uint8_t* pGsTransposed; +void* pStreamCutBuffer; +}; -vertexPrimitiveStride = vertexStride * numSimdBatches; -vertexInstanceStride = vertexPrimitiveStride * SIMD_WIDTH; +// +/// @brief Transposes GS output from SOA to AOS to feed the primitive assembler +/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive assembler +/// @param pSrc - Buffer of vertices in SOA form written by the geometry shader +/// @param numVerts - Number of vertices outputted by the GS +/// @param numAttribs - Number of attributes per vertex +template +void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t numAttribs) +{ +uint32_t srcVertexStride = numAttribs * sizeof(float) * 4; +uint32_t dstVertexStride = numAttribs * sizeof(typename SIMD_T::Float) * 4; -if (gsState.isSingleStream) -{ -cutPrimitiveStride = (vertexCount + 7) / 8; -cutInstanceStride = cutPrimitiveStride * SIMD_WIDTH; +OSALIGNSIMD16(uint32_t) gatherOffsets[SimdWidth]; -streamCutPrimitiveStride = 0; -streamCutInstanceStride = 0; -} -else -{ -cutPrimitiveStride = AlignUp(vertexCount * 2 / 8, 4); -cutInstanceStride = cutPrimitiveStride * SIMD_WIDTH; - -streamCutPrimitiveStride = (vertexCount + 7) / 8; -streamCutInstanceStride = streamCutPrimitiveStride * SIMD_WIDTH; -} +for (uint32_t i = 0; i < SimdWidth; ++i) +{ +gatherOffsets[i] = srcVertexStride * i; } +auto vGatherOffsets = SIMD_T::load_si((typename SIMD_T::Integer*)[0]); -uint32_t vertexPrimitiveStride; -uint32_t vertexInstanceStride; +uint32_t numSimd = AlignUp(numVerts, SimdWidth) / SimdWidth; +uint32_t remainingVerts = numVerts; -uint32_t cutPrimitiveStride; -uint32_t cutInstanceStride; +for (uint32_t s = 0; s < numSimd; ++s) +{ +uint8_t* pSrcBase = pSrc + s * srcVertexStride * SimdWidth; +uint8_t* pDstBase = pDst + s * dstVertexStride; -uint32_t streamCutPrimitiveStride; -uint32_t streamCutInstanceStride; -}; +// Compute mask to prevent src overflow +uint32_t mask = std::min(remainingVerts, SimdWidth); +mask = GenMask(mask); +auto vMask = SIMD_T::vmask_ps(mask); +auto viMask = SIMD_T::castps_si(vMask); + +for (uint32_t a = 0; a < numAttribs; ++a) +{ +auto attribGatherX = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask); +auto attribGatherY = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float)), vGatherOffsets, vMask); +auto attribGatherZ = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 2), vGatherOffsets, vMask); +auto attribGatherW = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 3), vGatherOffsets, vMask); + +SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX); +SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(typename SIMD_T::Float)), viMask, attribGatherY); +SIMD_T::maskstore
Mesa (master): swr/rast: Move SWR_GS_CONTEXT from thread local storage to stack
Module: Mesa Branch: master Commit: 9c82cf0f1e2b0496d135dc35dbb512e67b4e23f5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c82cf0f1e2b0496d135dc35dbb512e67b4e23f5 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Sep 12 14:37:36 2017 -0500 swr/rast: Move SWR_GS_CONTEXT from thread local storage to stack Move structure, as the size is significantly reduced due to dynamic allocation of the GS buffers. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/core/frontend.cpp | 23 +++--- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 26e76a92ef..15bc93db63 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -708,8 +708,6 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num } } -THREAD SWR_GS_CONTEXT tlsGsContext; - // Buffers that are allocated if GS is enabled struct GsBuffers { @@ -798,21 +796,22 @@ static void GeometryShaderStage( const API_STATE& state = GetApiState(pDC); const SWR_GS_STATE* pState = +SWR_GS_CONTEXT gsContext; static uint8_t sNullBuffer[1024] = { 0 }; for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { -tlsGsContext.pStreams[i] = pGsBuffers->pGsOut[i]; +gsContext.pStreams[i] = pGsBuffers->pGsOut[i]; } -tlsGsContext.pVerts = (simdvector*)pGsBuffers->pGsIn; -tlsGsContext.PrimitiveID = primID; +gsContext.pVerts = (simdvector*)pGsBuffers->pGsIn; +gsContext.PrimitiveID = primID; uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true); simdvector attrib[MAX_NUM_VERTS_PER_PRIM]; // assemble all attributes for the input primitive -tlsGsContext.inputVertStride = pState->inputVertStride; +gsContext.inputVertStride = pState->inputVertStride; for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot) { uint32_t srcAttribSlot = pState->srcVertexAttribOffset + slot; @@ -821,7 +820,7 @@ static void GeometryShaderStage( for (uint32_t i = 0; i < numVertsPerPrim; ++i) { -tlsGsContext.pVerts[attribSlot + pState->inputVertStride * i] = attrib[i]; +gsContext.pVerts[attribSlot + pState->inputVertStride * i] = attrib[i]; } } @@ -829,7 +828,7 @@ static void GeometryShaderStage( pa.Assemble(VERTEX_POSITION_SLOT, attrib); for (uint32_t i = 0; i < numVertsPerPrim; ++i) { -tlsGsContext.pVerts[VERTEX_POSITION_SLOT + pState->inputVertStride * i] = attrib[i]; +gsContext.pVerts[VERTEX_POSITION_SLOT + pState->inputVertStride * i] = attrib[i]; } // record valid prims from the frontend to avoid over binning the newly generated @@ -842,15 +841,15 @@ static void GeometryShaderStage( for (uint32_t instance = 0; instance < pState->instanceCount; ++instance) { -tlsGsContext.InstanceID = instance; -tlsGsContext.mask = GenerateMask(numInputPrims); +gsContext.InstanceID = instance; +gsContext.mask = GenerateMask(numInputPrims); // execute the geometry shader -state.pfnGsFunc(GetPrivateState(pDC), ); +state.pfnGsFunc(GetPrivateState(pDC), ); for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { -tlsGsContext.pStreams[i] += pState->allocationSize; +gsContext.pStreams[i] += pState->allocationSize; } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: remove llvm fence/atomics from generated files
Module: Mesa Branch: master Commit: 066d1dc951d3a0833de6abd8e004bf467e6e50eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=066d1dc951d3a0833de6abd8e004bf467e6e50eb Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Sep 19 14:04:20 2017 -0500 swr/rast: remove llvm fence/atomics from generated files We currently don't use these instructions, and since their API changed in llvm-5.0 having them in the autogen files broke the mesa release tarballs which ship with generated autogen files. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102847 CC: mesa-sta...@lists.freedesktop.org Tested-by: Laurent Carlier <lordhea...@gmail.com> Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 8 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 025d38ab33..ce892a9abe 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -140,6 +140,14 @@ def parse_ir_builder(input_file): ignore = False +# The following functions need to be ignored in openswr. +# API change in llvm-5.0 breaks baked autogen files +if ( +(func_name == 'CreateFence' or + func_name == 'CreateAtomicCmpXchg' or + func_name == 'CreateAtomicRMW')): +ignore = True + # The following functions need to be ignored. if (func_name == 'CreateInsertNUWNSWBinOp' or func_name == 'CreateMaskedIntrinsic' or ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Missed conversion to SIMD_T
Module: Mesa Branch: master Commit: 1ccf9ad280415536056095314b470156e29b057e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ccf9ad280415536056095314b470156e29b057e Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Aug 30 11:02:16 2017 -0500 swr/rast: Missed conversion to SIMD_T Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index a6713e8c5d..e08e4896f3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -1195,7 +1195,7 @@ void BinPostSetupPointsImpl( } OSALIGNSIMD16(float) aPointSize[SIMD_WIDTH]; -_simd16_store_ps(reinterpret_cast(aPointSize), vPointSize); +SIMD_T::store_ps(reinterpret_cast(aPointSize), vPointSize); uint32_t *pPrimID = (uint32_t *) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Remove hardcoded clip/cull slot from clipper
Module: Mesa Branch: master Commit: ae2412dbbdcff6583d7e4cf0430a409b86cb9e80 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae2412dbbdcff6583d7e4cf0430a409b86cb9e80 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Aug 22 17:46:14 2017 -0500 swr/rast: Remove hardcoded clip/cull slot from clipper Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/clip.h | 35 +++--- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index e0aaf81541..cde5261521 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -372,13 +372,15 @@ public: int ComputeUserClipCullMask(PA_STATE , typename SIMD_T::Vec4 prim[]) { uint8_t cullMask = state.backendState.cullDistanceMask; +uint32_t vertexClipCullOffset = state.backendState.vertexClipCullOffset; + typename SIMD_T::Float vClipCullMask = SIMD_T::setzero_ps(); typename SIMD_T::Vec4 vClipCullDistLo[3]; typename SIMD_T::Vec4 vClipCullDistHi[3]; -pa.Assemble(VERTEX_CLIPCULL_DIST_LO_SLOT, vClipCullDistLo); -pa.Assemble(VERTEX_CLIPCULL_DIST_HI_SLOT, vClipCullDistHi); +pa.Assemble(vertexClipCullOffset, vClipCullDistLo); +pa.Assemble(vertexClipCullOffset + 1, vClipCullDistHi); DWORD index; while (_BitScanForward(, cullMask)) @@ -488,21 +490,22 @@ public: } // assemble user clip distances if enabled +uint32_t vertexClipCullSlot = state.backendState.vertexClipCullOffset; if (state.backendState.clipDistanceMask & 0xf) { -pa.Assemble(VERTEX_CLIPCULL_DIST_LO_SLOT, tmpVector); +pa.Assemble(vertexClipCullSlot, tmpVector); for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { -vertices[i].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT] = tmpVector[i]; +vertices[i].attrib[vertexClipCullSlot] = tmpVector[i]; } } if (state.backendState.clipDistanceMask & 0xf0) { -pa.Assemble(VERTEX_CLIPCULL_DIST_HI_SLOT, tmpVector); +pa.Assemble(vertexClipCullSlot + 1, tmpVector); for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { -vertices[i].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT] = tmpVector[i]; +vertices[i].attrib[vertexClipCullSlot + 1] = tmpVector[i]; } } @@ -613,26 +616,27 @@ public: } // transpose user clip distances if enabled +uint32_t vertexClipCullSlot = backendState.vertexClipCullOffset; if (state.backendState.clipDistanceMask & 0x0f) { -pBase = reinterpret_cast([0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT]) + sizeof(float) * inputPrim; +pBase = reinterpret_cast([0].attrib[vertexClipCullSlot]) + sizeof(float) * inputPrim; for (uint32_t c = 0; c < 4; ++c) { SIMD256::Float temp = SIMD256::template mask_i32gather_ps(SIMD256::setzero_ps(), reinterpret_cast(pBase), vOffsets, vMask); -transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT][c] = SimdHelper::insert_lo_ps(temp); +transposedPrims[0].attrib[vertexClipCullSlot][c] = SimdHelper::insert_lo_ps(temp); pBase += sizeof(typename SIMD_T::Float); } } if (state.backendState.clipDistanceMask & 0xf0) { -pBase = reinterpret_cast([0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT]) + sizeof(float) * inputPrim; +pBase = reinterpret_cast([0].attrib[vertexClipCullSlot + 1]) + sizeof(float) * inputPrim; for (uint32_t c = 0; c < 4; ++c) { SIMD256::Float temp = SIMD256::template mask_i32gather_ps(SIMD256::setzero_ps(), reinterpret_cast(pBase), vOffsets, vMask); -transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT][c] = SimdHelper::insert_lo_ps(temp); +transposedPrims[0].attrib[vertexClipCullSlot + 1][c] = SimdHelper::insert_lo_ps(temp); pBase += sizeof(typename SIMD_T::Float); } } @@ -692,6 +696,7 @@ public: // OOB indices => forced to zero. typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); +vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); viewportIdx = SIMD_T::and_si(vCl
Mesa (master): swr/rast: Add new API SwrStallBE
Module: Mesa Branch: master Commit: f5031fb9521ecf3be4af8584a80516c7307ad61a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5031fb9521ecf3be4af8584a80516c7307ad61a Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Aug 18 12:34:48 2017 -0500 swr/rast: Add new API SwrStallBE SwrStallBE stalls the backend threads until all work submitted before the stall has finished. The frontend threads can continue to make forward progress. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/api.cpp | 9 + src/gallium/drivers/swr/rasterizer/core/api.h | 8 2 files changed, 17 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index ccb6dfb7a1..632309821f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -458,6 +458,14 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint AR_API_END(APISync, 1); } +void SwrStallBE(HANDLE hContext) +{ +SWR_CONTEXT* pContext = GetContext(hContext); +DRAW_CONTEXT* pDC = GetDrawContext(pContext); + +pDC->dependent = true; +} + void SwrWaitForIdle(HANDLE hContext) { SWR_CONTEXT *pContext = GetContext(hContext); @@ -1672,6 +1680,7 @@ void SwrGetInterface(SWR_INTERFACE _funcs) out_funcs.pfnSwrSaveState = SwrSaveState; out_funcs.pfnSwrRestoreState = SwrRestoreState; out_funcs.pfnSwrSync = SwrSync; +out_funcs.pfnSwrStallBE = SwrStallBE; out_funcs.pfnSwrWaitForIdle = SwrWaitForIdle; out_funcs.pfnSwrWaitForIdleFE = SwrWaitForIdleFE; out_funcs.pfnSwrSetVertexBuffers = SwrSetVertexBuffers; diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index a39420552b..577cfb157a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -263,6 +263,13 @@ SWR_FUNC(void, SwrSync, uint64_t userData3); // +/// @brief Stall cmd. Stalls the backend until all previous work has been completed. +///Frontend work can continue to make progress +/// @param hContext - Handle passed back from SwrCreateContext +SWR_FUNC(void, SwrStallBE, +HANDLE hContext); + +// /// @brief Blocks until all rendering has been completed. /// @param hContext - Handle passed back from SwrCreateContext SWR_FUNC(void, SwrWaitForIdle, @@ -709,6 +716,7 @@ struct SWR_INTERFACE PFNSwrSaveState pfnSwrSaveState; PFNSwrRestoreState pfnSwrRestoreState; PFNSwrSync pfnSwrSync; +PFNSwrStallBE pfnSwrStallBE; PFNSwrWaitForIdle pfnSwrWaitForIdle; PFNSwrWaitForIdleFE pfnSwrWaitForIdleFE; PFNSwrSetVertexBuffers pfnSwrSetVertexBuffers; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: add graph write to jit debug putput
Module: Mesa Branch: master Commit: 6b9e801832c2691b311ab2429fda1f9ec774f021 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b9e801832c2691b311ab2429fda1f9ec774f021 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Sep 7 15:18:08 2017 -0500 swr/rast: add graph write to jit debug putput Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index fc32b627bd..e4281f8e92 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -296,10 +296,10 @@ void JitManager::DumpToFile(Function *f, const char *fileName) #endif fd.flush(); -//raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text); -//WriteGraph(fd_cfg, (const Function*)f); +raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text); +WriteGraph(fd_cfg, (const Function*)f); -//fd_cfg.flush(); +fd_cfg.flush(); } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: adjust linux cpu topology identification code
Module: Mesa Branch: master Commit: ead0dfe31ec7a1b1928e4abbfa99d59e0e5e929a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ead0dfe31ec7a1b1928e4abbfa99d59e0e5e929a Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Sep 6 14:59:33 2017 -0500 swr/rast: adjust linux cpu topology identification code Make more robust to handle strange strange configurations like a vmware exported 4-way numa X 1-core configuration. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/core/threads.cpp| 81 ++ 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index b704d23f54..4bb395dec3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -169,37 +169,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread std::ifstream input("/proc/cpuinfo"); std::string line; char* c; -uint32_t threadId = uint32_t(-1); +uint32_t procId = uint32_t(-1); uint32_t coreId = uint32_t(-1); -uint32_t numaId = uint32_t(-1); +uint32_t physId = uint32_t(-1); while (std::getline(input, line)) { if (line.find("processor") != std::string::npos) { -if (threadId != uint32_t(-1)) -{ -// Save information. -if (out_nodes.size() <= numaId) -{ -out_nodes.resize(numaId + 1); -} - -auto& numaNode = out_nodes[numaId]; -if (numaNode.cores.size() <= coreId) -{ -numaNode.cores.resize(coreId + 1); -} - -auto& core = numaNode.cores[coreId]; -core.procGroup = coreId; -core.threadIds.push_back(threadId); - -out_numThreadsPerProcGroup++; -} - auto data_start = line.find(": ") + 2; -threadId = std::strtoul(_str()[data_start], , 10); +procId = std::strtoul(_str()[data_start], , 10); continue; } if (line.find("core id") != std::string::npos) @@ -211,29 +190,32 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread if (line.find("physical id") != std::string::npos) { auto data_start = line.find(": ") + 2; -numaId = std::strtoul(_str()[data_start], , 10); +physId = std::strtoul(_str()[data_start], , 10); continue; } +if (line.length() == 0) +{ +if (physId + 1 > out_nodes.size()) +out_nodes.resize(physId + 1); +auto& numaNode = out_nodes[physId]; +numaNode.numaId = physId; + +if (coreId + 1 > numaNode.cores.size()) +numaNode.cores.resize(coreId + 1); +auto& core = numaNode.cores[coreId]; +core.procGroup = coreId; +core.threadIds.push_back(procId); +} } -if (threadId != uint32_t(-1)) +out_numThreadsPerProcGroup = 0; +for (auto : out_nodes) { -// Save information. -if (out_nodes.size() <= numaId) +for (auto : node.cores) { -out_nodes.resize(numaId + 1); +out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup, + core.threadIds.size()); } -auto& numaNode = out_nodes[numaId]; -numaNode.numaId = numaId; -if (numaNode.cores.size() <= coreId) -{ -numaNode.cores.resize(coreId + 1); -} -auto& core = numaNode.cores[coreId]; - -core.procGroup = coreId; -core.threadIds.push_back(threadId); -out_numThreadsPerProcGroup++; } #else @@ -316,7 +298,11 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = CPU_ZERO(); CPU_SET(threadId, ); -pthread_setaffinity_np(thread, sizeof(cpu_set_t), ); +int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), ); +if (err != 0) +{ +fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err)); +} #endif } @@ -1031,7 +1017,16 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) } else { -pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.) +// numa distribution assumes workers on all nodes +bool useNuma = true; +if (numCoresPerNode * numHyperThreads == 1) +useNuma = false; + +if (useNuma) { +
Mesa (master): swr/rast: Migrate memory pointers to gfxptr_t type
Module: Mesa Branch: master Commit: 6f0fcec07a16eb48ebdafffd0b4ae0bb5ac611a4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f0fcec07a16eb48ebdafffd0b4ae0bb5ac611a4 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Sep 7 15:17:23 2017 -0500 swr/rast: Migrate memory pointers to gfxptr_t type Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/gen_llvm_types.py| 2 +- src/gallium/drivers/swr/rasterizer/core/state.h | 5 +++-- .../drivers/swr/rasterizer/memory/StoreTile.h | 4 ++-- .../drivers/swr/rasterizer/memory/TilingFunctions.h | 2 +- src/gallium/drivers/swr/swr_context.cpp | 18 +- src/gallium/drivers/swr/swr_draw.cpp| 8 src/gallium/drivers/swr/swr_resource.h | 2 +- src/gallium/drivers/swr/swr_screen.cpp | 21 ++--- src/gallium/drivers/swr/swr_state.cpp | 10 +- 9 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py index 94f3f9feff..ccf2bde1ed 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py @@ -42,7 +42,7 @@ def gen_llvm_type(type, name, is_pointer, is_pointer_pointer, is_array, is_array else: if type == 'BYTE' or type == 'char' or type == 'uint8_t' or type == 'int8_t' or type == 'bool': llvm_type = 'Type::getInt8Ty(ctx)' -elif type == 'UINT64' or type == 'INT64' or type == 'uint64_t' or type == 'int64_t': +elif type == 'UINT64' or type == 'INT64' or type == 'uint64_t' or type == 'int64_t' or type == 'gfxptr_t': llvm_type = 'Type::getInt64Ty(ctx)' elif type == 'UINT16' or type == 'int16_t' or type == 'uint16_t': llvm_type = 'Type::getInt16Ty(ctx)' diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index b0af663d50..13c1d8b7e9 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -29,6 +29,7 @@ #include "common/formats.h" #include "common/intrin.h" +using gfxptr_t = unsigned long long; #include #include @@ -513,7 +514,7 @@ enum SWR_AUX_MODE // struct SWR_SURFACE_STATE { -uint8_t *pBaseAddress; +gfxptr_t xpBaseAddress; SWR_SURFACE_TYPE type; // @llvm_enum SWR_FORMAT format; // @llvm_enum uint32_t width; @@ -536,7 +537,7 @@ struct SWR_SURFACE_STATE uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces -uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc. +gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc. SWR_AUX_MODE auxMode; // @llvm_enum diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h index c3d14e9509..512c338027 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h @@ -1179,7 +1179,7 @@ struct StoreRasterTile resolveColor[3] *= oneOverNumSamples; // Use the resolve surface state -SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress; +SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->xpAuxBaseAddress; uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry), pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex, 0, pResolveSurface->lod, pResolveSurface); @@ -2390,7 +2390,7 @@ struct StoreMacroTile } } -if (pDstSurface->pAuxBaseAddress) +if (pDstSurface->xpAuxBaseAddress) { uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits::bpp / 8); // Store each raster tile from the hot tile to the destination surface. diff --git a/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h b/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h index 9222d3edfb..6c801c7ff6 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h +++ b/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h @@ -694,5 +694,5 @@ template INLINE void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState) { -return pState->pBaseAddress + ComputeSurfaceOffset(x, y, z, array, samp
Mesa (master): swr/rast: Start to remove hardcoded clipcull_dist vertex attrib slot
Module: Mesa Branch: master Commit: 5471f65976f39299b9fec7e98fd3b122fa86b499 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5471f65976f39299b9fec7e98fd3b122fa86b499 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Aug 22 16:42:57 2017 -0500 swr/rast: Start to remove hardcoded clipcull_dist vertex attrib slot Add new field in SWR_BACKEND_STATE::vertexClipCullOffset to specify the start of the clip/cull section of the vertex header. Removed use of hardcoded slot from binner. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 11 ++- src/gallium/drivers/swr/rasterizer/core/state.h| 9 ++--- src/gallium/drivers/swr/swr_state.cpp | 3 +++ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 19afd1f292..a6713e8c5d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -366,16 +366,17 @@ PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzl /// @param clipDistMask - mask of enabled clip distances /// @param pUserClipBuffer - buffer to store results template -void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask, float *pRecipW, float* pUserClipBuffer) +void ProcessUserClipDist(const SWR_BACKEND_STATE& state, PA_STATE& pa, uint32_t primIndex, float *pRecipW, float* pUserClipBuffer) { DWORD clipDist; +uint32_t clipDistMask = state.clipDistanceMask; while (_BitScanForward(, clipDistMask)) { clipDistMask &= ~(1 << clipDist); uint32_t clipSlot = clipDist >> 2; uint32_t clipComp = clipDist & 0x3; uint32_t clipAttribSlot = clipSlot == 0 ? -VERTEX_CLIPCULL_DIST_LO_SLOT : VERTEX_CLIPCULL_DIST_HI_SLOT; +state.vertexClipCullOffset : state.vertexClipCullOffset + 1; simd4scalar primClipDist[3]; pa.AssembleSingle(clipAttribSlot, primIndex, primClipDist); @@ -872,7 +873,7 @@ endBinTriangles: { uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask); desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 3 * sizeof(float)); -ProcessUserClipDist<3>(pa, triIndex, state.backendState.clipDistanceMask, [12], desc.pUserClipBuffer); +ProcessUserClipDist<3>(state.backendState, pa, triIndex, [12], desc.pUserClipBuffer); } for (uint32_t y = aMTTop[triIndex]; y <= aMTBottom[triIndex]; ++y) @@ -1248,7 +1249,7 @@ void BinPostSetupPointsImpl( desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 3 * sizeof(float)); float dists[8]; float one = 1.0f; -ProcessUserClipDist<1>(pa, primIndex, backendState.clipDistanceMask, , dists); +ProcessUserClipDist<1>(backendState, pa, primIndex, , dists); for (uint32_t i = 0; i < numClipDist; i++) { desc.pUserClipBuffer[3 * i + 0] = 0.0f; desc.pUserClipBuffer[3 * i + 1] = 0.0f; @@ -1577,7 +1578,7 @@ void BinPostSetupLinesImpl( { uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask); desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 2 * sizeof(float)); -ProcessUserClipDist<2>(pa, primIndex, state.backendState.clipDistanceMask, [12], desc.pUserClipBuffer); +ProcessUserClipDist<2>(state.backendState, pa, primIndex, [12], desc.pUserClipBuffer); } MacroTileMgr *pTileMgr = pDC->pTileMgr; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 284c523eba..b0af663d50 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -1070,12 +1070,15 @@ struct SWR_BACKEND_STATE bool readRenderTargetArrayIndex;// Forward render target array index from last FE stage to the backend bool readViewportArrayIndex;// Read viewport array index from last FE stage during binning -// user clip/cull distance enables + // Offset to the start of the attributes of the input vertices, in simdvector units +uint32_t vertexAttribOffset; + +// User clip/cull distance enables uint8_t cullDistanceMask; uint8_t clipDistanceMask; - // Offset to the start of the attributes of the input vertices, in simdvector units -uint32_t vertexAttribOffset; +// Offset to clip/cull attrib section of the vertex, in simdvector units +uint32_t vertexClipCullOffset; }; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers
Mesa (master): swr/rast: Fetch compile state changes
Module: Mesa Branch: master Commit: 000e2958f59a8d8e07f06e384546aa942d49b15f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=000e2958f59a8d8e07f06e384546aa942d49b15f Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Sep 7 18:53:03 2017 -0500 swr/rast: Fetch compile state changes Add InstanceStrideEnable field and rename InstanceDataStepRate to InstanceAdvancementState in INPUT_ELEMENT_DESC structure. Add stubs for handling InstanceStrideEnable in FetchJit::JitLoadVertices() and FetchJit::JitGatherVertices() and assert if they are triggered. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 12 ++-- src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h | 7 --- src/gallium/drivers/swr/swr_state.cpp | 2 +- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 761c58ca27..f3a4b27d9a 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -360,7 +360,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE , Value* str Value *startOffset; if(ied.InstanceEnable) { -Value* stepRate = C(ied.InstanceDataStepRate); +Value* stepRate = C(ied.InstanceAdvancementState); // prevent a div by 0 for 0 step rate Value* isNonZeroStep = ICMP_UGT(stepRate, C(0)); @@ -376,6 +376,10 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE , Value* str startOffset = startInstance; } +else if (ied.InstanceStrideEnable) +{ +SWR_ASSERT((0), "TODO: Fill out more once driver sends this down."); +} else { // offset indices by baseVertex @@ -825,7 +829,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , Value *startOffset; if(ied.InstanceEnable) { -Value* stepRate = C(ied.InstanceDataStepRate); +Value* stepRate = C(ied.InstanceAdvancementState); // prevent a div by 0 for 0 step rate Value* isNonZeroStep = ICMP_UGT(stepRate, C(0)); @@ -841,6 +845,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , startOffset = startInstance; } +else if (ied.InstanceStrideEnable) +{ +SWR_ASSERT((0), "TODO: Fill out more once driver sends this down."); +} else { // offset indices by baseVertex diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index 4f456afffc..0dd6de759a 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -45,16 +45,17 @@ struct INPUT_ELEMENT_DESC uint32_tFormat : 10; uint32_tStreamIndex : 6; uint32_tInstanceEnable : 1; +uint32_tInstanceStrideEnable : 1; uint32_tComponentControl0 : 3; uint32_tComponentControl1 : 3; uint32_tComponentControl2 : 3; uint32_tComponentControl3 : 3; uint32_tComponentPacking : 4; -uint32_t_reserved : 19; +uint32_t_reserved : 18; }; uint64_t bits; }; -uint32_t InstanceDataStepRate; +uint32_t InstanceAdvancementState; }; // used to set ComponentPacking @@ -124,7 +125,7 @@ struct FETCH_COMPILE_STATE { if((layout[i].bits != other.layout[i].bits) || ((layout[i].InstanceEnable == 1) && -(layout[i].InstanceDataStepRate != other.layout[i].InstanceDataStepRate))){ +(layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState))){ return false; } } diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 1491868eae..93108de065 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -531,7 +531,7 @@ swr_create_vertex_elements_state(struct pipe_context *pipe, ? ComponentControl::StoreSrc : ComponentControl::Store1Fp; velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW; - velems->fsState.layout[i].InstanceDataStepRate = + velems->fsState.layout[i].InstanceAdvancementState = attribs[i].instance_divisor; /* Calculate the pitch of each stream */ ___ mesa-commit mai
Mesa (master): swr/rast: Move clip/cull enables in API
Module: Mesa Branch: master Commit: 966997269278d5eeeb6baf7d70fb99df0038b081 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=966997269278d5eeeb6baf7d70fb99df0038b081 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Aug 21 17:11:34 2017 -0500 swr/rast: Move clip/cull enables in API Moved from from SWR_RASTSTATE to SWR_BACKEND_STATE. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/core/backend.cpp| 4 ++-- .../drivers/swr/rasterizer/core/backend_impl.h | 2 +- .../drivers/swr/rasterizer/core/backend_sample.cpp | 4 ++-- .../swr/rasterizer/core/backend_singlesample.cpp | 4 ++-- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 18 +- src/gallium/drivers/swr/rasterizer/core/clip.h | 22 +++--- .../drivers/swr/rasterizer/core/rasterizer.cpp | 2 +- src/gallium/drivers/swr/rasterizer/core/state.h| 8 src/gallium/drivers/swr/swr_state.cpp | 16 9 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 363349f6c8..6282e87f31 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -272,9 +272,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, AR_END(BEBarycentric, 0); // interpolate user clip distance if available -if (state.rastState.clipDistanceMask) +if (state.backendState.clipDistanceMask) { -coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample); +coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample); } simdscalar vCoverageMask = _simd_vmask_ps(coverageMask); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 0f430ef3ab..593082bd7d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -886,7 +886,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t AR_END(BESetup, 0); -PixelRateZTestLoop PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask); +PixelRateZTestLoop PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.backendState.clipDistanceMask); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast(y))); psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast(y))); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index bb2e9a9f63..04e34aa264 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -128,9 +128,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ AR_END(BEBarycentric, 0); // interpolate user clip distance if available -if (state.rastState.clipDistanceMask) +if (state.backendState.clipDistanceMask) { -coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample); +coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample); } simdscalar vCoverageMask = _simd_vmask_ps(coverageMask); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 18f4299f51..686b97912c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -112,9 +112,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 AR_END(BEBarycentric, 1); // interpolate user clip distance if available -if (state.rastState.clipDistanceMask) +if (state.backendState.clipDistanceMask) { -coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.center, psContext.vJ.center); +
Mesa (master): swr/rast: whitespace changes
Module: Mesa Branch: master Commit: c0ce5c4422a8a49124196da00577196ab22ab89c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c0ce5c4422a8a49124196da00577196ab22ab89c Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Sep 7 15:18:35 2017 -0500 swr/rast: whitespace changes Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/jitter/jit_api.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h index 9f69669735..e589d2c6a7 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h @@ -51,6 +51,7 @@ struct ShaderInfo; + // /// Jit Compile Info Input // @@ -63,6 +64,7 @@ struct JIT_COMPILE_INPUT size_t irLength; bool enableJitSampler; + }; extern "C" ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: set caps for VB 4-byte alignment
Module: Mesa Branch: master Commit: 4edc5d830550355681df2147fd25dae4c77bccc0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4edc5d830550355681df2147fd25dae4c77bccc0 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Aug 18 11:51:59 2017 -0500 swr: set caps for VB 4-byte alignment Needed to compensate for change to fetch jit requiring alignment. Fixes regressions in piglit: vertex-buffer-offsets and about another hundred of the vs-input*byte* tests. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/swr_screen.cpp | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index cc8d9955b8..85bf765841 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -263,6 +263,12 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: return (swr_screen(screen)->msaa_max_count > 1) ? 0 : 1; + /* fetch jit change for 2-4GB buffers requires alignment */ + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + return 1; + /* unsupported features */ case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: @@ -274,9 +280,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_COMPUTE: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: FE/Clipper - unify SIMD8/ 16 functions using simdlib types
Module: Mesa Branch: master Commit: dad32fc61c21601e3700b88914cd6b9c1271aa85 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dad32fc61c21601e3700b88914cd6b9c1271aa85 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Aug 15 18:51:45 2017 -0500 swr/rast: FE/Clipper - unify SIMD8/16 functions using simdlib types Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/clip.cpp | 16 +- src/gallium/drivers/swr/rasterizer/core/clip.h | 1650 ++ src/gallium/drivers/swr/rasterizer/core/state.h |7 + 3 files changed, 465 insertions(+), 1208 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=dad32fc61c21601e3700b88914cd6b9c1271aa85 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: FE/Binner - unify SIMD8/ 16 functions using simdlib types
Module: Mesa Branch: master Commit: 6cb20c9f3a327fe3c1a99d6824632aea238d7d72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cb20c9f3a327fe3c1a99d6824632aea238d7d72 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Fri Aug 4 18:07:01 2017 -0500 swr/rast: FE/Binner - unify SIMD8/16 functions using simdlib types Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 2366 ++-- src/gallium/drivers/swr/rasterizer/core/binner.h | 186 +- .../drivers/swr/rasterizer/core/conservativeRast.h |1 + src/gallium/drivers/swr/rasterizer/core/pa.h | 16 + src/gallium/drivers/swr/rasterizer/core/utils.h|8 + 5 files changed, 767 insertions(+), 1810 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=6cb20c9f3a327fe3c1a99d6824632aea238d7d72 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Allow gather of floats from fetch shader with 2-4GB offsets
Module: Mesa Branch: master Commit: 4475583f5ea44c3585e0ffea6118ba3a32fddd72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4475583f5ea44c3585e0ffea6118ba3a32fddd72 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Aug 9 17:32:28 2017 -0500 swr/rast: Allow gather of floats from fetch shader with 2-4GB offsets Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 1 + src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 7 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 2ed2b2f61e..025d38ab33 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -45,6 +45,7 @@ intrinsics = [ ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], +['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], ['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']], ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index dcfe8970f5..761c58ca27 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1005,7 +1005,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , Value *vMask = vGatherMask; // Gather a SIMD of vertices -vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1)); +// APIs allow a 4GB range for offsets +// However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( +// But, we know that elements must be aligned for FETCH. :) +// Right shift the offset by a bit and then scale by 2 to remove the sign extension. +Value* vShiftedOffsets = VPSRLI(vOffsets, C(1)); +vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, C((char)2)); } else { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: SIMD16 FE remove templated immediates workaround
Module: Mesa Branch: master Commit: 9df5691fffafdc31b82eb18f3cd5ce7d45eb83a2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9df5691fffafdc31b82eb18f3cd5ce7d45eb83a2 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Aug 9 18:48:58 2017 -0500 swr/rast: SIMD16 FE remove templated immediates workaround Fixed properly in gcc-compatible fashion. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 110 - 1 file changed, 20 insertions(+), 90 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index e09ff7a399..832c47d6e4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -404,35 +404,6 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask, } } -// WA linux compiler issue with SIMDLIB and shift immediates -#define SIMD_WA_SXXI_EPI32 1 - -#if SIMD_WA_SXXI_EPI32 -template -simdscalari simd_wa_slli_epi32(simdscalari a) -{ -return SIMD256::slli_epi32(a); -} - -template -simd16scalari simd_wa_slli_epi32(simd16scalari a) -{ -return SIMD512::slli_epi32(a); -} - -template -simdscalari simd_wa_srai_epi32(simdscalari a) -{ -return SIMD256::srai_epi32(a); -} - -template -simd16scalari simd_wa_srai_epi32(simd16scalari a) -{ -return SIMD512::srai_epi32(a); -} - -#endif INLINE void TransposeVertices(simd4scalar()[8], const simdscalar , const simdscalar , const simdscalar ) { @@ -804,17 +775,10 @@ endBinTriangles: } // Convert triangle bbox to macrotile units. -#if SIMD_WA_SXXI_EPI32 -bbox.xmin = simd_wa_srai_epi32(bbox.xmin); -bbox.ymin = simd_wa_srai_epi32(bbox.ymin); -bbox.xmax = simd_wa_srai_epi32(bbox.xmax); -bbox.ymax = simd_wa_srai_epi32(bbox.ymax); -#else -bbox.xmin = SIMD_T::srai_epi32(bbox.xmin); -bbox.ymin = SIMD_T::srai_epi32(bbox.ymin); -bbox.xmax = SIMD_T::srai_epi32(bbox.xmax); -bbox.ymax = SIMD_T::srai_epi32(bbox.ymax); -#endif +bbox.xmin = SIMD_T::template srai_epi32(bbox.xmin); +bbox.ymin = SIMD_T::template srai_epi32(bbox.ymin); +bbox.xmax = SIMD_T::template srai_epi32(bbox.xmax); +bbox.ymax = SIMD_T::template srai_epi32(bbox.ymax); OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH]; @@ -1034,13 +998,8 @@ void BinPostSetupPointsImpl( primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vYi)); // compute macro tile coordinates -#if SIMD_WA_SXXI_EPI32 -typename SIMD_T::Integer macroX = simd_wa_srai_epi32(vXi); -typename SIMD_T::Integer macroY = simd_wa_srai_epi32(vYi); -#else -typename SIMD_T::Integer macroX = SIMD_T::srai_epi32(vXi); -typename SIMD_T::Integer macroY = SIMD_T::srai_epi32(vYi); -#endif +typename SIMD_T::Integer macroX = SIMD_T::template srai_epi32(vXi); +typename SIMD_T::Integer macroY = SIMD_T::template srai_epi32(vYi); OSALIGNSIMD16(uint32_t) aMacroX[SIMD_WIDTH], aMacroY[SIMD_WIDTH]; @@ -1048,30 +1007,15 @@ void BinPostSetupPointsImpl( SIMD_T::store_si(reinterpret_cast(aMacroY), macroY); // compute raster tile coordinates -#if SIMD_WA_SXXI_EPI32 -typename SIMD_T::Integer rasterX = simd_wa_srai_epi32(vXi); -typename SIMD_T::Integer rasterY = simd_wa_srai_epi32(vYi); -#else -typename SIMD_T::Integer rasterX = SIMD_T::srai_epi32(vXi); -typename SIMD_T::Integer rasterY = SIMD_T::srai_epi32(vYi); -#endif +typename SIMD_T::Integer rasterX = SIMD_T::template srai_epi32(vXi); +typename SIMD_T::Integer rasterY = SIMD_T::template srai_epi32(vYi); // compute raster tile relative x,y for coverage mask -#if SIMD_WA_SXXI_EPI32 -typename SIMD_T::Integer tileAlignedX = simd_wa_slli_epi32(rasterX); -typename SIMD_T::Integer tileAlignedY = simd_wa_slli_epi32(rasterY); -#else -typename SIMD_T::Integer tileAlignedX = SIMD_T::slli_epi32(rasterX); -typename SIMD_T::Integer tileAlignedY = SIMD_T::slli_epi32(rasterY); -#endif +typename SIMD_T::Integer tileAlignedX = SIMD_T::template slli_epi32(rasterX); +typename SIMD_T::Integer tileAlignedY = SIMD_T::template slli_epi32(rasterY); -#if SIMD_WA_SXXI_EPI32 -typename SIMD_T::Integer tileRelativeX = SIMD_T::sub_epi32(simd_wa_srai_epi32(vXi), tileAlignedX); -typename SIMD_T::Integer tileRelativeY = SIMD_T::sub_epi32(simd_wa_srai_epi32(vYi), tileAlignedY); -#else -typename SIMD_T::Integer tileRelativeX = SIMD_T::sub_epi32(SIMD_T::srai_epi32(vXi), tileAlignedX); -typename SIMD_T::Integer tileRelativeY = SIMD_T::sub_epi32(SIMD_T::srai_epi32(vYi), tileAlignedY); -#endif +typename SIMD_T::Integer tileRelativeX = SIMD_T::sub_epi32(SIMD_T:
Mesa (master): swr/rast: Removed some trailing whitespace caught during review
Module: Mesa Branch: master Commit: 6afdc8732c4fca735803b6cbacf9723bbd02afa4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6afdc8732c4fca735803b6cbacf9723bbd02afa4 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Aug 1 15:21:04 2017 -0500 swr/rast: Removed some trailing whitespace caught during review Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp | 4 ++-- src/gallium/drivers/swr/rasterizer/core/fifo.hpp | 4 ++-- src/gallium/drivers/swr/rasterizer/core/pa.h | 12 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp index 0ca9a7828d..d1852b35fd 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp @@ -23,7 +23,7 @@ * @file ${filename} * * @brief Event handler interface. auto-generated file -* +* * DO NOT EDIT * * Generation Command Line: @@ -57,7 +57,7 @@ namespace ArchRast std::stringstream outDir; outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; CreateDirectory(outDir.str().c_str(), NULL); - + // There could be multiple threads creating thread pools. We // want to make sure they are uniquly identified by adding in // the creator's thread id into the filename. diff --git a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp index 3be72f37cd..43d3a83226 100644 --- a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp +++ b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp @@ -79,7 +79,7 @@ struct QUEUE long initial = InterlockedCompareExchange(, 1, 0); return (initial == 0); } - + void unlock() { mLock = 0; @@ -112,7 +112,7 @@ struct QUEUE __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH); _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc); }; - + const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4); static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T), "FIFO element size should be multiple of SIMD width."); diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index cb3470ff6b..87dba22bf8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -162,7 +162,7 @@ struct PA_STATE_OPT : public PA_STATE bool isStreaming{ false }; SIMDMASK junkIndices { 0 }; // temporary index store for unused virtual function - + PA_STATE_OPT() {} PA_STATE_OPT(DRAW_CONTEXT* pDC, uint32_t numPrims, uint8_t* pStream, uint32_t streamSizeInVerts, uint32_t vertexStride, bool in_isStreaming, PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN); @@ -412,7 +412,7 @@ struct PA_STATE_CUT : public PA_STATE uint32_t vertsPerPrim{ 0 }; bool processCutVerts{ false }; // vertex indices with cuts should be processed as normal, otherwise they // are ignored. Fetch shader sends invalid verts on cuts that should be ignored - // while the GS sends valid verts for every index + // while the GS sends valid verts for every index simdvector junkVector; // junk simdvector for unimplemented API #if ENABLE_AVX512_SIMD16 @@ -575,7 +575,7 @@ struct PA_STATE_CUT : public PA_STATE return CheckBit(this->pCutIndices[vertexIndex], vertexOffset); } -// iterates across the unprocessed verts until we hit the end or we +// iterates across the unprocessed verts until we hit the end or we // have assembled SIMD prims void ProcessVerts() { @@ -583,7 +583,7 @@ struct PA_STATE_CUT : public PA_STATE this->numRemainingVerts > 0 && this->curVertex != this->headVertex) { -// if cut index, restart topology +// if cut index, restart topology if (IsCutIndex(this->curVertex)) { if (this->processCutVerts) @@ -923,7 +923,7 @@ struct PA_STATE_CUT : public PA_STATE case 6: SWR_ASSERT(this->adjExtraVert != -1, "Algorith failure!"); AssembleTriStripAdj(); - + uint32_t nextTri[6]; if (this->reverseWinding) { @@ -939,7 +939,7 @@ struct PA_STATE_CUT : p
Mesa (master): swr/rast: SIMD16 PA - rename Assemble_simd16 to Assemble
Module: Mesa Branch: master Commit: 404ac6da9e7eadd62c38e20f382b5280b29fa8bb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=404ac6da9e7eadd62c38e20f382b5280b29fa8bb Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Aug 7 18:13:54 2017 -0500 swr/rast: SIMD16 PA - rename Assemble_simd16 to Assemble For consistency and to support overloading. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/clip.h | 18 +- .../drivers/swr/rasterizer/core/frontend.cpp | 6 +++--- src/gallium/drivers/swr/rasterizer/core/pa.h | 22 +++--- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index ffc69c4229..5238284e32 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -399,8 +399,8 @@ public: simd16vector vClipCullDistLo[3]; simd16vector vClipCullDistHi[3]; -pa.Assemble_simd16(VERTEX_CLIPCULL_DIST_LO_SLOT, vClipCullDistLo); -pa.Assemble_simd16(VERTEX_CLIPCULL_DIST_HI_SLOT, vClipCullDistHi); +pa.Assemble(VERTEX_CLIPCULL_DIST_LO_SLOT, vClipCullDistLo); +pa.Assemble(VERTEX_CLIPCULL_DIST_HI_SLOT, vClipCullDistHi); DWORD index; while (_BitScanForward(, cullMask)) @@ -680,7 +680,7 @@ public: { #if USE_SIMD16_FRONTEND simd16vector attrib_simd16[NumVertsPerPrim]; -bool assemble = clipPa.Assemble_simd16(VERTEX_POSITION_SLOT, attrib_simd16); +bool assemble = clipPa.Assemble(VERTEX_POSITION_SLOT, attrib_simd16); if (assemble) { @@ -731,7 +731,7 @@ public: // assemble pos simd16vector tmpVector[NumVertsPerPrim]; -pa.Assemble_simd16(VERTEX_POSITION_SLOT, tmpVector); +pa.Assemble(VERTEX_POSITION_SLOT, tmpVector); for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { vertices[i].attrib[VERTEX_POSITION_SLOT] = tmpVector[i]; @@ -748,7 +748,7 @@ public: maxSlot = std::max(maxSlot, mapSlot); uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot; -pa.Assemble_simd16(inputSlot, tmpVector); +pa.Assemble(inputSlot, tmpVector); // if constant interpolation enabled for this attribute, assign the provoking // vertex values to all edges @@ -771,7 +771,7 @@ public: // assemble user clip distances if enabled if (this->state.rastState.clipDistanceMask & 0xf) { -pa.Assemble_simd16(VERTEX_CLIPCULL_DIST_LO_SLOT, tmpVector); +pa.Assemble(VERTEX_CLIPCULL_DIST_LO_SLOT, tmpVector); for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { vertices[i].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT] = tmpVector[i]; @@ -780,7 +780,7 @@ public: if (this->state.rastState.clipDistanceMask & 0xf0) { -pa.Assemble_simd16(VERTEX_CLIPCULL_DIST_HI_SLOT, tmpVector); +pa.Assemble(VERTEX_CLIPCULL_DIST_HI_SLOT, tmpVector); for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { vertices[i].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT] = tmpVector[i]; @@ -919,7 +919,7 @@ public: do { simd16vector attrib[NumVertsPerPrim]; -bool assemble = clipPa.Assemble_simd16(VERTEX_POSITION_SLOT, attrib); +bool assemble = clipPa.Assemble(VERTEX_POSITION_SLOT, attrib); if (assemble) { @@ -1060,7 +1060,7 @@ public: if (state.backendState.readViewportArrayIndex) { simd16vector vpiAttrib[NumVertsPerPrim]; -pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib); +pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); // OOB indices => forced to zero. simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 406a0e0bec..f882869eb7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -929,7 +929,7 @@ static void GeometryShaderStage( #if USE_SIMD16_FRONTEND simd16vector attrib_simd16[3]; -bool assemble = gsPa.Assemble_simd16(VERTEX_POSITION_SLOT, attrib_simd16); +bool assemble = gsPa.Assemble(VERTEX_POSITION_SLOT, attrib_simd16); #else bool assemble = gsPa.Assemble(VERTEX_POSITION_SLOT, attrib); @@ -1297,7 +1297,7 @@ static void TessellationStages(
Mesa (master): swr/rast: Remove use of C++14 template variable
Module: Mesa Branch: master Commit: 1ebf6fc86556669fbb7b30e560119622497a5051 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ebf6fc86556669fbb7b30e560119622497a5051 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Aug 10 16:11:35 2017 -0500 swr/rast: Remove use of C++14 template variable SWR rasterizer must remain C++11 compliant. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 6 +++--- src/gallium/drivers/swr/rasterizer/core/binner.h | 14 +++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 832c47d6e4..01c2f8f7a3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -502,7 +502,7 @@ void SIMDCALL BinTrianglesImpl( } // Adjust for pixel center location -typename SIMD_T::Float offset = g_pixelOffsets[rastState.pixelLocation]; +typename SIMD_T::Float offset = SwrPixelOffsets::GetOffset(rastState.pixelLocation); tri[0].x = SIMD_T::add_ps(tri[0].x, offset); tri[0].y = SIMD_T::add_ps(tri[0].y, offset); @@ -1332,7 +1332,7 @@ void BinPointsImpl( } } -typename SIMD_T::Float offset = g_pixelOffsets[rastState.pixelLocation]; +typename SIMD_T::Float offset = SwrPixelOffsets::GetOffset(rastState.pixelLocation); prim[0].x = SIMD_T::add_ps(prim[0].x, offset); prim[0].y = SIMD_T::add_ps(prim[0].y, offset); @@ -1666,7 +1666,7 @@ void SIMDCALL BinLinesImpl( } // adjust for pixel center location -typename SIMD_T::Float offset = g_pixelOffsets[rastState.pixelLocation]; +typename SIMD_T::Float offset = SwrPixelOffsets::GetOffset(rastState.pixelLocation); prim[0].x = SIMD_T::add_ps(prim[0].x, offset); prim[0].y = SIMD_T::add_ps(prim[0].y, offset); diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.h b/src/gallium/drivers/swr/rasterizer/core/binner.h index e842aa663b..97e113f7f2 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.h +++ b/src/gallium/drivers/swr/rasterizer/core/binner.h @@ -31,11 +31,19 @@ // /// @brief Offsets added to post-viewport vertex positions based on /// raster state. +/// +/// Can't use templated variable because we must stick with C++11 features. +/// Template variables were introduced with C++14 template -static const typename SIMD_T::Float g_pixelOffsets[SWR_PIXEL_LOCATION_UL + 1] = +struct SwrPixelOffsets { -SIMD_T::set1_ps(0.0f), // SWR_PIXEL_LOCATION_CENTER -SIMD_T::set1_ps(0.5f), // SWR_PIXEL_LOCATION_UL +public: +INLINE static typename SIMD_T::Float GetOffset(uint32_t loc) +{ +SWR_ASSERT(loc <= 1); + +return SIMD_T::set1_ps(loc ? 0.5f : 0.0f); +} }; // ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: limit pipe_draw_info->restart_index usage
Module: Mesa Branch: master Commit: f0602dc92044ea6d738d0e539e52f938a41f6093 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0602dc92044ea6d738d0e539e52f938a41f6093 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Aug 22 10:39:57 2017 -0500 swr: limit pipe_draw_info->restart_index usage Only copy this value when in restart drawing mode. Eliminates valgrind errors when running trivial programs. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/swr_draw.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index df1c11abeb..2363800f80 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -107,7 +107,10 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } struct swr_vertex_element_state *velems = ctx->velems; - velems->fsState.cutIndex = info->restart_index; + if (info->primitive_restart) + velems->fsState.cutIndex = info->restart_index; + else + velems->fsState.cutIndex = 0; velems->fsState.bEnableCutIndex = info->primitive_restart; velems->fsState.bPartialVertexBuffer = (info->min_index > 0); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: Fix invalid casting for calls to Interlocked* functions
Module: Mesa Branch: master Commit: b333bc753e2dd1ed1a676606046a4289e7d58187 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b333bc753e2dd1ed1a676606046a4289e7d58187 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Aug 7 20:33:24 2017 -0500 swr/rast: Fix invalid casting for calls to Interlocked* functions CID: 1416243, 1416244, 1416255 CC: mesa-sta...@lists.freedesktop.org Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/api.cpp | 2 +- src/gallium/drivers/swr/rasterizer/core/context.h | 8 src/gallium/drivers/swr/rasterizer/core/threads.cpp | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 8dc9ac24a7..ccb6dfb7a1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -189,7 +189,7 @@ void QueueWork(SWR_CONTEXT *pContext) if (IsDraw) { -InterlockedIncrement((volatile long*)>drawsOutstandingFE); +InterlockedIncrement(>drawsOutstandingFE); } _ReadWriteBarrier(); diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 131b3cbbb0..bcd5801a3b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -409,12 +409,12 @@ struct DRAW_CONTEXT booldependent; // Backend work is dependent on all previous BE boolisCompute; // Is this DC a compute context? boolcleanupState; // True if this is the last draw using an entry in the state ring. -volatile bool doneFE; // Is FE work done for this draw? FE_WORK FeWork; +volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw? volatile OSALIGNLINE(uint32_t) FeLock; -volatile int32_tthreadsDone; +volatile OSALIGNLINE(uint32_t) threadsDone; SYNC_DESC retireCallback; // Call this func when this DC is retired. }; @@ -503,9 +503,9 @@ struct SWR_CONTEXT // Scratch space for workers. uint8_t** ppScratch; -volatile int32_t drawsOutstandingFE; +volatile OSALIGNLINE(uint32_t) drawsOutstandingFE; -CachingAllocator cachingArenaAllocator; +OSALIGNLINE(CachingAllocator) cachingArenaAllocator; uint32_t frameCount; uint32_t lastFrameChecked; diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 70bde027ee..b704d23f54 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -393,7 +393,7 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONT // inlined-only version INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC) { -int32_t result = InterlockedDecrement((volatile long*)>threadsDone); +int32_t result = static_cast(InterlockedDecrement(>threadsDone)); SWR_ASSERT(result >= 0); AR_FLUSH(pDC->drawId); @@ -639,7 +639,7 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX _mm_mfence(); pDC->doneFE = true; -InterlockedDecrement((volatile long*)>drawsOutstandingFE); +InterlockedDecrement(>drawsOutstandingFE); } void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t ) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): configure: remove trailing "-a" in swr architecture test
Module: Mesa Branch: master Commit: 4d9b0dcccb81ad10113d9aef52b4c84496e879f1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d9b0dcccb81ad10113d9aef52b4c84496e879f1 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Aug 10 12:58:57 2017 -0500 configure: remove trailing "-a" in swr architecture test Fixes "configure: line 27326: test: argument expected" CC: mesa-sta...@lists.freedesktop.org Reviewed-by: Matt Turner <matts...@gmail.com> --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index d2704bce05..f131788e3a 100644 --- a/configure.ac +++ b/configure.ac @@ -2545,7 +2545,7 @@ if test -n "$with_gallium_drivers"; then if test "x$HAVE_SWR_AVX" != xyes -a \ "x$HAVE_SWR_AVX2" != xyes -a \ "x$HAVE_SWR_KNL" != xyes -a \ -"x$HAVE_SWR_SKX" != xyes -a; then +"x$HAVE_SWR_SKX" != xyes; then AC_MSG_ERROR([swr enabled but no swr architectures selected]) fi ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st/osmesa: add osmesa framebuffer iface hash table per st manager
Module: Mesa Branch: master Commit: 9966c85e01a4344d2a6bb76e432e0bed70d52ff6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9966c85e01a4344d2a6bb76e432e0bed70d52ff6 Author: Bruce CherniakDate: Wed Aug 2 18:14:19 2017 -0500 st/osmesa: add osmesa framebuffer iface hash table per st manager Commit bbc29393d3 didn't include osmesa state_tracker. This patch adds necessary initialization. Fixes crash in OSMesa initialization. Created-by: Charmaine Lee Tested-by: Bruce Cherniak Reviewed-by: Charmaine Lee Cc: 17.2 --- src/gallium/state_trackers/osmesa/osmesa.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/state_trackers/osmesa/osmesa.c b/src/gallium/state_trackers/osmesa/osmesa.c index 18f1b88128..751d255c54 100644 --- a/src/gallium/state_trackers/osmesa/osmesa.c +++ b/src/gallium/state_trackers/osmesa/osmesa.c @@ -439,6 +439,7 @@ osmesa_st_framebuffer_validate(struct st_context_iface *stctx, return TRUE; } +static uint32_t osmesa_fb_ID = 0; static struct st_framebuffer_iface * osmesa_create_st_framebuffer(void) @@ -448,6 +449,8 @@ osmesa_create_st_framebuffer(void) stfbi->flush_front = osmesa_st_framebuffer_flush_front; stfbi->validate = osmesa_st_framebuffer_validate; p_atomic_set(>stamp, 1); + stfbi->ID = p_atomic_inc_return(_fb_ID); + stfbi->state_manager = get_st_manager(); } return stfbi; } @@ -508,6 +511,14 @@ osmesa_find_buffer(enum pipe_format color_format, static void osmesa_destroy_buffer(struct osmesa_buffer *osbuffer) { + struct st_api *stapi = get_st_api(); + + /* +* Notify the state manager that the associated framebuffer interface +* is no longer valid. +*/ + stapi->destroy_drawable(stapi, osbuffer->stfb); + FREE(osbuffer->stfb); FREE(osbuffer); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: Add arch flags to support Cray and PGI compilers
Module: Mesa Branch: master Commit: f0da70a96432dff8f9ebf054b352ce9db45f3ad6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0da70a96432dff8f9ebf054b352ce9db45f3ad6 Author: Chuck Atkins <chuck.atk...@kitware.com> Date: Mon Jul 31 15:53:13 2017 -0400 swr: Add arch flags to support Cray and PGI compilers Note that the Cray flags (-target-cpu=) need to come first since the cray programming environment uses wappers around other compilers. By checking the wrapper flags first, you can be sure to match the wrapper flag instead of the underlying compiler (gcc, intel, pgi, etc.) flags. Signed-off-by: Chuck Atkins <chuck.atk...@kitware.com> Reviewed-by: Tim Rowley <timothy.o.row...@intel.com> --- configure.ac | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 6302aa2b0c..3b45baf6d0 100644 --- a/configure.ac +++ b/configure.ac @@ -2511,7 +2511,7 @@ if test -n "$with_gallium_drivers"; then AC_SUBST([SWR_CXX11_CXXFLAGS]) swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \ -",-mavx,-march=core-avx" \ + ",-target-cpu=sandybridge,-mavx,-march=core-avx,-tp=sandybridge" \ SWR_AVX_CXXFLAGS AC_SUBST([SWR_AVX_CXXFLAGS]) @@ -2523,21 +2523,21 @@ if test -n "$with_gallium_drivers"; then ;; xavx2) swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \ -",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \ +",-target-cpu=haswell,-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2,-tp=haswell" \ SWR_AVX2_CXXFLAGS AC_SUBST([SWR_AVX2_CXXFLAGS]) HAVE_SWR_AVX2=yes ;; xknl) swr_require_cxx_feature_flags "KNL" "defined(__AVX512F__) && defined(__AVX512ER__)" \ -",-march=knl,-xMIC-AVX512" \ +",-target-cpu=mic-knl,-march=knl,-xMIC-AVX512" \ SWR_KNL_CXXFLAGS AC_SUBST([SWR_KNL_CXXFLAGS]) HAVE_SWR_KNL=yes ;; xskx) swr_require_cxx_feature_flags "SKX" "defined(__AVX512F__) && defined(__AVX512BW__)" \ -",-march=skylake-avx512,-xCORE-AVX512" \ + ",-target-cpu=x86-skylake,-march=skylake-avx512,-xCORE-AVX512" \ SWR_SKX_CXXFLAGS AC_SUBST([SWR_SKX_CXXFLAGS]) HAVE_SWR_SKX=yes ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: fix scons gen_knobs.h dependency
Module: Mesa Branch: master Commit: e4a6ae06cf01a21d7fe32e3ff2fc441102d68f82 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e4a6ae06cf01a21d7fe32e3ff2fc441102d68f82 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Jul 31 16:59:06 2017 -0500 swr/rast: fix scons gen_knobs.h dependency Copy/paste error was duplicating a gen_knobs.cpp rule. Fixes: 5079c277b57 ("swr: [scons] Fix windows build") Reviewed-by: Emil Velikov <emil.veli...@collabora.com> Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/SConscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/SConscript b/src/gallium/drivers/swr/SConscript index a32807d36b..c578d7a648 100644 --- a/src/gallium/drivers/swr/SConscript +++ b/src/gallium/drivers/swr/SConscript @@ -53,7 +53,7 @@ env.CodeGenerate( source = '', command = python_cmd + ' $SCRIPT --output $TARGET --gen_h' ) -Depends('rasterizer/codegen/gen_knobs.cpp', +Depends('rasterizer/codegen/gen_knobs.h', swrroot + 'rasterizer/codegen/templates/gen_knobs.cpp') env.CodeGenerate( ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: fix movemask_ps / movemask_pd on AVX512
Module: Mesa Branch: master Commit: eddbd781af15f655a1dba6949e7c6b214f47e2f8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eddbd781af15f655a1dba6949e7c6b214f47e2f8 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Jul 20 17:06:14 2017 -0500 swr/rast: fix movemask_ps / movemask_pd on AVX512 Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 1001417704..1dbfff8c9c 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -554,15 +554,20 @@ static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a) static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a) { -__mmask8 m = _mm512_test_epi64_mask(castpd_si(a), set1_epi32(-1)); +__mmask8 m = _mm512_test_epi64_mask(castpd_si(a), set1_epi64(0x8000LL)); return static_cast(m); } static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a) { -__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(-1)); +__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x800)); return static_cast(m); } +static SIMDINLINE Integer SIMDCALL set1_epi64(long long i) // return i (all elements are same value) +{ +return _mm512_set1_epi64(i); +} + static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value) { return _mm512_set1_epi32(i); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: split gen_knobs templates into .h/.cpp
Module: Mesa Branch: master Commit: 844be91e70413c1c3871d5f93b1e4766eb495df9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=844be91e70413c1c3871d5f93b1e4766eb495df9 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Jul 31 17:22:12 2017 -0500 swr/rast: split gen_knobs templates into .h/.cpp Switch to a 1:1 mapping template:generated for future maintenance. Reviewed-by: Emil Velikov <emil.veli...@collabora.com> Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/Makefile.am| 3 +- src/gallium/drivers/swr/SConscript | 2 +- .../drivers/swr/rasterizer/codegen/gen_knobs.py| 14 +- .../swr/rasterizer/codegen/templates/gen_knobs.cpp | 108 -- .../swr/rasterizer/codegen/templates/gen_knobs.h | 157 + 5 files changed, 166 insertions(+), 118 deletions(-) diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am index 73fe904a7d..b20f128bd2 100644 --- a/src/gallium/drivers/swr/Makefile.am +++ b/src/gallium/drivers/swr/Makefile.am @@ -115,7 +115,7 @@ rasterizer/codegen/gen_knobs.cpp: rasterizer/codegen/gen_knobs.py rasterizer/cod --output rasterizer/codegen/gen_knobs.cpp \ --gen_cpp -rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp rasterizer/codegen/gen_common.py +rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.h rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_knobs.py \ @@ -347,5 +347,6 @@ EXTRA_DIST = \ rasterizer/codegen/templates/gen_builder.hpp \ rasterizer/codegen/templates/gen_header_init.hpp \ rasterizer/codegen/templates/gen_knobs.cpp \ + rasterizer/codegen/templates/gen_knobs.h \ rasterizer/codegen/templates/gen_llvm.hpp \ rasterizer/codegen/templates/gen_rasterizer.cpp diff --git a/src/gallium/drivers/swr/SConscript b/src/gallium/drivers/swr/SConscript index c578d7a648..b394cbc17e 100644 --- a/src/gallium/drivers/swr/SConscript +++ b/src/gallium/drivers/swr/SConscript @@ -54,7 +54,7 @@ env.CodeGenerate( command = python_cmd + ' $SCRIPT --output $TARGET --gen_h' ) Depends('rasterizer/codegen/gen_knobs.h', -swrroot + 'rasterizer/codegen/templates/gen_knobs.cpp') +swrroot + 'rasterizer/codegen/templates/gen_knobs.h') env.CodeGenerate( target = 'rasterizer/jitter/gen_state_llvm.h', diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py index 2c271c7f5c..33f62a28ce 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py @@ -37,27 +37,25 @@ def main(args=sys.argv[1:]): args = parser.parse_args() cur_dir = os.path.dirname(os.path.abspath(__file__)) -template_file = os.path.join(cur_dir, 'templates', 'gen_knobs.cpp') +template_cpp = os.path.join(cur_dir, 'templates', 'gen_knobs.cpp') +template_h = os.path.join(cur_dir, 'templates', 'gen_knobs.h') if args.gen_h: MakoTemplateWriter.to_file( -template_file, +template_h, args.output, cmdline=sys.argv, filename='gen_knobs', -knobs=knob_defs.KNOBS, -includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'], -gen_header=True) +knobs=knob_defs.KNOBS) if args.gen_cpp: MakoTemplateWriter.to_file( -template_file, +template_cpp, args.output, cmdline=sys.argv, filename='gen_knobs', knobs=knob_defs.KNOBS, -includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'], -gen_header=False) +includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip']) return 0 diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp index a9506434c6..2f4c47a92e 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp @@ -20,11 +20,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * -% if gen_header: -* @file ${filename}.h -% else: * @file ${filename}.cpp -% endif * * @brief Dynamic Knobs for Core. * @@ -35,105 +31,6 @@ * **/ <% calc_max_knob_len(knobs) %> -%if gen_header: -#pragma once -#include - -struct KnobBase -{ -private: -// Update the input
Mesa (master): swr/rast: enable USE_SIMD16_FRONTEND by default
Module: Mesa Branch: master Commit: 030cfa8eed9a91fe5b5ae59670a3001ac0b0f339 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=030cfa8eed9a91fe5b5ae59670a3001ac0b0f339 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Jul 19 17:49:17 2017 -0500 swr/rast: enable USE_SIMD16_FRONTEND by default Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/core/knobs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h index 7ad6fe33f0..10bd4a5e70 100644 --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h @@ -40,7 +40,7 @@ #define ENABLE_AVX512_SIMD161 #define USE_8x2_TILE_BACKEND1 -#define USE_SIMD16_FRONTEND 0 +#define USE_SIMD16_FRONTEND 1 /// // Architecture validation ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: switch gen_knobs.cpp license
Module: Mesa Branch: master Commit: fb3e50a351b52014479a9a81226b7c51b176afed URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fb3e50a351b52014479a9a81226b7c51b176afed Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Jul 31 17:48:12 2017 -0500 swr/rast: switch gen_knobs.cpp license Unintentionally added with an apache2 license; relicense to match the rest of the tree. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../swr/rasterizer/codegen/templates/gen_knobs.cpp | 29 +- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp index 06b93bd72b..e6fe16533a 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp @@ -1,19 +1,24 @@ /** +* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved. * -* Copyright 2015-2017 -* Intel Corporation +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. * -* http ://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. * % if gen_header: * @file ${filename}.h ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: stop using MSFT types in platform independent code
Module: Mesa Branch: master Commit: f253798205a3ce7f577867a96ce487bf20e10909 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f253798205a3ce7f577867a96ce487bf20e10909 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Jul 20 13:48:28 2017 -0500 swr/rast: stop using MSFT types in platform independent code Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/common/os.h | 6 -- src/gallium/drivers/swr/rasterizer/core/api.cpp| 2 +- src/gallium/drivers/swr/rasterizer/core/api.h | 4 ++-- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 4 ++-- src/gallium/drivers/swr/rasterizer/core/blend.h| 2 +- src/gallium/drivers/swr/rasterizer/core/clip.h | 8 src/gallium/drivers/swr/rasterizer/core/fifo.hpp | 2 +- src/gallium/drivers/swr/rasterizer/core/format_traits.h| 4 ++-- src/gallium/drivers/swr/rasterizer/core/pa.h | 2 +- src/gallium/drivers/swr/rasterizer/core/threads.cpp| 4 ++-- src/gallium/drivers/swr/rasterizer/core/tilemgr.h | 12 ++-- src/gallium/drivers/swr/rasterizer/core/utils.h| 10 ++ src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp| 2 +- src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp | 4 ++-- 14 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h index dc90fca750..4ed6b88e45 100644 --- a/src/gallium/drivers/swr/rasterizer/common/os.h +++ b/src/gallium/drivers/swr/rasterizer/common/os.h @@ -220,12 +220,6 @@ void *AlignedMalloc(unsigned int size, unsigned int alignment) return ret; } -inline -unsigned char _bittest(const LONG *a, LONG b) -{ -return ((*(unsigned *)(a) & (1 << b)) != 0); -} - static inline void AlignedFree(void* p) { diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 855d133920..8dc9ac24a7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -189,7 +189,7 @@ void QueueWork(SWR_CONTEXT *pContext) if (IsDraw) { -InterlockedIncrement((volatile LONG*)>drawsOutstandingFE); +InterlockedIncrement((volatile long*)>drawsOutstandingFE); } _ReadWriteBarrier(); diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 236e0fcd66..a39420552b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -697,8 +697,8 @@ SWR_FUNC(void, SwrStoreHotTileToSurface, SWR_FUNC(void, SwrStoreHotTileClear, SWR_SURFACE_STATE *pDstSurface, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, - UINT x, - UINT y, + uint32_t x, + uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor); diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index de6691b4cf..c1f0f07804 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -64,7 +64,7 @@ INLINE void ProcessAttributes( static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT"); const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; // Conservative Rasterization requires degenerate tris to have constant attribute interpolation -LONG constantInterpMask = IsDegenerate::value ? 0x : backendState.constantInterpolationMask; +uint32_t constantInterpMask = IsDegenerate::value ? 0x : backendState.constantInterpolationMask; const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex; const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology; @@ -93,7 +93,7 @@ INLINE void ProcessAttributes( if (HasConstantInterpT::value || IsDegenerate::value) { -if (_bittest(, i)) +if (CheckBit(constantInterpMask, i)) { uint32_t vid; uint32_t adjustedTriIndex; diff --git a/src/gallium/drivers/swr/rasterizer/core/blend.h b/src/gallium/drivers/swr/rasterizer/core/blend.h index 1b98e442fd..c89c47646a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/blend.h +++ b/src/gallium/drivers/swr/rasterizer/core/blend.h @@ -278,7 +278,7 @@ INLINE void Clamp(simdvector ) } template -void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector , simdvector& src1, BYTE *pDst, simdvector ) +void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simd
Mesa (master): swr/rast: fix USE_SIMD16_FRONTEND issues
Module: Mesa Branch: master Commit: d08493f9cef236af57538d4dd3087277f3a65ad2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d08493f9cef236af57538d4dd3087277f3a65ad2 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Tue Jul 18 23:52:38 2017 -0500 swr/rast: fix USE_SIMD16_FRONTEND issues Fix problems found when enabling USE_SIMD16_FRONTEND, mostly related to vMask / movemask_ps(pd). Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/common/simd16intrin.h| 14 ++ .../drivers/swr/rasterizer/common/simdintrin.h | 21 + .../swr/rasterizer/common/simdlib_128_avx.inl | 15 +++ .../swr/rasterizer/common/simdlib_256_avx.inl | 10 ++ .../swr/rasterizer/common/simdlib_512_avx512.inl| 4 ++-- .../common/simdlib_512_avx512_knights.inl | 21 - .../swr/rasterizer/common/simdlib_512_emu.inl | 12 +--- src/gallium/drivers/swr/rasterizer/core/backend.cpp | 2 +- .../drivers/swr/rasterizer/core/backend_impl.h | 8 .../drivers/swr/rasterizer/core/backend_sample.cpp | 2 +- .../swr/rasterizer/core/backend_singlesample.cpp| 2 +- src/gallium/drivers/swr/rasterizer/core/clip.h | 6 +++--- .../drivers/swr/rasterizer/core/frontend.cpp| 2 +- src/gallium/drivers/swr/rasterizer/core/pa.h| 4 +++- 14 files changed, 49 insertions(+), 74 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h index a160ca2c5e..019b26d8cf 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h @@ -159,20 +159,10 @@ typedef SIMD512 SIMD16; #define _simd16_packus_epi32SIMD16::packus_epi32 #define _simd16_packs_epi32 SIMD16::packs_epi32 #define _simd16_cmplt_ps_mask SIMD16::cmp_ps_mask +#define _simd16_cmpeq_ps_mask SIMD16::cmp_ps_mask #define _simd16_int2mask(mask) simd16mask(mask) #define _simd16_mask2int(mask) int(mask) - -// convert bitmask to vector mask -SIMDINLINE simd16scalar vMask16(int32_t mask) -{ -simd16scalari temp = _simd16_set1_epi32(mask); - -simd16scalari bits = _simd16_set_epi32(0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001); - -simd16scalari result = _simd16_cmplt_epi32(_simd16_setzero_si(), _simd16_and_si(temp, bits)); - -return _simd16_castsi_ps(result); -} +#define _simd16_vmask_psSIMD16::vmask_ps #endif//ENABLE_AVX512_SIMD16 diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h index f95c109e6f..f4b9e1055c 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h @@ -181,6 +181,7 @@ typedef SIMD256 SIMD; #define _simd_storeu2_siSIMD::storeu2_si #define _simd_blendv_epi32 SIMD::blendv_epi32 +#define _simd_vmask_ps SIMD::vmask_ps template SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer a, SIMD128::Integer b) @@ -188,26 +189,6 @@ SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer a, SIMD128::Integer b) return SIMD128::castps_si(SIMD128::blend_ps(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b))); } -// convert bitmask to vector mask -SIMDINLINE -SIMD256::Float vMask(int32_t mask) -{ -SIMD256::Integer vec = SIMD256::set1_epi32(mask); -const SIMD256::Integer bit = SIMD256::set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01); -vec = SIMD256::and_si(vec, bit); -vec = SIMD256::cmplt_epi32(SIMD256::setzero_si(), vec); -return SIMD256::castsi_ps(vec); -} - -SIMDINLINE -SIMD256::Integer vMaski(int32_t mask) -{ -SIMD256::Integer vec = SIMD256::set1_epi32(mask); -const SIMD256::Integer bit = SIMD256::set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01); -vec = SIMD256::and_si(vec, bit); -return SIMD256::cmplt_epi32(SIMD256::setzero_si(), vec); -} - SIMDINLINE void _simd_mov(simdscalar , unsigned int rlane, simdscalar& s, unsigned int slane) { diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl index 5bcedf3971..7232791893 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl @@ -519,6 +519,11 @@ static SIMDINLINE Float SIMDCALL set_ps(float in3, float in2, float in1, float i return _mm_set_ps(in3, in2, in1, in0); } +static SIMDINLINE Integer SIMDCALL set_epi32(int in3, int in2, int in1, int in0) +{ +return _
Mesa (master): swr/rast: disable AVX512 optimization of SSE / AVX code
Module: Mesa Branch: master Commit: f8a572cdf0cf7fb52348adc7862a7ffc612180ef URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f8a572cdf0cf7fb52348adc7862a7ffc612180ef Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Jul 19 16:16:57 2017 -0500 swr/rast: disable AVX512 optimization of SSE / AVX code Disable an optimization which implemented sse/avx operations on avx512 using avx512 intrinsics (to avoid switching between lane widths). Compile with SIMD_OPT_128_AVX512 / SIMD_OPT_256_AVX512 defined to enable these optimizations. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/rasterizer/common/simdlib.hpp | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp index 0c79cdd660..a4b5854d00 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp @@ -53,6 +53,7 @@ namespace SIMDImpl #if SIMD_ARCH >= SIMD_ARCH_AVX512 struct AVX512Impl : AVX2Impl { +#if defined(SIMD_OPT_128_AVX512) #define __SIMD_LIB_AVX512_HPP__ #include "simdlib_128_avx512.inl" #if defined(SIMD_ARCH_KNIGHTS) @@ -61,6 +62,7 @@ namespace SIMDImpl #include "simdlib_128_avx512_core.inl" #endif // defined(SIMD_ARCH_KNIGHTS) #undef __SIMD_LIB_AVX512_HPP__ +#endif // SIMD_OPT_128_AVX512 }; // struct AVX2Impl #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512 @@ -108,6 +110,7 @@ namespace SIMDImpl #if SIMD_ARCH >= SIMD_ARCH_AVX512 struct AVX512Impl : AVX2Impl { +#if defined(SIMD_OPT_256_AVX512) #define __SIMD_LIB_AVX512_HPP__ #include "simdlib_256_avx512.inl" #if defined(SIMD_ARCH_KNIGHTS) @@ -116,6 +119,7 @@ namespace SIMDImpl #include "simdlib_256_avx512_core.inl" #endif // defined(SIMD_ARCH_KNIGHTS) #undef __SIMD_LIB_AVX512_HPP__ +#endif // SIMD_OPT_256_AVX512 }; // struct AVX2Impl #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: constify swr rasterizer
Module: Mesa Branch: master Commit: 08e3c369550be2842d32fd05d2e9ba68fb1b08f8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=08e3c369550be2842d32fd05d2e9ba68fb1b08f8 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Wed Jul 26 12:27:44 2017 -0500 swr/rast: constify swr rasterizer Add "const" as appropriate in method/function signatures. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/common/simdintrin.h | 14 +- .../drivers/swr/rasterizer/common/simdlib.hpp | 10 +- .../swr/rasterizer/common/simdlib_256_avx.inl | 130 +-- .../swr/rasterizer/common/simdlib_256_avx2.inl | 32 ++--- .../swr/rasterizer/common/simdlib_512_emu.inl | 143 +++-- .../swr/rasterizer/common/simdlib_types.hpp| 76 +-- .../drivers/swr/rasterizer/core/backend_impl.h | 12 +- src/gallium/drivers/swr/rasterizer/core/binner.cpp | 36 +++--- src/gallium/drivers/swr/rasterizer/core/binner.h | 4 +- src/gallium/drivers/swr/rasterizer/core/clip.cpp | 12 +- src/gallium/drivers/swr/rasterizer/core/clip.h | 66 +- src/gallium/drivers/swr/rasterizer/core/context.h | 8 +- .../drivers/swr/rasterizer/core/depthstencil.h | 12 +- .../swr/rasterizer/core/format_conversion.h| 18 ++- .../drivers/swr/rasterizer/core/format_types.h | 71 +- .../drivers/swr/rasterizer/core/frontend.cpp | 4 +- src/gallium/drivers/swr/rasterizer/core/frontend.h | 12 +- src/gallium/drivers/swr/rasterizer/core/state.h| 2 +- 18 files changed, 339 insertions(+), 323 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=08e3c369550be2842d32fd05d2e9ba68fb1b08f8 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr/rast: fix core / knights split of AVX512 intrinsics
Module: Mesa Branch: master Commit: 7cd50b9e47a8ad131795da270039da87e0175143 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7cd50b9e47a8ad131795da270039da87e0175143 Author: Tim Rowley <timothy.o.row...@intel.com> Date: Thu Jul 27 15:33:10 2017 -0500 swr/rast: fix core / knights split of AVX512 intrinsics Move AVX512BW specific intrinics to be Core-only. Move some AVX512F intrinsics back to common implementation file. Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- .../drivers/swr/rasterizer/common/simdlib.hpp | 2 + .../swr/rasterizer/common/simdlib_512_avx512.inl | 53 + .../rasterizer/common/simdlib_512_avx512_core.inl | 54 ++ .../common/simdlib_512_avx512_knights.inl | 15 -- 4 files changed, 69 insertions(+), 55 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp index 22d7da42d0..500cf8a87e 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp @@ -214,6 +214,8 @@ struct SIMDBase : Traits::IsaImpl using Vec4 = typename Traits::Vec4; using Mask = typename Traits::Mask; +static const size_t VECTOR_BYTES = sizeof(Float); + // Populates a SIMD Vec4 from a non-simd vector. So p = xyzw becomes . static SIMDINLINE void vec4_load1_ps(Vec4& r, const float *p) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 1dbfff8c9c..95e4c31909 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -158,6 +158,11 @@ private: return _mm512_maskz_set1_epi32(m, -1); } +static SIMDINLINE Integer vmask(__mmask8 m) +{ +return _mm512_maskz_set1_epi64(m, -1LL); +} + public: //--- // Single precision floating point arithmetic operations @@ -187,8 +192,8 @@ static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps 0xff) ? 0xff : (a + b) (uint8) +//SIMD_IWRAPPER_2(add_epi8); // return a + b (int8) +//SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32) SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32) SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32) @@ -202,7 +207,7 @@ SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32) SIMD_IWRAPPER_2(mullo_epi32); SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32) SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64) -SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) +//SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) //--- // Logical operations @@ -276,7 +281,7 @@ static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a(i return _mm512_cvtepi32_ps(a); } -SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a(uint8 --> int16) +//SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a(uint8 --> int16) SIMD_IWRAPPER_1_4(cvtepu8_epi32); // return (int32)a(uint8 --> int32) SIMD_IWRAPPER_1_8(cvtepu16_epi32);// return (int32)a(uint16 --> int32) SIMD_IWRAPPER_1_4(cvtepu16_epi64);// return (int64)a(uint16 --> int64) @@ -317,20 +322,6 @@ static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps(a, b); } template -static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b) -{ -// Legacy vector mask generator -__mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast(CmpTypeT)); -return vmask(result); -} -template -static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b) -{ -// Legacy vector mask generator -__mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast(CmpTypeT)); -return vmask(result); -} -template static SIMDINLINE Integer SIMDCALL cmp_epi32(Integer a, Integer b) { // Legacy vector mask generator @@ -345,12 +336,12 @@ static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, Integer b) return vmask(result); } -SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8);// return a == b (int8) -SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16); // return a == b (int16) +//SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8);// return a == b (int8) +//SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16); // return a == b (int16) SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32); // return a == b (int32) SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64); // return a == b (int64) -SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8);// return a > b (int8) -SIMD_IWRAPPER_2_CM
Mesa (master): swr/rast: simdlib better separation of core vs knights avx512
Module: Mesa Branch: master Commit: 07062daae93b146458db55ba22a2e27d3d59552b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=07062daae93b146458db55ba22a2e27d3d59552b Author: Tim Rowley <timothy.o.row...@intel.com> Date: Mon Jul 24 16:13:12 2017 -0500 swr/rast: simdlib better separation of core vs knights avx512 Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> --- src/gallium/drivers/swr/Makefile.am| 2 +- src/gallium/drivers/swr/Makefile.sources | 8 + .../drivers/swr/rasterizer/common/simdlib.hpp | 21 ++- .../swr/rasterizer/common/simdlib_128_avx512.inl | 108 +++- .../rasterizer/common/simdlib_128_avx512_core.inl | 193 + .../common/simdlib_128_avx512_knights.inl | 35 .../swr/rasterizer/common/simdlib_256_avx512.inl | 128 +++--- .../rasterizer/common/simdlib_256_avx512_core.inl | 127 ++ .../common/simdlib_256_avx512_knights.inl | 35 .../swr/rasterizer/common/simdlib_512_avx512.inl | 79 +++-- .../rasterizer/common/simdlib_512_avx512_core.inl | 181 +++ .../common/simdlib_512_avx512_knights.inl | 183 +++ .../common/simdlib_512_avx512_masks_core.inl | 27 +++ .../common/simdlib_512_avx512_masks_knights.inl| 27 +++ .../swr/rasterizer/common/simdlib_types.hpp| 2 +- 15 files changed, 911 insertions(+), 245 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=07062daae93b146458db55ba22a2e27d3d59552b ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit