Use llvm intrinsic masked.gather instead of manual unroll for the cases
where we have vector of pointers.  Improves llvm IR debug experience by
reducing a ton of IR to a single intrinsic call. Also seems to reduce
overall stack use considerably.
---
 src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp | 12 ++++++++++++
 src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h   |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 0c9e279..a8c2f2c 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -346,6 +346,18 @@ namespace SwrJit
         return vGather;
     }
 
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Alternative masked gather where source is a vector of pointers
+    /// @param pVecSrcPtr   - SIMD wide vector of pointers
+    /// @param pVecMask     - SIMD active lanes
+    /// @param pVecPassthru - SIMD wide vector of values to load when lane is 
inactive
+    Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* 
pVecPassthru)
+    {
+        Function* pMaskedGather = 
llvm::Intrinsic::getDeclaration(JM()->mpCurrentModule, 
Intrinsic::masked_gather, { pVecPassthru->getType() });
+
+        return CALL(pMaskedGather, { pVecSrcPtr, C(0), pVecMask, pVecPassthru 
});
+    }
+
     void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* 
byteOffsets,
         Value* mask, Value* vGatherComponents[], bool bPackedOutput)
     {
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index 88ea37f..14dc22d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -58,6 +58,8 @@ virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* 
pSrcBase, Value* byte
 
 Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t 
scale = 1);
 
+Value *GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
+
 void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
 
 void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, 
Value* vGatherOutput[], bool bPackedOutput);
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to