[commit: ghc] simd: Add primops for packing and unpacking FloatX4#'s. (340816b)

Geoffrey Mainland Thu, 03 Nov 2011 01:01:58 -0700

Repository : ssh://darcs.haskell.org//srv/darcs/ghc

On branch  : simd


http://hackage.haskell.org/trac/ghc/changeset/340816bc8bb6ec86d8949371ef586f0079306c6a

>---------------------------------------------------------------

commit 340816bc8bb6ec86d8949371ef586f0079306c6a
Author: Geoffrey Mainland <[email protected]>
Date:   Wed Nov 2 16:20:35 2011 +0000

    Add primops for packing and unpacking FloatX4#'s.

>---------------------------------------------------------------

 compiler/cmm/CmmMachOp.hs               |    7 ++++
 compiler/codeGen/CgPrimOp.hs            |   56 +++++++++++++++++++++++++++++++
 compiler/llvmGen/LlvmCodeGen/CodeGen.hs |   16 +++++++++
 compiler/nativeGen/X86/CodeGen.hs       |    6 ++-
 compiler/prelude/primops.txt.pp         |   10 +++++
 5 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index a9e8699..52f335e 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -103,6 +103,10 @@ data MachOp
   | MO_UU_Conv Width Width      -- unsigned int -> unsigned int
   | MO_FF_Conv Width Width      -- Float -> Float
 
+  -- Vector element insertion and extraction operations
+  | MO_V_Insert Length Width
+  | MO_V_Extract Length Width
+
   -- Float vector operations
   | MO_VF_Add Length Width  
   deriving (Eq, Show)
@@ -341,6 +345,9 @@ machOpResultType mop tys =
     MO_SF_Conv _ to     -> cmmFloat to
     MO_FF_Conv _ to     -> cmmFloat to
 
+    MO_V_Insert {}      -> ty1
+    MO_V_Extract {}     -> vecType ty1
+
     MO_VF_Add {}        -> ty1
   where
     (ty1:_) = tys
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
index 37f6068..51c3f5b 100644
--- a/compiler/codeGen/CgPrimOp.hs
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -387,6 +387,13 @@ emitPrimOp [res] PopCnt32Op [w] live = emitPopCntCall res 
w W32 live
 emitPrimOp [res] PopCnt64Op [w] live = emitPopCntCall res w W64 live
 emitPrimOp [res] PopCntOp [w] live = emitPopCntCall res w wordWidth live
 
+-- SIMD vector packing and unpacking
+emitPrimOp [res] FloatX4PackOp es@[_,_,_,_] _ =
+    doVecPack vec4f32 es res
+
+emitPrimOp res@[_,_,_,_] FloatX4UnpackOp [arg] _ =
+    doVecUnpack vec4f32 arg res
+
 -- The rest just translate straightforwardly
 emitPrimOp [res] op [arg] _
    | nopOp op
@@ -662,6 +669,55 @@ mkBasicIndexedWrite off Nothing write_rep base idx val
 mkBasicIndexedWrite off (Just cast) write_rep base idx val
    = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast 
[val]))
 
+------------------------------------------------------------------------------
+-- Helpers for translating vector packing and unpacking.
+
+doVecPack :: CmmType -> [CmmExpr] -> CmmFormal -> Code
+doVecPack ty es res = do
+    dst <- newTemp ty
+    vecPack dst es 0
+  where
+    vecPack :: CmmFormal -> [CmmExpr] -> Int -> Code
+    vecPack src [] _ =
+        stmtC (CmmAssign (CmmLocal res) (CmmReg (CmmLocal src)))
+
+    vecPack src (e : es) i = do
+        dst <- newTemp ty
+        stmtC $ CmmAssign (CmmLocal dst)
+                          (CmmMachOp (MO_V_Insert len wid)
+                                     [CmmReg (CmmLocal src), e, iLit])
+        vecPack dst es (i + 1)
+      where
+        iLit = CmmLit (mkIntCLit i)
+
+    len :: Length
+    len = vecLength ty 
+
+    wid :: Width
+    wid = typeWidth (vecType ty)
+
+doVecUnpack :: CmmType -> CmmExpr -> [CmmFormal] -> Code
+doVecUnpack ty e res =
+    vecUnpack res 0
+  where
+    vecUnpack :: [CmmFormal] -> Int -> Code
+    vecUnpack [] _ =
+        return ()
+
+    vecUnpack (r : rs) i = do
+        stmtC $ CmmAssign (CmmLocal r)
+                          (CmmMachOp (MO_V_Extract len wid)
+                                     [e, iLit])
+        vecUnpack rs (i + 1)
+      where
+        iLit = CmmLit (mkIntCLit i)
+
+    len :: Length
+    len = vecLength ty 
+
+    wid :: Width
+    wid = typeWidth (vecType ty)
+
 -- ----------------------------------------------------------------------------
 -- Misc utils
 
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs 
b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index f3fbdd2..50c326e 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -814,6 +814,22 @@ genMachOp_fast env opt op r n e
 -- This handles all the cases not handle by the specialised genMachOp_fast.
 genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData
 
+-- Element extraction
+genMachOp_slow env _ (MO_V_Extract {}) [val, CmmLit (CmmInt idx _)] = do
+    (env1, vval, stmts, top) <- exprToVar env val
+    let (LMVector _ ty)      =  getVarType vval
+    (v1, s1) <- doExpr ty $ Extract vval (fromInteger idx)
+    return (env1, v1, stmts `snocOL` s1, top)
+
+-- Element insertion
+genMachOp_slow env _ (MO_V_Insert {}) [val, elt, CmmLit (CmmInt idx _)] = do
+    (env1, vval, stmts1, top1) <- exprToVar env val
+    (env2, velt, stmts2, top2) <- exprToVar env1 elt
+    let ty                     =  getVarType vval
+    (v1, s1) <- doExpr ty $ Insert vval velt (fromInteger idx)
+    return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1,
+            top1 ++ top2)
+    
 -- Binary MachOp
 genMachOp_slow env opt op [x, y] = case op of
 
diff --git a/compiler/nativeGen/X86/CodeGen.hs 
b/compiler/nativeGen/X86/CodeGen.hs
index e7e8459..8c8f75c 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -173,8 +173,10 @@ stmtToInstrs stmt = do
       panic "stmtToInstrs: return statement should have been cps'd away"
   where
     isVecExpr :: CmmExpr -> Bool
-    isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
-    isVecExpr _                            = False
+    isVecExpr (CmmMachOp (MO_V_Insert {}) _)  = True
+    isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
+    isVecExpr (CmmMachOp (MO_VF_Add {}) _)    = True
+    isVecExpr _                               = False
 
 
 
--------------------------------------------------------------------------------
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 957acf0..fe7a283 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -1939,6 +1939,16 @@ section "SIMD"
 
 primtype FloatX4#
 
+primop FloatX4PackOp "packFloatX4#" GenPrimOp         
+   Float# -> Float# -> Float# -> Float# -> FloatX4#
+   with
+   code_size = 4
+
+primop FloatX4UnpackOp "unpackFloatX4#" GenPrimOp         
+   FloatX4# -> (# Float#, Float#, Float#, Float# #)
+   with
+   code_size = 4
+
 primop FloatX4AddOp "plusFloatX4#" Dyadic            
    FloatX4# -> FloatX4# -> FloatX4#
    with commutable = True



_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

[commit: ghc] simd: Add primops for packing and unpacking FloatX4#'s. (340816b)

Reply via email to