Repository : ssh://darcs.haskell.org//srv/darcs/ghc On branch : simd
http://hackage.haskell.org/trac/ghc/changeset/340816bc8bb6ec86d8949371ef586f0079306c6a >--------------------------------------------------------------- commit 340816bc8bb6ec86d8949371ef586f0079306c6a Author: Geoffrey Mainland <[email protected]> Date: Wed Nov 2 16:20:35 2011 +0000 Add primops for packing and unpacking FloatX4#'s. >--------------------------------------------------------------- compiler/cmm/CmmMachOp.hs | 7 ++++ compiler/codeGen/CgPrimOp.hs | 56 +++++++++++++++++++++++++++++++ compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 16 +++++++++ compiler/nativeGen/X86/CodeGen.hs | 6 ++- compiler/prelude/primops.txt.pp | 10 +++++ 5 files changed, 93 insertions(+), 2 deletions(-) diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index a9e8699..52f335e 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -103,6 +103,10 @@ data MachOp | MO_UU_Conv Width Width -- unsigned int -> unsigned int | MO_FF_Conv Width Width -- Float -> Float + -- Vector element insertion and extraction operations + | MO_V_Insert Length Width + | MO_V_Extract Length Width + -- Float vector operations | MO_VF_Add Length Width deriving (Eq, Show) @@ -341,6 +345,9 @@ machOpResultType mop tys = MO_SF_Conv _ to -> cmmFloat to MO_FF_Conv _ to -> cmmFloat to + MO_V_Insert {} -> ty1 + MO_V_Extract {} -> vecType ty1 + MO_VF_Add {} -> ty1 where (ty1:_) = tys diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 37f6068..51c3f5b 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -387,6 +387,13 @@ emitPrimOp [res] PopCnt32Op [w] live = emitPopCntCall res w W32 live emitPrimOp [res] PopCnt64Op [w] live = emitPopCntCall res w W64 live emitPrimOp [res] PopCntOp [w] live = emitPopCntCall res w wordWidth live +-- SIMD vector packing and unpacking +emitPrimOp [res] FloatX4PackOp es@[_,_,_,_] _ = + doVecPack vec4f32 es res + +emitPrimOp res@[_,_,_,_] FloatX4UnpackOp [arg] _ = + doVecUnpack vec4f32 arg res + -- The rest just translate straightforwardly emitPrimOp [res] op [arg] _ | nopOp op @@ -662,6 +669,55 @@ mkBasicIndexedWrite off Nothing write_rep base idx val mkBasicIndexedWrite off (Just cast) write_rep base idx val = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast [val])) +------------------------------------------------------------------------------ +-- Helpers for translating vector packing and unpacking. + +doVecPack :: CmmType -> [CmmExpr] -> CmmFormal -> Code +doVecPack ty es res = do + dst <- newTemp ty + vecPack dst es 0 + where + vecPack :: CmmFormal -> [CmmExpr] -> Int -> Code + vecPack src [] _ = + stmtC (CmmAssign (CmmLocal res) (CmmReg (CmmLocal src))) + + vecPack src (e : es) i = do + dst <- newTemp ty + stmtC $ CmmAssign (CmmLocal dst) + (CmmMachOp (MO_V_Insert len wid) + [CmmReg (CmmLocal src), e, iLit]) + vecPack dst es (i + 1) + where + iLit = CmmLit (mkIntCLit i) + + len :: Length + len = vecLength ty + + wid :: Width + wid = typeWidth (vecType ty) + +doVecUnpack :: CmmType -> CmmExpr -> [CmmFormal] -> Code +doVecUnpack ty e res = + vecUnpack res 0 + where + vecUnpack :: [CmmFormal] -> Int -> Code + vecUnpack [] _ = + return () + + vecUnpack (r : rs) i = do + stmtC $ CmmAssign (CmmLocal r) + (CmmMachOp (MO_V_Extract len wid) + [e, iLit]) + vecUnpack rs (i + 1) + where + iLit = CmmLit (mkIntCLit i) + + len :: Length + len = vecLength ty + + wid :: Width + wid = typeWidth (vecType ty) + -- ---------------------------------------------------------------------------- -- Misc utils diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index f3fbdd2..50c326e 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -814,6 +814,22 @@ genMachOp_fast env opt op r n e -- This handles all the cases not handle by the specialised genMachOp_fast. genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData +-- Element extraction +genMachOp_slow env _ (MO_V_Extract {}) [val, CmmLit (CmmInt idx _)] = do + (env1, vval, stmts, top) <- exprToVar env val + let (LMVector _ ty) = getVarType vval + (v1, s1) <- doExpr ty $ Extract vval (fromInteger idx) + return (env1, v1, stmts `snocOL` s1, top) + +-- Element insertion +genMachOp_slow env _ (MO_V_Insert {}) [val, elt, CmmLit (CmmInt idx _)] = do + (env1, vval, stmts1, top1) <- exprToVar env val + (env2, velt, stmts2, top2) <- exprToVar env1 elt + let ty = getVarType vval + (v1, s1) <- doExpr ty $ Insert vval velt (fromInteger idx) + return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, + top1 ++ top2) + -- Binary MachOp genMachOp_slow env opt op [x, y] = case op of diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index e7e8459..8c8f75c 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -173,8 +173,10 @@ stmtToInstrs stmt = do panic "stmtToInstrs: return statement should have been cps'd away" where isVecExpr :: CmmExpr -> Bool - isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True - isVecExpr _ = False + isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True + isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True + isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True + isVecExpr _ = False -------------------------------------------------------------------------------- diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 957acf0..fe7a283 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -1939,6 +1939,16 @@ section "SIMD" primtype FloatX4# +primop FloatX4PackOp "packFloatX4#" GenPrimOp + Float# -> Float# -> Float# -> Float# -> FloatX4# + with + code_size = 4 + +primop FloatX4UnpackOp "unpackFloatX4#" GenPrimOp + FloatX4# -> (# Float#, Float#, Float#, Float# #) + with + code_size = 4 + primop FloatX4AddOp "plusFloatX4#" Dyadic FloatX4# -> FloatX4# -> FloatX4# with commutable = True _______________________________________________ Cvs-ghc mailing list [email protected] http://www.haskell.org/mailman/listinfo/cvs-ghc
