jxy - I did look at that, and perhaps I am reading the code wrong but I think 
that one does compile time decision about which SIMD feature set is available, 
not runtime, is that correct?

I'm looking to be able to build one exe, send it to a computer with SSE or AVX 
or AVX512 and have it use the appropriate instructions at runtime.

I have a prototype now that works, that might illustrate what I am after, but 
perhaps there are much better ways of going about it. Also, I haven't verified 
how runtime performance is at all yet:
    
    
    import rdstdin, strutils,x86_sse,x86_avx
    
    var has_sse = true
    var has_sse2 = true
    var has_avx = false
    
    proc load(a: var m128,s: var seq[float32],index: int) {.inline.} =
        a = loadu_ps(addr s[index])
    
    proc load(a: var m256,s: var seq[float32],index: int) {.inline.} =
        a = loadu_ps_256(addr s[index])
    
    template simd_block(s:seq[float32], a:untyped,count:untyped,body:untyped) =
        if has_avx:
            var count = 8
            var a: m256
            body
        elif has_sse:
            var count = 4
            var a: m128
            body
    
    
    var s = @[1.0'f32,2.0'f32,3.0'f32,4.0'f32,1.0'f32,2.0'f32,3.0'f32,4.0'f32]
    
    simd_block(s,a,count):
        for i in countup(0,<s.len,count):
            a.load(s,i)
            a = add_ps(a,a)
            storeu_ps(addr s[i],a)
    
    echo s #result is correct!
    

Reply via email to