Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r77290:ddd27b50021e
Date: 2015-05-11 10:48 +0200
http://bitbucket.org/pypy/pypy/changeset/ddd27b50021e/

Log:    renamed detect_sse2.py to detect_feature.py added checks for sse
        4.1, 4.2 and 4a

diff --git a/rpython/jit/backend/detect_cpu.py 
b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -73,12 +73,12 @@
             result = MODEL_X86_64
         else:
             assert sys.maxint == 2**31-1
-            from rpython.jit.backend.x86 import detect_sse2
-            if detect_sse2.detect_sse2():
+            from rpython.jit.backend.x86 import detect_feature
+            if detect_feature.detect_sse2():
                 result = MODEL_X86
             else:
                 result = MODEL_X86_NO_SSE2
-            if detect_sse2.detect_x32_mode():
+            if detect_feature.detect_x32_mode():
                 raise ProcessorAutodetectError(
                     'JITting in x32 mode is not implemented')
     #
diff --git a/rpython/jit/backend/x86/detect_feature.py 
b/rpython/jit/backend/x86/detect_feature.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/x86/detect_feature.py
@@ -0,0 +1,74 @@
+import sys
+import struct
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.rmmap import alloc, free
+
+def cpu_info(instr):
+    data = alloc(4096)
+    pos = 0
+    for c in instr:
+        data[pos] = c
+        pos += 1
+    fnptr = rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), data)
+    code = fnptr()
+    free(data, 4096)
+    return code
+
+def detect_sse2():
+    code = cpu_id(eax=1)
+    return bool(code & (1<<25)) and bool(code & (1<<26))
+
+def cpu_id(eax = 1, ret_edx=True, ret_ecx = False):
+    asm = "\xB8" + struct.pack('I', eax) # MOV EAX, $eax
+    asm += "\x53"                     # PUSH EBX
+           "\x0F\xA2"                 # CPUID
+           "\x5B"                     # POP EBX
+    if ret_edx:
+        asm += "\x92"                 # XCHG EAX, EDX
+    elif ret_ecx:
+        asm += "\x91"                 # XCHG EAX, ECX
+    asm += "\xC3"                     # RET
+    #code = cpu_info("\xB8\x01\x00\x00\x00"     # MOV EAX, 1
+    #                "\x53"                     # PUSH EBX
+    #                "\x0F\xA2"                 # CPUID
+    #                "\x5B"                     # POP EBX
+    #                "\x92"                     # XCHG EAX, EDX
+    #                "\xC3"                     # RET
+    #               )
+    return cpu_info(asm)
+
+def detect_sse4_1(code=-1):
+    """ use cpu_id_eax_1_ecx() to get code parameter """
+    if code == -1:
+        code = cpu_id(eax=1, ret_edx=False, ret_ecx=False)
+    return bool(code & (1<<19))
+
+def detect_sse4_2(code=-1):
+    """ use cpu_id_eax_1_ecx() to get code parameter """
+    if code == -1:
+        code = cpu_id(eax=1, ret_edx=False, ret_ecx=False)
+    return bool(code & (1<<20))
+
+def detect_sse4a(code=-1):
+    """ use cpu_id_eax_1_ecx() to get code parameter """
+    if code == -1:
+        code = feature.cpu_id(eax=0x80000001, ret_edx=False, ret_ecx=True)
+    return bool(code & (1<<20))
+
+def detect_x32_mode():
+    # 32-bit         64-bit / x32
+    code = cpu_info("\x48"                # DEC EAX
+                    "\xB8\xC8\x00\x00\x00"# MOV EAX, 200   MOV RAX, 
0x40404040000000C8
+                    "\x40\x40\x40\x40"    # 4x INC EAX
+                    "\xC3")               # RET            RET
+    assert code in (200, 204, 0x40404040000000C8)
+    return code == 200
+
+
+if __name__ == '__main__':
+    if detect_sse2():
+        print 'Processor supports sse2.'
+    else:
+        print 'Missing processor support for sse2.'
+    if detect_x32_mode():
+        print 'Process is running in "x32" mode.'
diff --git a/rpython/jit/backend/x86/detect_sse2.py 
b/rpython/jit/backend/x86/detect_sse2.py
deleted file mode 100644
--- a/rpython/jit/backend/x86/detect_sse2.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import sys
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.rmmap import alloc, free
-
-def cpu_info(instr):
-    data = alloc(4096)
-    pos = 0
-    for c in instr:
-        data[pos] = c
-        pos += 1
-    fnptr = rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), data)
-    code = fnptr()
-    free(data, 4096)
-    return code
-
-def detect_sse2():
-    code = cpu_info("\xB8\x01\x00\x00\x00"     # MOV EAX, 1
-                    "\x53"                     # PUSH EBX
-                    "\x0F\xA2"                 # CPUID
-                    "\x5B"                     # POP EBX
-                    "\x92"                     # XCHG EAX, EDX
-                    "\xC3"                     # RET
-                   )
-    return bool(code & (1<<25)) and bool(code & (1<<26))
-
-def byte_size_for_vector_registers(sse2, avx, avxbw):
-    if avx:
-        if avxbw:
-            return 64
-        return 32
-    if sse2:
-        return 16
-    assert False, "No vector extention supported"
-
-def detect_x32_mode():
-    # 32-bit         64-bit / x32
-    code = cpuinfo("\x48"                # DEC EAX
-                   "\xB8\xC8\x00\x00\x00"# MOV EAX, 200   MOV RAX, 
0x40404040000000C8
-                   "\x40\x40\x40\x40"    # 4x INC EAX
-                   "\xC3")               # RET            RET
-    assert code in (200, 204, 0x40404040000000C8)
-    return code == 200
-
-
-if __name__ == '__main__':
-    if detect_sse2():
-        print 'Processor supports sse2.'
-    else:
-        print 'Missing processor support for sse2.'
-    if detect_x32_mode():
-        print 'Process is running in "x32" mode.'
diff --git a/rpython/jit/backend/x86/runner.py 
b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -24,7 +24,10 @@
     with_threads = False
     frame_reg = regloc.ebp
 
+    vector_extension = False
     vector_register_size = 0 # in bytes
+    vector_horizontal_operations = False
+    vector_pack_slots = False
 
     from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE
     all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes
@@ -48,6 +51,16 @@
 
         self.profile_agent = profile_agent
 
+        if self.supports_floats and self.supports_longlong:
+            # has sse 2 at least
+            from rpython.jit.backend.x86 import detect_feature as feature
+            if feature.detect_sse4_1():
+                self.vector_extension = True
+                self.vector_register_size = 16
+                self.vector_horizontal_operations = True
+                if feature.detect_sse4a():
+                    self.vector_pack_slots = True
+
     def set_debug(self, flag):
         return self.assembler.set_debug(flag)
 
@@ -147,8 +160,6 @@
 
     IS_64_BIT = False
 
-    vector_register_size = 16
-
     def __init__(self, *args, **kwargs):
         assert sys.maxint == (2**31 - 1)
         super(CPU386, self).__init__(*args, **kwargs)
@@ -165,6 +176,4 @@
     IS_64_BIT = True
     HAS_CODEMAP = True
 
-    vector_register_size = 16
-
 CPU = CPU386
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to