Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77290:ddd27b50021e Date: 2015-05-11 10:48 +0200 http://bitbucket.org/pypy/pypy/changeset/ddd27b50021e/
Log: renamed detect_sse2.py to detect_feature.py added checks for sse 4.1, 4.2 and 4a diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py --- a/rpython/jit/backend/detect_cpu.py +++ b/rpython/jit/backend/detect_cpu.py @@ -73,12 +73,12 @@ result = MODEL_X86_64 else: assert sys.maxint == 2**31-1 - from rpython.jit.backend.x86 import detect_sse2 - if detect_sse2.detect_sse2(): + from rpython.jit.backend.x86 import detect_feature + if detect_feature.detect_sse2(): result = MODEL_X86 else: result = MODEL_X86_NO_SSE2 - if detect_sse2.detect_x32_mode(): + if detect_feature.detect_x32_mode(): raise ProcessorAutodetectError( 'JITting in x32 mode is not implemented') # diff --git a/rpython/jit/backend/x86/detect_feature.py b/rpython/jit/backend/x86/detect_feature.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/detect_feature.py @@ -0,0 +1,74 @@ +import sys +import struct +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rmmap import alloc, free + +def cpu_info(instr): + data = alloc(4096) + pos = 0 + for c in instr: + data[pos] = c + pos += 1 + fnptr = rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), data) + code = fnptr() + free(data, 4096) + return code + +def detect_sse2(): + code = cpu_id(eax=1) + return bool(code & (1<<25)) and bool(code & (1<<26)) + +def cpu_id(eax = 1, ret_edx=True, ret_ecx = False): + asm = "\xB8" + struct.pack('I', eax) # MOV EAX, $eax + asm += "\x53" # PUSH EBX + "\x0F\xA2" # CPUID + "\x5B" # POP EBX + if ret_edx: + asm += "\x92" # XCHG EAX, EDX + elif ret_ecx: + asm += "\x91" # XCHG EAX, ECX + asm += "\xC3" # RET + #code = cpu_info("\xB8\x01\x00\x00\x00" # MOV EAX, 1 + # "\x53" # PUSH EBX + # "\x0F\xA2" # CPUID + # "\x5B" # POP EBX + # "\x92" # XCHG EAX, EDX + # "\xC3" # RET + # ) + return cpu_info(asm) + +def detect_sse4_1(code=-1): + """ use cpu_id_eax_1_ecx() to get code parameter """ + if code == -1: + code = cpu_id(eax=1, ret_edx=False, ret_ecx=False) + return bool(code & (1<<19)) + +def detect_sse4_2(code=-1): + """ use cpu_id_eax_1_ecx() to get code parameter """ + if code == -1: + code = cpu_id(eax=1, ret_edx=False, ret_ecx=False) + return bool(code & (1<<20)) + +def detect_sse4a(code=-1): + """ use cpu_id_eax_1_ecx() to get code parameter """ + if code == -1: + code = feature.cpu_id(eax=0x80000001, ret_edx=False, ret_ecx=True) + return bool(code & (1<<20)) + +def detect_x32_mode(): + # 32-bit 64-bit / x32 + code = cpu_info("\x48" # DEC EAX + "\xB8\xC8\x00\x00\x00"# MOV EAX, 200 MOV RAX, 0x40404040000000C8 + "\x40\x40\x40\x40" # 4x INC EAX + "\xC3") # RET RET + assert code in (200, 204, 0x40404040000000C8) + return code == 200 + + +if __name__ == '__main__': + if detect_sse2(): + print 'Processor supports sse2.' + else: + print 'Missing processor support for sse2.' + if detect_x32_mode(): + print 'Process is running in "x32" mode.' diff --git a/rpython/jit/backend/x86/detect_sse2.py b/rpython/jit/backend/x86/detect_sse2.py deleted file mode 100644 --- a/rpython/jit/backend/x86/detect_sse2.py +++ /dev/null @@ -1,51 +0,0 @@ -import sys -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib.rmmap import alloc, free - -def cpu_info(instr): - data = alloc(4096) - pos = 0 - for c in instr: - data[pos] = c - pos += 1 - fnptr = rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), data) - code = fnptr() - free(data, 4096) - return code - -def detect_sse2(): - code = cpu_info("\xB8\x01\x00\x00\x00" # MOV EAX, 1 - "\x53" # PUSH EBX - "\x0F\xA2" # CPUID - "\x5B" # POP EBX - "\x92" # XCHG EAX, EDX - "\xC3" # RET - ) - return bool(code & (1<<25)) and bool(code & (1<<26)) - -def byte_size_for_vector_registers(sse2, avx, avxbw): - if avx: - if avxbw: - return 64 - return 32 - if sse2: - return 16 - assert False, "No vector extention supported" - -def detect_x32_mode(): - # 32-bit 64-bit / x32 - code = cpuinfo("\x48" # DEC EAX - "\xB8\xC8\x00\x00\x00"# MOV EAX, 200 MOV RAX, 0x40404040000000C8 - "\x40\x40\x40\x40" # 4x INC EAX - "\xC3") # RET RET - assert code in (200, 204, 0x40404040000000C8) - return code == 200 - - -if __name__ == '__main__': - if detect_sse2(): - print 'Processor supports sse2.' - else: - print 'Missing processor support for sse2.' - if detect_x32_mode(): - print 'Process is running in "x32" mode.' diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py --- a/rpython/jit/backend/x86/runner.py +++ b/rpython/jit/backend/x86/runner.py @@ -24,7 +24,10 @@ with_threads = False frame_reg = regloc.ebp + vector_extension = False vector_register_size = 0 # in bytes + vector_horizontal_operations = False + vector_pack_slots = False from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes @@ -48,6 +51,16 @@ self.profile_agent = profile_agent + if self.supports_floats and self.supports_longlong: + # has sse 2 at least + from rpython.jit.backend.x86 import detect_feature as feature + if feature.detect_sse4_1(): + self.vector_extension = True + self.vector_register_size = 16 + self.vector_horizontal_operations = True + if feature.detect_sse4a(): + self.vector_pack_slots = True + def set_debug(self, flag): return self.assembler.set_debug(flag) @@ -147,8 +160,6 @@ IS_64_BIT = False - vector_register_size = 16 - def __init__(self, *args, **kwargs): assert sys.maxint == (2**31 - 1) super(CPU386, self).__init__(*args, **kwargs) @@ -165,6 +176,4 @@ IS_64_BIT = True HAS_CODEMAP = True - vector_register_size = 16 - CPU = CPU386 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit