Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r82042:037ac225f6c1 Date: 2016-02-02 17:17 +0100 http://bitbucket.org/pypy/pypy/changeset/037ac225f6c1/
Log: reading level=2 cache for estimation size of nursery diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py --- a/rpython/jit/backend/ppc/callbuilder.py +++ b/rpython/jit/backend/ppc/callbuilder.py @@ -98,7 +98,7 @@ # We must also copy fnloc into FNREG non_float_locs.append(self.fnloc) - non_float_regs.append(self.mc.RAW_CALL_REG) # r2 or r12 + non_float_regs.append(self.mc.RAW_CALL_REG) if float_locs: assert len(float_locs) <= len(self.FPR_ARGS) diff --git a/rpython/jit/backend/zarch/callbuilder.py b/rpython/jit/backend/zarch/callbuilder.py --- a/rpython/jit/backend/zarch/callbuilder.py +++ b/rpython/jit/backend/zarch/callbuilder.py @@ -62,7 +62,6 @@ # called function will in turn call further functions (which must be passed the # address of the new frame). This stack grows downwards from high addresses # """ - self.subtracted_to_sp = 0 gpr_regs = 0 fpr_regs = 0 @@ -88,11 +87,6 @@ if self.is_call_release_gil: self.subtracted_to_sp += 8*WORD base += 8*WORD - # one additional word for remap frame layout - # regalloc_push will overwrite -8(r.SP) and destroy - # a parameter if we would not reserve that space - # base += WORD - # TODO self.subtracted_to_sp += WORD for idx,i in enumerate(stack_params): loc = arglocs[i] offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx @@ -149,7 +143,7 @@ def emit_raw_call(self): # always allocate a stack frame for the new function # save the SP back chain - #self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) + self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) # move the frame pointer if self.subtracted_to_sp != 0: self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) @@ -194,8 +188,6 @@ # pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP)) - # 6 registers, 1 for a floating point return value! - # registered by prepare_arguments! # # Save this thread's shadowstack pointer into r8, for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap @@ -266,19 +258,17 @@ PARAM_SAVE_AREA_OFFSET = 0 if reg is not None: # save 1 word below the stack pointer - pos = STD_FRAME_SIZE_IN_BYTES if reg.is_core_reg(): self.mc.LGR(RSAVEDRES, reg) elif reg.is_fp_reg(): - self.mc.STD(reg, l.addr(pos-1*WORD, r.SP)) + self.mc.STD(reg, l.addr(16*WORD, r.SP)) self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr) self.mc.raw_call() if reg is not None: - pos = STD_FRAME_SIZE_IN_BYTES if reg.is_core_reg(): self.mc.LGR(reg, RSAVEDRES) elif reg.is_fp_reg(): - self.mc.LD(reg, l.addr(pos-1*WORD, r.SP)) + self.mc.LD(reg, l.addr(16*WORD, r.SP)) # replace b1_location with BEQ(here) pmc = OverwritingBuilder(self.mc, b1_location, 1) diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py --- a/rpython/jit/backend/zarch/codebuilder.py +++ b/rpython/jit/backend/zarch/codebuilder.py @@ -189,11 +189,7 @@ return diff def sync(self): - # see sync. section of the zarch manual! - # 0xf creates a checkpoint which is not needed. - # we never want to restore the checkpoint, we only - # want to create a memory fence (i.e. serialization) - self.BCR_rr(0xe,0) + self.BCR_rr(0xf,0) def raw_call(self, call_reg=r.RETURN): """Emit a call to the address stored in the register 'call_reg', diff --git a/rpython/jit/backend/zarch/instruction_builder.py b/rpython/jit/backend/zarch/instruction_builder.py --- a/rpython/jit/backend/zarch/instruction_builder.py +++ b/rpython/jit/backend/zarch/instruction_builder.py @@ -191,6 +191,13 @@ self.write_i32(imm32 & BIT_MASK_32) return encode_ri +def build_s(mnemonic, (opcode1,opcode2)): + @builder.arguments('bd') + def encode_s(self, base_displace): + self.writechar(opcode1) + self.writechar(opcode2) + encode_base_displace(self, base_displace) + return encode_s def build_si(mnemonic, (opcode,)): @builder.arguments('bd,u8') diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py --- a/rpython/jit/backend/zarch/instructions.py +++ b/rpython/jit/backend/zarch/instructions.py @@ -288,6 +288,8 @@ 'SVC': ('i', ['\x0A']), 'TRAP2': ('e', ['\x01','\xFF']), + + 'STFLE': ('s', ['\xB2','\xB0']), } all_mnemonic_codes.update(arith_mnemonic_codes) all_mnemonic_codes.update(logic_mnemonic_codes) diff --git a/rpython/jit/backend/zarch/test/test_assembler.py b/rpython/jit/backend/zarch/test/test_assembler.py --- a/rpython/jit/backend/zarch/test/test_assembler.py +++ b/rpython/jit/backend/zarch/test/test_assembler.py @@ -144,6 +144,19 @@ assert self.mc.BRC_byte_count == 4 assert self.mc.LG_byte_count == 6 + def test_facility(self): + adr = self.a.datablockwrapper.malloc_aligned(16, 16) + self.a.mc.load_imm(r.r2, adr) + self.a.mc.STFLE(loc.addr(0,r.r2)) + self.a.mc.BCR(con.ANY, r.r14) + run_asm(self.a) + fac_data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) + f64 = bin(fac_data[0])[2:] + s64 = bin(fac_data[1])[2:] + print(f64) + print(s64) + assert f64[18] == '1' # long displacement facility + def test_load_small_int_to_reg(self): self.a.mc.LGHI(r.r2, loc.imm(123)) self.a.jmpto(r.r14) diff --git a/rpython/memory/gc/env.py b/rpython/memory/gc/env.py --- a/rpython/memory/gc/env.py +++ b/rpython/memory/gc/env.py @@ -137,6 +137,8 @@ return get_L2cache_linux2_cpuinfo() if arch in ('alpha', 'ppc'): return get_L2cache_linux2_cpuinfo(label='L2 cache') + if arch in ('s390x'): + return get_L2cache_linux2_cpuinfo_s390x() if arch == 'ia64': return get_L2cache_linux2_ia64() if arch in ('parisc', 'parisc64'): @@ -208,6 +210,67 @@ "Warning: cannot find your CPU L2 cache size in /proc/cpuinfo") return -1 +def get_L2cache_linux2_cpuinfo_s390x(filename="/proc/cpuinfo", label='cache3'): + debug_start("gc-hardware") + L2cache = sys.maxint + try: + fd = os.open(filename, os.O_RDONLY, 0644) + try: + data = [] + while True: + buf = os.read(fd, 4096) + if not buf: + break + data.append(buf) + finally: + os.close(fd) + except OSError: + pass + else: + data = ''.join(data) + linepos = 0 + while True: + start = _findend(data, '\n' + label, linepos) + if start < 0: + break # done + linepos = _findend(data, '\n', start) + if linepos < 0: + break # no end-of-line?? + # *** data[start:linepos] == " : level=2 type=Instruction scope=Private size=2048K ..." + start = _skipspace(data, start) + if data[start] != ':': + continue + # *** data[start:linepos] == ": level=2 type=Instruction scope=Private size=2048K ..." + start = _skipspace(data, start + 1) + # *** data[start:linepos] == "level=2 type=Instruction scope=Private size=2048K ..." + start += 44 + end = start + while '0' <= data[end] <= '9': + end += 1 + # *** data[start:end] == "2048" + if start == end: + continue + number = int(data[start:end]) + # *** data[end:linepos] == " KB\n" + end = _skipspace(data, end) + if data[end] not in ('K', 'k'): # assume kilobytes for now + continue + number = number * 1024 + # for now we look for the smallest of the L2 caches of the CPUs + if number < L2cache: + L2cache = number + + debug_print("L2cache =", L2cache) + debug_stop("gc-hardware") + + if L2cache < sys.maxint: + return L2cache + else: + # Print a top-level warning even in non-debug builds + llop.debug_print(lltype.Void, + "Warning: cannot find your CPU L2 cache size in /proc/cpuinfo") + return -1 + def get_L2cache_linux2_sparc(): debug_start("gc-hardware") cpu = 0 diff --git a/rpython/memory/gc/test/test_env.py b/rpython/memory/gc/test/test_env.py --- a/rpython/memory/gc/test/test_env.py +++ b/rpython/memory/gc/test/test_env.py @@ -161,3 +161,22 @@ """) result = env.get_L2cache_linux2_cpuinfo(str(filepath)) assert result == 3072 * 1024 + +def test_estimate_best_nursery_size_linux2_s390x(): + filepath = udir.join('estimate_best_nursery_size_linux2') + filepath.write("""\ +vendor_id : IBM/S390 +# processors : 2 +bogomips per cpu: 20325.00 +features : esan3 zarch stfle msa ldisp eimm dfp etf3eh highgprs +cache0 : level=1 type=Data scope=Private size=128K line_size=256 associativity=8 +cache1 : level=1 type=Instruction scope=Private size=96K line_size=256 associativity=6 +cache2 : level=2 type=Data scope=Private size=2048K line_size=256 associativity=8 +cache3 : level=2 type=Instruction scope=Private size=2048K line_size=256 associativity=8 +cache4 : level=3 type=Unified scope=Shared size=65536K line_size=256 associativity=16 +cache5 : level=4 type=Unified scope=Shared size=491520K line_size=256 associativity=30 +processor 0: version = FF, identification = 026A77, machine = 2964 +processor 1: version = FF, identification = 026A77, machine = 2964 +""") + result = env.get_L2cache_linux2_cpuinfo_s390x(str(filepath)) + assert result == 2048 * 1024 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit