On 8/18/20 8:50 AM, Taylor Simpson wrote: > +/* > + * Section 5.5 of the Hexagon V67 Programmer's Reference Manual > + * > + * Slot 1 store with slot 0 load > + * A slot 1 store operation with a slot 0 load operation can appear in a > packet. > + * The packet attribute :mem_noshuf inhibits the instruction reordering that > + * would otherwise be done by the assembler. For example: > + * { > + * memw(R5) = R2 // slot 1 store > + * R3 = memh(R6) // slot 0 load > + * }:mem_noshuf > + * Unlike most packetized operations, these memory operations are not > executed > + * in parallel (Section 3.3.1). Instead, the store instruction in Slot 1 > + * effectively executes first, followed by the load instruction in Slot 0. If > + * the addresses of the two operations are overlapping, the load will receive > + * the newly stored data. This feature is supported in processor versions > + * V65 or greater. > + * > + * > + * For qemu, we look for a load in slot 0 when there is a store in slot 1 > + * in the same packet. When we see this, we call a helper that merges the > + * bytes from the store buffer with the value loaded from memory. > + */ > +#define CHECK_NOSHUF(DST, VA, SZ, SIGN) \ > + do { \ > + if (insn->slot == 0 && pkt->pkt_has_store_s1) { \ > + gen_helper_merge_inflight_store##SZ##SIGN(DST, cpu_env, VA, > DST); \ > + } \ > + } while (0)
Ah, so I see what you're trying to do with the merge thing, which had the host-endian problems. I think the merge stuff is a mistake. I think you can get the semantics that you want with probe_read(ld_addr, ld_len) qemu_st(st_value, st_addr) qemu_ld(ld_value, ld_addr) In this way, all exceptions are recognized before the store is complete, the normal memory operations handle any possible overlap. > +#define fINSERT_BITS(REG, WIDTH, OFFSET, INVAL) \ > + do { \ > + REG = ((REG) & ~(((fCONSTLL(1) << (WIDTH)) - 1) << (OFFSET))) | \ > + (((INVAL) & ((fCONSTLL(1) << (WIDTH)) - 1)) << (OFFSET)); \ > + } while (0) reg = deposit32(reg, offset, width, inval) > +#define fEXTRACTU_BITS(INREG, WIDTH, OFFSET) \ > + (fZXTN(WIDTH, 32, (INREG >> OFFSET))) > +#define fEXTRACTU_BIDIR(INREG, WIDTH, OFFSET) \ > + (fZXTN(WIDTH, 32, fBIDIR_LSHIFTR((INREG), (OFFSET), 4_8))) > +#define fEXTRACTU_RANGE(INREG, HIBIT, LOWBIT) \ > + (fZXTN((HIBIT - LOWBIT + 1), 32, (INREG >> LOWBIT))) extract32(inreg, offset, width) > +#define fZXTN(N, M, VAL) ((VAL) & ((1LL << (N)) - 1)) extract32(VAL, 0, n) > +#define fSXTN(N, M, VAL) \ > + ((fZXTN(N, M, VAL) ^ (1LL << ((N) - 1))) - (1LL << ((N) - 1))) sextract32(val, 0, n) > +#define fRND(A) (((A) + 1) >> 1) Does A have a type that won't overflow? For Arm we write this as (A >> 1) + (A & 1) > +#define fDCFETCH(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ > +#define fICINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ > +#define fL2FETCH(ADDR, HEIGHT, WIDTH, STRIDE, FLAGS) > +#define fDCCLEANA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu > */ > +#define fDCCLEANINVA(REG) \ > + do { REG = REG; } while (0) /* Nothing to do in qemu */ Is this "R=R" thing trying to avoid a compiler warning? Perhaps "(void)R" would be sufficient to avoid that? > -static inline void log_store32(CPUHexagonState *env, target_ulong addr, > - target_ulong val, int width, int slot) > -{ > - HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx ", " TARGET_FMT_ld > - " [0x" TARGET_FMT_lx "])\n", > - width, addr, val, val); > - env->mem_log_stores[slot].va = addr; > - env->mem_log_stores[slot].width = width; > - env->mem_log_stores[slot].data32 = val; > -} > - > -static inline void log_store64(CPUHexagonState *env, target_ulong addr, > - int64_t val, int width, int slot) > -{ > - HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx ", %ld [0x%lx])\n", > - width, addr, val, val); > - env->mem_log_stores[slot].va = addr; > - env->mem_log_stores[slot].width = width; > - env->mem_log_stores[slot].data64 = val; > -} > - Fold this back to wherever it came from. Clearly no need to introduce it in the first place. r~