Re: [PATCH 11/14] target/arm: Implement the SETG* instructions
On 9/11/23 07:17, Peter Maydell wrote: I think it would be a little better if set_tags was visible to the compiler, via inlining, so that all of the conditions can be folded away. Do you mean having a separate triplet of helper functions for setg, which then call an inline function shared with the normal setp/setm/sete to do the actual work, rather than passing "is this setg" via the syndrome ? Yes. r~
Re: [PATCH 11/14] target/arm: Implement the SETG* instructions
On Sat, 9 Sept 2023 at 17:38, Richard Henderson wrote: > > On 9/7/23 09:03, Peter Maydell wrote: > > The FEAT_MOPS SETG* instructions are very similar to the SET* > > instructions, but as well as setting memory contents they also > > set the MTE tags. They are architecturally required to operate > > on tag-granule aligned regions only. > > > > Signed-off-by: Peter Maydell > > --- > > target/arm/internals.h | 10 > > target/arm/tcg/a64.decode | 5 ++ > > target/arm/tcg/helper-a64.c| 92 +++--- > > target/arm/tcg/mte_helper.c| 40 +++ > > target/arm/tcg/translate-a64.c | 20 +--- > > 5 files changed, 155 insertions(+), 12 deletions(-) > > > > diff --git a/target/arm/internals.h b/target/arm/internals.h > > index a70a7fd50f6..642f77df29b 100644 > > --- a/target/arm/internals.h > > +++ b/target/arm/internals.h > > @@ -1300,6 +1300,16 @@ uint64_t mte_mops_probe(CPUARMState *env, uint64_t > > ptr, uint64_t size, > > void mte_check_fail(CPUARMState *env, uint32_t desc, > > uint64_t dirty_ptr, uintptr_t ra); > > > > +/** > > + * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation > > + * @env: CPU env > > + * @dirty_ptr: Start address of memory region (dirty pointer) > > + * @size: length of region (guaranteed not to cross page boundary) > > + * @desc: MTEDESC descriptor word > > + */ > > +void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size, > > + uint32_t desc); > > + > > static inline int allocation_tag_from_addr(uint64_t ptr) > > { > > return extract64(ptr, 56, 4); > > diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode > > index 8cddc207a6f..46caeb59fe5 100644 > > --- a/target/arm/tcg/a64.decode > > +++ b/target/arm/tcg/a64.decode > > @@ -569,3 +569,8 @@ STZ2G 11011001 11 1 . 11 . . > > @ldst_tag p=0 w=1 > > SETP00 011001110 . 00 . . 01 . . @set > > SETM00 011001110 . 01 . . 01 . . @set > > SETE00 011001110 . 10 . . 01 . . @set > > + > > +# Like SET, but also setting MTE tags > > +SETGP 00 011101110 . 00 . . 01 . . @set > > +SETGM 00 011101110 . 01 . . 01 . . @set > > +SETGE 00 011101110 . 10 . . 01 . . @set > > diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c > > index 73169eb0b56..96780067262 100644 > > --- a/target/arm/tcg/helper-a64.c > > +++ b/target/arm/tcg/helper-a64.c > > @@ -1103,6 +1103,54 @@ static uint64_t set_step(CPUARMState *env, uint64_t > > toaddr, > > return setsize; > > } > > > > +/* > > + * Similar, but setting tags. The architecture requires us to do this > > + * in 16-byte chunks. SETP accesses are not tag checked; they set > > + * the tags. > > + */ > > +static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, > > + uint64_t setsize, uint32_t data, int memidx, > > + uint32_t *mtedesc, uintptr_t ra) > > +{ > > +void *mem; > > +uint64_t cleanaddr; > > + > > +setsize = MIN(setsize, page_limit(toaddr)); > > + > > +cleanaddr = useronly_clean_ptr(toaddr); > > +/* > > + * Trapless lookup: returns NULL for invalid page, I/O, > > + * watchpoints, clean pages, etc. > > + */ > > +mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); > > + > > +#ifndef CONFIG_USER_ONLY > > +if (unlikely(!mem)) { > > +/* > > + * Slow-path: just do one write. This will handle the > > + * watchpoint, invalid page, etc handling correctly. > > + * The architecture requires that we do 16 bytes at a time, > > + * and we know both ptr and size are 16 byte aligned. > > + * For clean code pages, the next iteration will see > > + * the page dirty and will use the fast path. > > + */ > > +uint64_t repldata = data * 0x0101010101010101ULL; > > +cpu_stq_mmuidx_ra(env, toaddr, repldata, memidx, ra); > > +cpu_stq_mmuidx_ra(env, toaddr + 8, repldata, memidx, ra); > > +mte_mops_set_tags(env, toaddr, 16, *mtedesc); > > +return 16; > > You can use cpu_st16_mmu for one store. > > > @@ -1154,20 +1219,27 @@ void HELPER(setp)(CPUARMState *env, uint32_t > > syndrome, uint32_t mtedesc) > > int rd = mops_destreg(syndrome); > > int rs = mops_srcreg(syndrome); > > int rn = mops_sizereg(syndrome); > > +bool set_tags = mops_is_setg(syndrome); > > uint8_t data = env->xregs[rs]; > > uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); > > uint64_t toaddr = env->xregs[rd]; > > uint64_t setsize = env->xregs[rn]; > > uint64_t stagesetsize, step; > > uintptr_t ra = GETPC(); > > +StepFn *stepfn = set_tags ? set_step_tags : set_step; > > > > check_mops_enabled(env, ra); > > > >
Re: [PATCH 11/14] target/arm: Implement the SETG* instructions
On 9/7/23 09:03, Peter Maydell wrote: The FEAT_MOPS SETG* instructions are very similar to the SET* instructions, but as well as setting memory contents they also set the MTE tags. They are architecturally required to operate on tag-granule aligned regions only. Signed-off-by: Peter Maydell --- target/arm/internals.h | 10 target/arm/tcg/a64.decode | 5 ++ target/arm/tcg/helper-a64.c| 92 +++--- target/arm/tcg/mte_helper.c| 40 +++ target/arm/tcg/translate-a64.c | 20 +--- 5 files changed, 155 insertions(+), 12 deletions(-) diff --git a/target/arm/internals.h b/target/arm/internals.h index a70a7fd50f6..642f77df29b 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1300,6 +1300,16 @@ uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, void mte_check_fail(CPUARMState *env, uint32_t desc, uint64_t dirty_ptr, uintptr_t ra); +/** + * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation + * @env: CPU env + * @dirty_ptr: Start address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross page boundary) + * @desc: MTEDESC descriptor word + */ +void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size, + uint32_t desc); + static inline int allocation_tag_from_addr(uint64_t ptr) { return extract64(ptr, 56, 4); diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 8cddc207a6f..46caeb59fe5 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -569,3 +569,8 @@ STZ2G 11011001 11 1 . 11 . . @ldst_tag p=0 w=1 SETP00 011001110 . 00 . . 01 . . @set SETM00 011001110 . 01 . . 01 . . @set SETE00 011001110 . 10 . . 01 . . @set + +# Like SET, but also setting MTE tags +SETGP 00 011101110 . 00 . . 01 . . @set +SETGM 00 011101110 . 01 . . 01 . . @set +SETGE 00 011101110 . 10 . . 01 . . @set diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 73169eb0b56..96780067262 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -1103,6 +1103,54 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr, return setsize; } +/* + * Similar, but setting tags. The architecture requires us to do this + * in 16-byte chunks. SETP accesses are not tag checked; they set + * the tags. + */ +static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, int memidx, + uint32_t *mtedesc, uintptr_t ra) +{ +void *mem; +uint64_t cleanaddr; + +setsize = MIN(setsize, page_limit(toaddr)); + +cleanaddr = useronly_clean_ptr(toaddr); +/* + * Trapless lookup: returns NULL for invalid page, I/O, + * watchpoints, clean pages, etc. + */ +mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); + +#ifndef CONFIG_USER_ONLY +if (unlikely(!mem)) { +/* + * Slow-path: just do one write. This will handle the + * watchpoint, invalid page, etc handling correctly. + * The architecture requires that we do 16 bytes at a time, + * and we know both ptr and size are 16 byte aligned. + * For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ +uint64_t repldata = data * 0x0101010101010101ULL; +cpu_stq_mmuidx_ra(env, toaddr, repldata, memidx, ra); +cpu_stq_mmuidx_ra(env, toaddr + 8, repldata, memidx, ra); +mte_mops_set_tags(env, toaddr, 16, *mtedesc); +return 16; You can use cpu_st16_mmu for one store. @@ -1154,20 +1219,27 @@ void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) int rd = mops_destreg(syndrome); int rs = mops_srcreg(syndrome); int rn = mops_sizereg(syndrome); +bool set_tags = mops_is_setg(syndrome); uint8_t data = env->xregs[rs]; uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); uint64_t toaddr = env->xregs[rd]; uint64_t setsize = env->xregs[rn]; uint64_t stagesetsize, step; uintptr_t ra = GETPC(); +StepFn *stepfn = set_tags ? set_step_tags : set_step; check_mops_enabled(env, ra); if (setsize > INT64_MAX) { setsize = INT64_MAX; +if (set_tags) { +setsize &= ~0xf; +} } I think it would be a little better if set_tags was visible to the compiler, via inlining, so that all of the conditions can be folded away. That said, all of the above is optimization not bugs. Reviewed-by: Richard Henderson r~
[PATCH 11/14] target/arm: Implement the SETG* instructions
The FEAT_MOPS SETG* instructions are very similar to the SET* instructions, but as well as setting memory contents they also set the MTE tags. They are architecturally required to operate on tag-granule aligned regions only. Signed-off-by: Peter Maydell --- target/arm/internals.h | 10 target/arm/tcg/a64.decode | 5 ++ target/arm/tcg/helper-a64.c| 92 +++--- target/arm/tcg/mte_helper.c| 40 +++ target/arm/tcg/translate-a64.c | 20 +--- 5 files changed, 155 insertions(+), 12 deletions(-) diff --git a/target/arm/internals.h b/target/arm/internals.h index a70a7fd50f6..642f77df29b 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1300,6 +1300,16 @@ uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, void mte_check_fail(CPUARMState *env, uint32_t desc, uint64_t dirty_ptr, uintptr_t ra); +/** + * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation + * @env: CPU env + * @dirty_ptr: Start address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross page boundary) + * @desc: MTEDESC descriptor word + */ +void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size, + uint32_t desc); + static inline int allocation_tag_from_addr(uint64_t ptr) { return extract64(ptr, 56, 4); diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 8cddc207a6f..46caeb59fe5 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -569,3 +569,8 @@ STZ2G 11011001 11 1 . 11 . . @ldst_tag p=0 w=1 SETP00 011001110 . 00 . . 01 . . @set SETM00 011001110 . 01 . . 01 . . @set SETE00 011001110 . 10 . . 01 . . @set + +# Like SET, but also setting MTE tags +SETGP 00 011101110 . 00 . . 01 . . @set +SETGM 00 011101110 . 01 . . 01 . . @set +SETGE 00 011101110 . 10 . . 01 . . @set diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 73169eb0b56..96780067262 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -1103,6 +1103,54 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr, return setsize; } +/* + * Similar, but setting tags. The architecture requires us to do this + * in 16-byte chunks. SETP accesses are not tag checked; they set + * the tags. + */ +static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, int memidx, + uint32_t *mtedesc, uintptr_t ra) +{ +void *mem; +uint64_t cleanaddr; + +setsize = MIN(setsize, page_limit(toaddr)); + +cleanaddr = useronly_clean_ptr(toaddr); +/* + * Trapless lookup: returns NULL for invalid page, I/O, + * watchpoints, clean pages, etc. + */ +mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); + +#ifndef CONFIG_USER_ONLY +if (unlikely(!mem)) { +/* + * Slow-path: just do one write. This will handle the + * watchpoint, invalid page, etc handling correctly. + * The architecture requires that we do 16 bytes at a time, + * and we know both ptr and size are 16 byte aligned. + * For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ +uint64_t repldata = data * 0x0101010101010101ULL; +cpu_stq_mmuidx_ra(env, toaddr, repldata, memidx, ra); +cpu_stq_mmuidx_ra(env, toaddr + 8, repldata, memidx, ra); +mte_mops_set_tags(env, toaddr, 16, *mtedesc); +return 16; +} +#endif +/* Easy case: just memset the host memory */ +memset(mem, data, setsize); +mte_mops_set_tags(env, toaddr, setsize, *mtedesc); +return setsize; +} + +typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, +uint64_t setsize, uint32_t data, +int memidx, uint32_t *mtedesc, uintptr_t ra); + /* Extract register numbers from a MOPS exception syndrome value */ static int mops_destreg(uint32_t syndrome) { @@ -1119,6 +1167,11 @@ static int mops_sizereg(uint32_t syndrome) return extract32(syndrome, 0, 5); } +static bool mops_is_setg(uint32_t syndrome) +{ +return extract32(syndrome, 23, 1); +} + /* * Return true if TCMA and TBI bits mean we need to do MTE checks. * We only need to do this once per MOPS insn, not for every page. @@ -1137,6 +1190,18 @@ static bool mte_checks_needed(uint64_t ptr, uint32_t desc) return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); } +/* Take an exception if the SETG addr/size are not granule aligned */ +static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t memidx, uintptr_t