On 22.05.2024 15:17, Andrew Cooper wrote: > trace_shadow_fixup() and trace_not_shadow_fault() both write out identical > trace records. Reimplement them in terms of a common sh_trace_gl1e_va(). > > There's no need to pack the trace record, even in the case of PAE paging.
Isn't this altering the generated trace record for the 4-level case, in size changing from 20 to 24 bytes? > --- a/xen/arch/x86/mm/shadow/multi.c > +++ b/xen/arch/x86/mm/shadow/multi.c > @@ -1987,51 +1987,26 @@ static void sh_trace_va(uint32_t event, guest_va_t va) > sh_trace(event, sizeof(va), &va); > } > > -static inline void trace_shadow_fixup(guest_l1e_t gl1e, > - guest_va_t va) > +/* Shadow trace event with a gl1e, linear address and flags. */ > +static void sh_trace_gl1e_va(uint32_t event, guest_l1e_t gl1e, guest_va_t va) > { > if ( tb_init_done ) > { > - struct __packed { > - /* for PAE, guest_l1e may be 64 while guest_va may be 32; > - so put it first for alignment sake. */ > - guest_l1e_t gl1e; > - guest_va_t va; > - u32 flags; > - } d; > - u32 event; > - > - event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8); > - > - d.gl1e = gl1e; > - d.va = va; > - d.flags = this_cpu(trace_shadow_path_flags); > - > - trace(event, sizeof(d), &d); > - } > -} > - > -static inline void trace_not_shadow_fault(guest_l1e_t gl1e, > - guest_va_t va) > -{ > - if ( tb_init_done ) > - { > - struct __packed { > - /* for PAE, guest_l1e may be 64 while guest_va may be 32; > - so put it first for alignment sake. */ > + struct { > + /* > + * For GUEST_PAGING_LEVELS=3 (PAE paging), guest_l1e is 64 while > + * guest_va is 32. Put it first to avoid padding. > + */ > guest_l1e_t gl1e; > guest_va_t va; > - u32 flags; > - } d; > - u32 event; > - > - event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8); > - > - d.gl1e = gl1e; > - d.va = va; > - d.flags = this_cpu(trace_shadow_path_flags); > - > - trace(event, sizeof(d), &d); > + uint32_t flags; > + } d = { > + .gl1e = gl1e, > + .va = va, > + .flags = this_cpu(trace_shadow_path_flags), > + }; > + > + sh_trace(event, sizeof(d), &d); > } > } Unlike in patch 1, it's less clear here whether leaving the tb_init_done check is actually better to keep where it is. In principle the compiler should be able to re-arrange code enough to make it identical no matter which way it's written, at which point it might again be more desirable to have the check solely in sh_trace(). Jan