On 22.05.2024 15:17, Andrew Cooper wrote:
> trace_shadow_fixup() and trace_not_shadow_fault() both write out identical
> trace records.  Reimplement them in terms of a common sh_trace_gl1e_va().
> 
> There's no need to pack the trace record, even in the case of PAE paging.

Isn't this altering the generated trace record for the 4-level case, in
size changing from 20 to 24 bytes?

> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -1987,51 +1987,26 @@ static void sh_trace_va(uint32_t event, guest_va_t va)
>          sh_trace(event, sizeof(va), &va);
>  }
>  
> -static inline void trace_shadow_fixup(guest_l1e_t gl1e,
> -                                      guest_va_t va)
> +/* Shadow trace event with a gl1e, linear address and flags. */
> +static void sh_trace_gl1e_va(uint32_t event, guest_l1e_t gl1e, guest_va_t va)
>  {
>      if ( tb_init_done )
>      {
> -        struct __packed {
> -            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
> -               so put it first for alignment sake. */
> -            guest_l1e_t gl1e;
> -            guest_va_t va;
> -            u32 flags;
> -        } d;
> -        u32 event;
> -
> -        event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
> -
> -        d.gl1e = gl1e;
> -        d.va = va;
> -        d.flags = this_cpu(trace_shadow_path_flags);
> -
> -        trace(event, sizeof(d), &d);
> -    }
> -}
> -
> -static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
> -                                          guest_va_t va)
> -{
> -    if ( tb_init_done )
> -    {
> -        struct __packed {
> -            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
> -               so put it first for alignment sake. */
> +        struct {
> +            /*
> +             * For GUEST_PAGING_LEVELS=3 (PAE paging), guest_l1e is 64 while
> +             * guest_va is 32.  Put it first to avoid padding.
> +             */
>              guest_l1e_t gl1e;
>              guest_va_t va;
> -            u32 flags;
> -        } d;
> -        u32 event;
> -
> -        event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
> -
> -        d.gl1e = gl1e;
> -        d.va = va;
> -        d.flags = this_cpu(trace_shadow_path_flags);
> -
> -        trace(event, sizeof(d), &d);
> +            uint32_t flags;
> +        } d = {
> +            .gl1e = gl1e,
> +            .va = va,
> +            .flags = this_cpu(trace_shadow_path_flags),
> +        };
> +
> +        sh_trace(event, sizeof(d), &d);
>      }
>  }

Unlike in patch 1, it's less clear here whether leaving the tb_init_done
check is actually better to keep where it is. In principle the compiler
should be able to re-arrange code enough to make it identical no matter
which way it's written, at which point it might again be more desirable
to have the check solely in sh_trace().

Jan

Reply via email to