On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote: > Introduce the nested PAPR hcalls: > - H_GUEST_GET_STATE which is used to get state of a nested guest or > a guest VCPU. The value field for each element in the request is > destination to be updated to reflect current state on success. > - H_GUEST_SET_STATE which is used to modify the state of a guest or > a guest VCPU. On success, guest (or its VCPU) state shall be > updated as per the value field for the requested element(s). > > Signed-off-by: Michael Neuling <mi...@neuling.org> > Signed-off-by: Harsh Prateek Bora <hars...@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npig...@gmail.com> > --- > include/hw/ppc/spapr.h | 3 + > include/hw/ppc/spapr_nested.h | 23 +++ > hw/ppc/spapr_nested.c | 268 ++++++++++++++++++++++++++++++++++ > 3 files changed, 294 insertions(+) > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 070135793a..6223873641 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -366,6 +366,7 @@ struct SpaprMachineState { > #define H_OVERLAP -68 > #define H_STATE -75 > #define H_IN_USE -77 > +#define H_INVALID_ELEMENT_VALUE -81 > #define H_UNSUPPORTED_FLAG -256 > #define H_MULTI_THREADS_ACTIVE -9005 > > @@ -589,6 +590,8 @@ struct SpaprMachineState { > #define H_GUEST_SET_CAPABILITIES 0x464 > #define H_GUEST_CREATE 0x470 > #define H_GUEST_CREATE_VCPU 0x474 > +#define H_GUEST_GET_STATE 0x478 > +#define H_GUEST_SET_STATE 0x47C > #define H_GUEST_DELETE 0x488 > > #define MAX_HCALL_OPCODE H_GUEST_DELETE > diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h > index 433d93c480..bd43c6b6ef 100644 > --- a/include/hw/ppc/spapr_nested.h > +++ b/include/hw/ppc/spapr_nested.h > @@ -224,6 +224,10 @@ typedef struct SpaprMachineStateNestedGuest { > #define HVMASK_MSR 0xEBFFFFFFFFBFEFFF > #define HVMASK_HDEXCR 0x00000000FFFFFFFF > #define HVMASK_TB_OFFSET 0x000000FFFFFFFFFF > +#define GSB_MAX_BUF_SIZE (1024 * 1024) > +#define H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE 0x8000000000000000 > +#define GUEST_STATE_REQUEST_GUEST_WIDE 0x1 > +#define GUEST_STATE_REQUEST_SET 0x2 > > /* As per ISA v3.1B, following bits are reserved: > * 0:2 > @@ -321,6 +325,25 @@ typedef struct SpaprMachineStateNestedGuest { > #define GSE_ENV_DWM(i, f, m) \ > GUEST_STATE_ELEMENT_MSK(i, 8, f, copy_state_8to8, m) > > +struct guest_state_element { > + uint16_t id; > + uint16_t size; > + uint8_t value[]; > +} QEMU_PACKED; > + > +struct guest_state_buffer { > + uint32_t num_elements; > + struct guest_state_element elements[]; > +} QEMU_PACKED; > + > +/* Actual buffer plus some metadata about the request */ > +struct guest_state_request { > + struct guest_state_buffer *gsb; > + int64_t buf; > + int64_t len; > + uint16_t flags; > +}; > + > /* > * Register state for entering a nested guest with H_ENTER_NESTED. > * New member must be added at the end. > diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c > index 07dc294c5a..ca99805ce8 100644 > --- a/hw/ppc/spapr_nested.c > +++ b/hw/ppc/spapr_nested.c > @@ -1028,6 +1028,140 @@ void spapr_nested_gsb_init(void) > } > } > > +static struct guest_state_element *guest_state_element_next( > + struct guest_state_element *element, > + int64_t *len, > + int64_t *num_elements) > +{ > + uint16_t size; > + > + /* size is of element->value[] only. Not whole guest_state_element */ > + size = be16_to_cpu(element->size); > + > + if (len) { > + *len -= size + offsetof(struct guest_state_element, value); > + } > + > + if (num_elements) { > + *num_elements -= 1; > + } > + > + return (struct guest_state_element *)(element->value + size); > +} > + > +static > +struct guest_state_element_type *guest_state_element_type_find(uint16_t id) > +{ > + int i; > + > + for (i = 0; i < ARRAY_SIZE(guest_state_element_types); i++) > + if (id == guest_state_element_types[i].id) { > + return &guest_state_element_types[i]; > + } > + > + return NULL; > +} > + > +static void log_element(struct guest_state_element *element, > + struct guest_state_request *gsr) > +{ > + qemu_log_mask(LOG_GUEST_ERROR, "h_guest_%s_state id:0x%04x size:0x%04x", > + gsr->flags & GUEST_STATE_REQUEST_SET ? "set" : "get", > + be16_to_cpu(element->id), be16_to_cpu(element->size)); > + qemu_log_mask(LOG_GUEST_ERROR, "buf:0x%016lx ...\n", > + be64_to_cpu(*(uint64_t *)element->value)); > +} > + > +static bool guest_state_request_check(struct guest_state_request *gsr) > +{ > + int64_t num_elements, len = gsr->len; > + struct guest_state_buffer *gsb = gsr->gsb; > + struct guest_state_element *element; > + struct guest_state_element_type *type; > + uint16_t id, size; > + > + /* gsb->num_elements = 0 == 32 bits long */ > + assert(len >= 4); > + > + num_elements = be32_to_cpu(gsb->num_elements); > + element = gsb->elements; > + len -= sizeof(gsb->num_elements); > + > + /* Walk the buffer to validate the length */ > + while (num_elements) { > + > + id = be16_to_cpu(element->id); > + size = be16_to_cpu(element->size); > + > + if (false) { > + log_element(element, gsr); > + } > + /* buffer size too small */ > + if (len < 0) { > + return false; > + } > + > + type = guest_state_element_type_find(id); > + if (!type) { > + qemu_log_mask(LOG_GUEST_ERROR,"Element ID %04x unknown\n", id); > + log_element(element, gsr); > + return false; > + } > + > + if (id == GSB_HV_VCPU_IGNORED_ID) { > + goto next_element; > + } > + > + if (size != type->size) { > + qemu_log_mask(LOG_GUEST_ERROR,"Size mismatch. Element ID:%04x." > + "Size Exp:%i Got:%i\n", id, type->size, size); > + log_element(element, gsr); > + return false; > + } > + > + if ((type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY) && > + (gsr->flags & GUEST_STATE_REQUEST_SET)) { > + qemu_log_mask(LOG_GUEST_ERROR,"trying to set a read-only Element > " > + "ID:%04x.\n", id); > + return false; > + } > + > + if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) { > + /* guest wide element type */ > + if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) { > + qemu_log_mask(LOG_GUEST_ERROR, "trying to set a guest wide " > + "Element ID:%04x.\n", id); > + return false; > + } > + } else { > + /* thread wide element type */ > + if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) { > + qemu_log_mask(LOG_GUEST_ERROR, "trying to set a thread wide " > + "Element ID:%04x.\n", id); > + return false; > + } > + } > +next_element: > + element = guest_state_element_next(element, &len, &num_elements); > + > + } > + return true; > +} > + > +static bool is_gsr_invalid(struct guest_state_request *gsr, > + struct guest_state_element *element, > + struct guest_state_element_type *type) > +{ > + if ((gsr->flags & GUEST_STATE_REQUEST_SET) && > + (*(uint64_t *)(element->value) & ~(type->mask))) { > + log_element(element, gsr); > + qemu_log_mask(LOG_GUEST_ERROR, "L1 can't set reserved bits i" > + "(allowed mask: 0x%08lx)\n", type->mask); > + return true; > + } > + return false; > +} > + > static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu, > SpaprMachineState *spapr, > target_ulong opcode, > @@ -1261,6 +1395,136 @@ static target_ulong h_guest_create_vcpu(PowerPCCPU > *cpu, > return H_SUCCESS; > } > > +static target_ulong getset_state(SpaprMachineStateNestedGuest *guest, > + uint64_t vcpuid, > + struct guest_state_request *gsr) > +{ > + void *ptr; > + uint16_t id; > + struct guest_state_element *element; > + struct guest_state_element_type *type; > + int64_t lenleft, num_elements; > + > + lenleft = gsr->len; > + > + if (!guest_state_request_check(gsr)) { > + return H_P3; > + } > + > + num_elements = be32_to_cpu(gsr->gsb->num_elements); > + element = gsr->gsb->elements; > + /* Process the elements */ > + while (num_elements) { > + type = NULL; > + /* log_element(element, gsr); */ > + > + id = be16_to_cpu(element->id); > + if (id == GSB_HV_VCPU_IGNORED_ID) { > + goto next_element; > + } > + > + type = guest_state_element_type_find(id); > + assert(type); > + > + /* Get pointer to guest data to get/set */ > + if (type->location && type->copy) { > + ptr = type->location(guest, vcpuid); > + assert(ptr); > + if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) { > + return H_INVALID_ELEMENT_VALUE; > + } > + type->copy(ptr + type->offset, element->value, > + gsr->flags & GUEST_STATE_REQUEST_SET ? true : false); > + } > + > +next_element: > + element = guest_state_element_next(element, &lenleft, &num_elements); > + } > + > + return H_SUCCESS; > +} > + > +static target_ulong map_and_getset_state(PowerPCCPU *cpu, > + SpaprMachineStateNestedGuest *guest, > + uint64_t vcpuid, > + struct guest_state_request *gsr) > +{ > + target_ulong rc; > + int64_t len; > + bool is_write; > + > + len = gsr->len; > + /* only get_state would require write access to the provided buffer */ > + is_write = (gsr->flags & GUEST_STATE_REQUEST_SET) ? false : true; > + gsr->gsb = address_space_map(CPU(cpu)->as, gsr->buf, (uint64_t *)&len, > + is_write, MEMTXATTRS_UNSPECIFIED); > + if (!gsr->gsb) { > + rc = H_P3; > + goto out1; > + } > + > + if (len != gsr->len) { > + rc = H_P3; > + goto out1; > + } > + > + rc = getset_state(guest, vcpuid, gsr); > + > +out1: > + address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, len); > + return rc; > +} > + > +static target_ulong h_guest_getset_state(PowerPCCPU *cpu, > + SpaprMachineState *spapr, > + target_ulong *args, > + bool set) > +{ > + target_ulong flags = args[0]; > + target_ulong lpid = args[1]; > + target_ulong vcpuid = args[2]; > + target_ulong buf = args[3]; > + target_ulong buflen = args[4]; > + struct guest_state_request gsr; > + SpaprMachineStateNestedGuest *guest; > + > + guest = spapr_get_nested_guest(spapr, lpid); > + if (!guest) { > + return H_P2; > + } > + gsr.buf = buf; > + assert(buflen <= GSB_MAX_BUF_SIZE); > + gsr.len = buflen; > + gsr.flags = 0; > + if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) { > + gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE; > + } > + if (flags & !H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) { > + return H_PARAMETER; /* flag not supported yet */ > + } > + > + if (set) { > + gsr.flags |= GUEST_STATE_REQUEST_SET; > + } > + return map_and_getset_state(cpu, guest, vcpuid, &gsr); > +} > + > +static target_ulong h_guest_set_state(PowerPCCPU *cpu, > + SpaprMachineState *spapr, > + target_ulong opcode, > + target_ulong *args) > +{ > + return h_guest_getset_state(cpu, spapr, args, true); > +} > + > +static target_ulong h_guest_get_state(PowerPCCPU *cpu, > + SpaprMachineState *spapr, > + target_ulong opcode, > + target_ulong *args) > +{ > + return h_guest_getset_state(cpu, spapr, args, false); > +} > + > void spapr_register_nested_hv(void) > { > spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); > @@ -1284,6 +1548,8 @@ void spapr_register_nested_papr(void) > spapr_register_hypercall(H_GUEST_CREATE , h_guest_create); > spapr_register_hypercall(H_GUEST_DELETE , h_guest_delete); > spapr_register_hypercall(H_GUEST_CREATE_VCPU , h_guest_create_vcpu); > + spapr_register_hypercall(H_GUEST_SET_STATE , h_guest_set_state); > + spapr_register_hypercall(H_GUEST_GET_STATE , h_guest_get_state); > } > > void spapr_unregister_nested_papr(void) > @@ -1293,6 +1559,8 @@ void spapr_unregister_nested_papr(void) > spapr_unregister_hypercall(H_GUEST_CREATE); > spapr_unregister_hypercall(H_GUEST_DELETE); > spapr_unregister_hypercall(H_GUEST_CREATE_VCPU); > + spapr_unregister_hypercall(H_GUEST_SET_STATE); > + spapr_unregister_hypercall(H_GUEST_GET_STATE); > } > > #else