On 3/9/20 2:37 PM, David Hildenbrand wrote: > On 09.03.20 12:21, Janosch Frank wrote: >> The unpack facility provides the means to setup a protected guest. A >> protected guest can not be introspected by the hypervisor or any >> user/administrator of the machine it is running on. >> >> Protected guests are encrypted at rest and need a special boot >> mechanism via diag308 subcode 8 and 10. >> >> Code 8 sets the PV specific IPLB which is retained seperately from >> those set via code 5. >> >> Code 10 is used to unpack the VM into protected memory, verify its >> integrity and start it. >> >> Signed-off-by: Janosch Frank <fran...@linux.ibm.com> >> Signed-off-by: Christian Borntraeger <borntrae...@de.ibm.com> [Changes >> to machine] > > As you signed this patch off, Maybe this should rather be a Co-developed-by: >
Ack > [...] > >> { >> S390IPLState *ipl = get_ipl_device(); >> @@ -561,7 +581,8 @@ void s390_ipl_reset_request(CPUState *cs, enum >> s390_reset reset_type) >> { >> S390IPLState *ipl = get_ipl_device(); >> >> - if (reset_type == S390_RESET_EXTERNAL || reset_type == >> S390_RESET_REIPL) { >> + if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL >> || >> + reset_type == S390_RESET_PV) { >> /* use CPU 0 for full resets */ >> ipl->reset_cpu_index = 0; > > This looks wrong. In case of an error, you modify the registers of a > theoretically unrelated CPU. It does indeed, will fix. > > >> } else { >> @@ -635,6 +656,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) >> cpu_physical_memory_unmap(addr, len, 1, len); >> } >> >> +int s390_ipl_prepare_pv_header(void) >> +{ >> + S390IPLState *ipl = get_ipl_device(); >> + IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv; >> + void *hdr = g_malloc(ipib_pv->pv_header_len); > > Should there be an upper limit? The guest can allocate quite some memory > this way and theoretically crash the VM. Yes, it's currently two pages max. > >> + int rc; >> + >> + cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, >> + ipib_pv->pv_header_len); > > Shouldn't we validate if this memory is accessible at all? Yes, I moved the check into iplb_valid so we don't need to worry about that here. > >> + rc = s390_pv_set_sec_parms((uint64_t)hdr, >> + ipib_pv->pv_header_len); >> + g_free(hdr); >> + return rc; >> +} >> + >> +int s390_ipl_pv_unpack(void) >> +{ >> + int i, rc = 0; > > NIT: These declarations last. Ack > >> + S390IPLState *ipl = get_ipl_device(); >> + IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv; > > use s390_ipl_get_iplb_pv() and assert that we don't get NULL? We already check for NULL in the diag code before the DIAG308_PV_START reset is set. I've used your other suggestion though. > >> + >> + for (i = 0; i < ipib_pv->num_comp; i++) { >> + rc = s390_pv_unpack(ipib_pv->components[i].addr, >> + TARGET_PAGE_ALIGN(ipib_pv->components[i].size), >> + ipib_pv->components[i].tweak_pref); >> + if (rc) { >> + break; >> + } >> + } >> + return rc; >> +} >> + >> void s390_ipl_prepare_cpu(S390CPU *cpu) >> { >> S390IPLState *ipl = get_ipl_device(); >> diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h >> index d4813105db..b2ccdd9dae 100644 >> --- a/hw/s390x/ipl.h >> +++ b/hw/s390x/ipl.h >> @@ -15,6 +15,24 @@ >> #include "cpu.h" >> #include "hw/qdev-core.h" >> >> +struct IPLBlockPVComp { >> + uint64_t tweak_pref; >> + uint64_t addr; >> + uint64_t size; >> +} QEMU_PACKED; >> +typedef struct IPLBlockPVComp IPLBlockPVComp; >> + >> +struct IPLBlockPV { >> + uint8_t reserved18[87]; /* 0x18 */ >> + uint8_t version; /* 0x6f */ >> + uint32_t reserved70; /* 0x70 */ >> + uint32_t num_comp; /* 0x70 */ >> + uint64_t pv_header_addr; /* 0x74 */ >> + uint64_t pv_header_len; /* 0x7c */ >> + struct IPLBlockPVComp components[]; >> +} QEMU_PACKED; >> +typedef struct IPLBlockPV IPLBlockPV; >> + >> struct IplBlockCcw { >> uint8_t reserved0[85]; >> uint8_t ssid; >> @@ -71,6 +89,7 @@ union IplParameterBlock { >> union { >> IplBlockCcw ccw; >> IplBlockFcp fcp; >> + IPLBlockPV pv; >> IplBlockQemuScsi scsi; >> }; >> } QEMU_PACKED; >> @@ -85,8 +104,11 @@ typedef union IplParameterBlock IplParameterBlock; >> >> int s390_ipl_set_loadparm(uint8_t *loadparm); >> void s390_ipl_update_diag308(IplParameterBlock *iplb); >> +int s390_ipl_prepare_pv_header(void); >> +int s390_ipl_pv_unpack(void); >> void s390_ipl_prepare_cpu(S390CPU *cpu); >> IplParameterBlock *s390_ipl_get_iplb(void); >> +IplParameterBlock *s390_ipl_get_iplb_pv(void); >> >> enum s390_reset { >> /* default is a reset not triggered by a CPU e.g. issued by QMP */ >> @@ -94,6 +116,7 @@ enum s390_reset { >> S390_RESET_REIPL, >> S390_RESET_MODIFIED_CLEAR, >> S390_RESET_LOAD_NORMAL, >> + S390_RESET_PV, >> }; >> void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type); >> void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type); >> @@ -133,6 +156,7 @@ struct S390IPLState { >> /*< private >*/ >> DeviceState parent_obj; >> IplParameterBlock iplb; >> + IplParameterBlock iplb_pv; >> QemuIplParameters qipl; >> uint64_t start_addr; >> uint64_t compat_start_addr; >> @@ -140,6 +164,7 @@ struct S390IPLState { >> uint64_t compat_bios_start_addr; >> bool enforce_bios; >> bool iplb_valid; >> + bool iplb_valid_pv; >> bool netboot; >> /* reset related properties don't have to be migrated or reset */ >> enum s390_reset reset_type; >> @@ -161,9 +186,11 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, >> "alignment of iplb wrong"); >> >> #define S390_IPL_TYPE_FCP 0x00 >> #define S390_IPL_TYPE_CCW 0x02 >> +#define S390_IPL_TYPE_PV 0x05 >> #define S390_IPL_TYPE_QEMU_SCSI 0xff >> >> #define S390_IPLB_HEADER_LEN 8 >> +#define S390_IPLB_MIN_PV_LEN 148 >> #define S390_IPLB_MIN_CCW_LEN 200 >> #define S390_IPLB_MIN_FCP_LEN 384 >> #define S390_IPLB_MIN_QEMU_SCSI_LEN 200 >> @@ -173,16 +200,49 @@ static inline bool iplb_valid_len(IplParameterBlock >> *iplb) >> return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); >> } >> >> -static inline bool iplb_valid_ccw(IplParameterBlock *iplb) >> +static inline bool s390_ipl_pv_check_components(IplParameterBlock *iplb) > > Still don't like the function name :) > > ipl_valid_pv_components() ? If it makes you happier > >> { >> - return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN && >> - iplb->pbt == S390_IPL_TYPE_CCW; >> + IPLBlockPV *ipib_pv = &iplb->pv; >> + int i; >> + >> + if (ipib_pv->num_comp == 0) { >> + return false; >> + } >> + >> + for (i = 0; i < ipib_pv->num_comp; i++) { >> + /* Addr must be 4k aligned */ >> + if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) { >> + return false; >> + } >> + >> + /* Tweak prefix is monotonically increasing with each component */ >> + if (i < ipib_pv->num_comp - 1 && >> + ipib_pv->components[i].tweak_pref >= >> + ipib_pv->components[i + 1].tweak_pref) { >> + return false; >> + } >> + } >> + return true; >> } >> >> -static inline bool iplb_valid_fcp(IplParameterBlock *iplb) >> +static inline bool iplb_valid(IplParameterBlock *iplb) >> { >> - return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN && >> - iplb->pbt == S390_IPL_TYPE_FCP; >> + switch (iplb->pbt) { >> + case S390_IPL_TYPE_FCP: >> + return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN && >> + iplb->pbt == S390_IPL_TYPE_FCP); >> + case S390_IPL_TYPE_CCW: >> + return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN && >> + iplb->pbt == S390_IPL_TYPE_CCW); > > That's a refactoring you could have split out. Sure > >> + case S390_IPL_TYPE_PV: >> + if(be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN || >> + iplb->pbt != S390_IPL_TYPE_PV) { >> + return false; >> + } >> + return s390_ipl_pv_check_components(iplb); >> + default: >> + return false; >> + } >> } >> >> #endif >> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c >> new file mode 100644 >> index 0000000000..ba6409246e >> --- /dev/null >> +++ b/hw/s390x/pv.c >> @@ -0,0 +1,104 @@ >> +/* >> + * Secure execution functions >> + * >> + * Copyright IBM Corp. 2020 >> + * Author(s): >> + * Janosch Frank <fran...@linux.ibm.com> >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or (at >> + * your option) any later version. See the COPYING file in the top-level >> + * directory. >> + */ >> +#include "qemu/osdep.h" >> + >> +#include <linux/kvm.h> >> + >> +#include "qemu/error-report.h" >> +#include "sysemu/kvm.h" >> +#include "pv.h" >> + >> +const char *cmd_names[] = { >> + "VM_ENABLE", >> + "VM_DISABLE", >> + "VM_SET_SEC_PARAMS", >> + "VM_UNPACK", >> + "VM_VERIFY", >> + "VM_PREP_RESET", >> + "VM_UNSHARE_ALL", >> +}; >> + >> +static int s390_pv_cmd(uint32_t cmd, void *data) >> +{ >> + int rc; >> + struct kvm_pv_cmd pv_cmd = { >> + .cmd = cmd, >> + .data = (uint64_t)data, >> + }; >> + >> + rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); >> + if (rc) { >> + error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " >> + "IOCTL rc: %d", cmd, cmd_names[cmd], pv_cmd.rc, >> pv_cmd.rrc, >> + rc); >> + } >> + return rc; >> +} >> + >> +static void s390_pv_cmd_exit(uint32_t cmd, void *data) >> +{ >> + int rc; >> + >> + rc = s390_pv_cmd(cmd, data); >> + if (rc) { >> + exit(1); >> + } >> +} >> + >> +int s390_pv_vm_enable(void) >> +{ >> + return s390_pv_cmd(KVM_PV_ENABLE, NULL); >> +} >> + >> +void s390_pv_vm_disable(void) >> +{ >> + s390_pv_cmd_exit(KVM_PV_DISABLE, NULL); >> +} >> + >> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) >> +{ >> + struct kvm_s390_pv_sec_parm args = { >> + .origin = origin, >> + .length = length, >> + }; >> + >> + return s390_pv_cmd(KVM_PV_VM_SET_SEC_PARMS, &args); >> +} >> + >> +/* >> + * Called for each component in the SE type IPL parameter block 0. >> + */ >> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) >> +{ >> + struct kvm_s390_pv_unp args = { >> + .addr = addr, >> + .size = size, >> + .tweak = tweak, >> + }; >> + >> + return s390_pv_cmd(KVM_PV_VM_UNPACK, &args); >> +} >> + >> +void s390_pv_perf_clear_reset(void) >> +{ >> + s390_pv_cmd_exit(KVM_PV_VM_PREP_RESET, NULL); >> +} >> + >> +int s390_pv_verify(void) >> +{ >> + return s390_pv_cmd(KVM_PV_VM_VERIFY, NULL); >> +} >> + >> +void s390_pv_unshare(void) >> +{ >> + s390_pv_cmd_exit(KVM_PV_VM_UNSHARE_ALL, NULL); >> +} >> diff --git a/hw/s390x/pv.h b/hw/s390x/pv.h >> new file mode 100644 >> index 0000000000..e58fbca96a >> --- /dev/null >> +++ b/hw/s390x/pv.h >> @@ -0,0 +1,34 @@ >> +/* >> + * Protected Virtualization header >> + * >> + * Copyright IBM Corp. 2020 >> + * Author(s): >> + * Janosch Frank <fran...@linux.ibm.com> >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or (at >> + * your option) any later version. See the COPYING file in the top-level >> + * directory. >> + */ >> + >> +#ifndef HW_S390_PV_H >> +#define HW_S390_PV_H >> + >> +#ifdef CONFIG_KVM >> +int s390_pv_vm_enable(void); >> +void s390_pv_vm_disable(void); >> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); >> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); >> +void s390_pv_perf_clear_reset(void); >> +int s390_pv_verify(void); >> +void s390_pv_unshare(void); >> +#else >> +static inline int s390_pv_vm_enable(void) { return 0; } >> +static inline void s390_pv_vm_disable(void) {} >> +static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { >> return 0; } >> +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t >> tweak) { return 0; } >> +static inline void s390_pv_perf_clear_reset(void) {} >> +static inline int s390_pv_verify(void) { return 0; } >> +static inline void s390_pv_unshare(void) {} >> +#endif >> + >> +#endif /* HW_S390_PV_H */ >> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c >> index a89cf4c129..f718cfc591 100644 >> --- a/hw/s390x/s390-virtio-ccw.c >> +++ b/hw/s390x/s390-virtio-ccw.c >> @@ -41,6 +41,8 @@ >> #include "hw/qdev-properties.h" >> #include "hw/s390x/tod.h" >> #include "sysemu/sysemu.h" >> +#include "hw/s390x/pv.h" >> +#include <linux/kvm.h> >> >> S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) >> { >> @@ -238,9 +240,11 @@ static void s390_create_sclpconsole(const char *type, >> Chardev *chardev) >> static void ccw_init(MachineState *machine) >> { >> int ret; >> + S390CcwMachineState *ms = S390_CCW_MACHINE(machine); >> VirtualCssBus *css_bus; >> DeviceState *dev; >> >> + ms->pv = false; >> s390_sclp_init(); >> /* init memory + setup max page size. Required for the CPU model */ >> s390_memory_init(machine->ram); >> @@ -316,10 +320,90 @@ static inline void s390_do_cpu_ipl(CPUState *cs, >> run_on_cpu_data arg) >> s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); >> } >> >> +static void s390_machine_unprotect(S390CcwMachineState *ms) >> +{ >> + CPUState *t; >> + >> + if (ms->pv) { > > This is always the case. The check for ms->pv dropped (and if other > patches require this, it is to be handled in the caller). Ack > >> + s390_pv_vm_disable(); >> + CPU_FOREACH(t) { >> + S390_CPU(t)->env.pv = false; >> + } >> + ms->pv = false; >> + } >> +} >> + >> +static int s390_machine_protect(S390CcwMachineState *ms) >> +{ >> + CPUState *t; >> + int rc; >> + >> + /* Create SE VM */ >> + rc = s390_pv_vm_enable(); >> + if (rc) { >> + return rc; >> + } >> + >> + CPU_FOREACH(t) { >> + S390_CPU(t)->env.pv = true; >> + } >> + ms->pv = true; >> + >> + /* Set SE header and unpack */ >> + rc = s390_ipl_prepare_pv_header(); >> + if (rc) { >> + goto out_err; >> + } >> + >> + /* Decrypt image */ >> + rc = s390_ipl_pv_unpack(); >> + if (rc) { >> + goto out_err; >> + } >> + >> + /* Verify integrity */ >> + rc = s390_pv_verify(); >> + if (rc) { >> + goto out_err; >> + } >> + return rc; >> + >> +out_err: >> + s390_machine_unprotect(ms); >> + return rc; >> +} >> + >> +#define DIAG_308_RC_INVAL_FOR_PV 0x0a02 >> +static void s390_machine_inject_pv_error(CPUState *cs) >> +{ >> + int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; >> + CPUS390XState *env = &S390_CPU(cs)->env; >> + >> + /* Report that we are unable to enter protected mode */ >> + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; >> +} >> + >> +static void s390_pv_prepare_reset(CPUS390XState *env) >> +{ >> + CPUState *cs; >> + >> + if (!env->pv) { >> + return; >> + } >> + CPU_FOREACH(cs) { >> + s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); >> + } > > This is done by the CPU reset in all cases? Why is that needed? (->comment) > >> + s390_pv_unshare(); >> + s390_pv_perf_clear_reset(); >> +} >> + >> static void s390_machine_reset(MachineState *machine) >> { >> enum s390_reset reset_type; >> CPUState *cs, *t; >> + S390CPU *cpu; >> + S390CcwMachineState *ms = S390_CCW_MACHINE(machine); > > Nit: Move this to the very top. Ack > >> + CPUS390XState *env; >> >> /* get the reset parameters, reset them once done */ >> s390_ipl_get_reset_request(&cs, &reset_type); >> @@ -327,9 +411,16 @@ static void s390_machine_reset(MachineState *machine) >> /* all CPUs are paused and synchronized at this point */ >> s390_cmma_reset(); >> >> + cpu = S390_CPU(cs); >> + env = &cpu->env; > > Can you just pass "cpu" to s390_pv_prepare_reset() and handle it in there? Wouldn't it make more sense to test the machine state here anyway? > >> + >> switch (reset_type) { >> case S390_RESET_EXTERNAL: >> case S390_RESET_REIPL: >> + if (ms->pv) { >> + s390_machine_unprotect(ms); >> + } >> + >> qemu_devices_reset(); >> s390_crypto_reset(); >> >> @@ -337,22 +428,52 @@ static void s390_machine_reset(MachineState *machine) >> run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); >> break; >> case S390_RESET_MODIFIED_CLEAR: >> + /* >> + * Susbsystem reset needs to be done before we unshare memory >> + * and loose access to VIRTIO structures in guest memory. >> + */ >> + subsystem_reset(); >> + s390_crypto_reset(); >> + s390_pv_prepare_reset(env); >> CPU_FOREACH(t) { >> run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); >> } >> - subsystem_reset(); >> - s390_crypto_reset(); >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> break; >> case S390_RESET_LOAD_NORMAL: >> + /* >> + * Susbsystem reset needs to be done before we unshare memory >> + * and loose access to VIRTIO structures in guest memory. >> + */ >> + subsystem_reset(); >> + s390_pv_prepare_reset(env); >> CPU_FOREACH(t) { >> if (t == cs) { >> continue; >> } >> run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); >> } >> - subsystem_reset(); >> run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); >> + run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); >> + break; >> + case S390_RESET_PV: /* Subcode 10 */ >> + subsystem_reset(); >> + s390_crypto_reset(); >> + >> + CPU_FOREACH(t) { >> + if (t == cs) { >> + continue; >> + } >> + run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); >> + } >> + run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); >> + >> + if (s390_machine_protect(ms)) { >> + s390_machine_inject_pv_error(cs); >> + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); >> + return; >> + } >> + >> run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > > [...] > >> >> +#if !defined(CONFIG_USER_ONLY) >> +static bool machine_is_pv(MachineState *ms) >> +{ >> + Object *obj; >> + >> + /* we have to bail out for the "none" machine */ >> + obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE); >> + if (!obj) { >> + return false; >> + } >> + return S390_CCW_MACHINE(obj)->pv; > > Maybe you want to cache the machine, so you can avoid the > lookup+conversion on every new CPU. Christian has provided me with a fix to this code, I'll squash it. > >> +} >> +#endif > > [...] >> static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t >> addr, >> uintptr_t ra, bool write) >> { >> + /* Handled by the Ultravisor */ >> + if (env->pv) { >> + return 0; >> + } >> if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { >> s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> return -1; >> @@ -93,6 +101,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, >> uint64_t r3, uintptr_t ra) >> return; >> } >> >> + if (subcode > 7 && !s390_has_feat(S390_FEAT_UNPACK)) { > >> = DIAG308_PV_SET ? > >> + s390_program_interrupt(env, PGM_SPECIFICATION, ra); >> + return; >> + } >> + > >
signature.asc
Description: OpenPGP digital signature