);
>
> - memory_region_set_readonly(isa_bios, true);
> +if (!machine_require_guest_memfd(current_machine)) {
> +memory_region_set_readonly(isa_bios, true);
> +}
> }
>
> static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
> --
> 2.25.1
>
>
--
Isaku Yamahata
gt; +
> +typedef struct __attribute__((__packed__)) OvmfSevMetadata {
> +uint8_t signature[4];
> +uint32_t len;
> +uint32_t version;
> +uint32_t num_desc;
> +OvmfSevMetadataDesc descs[];
> +} OvmfSevMetadata;
> +
> +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void);
>
> void pc_pci_as_mapping_init(MemoryRegion *system_memory,
> MemoryRegion *pci_address_space);
> --
> 2.25.1
>
>
--
Isaku Yamahata
t;Invalid tdx_ioctl_level %d", level);
> +exit(1);
> +}
> +
> +return r;
> +}
> +
> +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data)
> +{
> +return tdx_ioctl_internal(NULL, TDX_VM_IOCTL, cmd_id, flags, data);
> +}
> +
> +static inline int tdx_vcpu_ioctl(void *vcpu_fd, int cmd_id, __u32 flags,
> + void *data)
As kvm_vcpu_ioctl(CPUState *cpu, int type, ...) takes CPUState *, this can be
tdx_vcpu_ioctl(CPUState *cpu, ) instead of void *.
I struggled to fin my mistake to pass "int vcpu_fd" to this function.
--
Isaku Yamahata
On Tue, Mar 12, 2024 at 03:44:32PM +0800,
Xiaoyao Li wrote:
> On 3/11/2024 5:27 PM, Daniel P. Berrangé wrote:
> > On Thu, Feb 29, 2024 at 01:37:10AM -0500, Xiaoyao Li wrote:
> > > From: Isaku Yamahata
> > >
> > > Add property "quote-generation-
is supposed to
> be
> +* pre-allocated and doesn't need to be discarded
> +*/
> +return 0;
The reference count leaks. Add memory_region_unref() is needed.
Otherwise looks good to me.
Reviewed-by: Isaku Yamahata
--
Isaku Yamahata
+#endif
> +
> #include "exec/confidential-guest-support.h"
>
> #define TYPE_TDX_GUEST "tdx-guest"
> @@ -16,6 +20,12 @@ typedef struct TdxGuest {
> uint64_t attributes; /* TD attributes */
> } TdxGuest;
>
> +#ifdef CONFIG_TDX
> +bool is_tdx_vm(void);
> +#else
> +#define is_tdx_vm() 0
> +#endif /* CONFIG_TDX */
> +
> int tdx_kvm_init(MachineState *ms, Error **errp);
>
> #endif /* QEMU_I386_TDX_H */
> --
> 2.34.1
>
>
Reviewed-by: Isaku Yamahata
--
Isaku Yamahata
+
> +static int get_tdx_capabilities(Error **errp)
> +{
> +struct kvm_tdx_capabilities *caps;
> +/* 1st generation of TDX reports 6 cpuid configs */
> +int nr_cpuid_configs = 6;
> +size_t size;
> +int r;
> +
> +do {
> +size = sizeof(struct kvm_tdx_capabilities) +
> + nr_cpuid_configs * sizeof(struct kvm_tdx_cpuid_config);
> +caps = g_malloc0(size);
> +caps->nr_cpuid_configs = nr_cpuid_configs;
> +
> +r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps);
> +if (r == -E2BIG) {
> +g_free(caps);
> +nr_cpuid_configs *= 2;
g_realloc()? Maybe a matter of preference.
Other than this, it looks good to me.
--
Isaku Yamahata
ge();
>
> When page is converted from private to shared, the original private
> memory is back'ed by guest_memfd. Introduce
> ram_block_discard_guest_memfd_range() for discarding memory in
> guest_memfd.
>
> Originally-from: Isaku Yamahata
> Codeveloped-by: Xiaoyao Li
> S
+} else {
> +error_report("%s: KVM_SET_USER_MEMORY_REGION failed,
> slot=%d,"
> +" start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
> + __func__, mem.slot, slot->start_addr,
> +(uint64_t)mem.memory_size, strerror(errno));
> +}
> }
> return ret;
> }
> @@ -477,6 +511,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
> if (readonly && kvm_readonly_mem_allowed) {
> flags |= KVM_MEM_READONLY;
> }
> +if (memory_region_has_guest_memfd(mr)) {
> +flags |= KVM_MEM_PRIVATE;
> +}
Nitpick: it was renamed to KVM_MEM_GUEST_MEMFD
As long as the value is defined to same value, it doesn't matter, though.
--
Isaku Yamahata
tex_unlock_ramlist();
> +return;
> +}
> +}
> +#endif
> +
We should define kvm_create_guest_memfd() stub in accel/stub/kvm-stub.c.
We can remove this #ifdef.
--
Isaku Yamahata
he _weak_ function, it needs to implement the empty
> function (justing return 0) in all the other arches just as the placeholder.
> If QEMU community prefers this approach, I can change to it in next version.
Alternative is to move the hook to x86 specific function, not common kvm
function. With my quick grepping, x86_cpus_init() or x86_cpu_realizefn().
--
Isaku Yamahata
include unaccepted memory.
[1] UEFI Specification Version 2.10 (released August 2022)
[2] UEFI Platform Initialization Distribution Packaging Specification Version
1.1)
--
Isaku Yamahata
ck: enum TdxRamType. and related function arguments.
--
Isaku Yamahata
On Mon, Aug 21, 2023 at 10:25:35AM +0100,
"Daniel P. Berrangé" wrote:
> On Fri, Aug 18, 2023 at 05:50:02AM -0400, Xiaoyao Li wrote:
> > From: Isaku Yamahata
> >
> > Implement property_add_sha384() which converts hex string <-> uint8_t[48]
> >
break;
> +case R_EBX:
> +ret |= ebx;
> +break;
> +case R_ECX:
> +ret |= ecx;
> +break;
> +case R_EDX:
> +ret |= edx;
Nitpick: "|" isn't needed as we initialize ret = 0 above. Just '='.
--
Isaku Yamahata
On Fri, Aug 18, 2023 at 05:50:28AM -0400,
Xiaoyao Li wrote:
> From: Isaku Yamahata
>
> If the range for TDG.VP.VMCALL is too large, process the limited
> size and return retry error. It's bad for VMM to take too long time,
> e.g. second order, with blocking vcpu execution. It
eax = CACHE_TYPE(cache->type) |
> CACHE_LEVEL(cache->level) |
> (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
> - ((num_cores - 1) << 26) |
> + ((MIN(num_cores, 64) - 1) << 26) |
> ((num_apic_ids - 1)
commit: 0d52116fd82cdd1f4a88837336af5b6290c364a4
> --
> 2.25.1
>
The patch itself looks good. Can we add test cases?
We have some in qemu/tests/unit/test-x86-cpuid.c.
--
Isaku Yamahata
cores * cs->nr_threads) << 16;
> +threads_per_socket = cs->nr_cores * cs->nr_threads;
> + if (threads_per_socket > 1) {
> +*ebx |= MIN(threads_per_socket, 255) << 16;
> *edx |= CPUID_HT;
> }
> if (!cpu->enable_pmu) {
> --
> 2.25.1
>
>
Reviewed-by: Isaku Yamahata
--
Isaku Yamahata
ration_completion_postcopy "void" as it doesn't return a
> value.
Reviewed-by: Isaku Yamahata
--
Isaku Yamahata
uff, like scan for ROM regions in areas that
> guest kernels might have mapped as encrypted in guest page table. You
> can consider them to be guest bugs, but even current SNP-capable
> kernels exhibit this behavior and if the guest wants to do dumb stuff
> QEMU should let it.
>
> But for these latter 2 cases, it doesn't make sense to attempt to do
> any sort of discarding of backing pages since it doesn't make sense to
> discard ROM pages.
>
> So I think it makes sense to just set up the gmemfd automatically across
> the board internally, and keep memory-backend-memfd-private around
> purely as a way to control/configure discardable memory.
I'm looking at the repo and
31a7c7e36684 ("*hostmem-memfd-private: Initial discard manager support")
Do we have to implement RAM_DISCARD_MANGER at memory-backend-memfd-private?
Can't we implement it at host_mem? The interface callbacks can have check
"if (!private) return". Then we can support any host-mem backend.
--
Isaku Yamahata
\
> } \
> static const TypeInfo pc_machine_type_##suffix = { \
> .name = namestr TYPE_MACHINE_SUFFIX, \
> --
> 2.34.1
>
It seems strange for MachineClass to have kvm_type(). Probably AccelClass.
(struct KVMAccelClass?)
Anyway this is independent clean up.
Reviewed-by: Isaku Yamahata
--
Isaku Yamahata
On Wed, Aug 02, 2023 at 04:14:29PM +0200,
David Hildenbrand wrote:
> On 02.08.23 10:03, Xiaoyao Li wrote:
> > On 8/2/2023 1:21 AM, David Hildenbrand wrote:
> > > On 31.07.23 18:21, Xiaoyao Li wrote:
> > > > From: Isaku Yamahata
> > > >
> > >
INVAL;
> +}
> }
>
If we use confidential guest support, this check should go to there.
--
Isaku Yamahata
on;
> +rb = qemu_ram_block_from_host(addr, false, );
Here we have already section. section.mr->ram_block. We don't have to
scan the existing RAMBlocks.
Except that, looks good to me.
Reviewed-by: Isaku Yamahata
--
Isaku Yamahata
some may not. Leave the decision to its vm type backend. They can use qemu
memory listener.
--
Isaku Yamahata
> -}
> +if (close_return_path_on_source(s) < 0) {
> +goto fail;
> }
>
> if (qemu_file_get_error(s->to_dst_file)) {
> @@ -2384,26 +2434,7 @@ static void migration_completion(MigrationState *s)
> return;
>
> fail:
> -if (s->
Add MEMORY_LISTNER_PRIORITY_ACCEL for the symbolic value for the memory
listener to replace the hard-coded value 10 for accel.
No functional change intended.
Signed-off-by: Isaku Yamahata
---
accel/hvf/hvf-accel-ops.c | 2 +-
accel/kvm/kvm-all.c | 2 +-
hw/arm/xen_arm.c
Add MEMORY_LISTNER_PRIORITY_DEV_BAKCNED for the symbolic value for memory
listener to replace the hard-coded value 10 for the device backend.
No functional change intended.
Signed-off-by: Isaku Yamahata
---
accel/kvm/kvm-all.c | 2 +-
hw/remote/proxy-memory-listener.c | 2 +-
hw
Add MEMORY_LISTNER_PRIORITY_MIN for the symbolic value for the min value of
the memory listener instead of the hard-coded magic value 0. Add explicit
initialization.
No functional change intended.
Signed-off-by: Isaku Yamahata
---
accel/kvm/kvm-all.c | 1 +
include/exec/memory.h | 1
the KVM memory listener. And I
don't want to hard-code 10 + 1.
[1] KVM gmem patches
https://github.com/sean-jc/linux/tree/x86/kvm_gmem_solo
Isaku Yamahata (3):
exec/memory: Add symbolic value for memory listener priority for accel
exec/memory: Add symbol for memory listener priority for dev
| 24 +-
> .../testing/selftests/kvm/guest_memfd_test.c | 33 +-
> .../testing/selftests/kvm/include/test_util.h | 14 +
> tools/testing/selftests/kvm/lib/test_util.c | 74 +++
> .../kvm/x86_64/private_mem_conversions_test.c | 38 +-
> virt/kvm/guest_mem.c | 488 ++
> 11 files changed, 882 insertions(+), 327 deletions(-)
>
> --
> 2.41.0.rc0.172.g3f132b7071-goog
--
Isaku Yamahata
; > > +{
> > > + if (!offset)
> > > + return true;
> > > + if (!gpa)
> > > + return false;
> > > +
> > > + return !!(count_trailing_zeros(offset) >= count_trailing_zeros(gpa));
This check doesn't work expected. For exam
On Fri, Feb 10, 2023 at 12:35:30AM +,
Sean Christopherson wrote:
> On Wed, Feb 08, 2023, Isaku Yamahata wrote:
> > On Fri, Dec 02, 2022 at 02:13:40PM +0800,
> > Chao Peng wrote:
> >
> > > +static int kvm_vm_ioctl_set_
kvm_for_each_memslot_in_gfn_range(, slots, start,
end) {
+ if (!kvm_slot_can_be_private(iter.slot)) {
+ mutex_unlock(>slots_lock);
+ return -EINVAL;
+ }
+ }
+ }
+ }
+
--
Isaku Yamahata
On Tue, Jan 24, 2023 at 01:27:50AM +,
Sean Christopherson wrote:
> On Thu, Jan 19, 2023, Isaku Yamahata wrote:
> > On Thu, Jan 19, 2023 at 03:25:08PM +,
> > Sean Christopherson wrote:
> >
> > > On Thu, Jan 19, 2023, Isaku Yamahata wrote:
> > > >
On Thu, Jan 19, 2023 at 03:25:08PM +,
Sean Christopherson wrote:
> On Thu, Jan 19, 2023, Isaku Yamahata wrote:
> > On Sat, Jan 14, 2023 at 12:37:59AM +,
> > Sean Christopherson wrote:
> >
> > > On Fri, Dec 02, 2022, Chao Peng wrote:
> > > &
ATTRIBUTES
static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t
gfn)
{
- lockdep_assert_held(>mmu_lock);
+ // lockdep_assert_held(>mmu_lock);
return xa_to_value(xa_load(>mem_attr_array, gfn));
}
--
Isaku Yamahata
alias into system memory(Slot#0) in QEMU
> > > and slot#2 fails due to below exclusive check.
> > >
> > > Currently I changed QEMU code to mark these alias slots as shared
> > > instead of private but I'm not 100% confident this is correct fix.
> >
> > That's a QEMU bug of sorts. SMM is mutually exclusive with TDX, QEMU
> > shouldn't
> > be configuring SMRAM (or any SMM memslots for that matter) for TDX guests.
>
> Thanks for the confirmation. As long as we only bind one notifier for
> each address, using xarray does make things simple.
In the past, I had patches for qemu to disable PAM and SMRAM, but they were
dropped for simplicity because SMRAM/PAM are disabled as reset state with unused
memslot registered. TDX guest bios(TDVF or EDK2) doesn't enable them.
Now we can revive them.
--
Isaku Yamahata
On Tue, Dec 06, 2022 at 08:02:24PM +0800,
Chao Peng wrote:
> On Mon, Dec 05, 2022 at 02:49:59PM -0800, Isaku Yamahata wrote:
> > On Fri, Dec 02, 2022 at 02:13:45PM +0800,
> > Chao Peng wrote:
> >
> > > A large page with mixed private/shared subpages can't be ma
00% confident this is
> correct choice for other architectures, but after search it has not been
> used for other architectures, so should be safe to make it common.
INVALID_GPA is defined as all bit 1. Please notice "~" (tilde).
#define INVALID_GPA (~(gpa_t)0)
--
Isaku Yamahata
gfn_t end)
> +static void kvm_unmap_mem_range(struct kvm *kvm, gfn_t start, gfn_t end,
> + unsigned long attrs)
> {
> struct kvm_gfn_range gfn_range;
> struct kvm_memory_slot *slot;
> @@ -2378,6 +2379,10 @@ static void kvm_unmap_mem_range(struct kvm *kvm, gfn_t
> start, gfn_t end)
> gfn_range.slot = slot;
>
> r |= kvm_unmap_gfn_range(kvm, _range);
> +
> + kvm_arch_set_memory_attributes(kvm, slot, attrs,
> +gfn_range.start,
> +gfn_range.end);
> }
> }
>
> @@ -2427,7 +2432,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm
> *kvm,
> idx = srcu_read_lock(>srcu);
> KVM_MMU_LOCK(kvm);
> if (i > start)
> - kvm_unmap_mem_range(kvm, start, i);
> + kvm_unmap_mem_range(kvm, start, i, attrs->attributes);
> kvm_mmu_invalidate_end(kvm);
> KVM_MMU_UNLOCK(kvm);
> srcu_read_unlock(>srcu, idx);
> --
> 2.25.1
>
--
Isaku Yamahata
acking guest private memory?
mce_register_decode_chain() can be used. MCE physical address(p->mce_addr)
includes host key id in addition to real physical address. By searching used
hkid by KVM, we can determine if the page is assigned to guest TD or not. If
yes, send SIGBUS.
kvm_machine_check() can be enhanced for KVM specific use. This is before
memory_failure() is called, though.
any other ideas?
--
Isaku Yamahata
On Fri, Oct 28, 2022 at 02:55:45PM +0800,
Chao Peng wrote:
> On Wed, Oct 26, 2022 at 02:54:25PM -0700, Isaku Yamahata wrote:
> > On Tue, Oct 25, 2022 at 11:13:43PM +0800,
> > Chao Peng wrote:
> >
> > > A memslot with KVM_MEM_PRIVATE being set can include both
*rsvd_check, u64
> pte,
> int level)
> {
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 672f0432d777..9f97aac90606 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1768,7 +1768,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
> continue;
>
> max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
> - iter.gfn,
> PG_LEVEL_NUM);
> + iter.gfn, PG_LEVEL_NUM,
> + is_private_spte(iter.old_spte));
> if (max_mapping_level < iter.level)
> continue;
This is to merge pages into a large page on the next kvm page fault. large page
support is not yet supported. Let's skip the private slot until large page
support is done.
--
Isaku Yamahata
slot, level, attr,
> + lpage_start, start));
> +
> + if (lpage_start == lpage_end)
> + return;
> +
> + for (gfn = lpage_start + pages; gfn < lpage_end; gfn += pages)
> + linfo_update_mixed(lpage_info_slot(gfn, slot, level),
> +false);
> +
> + linfo_update_mixed(lpage_info_slot(lpage_end, slot, level),
> +mem_attr_is_mixed(kvm, slot, level, attr,
> + end, lpage_end + pages));
> + }
> +}
--
Isaku Yamahata
;
if (ret)
return ret;
+ page = folio_file_page(folio, offset);
*pagep = page;
if (order)
*order = thp_order(compound_head(page));
--
Isaku Yamahata
- sizeof(kvm_userspace_mem)))
> + if (copy_from_user(, argp, size);
> + goto out;
> +
> + r = -EINVAL;
> + if (mem.flags & KVM_MEM_PRIVATE)
> goto out;
Nit: It's better to check if padding is zero. Maybe rename it to reserved.
+ if (mem.pad1 || memchr_inv(mem.pad2, 0, sizeof(mem.pad2)))
+ goto out;
--
Isaku Yamahata
ED)),
"Unsupported mem attribute.\n");
idx = srcu_read_lock(>srcu);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot_in_gfn_range(, slots, start, end) {
slot = iter.slot;
start = max(start, slot->base_gfn);
end = min(end, slot->base_gfn + slot->npages);
if (WARN_ON_ONCE(start >= end))
continue;
update_mem_lpage_info(kvm, slot, attr, start, end);
}
}
srcu_read_unlock(>srcu, idx);
}
#endif
--
Isaku Yamahata
itial encrypted
> payload, and the KVM_MEMORY_ENCRYPT_{REG,UNREG}_REGION ioctls would
> still be used for conversions post-boot?
Yes. It is called before running any vcpu. At run time (after running vcpus),
KVM_MEMORY_ENCRYPT_{REG,UNREG}_REGION is used.
--
Isaku Yamahata
ec_notifier_count(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void kvm_dec_notifier_count(struct kvm *kvm, gfn_t start, gfn_t end)
{
/*
* This sequence increase will notify the kvm page fault that
--
Isaku Yamahata
gt; 0) {
> +head_start = e->address;
> + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
tdx_add_ram_entry() increments tdx_guest->nr_ram_entries. I think it's worth
for comments why this is safe regarding to this for-loop.
--
Isaku Yamahata
add that to the tdx-stubs.c file you already created in one of the
> previous patches and drop this #ifdef mess ;)
This is for consistency with SEV. Anyway Either way is okay.
>From target/i386/sev.h
...
#ifdef CONFIG_SEV
bool sev_enabled(void);
bool sev_es_enabled(void);
#else
#define sev_enabled() 0
#define sev_es_enabled() 0
#endif
--
Isaku Yamahata
fault VM into confidential VM after KVM_CREATE_VM.
Thanks,
>
> Acked-by: Gerd Hoffmann
>
> take care,
> Gerd
>
>
--
Isaku Yamahata
cpu);
> if (r) {
> goto out;
> @@ -221,6 +238,7 @@ int tdx_pre_create_vcpu(CPUState *cpu)
>
> init_vm.attributes = tdx_guest->attributes;
> init_vm.max_vcpus = ms->smp.cpus;
> +init_vm.tsc_khz = env->tsc_khz;
>
> r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, _vm);
> if (r < 0) {
> --
> 2.27.0
>
>
--
Isaku Yamahata
TD HOB in TDX's machine_init_done callback.
Because HOB is introduced first time, please expand HOB.
> Co-developed-by: Isaku Yamahata
> Signed-off-by: Isaku Yamahata
> Co-developed-by: Sean Christopherson
> Signed-off-by: Sean Christopherson
> Signed-off-by: Xiaoyao Li
> -
ps://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
> diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst
> index 96bf54889a82..16dd4f1a8c80 100644
> --- a/docs/system/target-i386.rst
> +++ b/docs/system/target-i386.rst
> @@ -29,6 +29,7 @@ Architectural features
> i386/kvm-pv
> i386/sgx
> i386/amd-memory-encryption
> + i386/tdx
>
> .. _pcsys_005freq:
>
> --
> 2.27.0
>
>
--
Isaku Yamahata
86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2
> *entries,
> +uint32_t cpuid_i);
> +
> #else
>
> #define kvm_pit_in_kernel() 0
> --
> 2.27.0
>
>
--
Isaku Yamahata
On Thu, May 12, 2022 at 11:17:51AM +0800,
Xiaoyao Li wrote:
> From: Isaku Yamahata
>
> TDVF firmware (CODE and VARS) needs to be added/copied to TD's private
> memory via KVM_TDX_INIT_MEM_REGION, as well as TD HOB and TEMP memory.
>
> Signed-off-by: Isaku Yamahata
> Sign
loc0(size);
> +caps->nr_cpuid_configs = max_ent;
> +
> +r = tdx_platform_ioctl(KVM_TDX_CAPABILITIES, 0, caps);
> +if (r == -E2BIG) {
> +g_free(caps);
> +max_ent *= 2;
> +} else if (r < 0) {
> + error_report("KVM_TDX_CAPABILITIES failed: %s\n", strerror(-r));
> +exit(1);
> +}
> +}
> +while (r == -E2BIG);
> +
> +tdx_caps = caps;
> +}
> +
> int tdx_kvm_init(MachineState *ms, Error **errp)
> {
> +if (!tdx_caps) {
> +get_tdx_capabilities();
> +}
> +
> return 0;
> }
>
> --
> 2.27.0
>
>
--
Isaku Yamahata
f --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index a783c7886811..0e94031ab7c7 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -373,6 +373,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level);
>
> int kvm_arch_init(MachineState *ms, KVMSt
pects, the guest can stop working itself.
--
Isaku Yamahata
ARS.fd
>
> to create TD guest.
For the compatibility for qemu command line, you don't have to modify pflash
device. Don't instantiate pflash at pc_system_flash_create(), and at
pc_system_firmware_init(), you can retrieve necessary parameters, and then
populate memory. Although it's still hacky, it would be cleaner a bit.
--
Isaku Yamahata
Only configure MSR_IA32_UCODE_REV for TDs.
non-TDs?
--
Isaku Yamahata
tifier: GPL-2.0-or-later
> +
> + * Copyright (c) 2020 Intel Corporation
> + * Author: Isaku Yamahata
> + *
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> +
U.
> + *
> + * Thus, just mark readonly memory not supported for simplicity.
> + */
> +kvm_readonly_mem_allowed = false;
> +
> tdx_guest = tdx;
>
> return 0;
> --
> 2.27.0
>
>
--
Isaku Yamahata
eturn kvm_arm_init_cpreg_list(cpu);
> }
>
> +int kvm_arch_pre_create_vcpu(CPUState *cpu)
> +{
> +return 0;
> +}
> +
Weak symbol can be used to avoid update all the arch.
Thanks,
--
Isaku Yamahata
; OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
> tdx_guest,
> diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
> index 4036ca2f3f99..06599b65b827 100644
> --- a/target/i386/kvm/tdx.h
> +++ b/target/i386/kvm/tdx.h
> @@ -27,5 +27,7 @@ bool is_tdx_vm(void);
> #endif /* CONFIG_TDX */
>
> int tdx_kvm_init(MachineState *ms, Error **errp);
> +void tdx_get_supported_cpuid(uint32_t function, uint32_t index, int reg,
> + uint32_t *ret);
>
> #endif /* QEMU_I386_TDX_H */
> --
> 2.27.0
>
>
--
Isaku Yamahata
data, data) \
> +__tdx_ioctl(NULL, TDX_VM_IOCTL, cmd_id, metadata, data)
> +
> +#define tdx_vcpu_ioctl(cpu, cmd_id, metadata, data) \
> +__tdx_ioctl(cpu, TDX_VCPU_IOCTL, cmd_id, metadata, data)
No point to use macro. Normal (inline) function can works.
--
Isaku Yamahata
; --- a/target/i386/kvm/tdx.c
> +++ b/target/i386/kvm/tdx.c
> @@ -12,10 +12,23 @@
> */
>
> #include "qemu/osdep.h"
> +#include "qapi/error.h"
> #include "qom/object_interfaces.h"
>
> +#include "hw/i386/x86.h"
> #include "tdx.h"
>
> +int tdx_kvm_init(MachineState *ms, Error **errp)
> +{
> +TdxGuest *tdx = (TdxGuest *)object_dynamic_cast(OBJECT(ms->cgs),
> +TYPE_TDX_GUEST);
The caller already checks it. This is redundant. Maybe assert?
--
Isaku Yamahata
From: Isaku Yamahata
In mch_realize(), process PAM initialization before SMRAM initialization so
that later patch can skill all the SMRAM related with a single check.
Signed-off-by: Isaku Yamahata
---
hw/pci-host/q35.c | 19 ++-
1 file changed, 10 insertions(+), 9 deletions
From: Isaku Yamahata
Specify the initial value for RCX/R8 to be the address of the HOB.
Don't propagate the value to Qemu's cache of the registers so as to
avoid implying that the register state is valid, e.g. Qemu doesn't model
TDX-SEAM behavior for initializing other GPRs.
Signed-off
From: Isaku Yamahata
TDX doesn't allow level interrupt and SMI/INIT/SIPI interrupt delivery
mode. So disallow them.
Signed-off-by: Isaku Yamahata
---
hw/i386/x86.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 24af05c313..c372403b87 100644
id routing entry.
Signed-off-by: Sean Christopherson
Signed-off-by: Isaku Yamahata
---
hw/intc/apic_common.c | 12
include/hw/i386/apic.h | 1 +
include/hw/i386/apic_internal.h | 1 +
target/i386/kvm/tdx.c | 7 +++
4 files changed, 21 insertion
From: Isaku Yamahata
Add a new flag to X86Machine to disallow INIT/SIPI delivery mode of
interrupt and pass it to ioapic creation so that ioapic disallows INIT/SIPI
delivery mode.
Signed-off-by: Isaku Yamahata
---
hw/i386/microvm.c | 4 ++--
hw/i386/pc_piix.c | 2 +-
hw/i386/pc_q35
From: Isaku Yamahata
Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG,
etc... exist for the target platform. TDX doesn't support SMM and doesn't
play nice with QEMU modifying related guest memory ranges.
Signed-off-by: Isaku Yamahata
Co-developed-by: Sean Christopherson
From: Sean Christopherson
Process PCIe BAR before PAM so that a future patch can skip all the SMM
related crud with a single check-and-return.
Signed-off-by: Sean Christopherson
Signed-off-by: Isaku Yamahata
---
hw/pci-host/q35.c | 10 +-
1 file changed, 5 insertions(+), 5 deletions
From: Isaku Yamahata
Despite advertising MCE support to the guest, TDX-SEAM doesn't support
injecting #MCs into the guest. All of the associated setup is thus
rejected by KVM.
Signed-off-by: Isaku Yamahata
---
target/i386/kvm/kvm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion
From: Isaku Yamahata
Build the TD HOB during machine late initialization, i.e. once guest
memory is fully defined.
Signed-off-by: Isaku Yamahata
Co-developed-by: Sean Christopherson
Signed-off-by: Sean Christopherson
---
hw/i386/meson.build | 2 +-
hw/i386/tdvf-hob.c| 166
From: Sean Christopherson
Add MMIO HOB entries, which are needed to enumerate legal MMIO ranges to
early TDVF.
Note, the attribute absolutely must include UNCACHEABLE, else TDVF will
effectively consider it a bad HOB entry and ignore it.
Signed-off-by: Sean Christopherson
Signed-off-by: Isaku
From: Isaku Yamahata
Add, and optionally measure, TDVF memory via KVM_TDX_INIT_MEM_REGION as
part of finalizing the TD.
Signed-off-by: Isaku Yamahata
Co-developed-by: Sean Christopherson
Signed-off-by: Sean Christopherson
---
target/i386/kvm/tdx.c | 17 -
1 file changed, 16
From: Isaku Yamahata
Signed-off-by: Sean Christopherson
Signed-off-by: Isaku Yamahata
---
include/sysemu/tdx.h | 1 +
target/i386/kvm/kvm.c | 8
target/i386/kvm/tdx-stub.c | 4
target/i386/kvm/tdx.c | 20
4 files changed, 29 insertions
From: Isaku Yamahata
Add a property to prevent ioapic from setting INIT/SIPI delivery mode.
Without this guard, qemu can result in unexpected behavior.
Signed-off-by: Isaku Yamahata
---
hw/intc/ioapic.c | 19 +++
hw/intc/ioapic_common.c | 21
From: Isaku Yamahata
Add support for loading TDX's Trusted Domain Virtual Firmware (TDVF) via
the generic loader. Prioritize the TDVF above plain hex to avoid false
positives with hex (TDVF has explicit metadata to confirm it's a TDVF).
Enumerate TempMem as added, private memory, i.e
From: Isaku Yamahata
Disable S3/S4 unconditionally when TDX is enabled. Because cpu state is
protected, it's not allowed to reset cpu state. So S3/S4 can't be
supported.
Signed-off-by: Isaku Yamahata
---
target/i386/kvm/tdx.c | 20
1 file changed, 20 insertions(+)
diff
From: Isaku Yamahata
Add a property to prevent ioapic from setting SMI delivery mode. Without
this guard, qemu can result in unexpected behavior.
Signed-off-by: Isaku Yamahata
---
hw/intc/ioapic.c | 18 ++
hw/intc/ioapic_common.c | 20
From: Isaku Yamahata
Introduce a new notifier, machine_init_done_late, that is notified after
machine_init_done. This will be used by TDX to generate the HOB for its
virtual firmware, which needs to be done after all guest memory has been
added, i.e. after machine_init_done notifiers have run
From: Isaku Yamahata
The following patch will utilize this refactoring.
Signed-off-by: Isaku Yamahata
---
hw/i386/e820_memory_layout.c | 42
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/hw/i386/e820_memory_layout.c b/hw/i386
From: Isaku Yamahata
Add a new flag to X86Machine to disallow SMI and pass it to ioapic creation
so that ioapic disallows delivery mode of SMI.
Signed-off-by: Isaku Yamahata
---
hw/i386/microvm.c | 6 --
hw/i386/pc_piix.c | 3 ++-
hw/i386/pc_q35.c | 3 ++-
hw/i386/x86.c
From: Sean Christopherson
Add support for grabbing KVM_TDX_CAPABILITIES and use the new
kvm_get_supported_cpuid() hook to adjust the supported XCR0 bits.
Add TODOs for the remaining work.
Signed-off-by: Sean Christopherson
Signed-off-by: Isaku Yamahata
---
target/i386/kvm/kvm.c | 2
From: Isaku Yamahata
Introduce a helper function, e820_change_type(), that change
the type of subregion of e820 entry.
The following patch uses it.
Signed-off-by: Isaku Yamahata
---
hw/i386/e820_memory_layout.c | 72
hw/i386/e820_memory_layout.h | 1 +
2
From: Isaku Yamahata
When level trigger isn't supported on x86 platform, forcibly report edge
trigger in acpi tables.
Signed-off-by: Isaku Yamahata
---
hw/i386/acpi-build.c | 103 --
hw/i386/acpi-common.c | 74 ++
2 files
From: Isaku Yamahata
In TDX CPU state is also protected, thus vcpu state can't be reset by VMM.
It assumes -action reboot=shutdown instead of silently ignoring vcpu reset.
TDX module spec version 344425-002US doesn't support vcpu reset by VMM. VM
needs to be destroyed and created again
From: Isaku Yamahata
Add constants and structs for the TD Virtual Firmware metadata, which
describes how the TDVF must be built to ensure correct functionality and
measurement. They are defined in TDVF Design Guide [1].
[1] TDVF Design Guide
https://software.intel.com/content/dam/develop
-by: Sean Christopherson
Signed-off-by: Isaku Yamahata
---
target/i386/kvm/kvm.c | 18 +++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a3d5b334d1..27b64dedc2 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm
From: Sean Christopherson
Add a hook for TDX to denote that the TD Virtual Firmware must be
provided via the "generic" device loader. Error out if pflash is used
in conjuction with TDX.
Suggested-by: Isaku Yamahata
Signed-off-by: Sean Christopherson
Signed-off-by: Isaku Yamahat
From: Isaku Yamahata
When x86machine doesn't support eoi intercept, set
level_trigger_unsupported property of ioapic to true so that ioapic doesn't
accept configuration to use level trigger.
Signed-off-by: Isaku Yamahata
---
hw/i386/microvm.c | 5 +++--
hw/i386/pc_piix.c | 2 +-
hw
From: Isaku Yamahata
Signed-off-by: Isaku Yamahata
---
include/sysemu/tdx.h | 1 +
target/i386/kvm/kvm.c | 5 +
2 files changed, 6 insertions(+)
diff --git a/include/sysemu/tdx.h b/include/sysemu/tdx.h
index 70eb01348f..f3eced10f9 100644
--- a/include/sysemu/tdx.h
+++ b/include/sysemu
From: Isaku Yamahata
Add a new bool member, eoi_intercept_unsupported, to X86MachineState with
default value false. Set true when tdx kvm type. Inability to intercept
eoi causes impossibility to emulate level triggered interrupt to be
re-injected when level is still kept active. which affects
From: Isaku Yamahata
Add definitions for literals, enums, structs, GUIDs, etc... that will be
used by TDX to build the UEFI Hand-Off Block (HOB) that is passed to the
Trusted Domain Virtual Firmware (TDVF). All values come from the UEFI
specification and TDVF design guide. [1]
Note
1 - 100 of 1486 matches
Mail list logo