[PATCH] ui/cocoa: Use the standard about panel

2021-07-07 Thread Akihiko Odaki
This provides standard look and feel for the about panel and reduces
code.

Signed-off-by: Akihiko Odaki 
---
 ui/cocoa.m | 111 +++--
 1 file changed, 23 insertions(+), 88 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 9f72844b079..3e1ae24739a 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -83,7 +83,7 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 
 static void cocoa_refresh(DisplayChangeListener *dcl);
 
-static NSWindow *normalWindow, *about_window;
+static NSWindow *normalWindow;
 static const DisplayChangeListenerOps dcl_ops = {
 .dpy_name  = "cocoa",
 .dpy_gfx_update = cocoa_update,
@@ -1115,7 +1115,6 @@ - (void)changeDeviceMedia:(id)sender;
 - (BOOL)verifyQuit;
 - (void)openDocumentation:(NSString *)filename;
 - (IBAction) do_about_menu_item: (id) sender;
-- (void)make_about_window;
 - (void)adjustSpeed:(id)sender;
 @end
 
@@ -1166,7 +1165,6 @@ - (id) init
 supportedImageFileTypes = [NSArray arrayWithObjects: @"img", @"iso", 
@"dmg",
  @"qcow", @"qcow2", @"cloop", @"vmdk", @"cdr",
   @"toast", nil];
-[self make_about_window];
 }
 return self;
 }
@@ -1451,92 +1449,29 @@ - (BOOL)verifyQuit
 /* The action method for the About menu item */
 - (IBAction) do_about_menu_item: (id) sender
 {
-[about_window makeKeyAndOrderFront: nil];
-}
-
-/* Create and display the about dialog */
-- (void)make_about_window
-{
-/* Make the window */
-int x = 0, y = 0, about_width = 400, about_height = 200;
-NSRect window_rect = NSMakeRect(x, y, about_width, about_height);
-about_window = [[NSWindow alloc] initWithContentRect:window_rect
-styleMask:NSWindowStyleMaskTitled | 
NSWindowStyleMaskClosable |
-NSWindowStyleMaskMiniaturizable
-backing:NSBackingStoreBuffered
-defer:NO];
-[about_window setTitle: @"About"];
-[about_window setReleasedWhenClosed: NO];
-[about_window center];
-NSView *superView = [about_window contentView];
-
-/* Create the dimensions of the picture */
-int picture_width = 80, picture_height = 80;
-x = (about_width - picture_width)/2;
-y = about_height - picture_height - 10;
-NSRect picture_rect = NSMakeRect(x, y, picture_width, picture_height);
-
-/* Make the picture of QEMU */
-NSImageView *picture_view = [[NSImageView alloc] initWithFrame:
- picture_rect];
-char *qemu_image_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR 
"/hicolor/512x512/apps/qemu.png");
-NSString *qemu_image_path = [NSString 
stringWithUTF8String:qemu_image_path_c];
-g_free(qemu_image_path_c);
-NSImage *qemu_image = [[NSImage alloc] 
initWithContentsOfFile:qemu_image_path];
-[picture_view setImage: qemu_image];
-[picture_view setImageScaling: NSImageScaleProportionallyUpOrDown];
-[superView addSubview: picture_view];
-
-/* Make the name label */
-NSBundle *bundle = [NSBundle mainBundle];
-if (bundle) {
-x = 0;
-y = y - 25;
-int name_width = about_width, name_height = 20;
-NSRect name_rect = NSMakeRect(x, y, name_width, name_height);
-NSTextField *name_label = [[NSTextField alloc] initWithFrame: 
name_rect];
-[name_label setEditable: NO];
-[name_label setBezeled: NO];
-[name_label setDrawsBackground: NO];
-[name_label setAlignment: NSTextAlignmentCenter];
-NSString *qemu_name = [[bundle executablePath] lastPathComponent];
-[name_label setStringValue: qemu_name];
-[superView addSubview: name_label];
+NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
+char *icon_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR 
"/hicolor/512x512/apps/qemu.png");
+NSString *icon_path = [NSString stringWithUTF8String:icon_path_c];
+g_free(icon_path_c);
+NSImage *icon = [[NSImage alloc] initWithContentsOfFile:icon_path];
+NSString *version = @"QEMU emulator version " QEMU_FULL_VERSION;
+NSString *copyright = @QEMU_COPYRIGHT;
+NSDictionary *options;
+if (icon) {
+options = @{
+NSAboutPanelOptionApplicationIcon : icon,
+NSAboutPanelOptionApplicationVersion : version,
+@"Copyright" : copyright,
+};
+[icon release];
+} else {
+options = @{
+NSAboutPanelOptionApplicationVersion : version,
+@"Copyright" : copyright,
+};
 }
-
-/* Set the version label's attributes */
-x = 0;
-y = 50;
-int version_width = about_width, version_height = 20;
-NSRect version_rect = NSMakeRect(x, y, version_width, version_height);
-NSTextField *version_label = [[NSTextField alloc] initWithFrame:
-  version_rect];
-[version_label setEditable: NO];
-

[PATCH] ui/cocoa: Do not perform unsafe cast of argv

2021-07-07 Thread Akihiko Odaki
Signed-off-by: Akihiko Odaki 
---
 ui/cocoa.m | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 9f72844b079..68a6302184a 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1888,12 +1888,12 @@ static void cocoa_clipboard_request(QemuClipboardInfo 
*info,
 exit(status);
 }
 
-int main (int argc, const char * argv[]) {
+int main (int argc, char **argv) {
 QemuThread thread;
 
 COCOA_DEBUG("Entered main()\n");
 gArgc = argc;
-gArgv = (char **)argv;
+gArgv = argv;
 
 qemu_sem_init(_init_sem, 0);
 qemu_sem_init(_started_sem, 0);
-- 
2.30.1 (Apple Git-130)




[PATCH v3] net/macos: implement vmnet-based netdev

2021-07-07 Thread Akihiko Odaki
From: Phillip Tennen 

This patch implements a new netdev device, reachable via -netdev
vmnet-macos, that’s backed by macOS’s vmnet framework.

The vmnet framework provides native bridging support, and its usage in
this patch is intended as a replacement for attempts to use a tap device
via the tuntaposx kernel extension. Notably, the tap/tuntaposx approach
never would have worked in the first place, as QEMU interacts with the
tap device via poll(), and macOS does not support polling device files.

vmnet requires either a special entitlement, granted via a provisioning
profile, or root access. Otherwise attempts to create the virtual
interface will fail with a “generic error” status code. QEMU may not
currently be signed with an entitlement granted in a provisioning
profile, as this would necessitate pre-signed binary build distribution,
rather than source-code distribution. As such, using this netdev
currently requires that qemu be run with root access. I’ve opened a
feedback report with Apple to allow the use of the relevant entitlement
with this use case:
https://openradar.appspot.com/radar?id=5007417364447232

vmnet offers three operating modes, all of which are supported by this
patch via the “mode=host|shared|bridge” option:

* "Host" mode: Allows the vmnet interface to communicate with other
* vmnet
interfaces that are in host mode and also with the native host.
* "Shared" mode: Allows traffic originating from the vmnet interface to
reach the Internet through a NAT. The vmnet interface can also
communicate with the native host.
* "Bridged" mode: Bridges the vmnet interface with a physical network
interface.

Each of these modes also provide some extra configuration that’s
supported by this patch:

* "Bridged" mode: The user may specify the physical interface to bridge
with. Defaults to en0.
* "Host" mode / "Shared" mode: The user may specify the DHCP range and
subnet. Allocated by vmnet if not provided.

vmnet also offers some extra configuration options that are not
supported by this patch:

* Enable isolation from other VMs using vmnet
* Port forwarding rules
* Enabling TCP segmentation offload
* Only applicable in "shared" mode: specifying the NAT IPv6 prefix
* Only available in "host" mode: specifying the IP address for the VM
within an isolated network

Note that this patch requires macOS 10.15 as a minimum, as this is when
bridging support was implemented in vmnet.framework.

Rebased to commit 9aef0954195cc592e86846dbbe7f3c2c5603690a by Akihiko
Odaki.

Signed-off-by: Phillip Tennen 
Signed-off-by: Akihiko Odaki 
Message-Id: <20210315103209.20870-1-akihiko.od...@gmail.com>
---
 meson.build   |   3 +
 net/clients.h |   5 +
 net/meson.build   |   1 +
 net/net.c |   3 +
 net/vmnet-macos.c | 447 ++
 qapi/net.json | 120 -
 qemu-options.hx   |   9 +
 7 files changed, 586 insertions(+), 2 deletions(-)
 create mode 100644 net/vmnet-macos.c

diff --git a/meson.build b/meson.build
index b9a9b2120fe..0d2ceaa880e 100644
--- a/meson.build
+++ b/meson.build
@@ -178,6 +178,7 @@ socket = []
 version_res = []
 coref = []
 iokit = []
+vmnet = not_found
 emulator_link_args = []
 nvmm =not_found
 hvf = not_found
@@ -192,6 +193,7 @@ if targetos == 'windows'
 elif targetos == 'darwin'
   coref = dependency('appleframeworks', modules: 'CoreFoundation')
   iokit = dependency('appleframeworks', modules: 'IOKit', required: false)
+  vmnet = dependency('appleframeworks', modules: 'vmnet')
 elif targetos == 'sunos'
   socket = [cc.find_library('socket'),
 cc.find_library('nsl'),
@@ -1259,6 +1261,7 @@ config_host_data.set('CONFIG_FUSE', fuse.found())
 config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found())
 config_host_data.set('CONFIG_X11', x11.found())
 config_host_data.set('CONFIG_CFI', get_option('cfi'))
+config_host_data.set('CONFIG_VMNET', vmnet.found())
 config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
 config_host_data.set('QEMU_VERSION_MAJOR', 
meson.project_version().split('.')[0])
 config_host_data.set('QEMU_VERSION_MINOR', 
meson.project_version().split('.')[1])
diff --git a/net/clients.h b/net/clients.h
index 92f9b59aedc..2c2af67f82a 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -63,4 +63,9 @@ int net_init_vhost_user(const Netdev *netdev, const char 
*name,
 
 int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
 NetClientState *peer, Error **errp);
+
+#ifdef CONFIG_VMNET
+int net_init_vmnet_macos(const Netdev *netdev, const char *name,
+NetClientState *peer, Error **errp);
+#endif
 #endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/meson.build b/net/meson.build
index 1076b0a7ab4..ba6a5b7fa0b 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -37,5 +37,6 @@ endif
 softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
 softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
 softmmu_ss.add(when: 

Re: Picture missing in About dialog on cocoa ui

2021-07-07 Thread Akihiko Odaki
Hi,

My installation correctly shows the picture. Please make sure you
install it and run the installed binary (not the binary located in the
build directory). The new code should work reliably once you install
it while the old code may or may not work depending on how you execute
the binary.

Regards,
Akihiko Odaki

On Thu, Jul 8, 2021 at 2:00 AM Programmingkid  wrote:
>
> Hello, now that the code to add the icon to QEMU has been restored for Mac 
> OS, I think it is time to revert patch 
> e31746ecf8dd2f25f687c94ac14016a3ba5debfc. This patch was made to fix the 
> missing icon in the About dialog but it doesn't seem to work any more. I do 
> not see a picture in the About dialog on Mac OS 11.1. The easiest fix for 
> this issue is to revert the mentioned patch. When I ran
> 'git revert e31746ecf8dd2f25f687c94ac14016a3ba5debfc' the picture in the 
> About dialog appeared again.



Re: [PATCH v2 4/4] hw/nvme: fix controller hot unplugging

2021-07-07 Thread Klaus Jensen

On Jul  7 18:56, Klaus Jensen wrote:

On Jul  7 17:57, Hannes Reinecke wrote:

On 7/7/21 5:49 PM, Klaus Jensen wrote:

From: Klaus Jensen 

Prior to this patch the nvme-ns devices are always children of the
NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
unrealized when the parent device is removed. However, when subsystems
are involved, this is not what we want since the namespaces may be
attached to other controllers as well.

This patch adds an additional NvmeBus on the subsystem device. When
nvme-ns devices are realized, if the parent controller device is linked
to a subsystem, the parent bus is set to the subsystem one instead. This
makes sure that namespaces are kept alive and not unrealized.

Signed-off-by: Klaus Jensen 
---
hw/nvme/nvme.h   | 15 ---
hw/nvme/ctrl.c   | 14 ++
hw/nvme/ns.c | 18 ++
hw/nvme/subsys.c |  3 +++
4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index c4065467d877..83ffabade4cf 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -33,12 +33,20 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST 
- 1);
typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;
+#define TYPE_NVME_BUS "nvme-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
+
+typedef struct NvmeBus {
+BusState parent_bus;
+} NvmeBus;
+
#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
typedef struct NvmeSubsystem {
DeviceState parent_obj;
+NvmeBus bus;
uint8_t subnqn[256];
NvmeCtrl  *ctrls[NVME_MAX_CONTROLLERS];
@@ -365,13 +373,6 @@ typedef struct NvmeCQueue {
QTAILQ_HEAD(, NvmeRequest) req_list;
} NvmeCQueue;
-#define TYPE_NVME_BUS "nvme-bus"
-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
-
-typedef struct NvmeBus {
-BusState parent_bus;
-} NvmeBus;
-
#define TYPE_NVME "nvme"
#define NVME(obj) \
OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 90e3ee2b70ee..9a3b3a27c293 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6514,16 +6514,14 @@ static void nvme_exit(PCIDevice *pci_dev)
nvme_ctrl_reset(n);
-for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-ns = nvme_ns(n, i);
-if (!ns) {
-continue;
+if (n->subsys) {
+for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ns = nvme_ns(n, i);
+if (ns) {
+ns->attached--;
+}
}
-nvme_ns_cleanup(ns);


So who is removing the namespaces, then?
I would have expected some cleanup action from the subsystem, seeing 
that we reparent to that ...




Since we "move" the namespaces to the subsystem, and since the 
subsystem is non-hotpluggable, they will (and can) not be removed. In 
the case that there is no subsystem, nvme_ns_unrealize() will be 
called for each child namespace on the controller NvmeBus.



-}
-
-if (n->subsys) {
nvme_subsys_unregister_ctrl(n->subsys, n);
}
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 3c4f5b8c714a..b7cf1494e75b 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -441,6 +441,15 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
}
}
+static void nvme_ns_unrealize(DeviceState *dev)
+{
+NvmeNamespace *ns = NVME_NS(dev);
+
+nvme_ns_drain(ns);
+nvme_ns_shutdown(ns);
+nvme_ns_cleanup(ns);
+}
+
static void nvme_ns_realize(DeviceState *dev, Error **errp)
{
NvmeNamespace *ns = NVME_NS(dev);
@@ -462,6 +471,14 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
   "linked to an nvme-subsys device");
return;
}
+} else {
+/*
+ * If this namespace belongs to a subsystem (through a link on the
+ * controller device), reparent the device.
+ */
+if (!qdev_set_parent_bus(dev, >bus.parent_bus, errp)) {
+return;
+}


What happens if that fails?
Will we abort? Not create the namespace?



Good point!

It can actually only fail if the bus implements check_address(), which 
it does not, so it always succeeds, so it should assert instead.




Nah, the 'if' is fine. If check_address() should be implemented at some 
point, errp will be set and invocation of qemu will stop with an error. 
So I think the error handling is fine as-is.


signature.asc
Description: PGP signature


Re: [External] Re: [PATCH] target/i386: Fix cpuid level for AMD

2021-07-07 Thread Michael Roth
Quoting Eduardo Habkost (2021-07-02 12:35:34)
> On Fri, Jul 02, 2021 at 10:43:22AM -0500, Michael Roth wrote:
> > On Fri, Jul 02, 2021 at 01:14:56PM +0800, zhenwei pi wrote:
> > > On 7/2/21 4:35 AM, Michael Roth wrote:
> > > > Quoting Igor Mammedov (2021-07-01 03:43:13)
> > > > > On Wed, 30 Jun 2021 14:18:09 -0500
> > > > > Michael Roth  wrote:
> > > > > 
> > > > > > Quoting Dr. David Alan Gilbert (2021-06-29 09:06:02)
> > > > > > > * zhenwei pi (pizhen...@bytedance.com) wrote:
> > > > > > > > A AMD server typically has cpuid level 0x10(test on 
> > > > > > > > Rome/Milan), it
> > > > > > > > should not be changed to 0x1f in multi-dies case.
> > > > > > > > 
> > > > > > > > Fixes: a94e1428991 (target/i386: Add CPUID.1F generation support
> > > > > > > > for multi-dies PCMachine)
> > > > > > > > Signed-off-by: zhenwei pi 
> > > > > > > 
> > > > > > > (Copying in Babu)
> > > > > > > 
> > > > > > > Hmm I think you're right.  I've cc'd in Babu and Wei.
> > > > > > > 
> > > > > > > Eduardo: What do we need to do about compatibility, do we need to 
> > > > > > > wire
> > > > > > > this to machine type or CPU version?
> > > > > > 
> > > > > > FWIW, there are some other CPUID entries like leaves 2 and 4 that 
> > > > > > are
> > > > > > also Intel-specific. With SEV-SNP CPUID enforcement, advertising 
> > > > > > them to
> > > > > > guests will result in failures when host SNP firmware checks the
> > > > > > hypervisor-provided CPUID values against the host-supported ones.
> > > > > > 
> > > > > > To address this we've been planning to add an 'amd-cpuid-only' 
> > > > > > property
> > > > > > to suppress them:
> > > > > > 
> > > > > >
> > > > > > https://github.com/mdroth/qemu/commit/28d0553fe748d30a8af09e5e58a7da3eff03e21b
> > > > > > 
> > > > > > My thinking is this property should be off by default, and only 
> > > > > > defined
> > > > > > either via explicit command-line option, or via new CPU types. 
> > > > > > We're also
> > > > > > planning to add new CPU versions for EPYC* CPU types that set this
> > > > > > 'amd-cpuid-only' property by default:
> > > > > > 
> > > > > >https://github.com/mdroth/qemu/commits/new-cpu-types-upstream
> > > > > It look like having new cpu versions is enough to change behavior,
> > > > > maybe keep 'amd-cpuid-only' as internal field and not expose it to 
> > > > > users
> > > > > as a property.
> > > > 
> > > > Hmm, I defined it as a property mainly to make use of
> > > > X86CPUVersionDefinition.props to create new versions of the CPU types
> > > > with those properties set.
> > > > 
> > > > There's a patch there that adds X86CPUVersionDefinition.cache_info so
> > > > that new cache definitions can be provided for new CPU versions. So
> > > > would you suggest a similar approach here, e.g. adding an
> > > > X86CPUVersionDefinition.amd_cpuid_only field that could be used directly
> > > > rather than going through X86CPUVersionDefinition.props?
> > > > 
> > > > There's also another new "amd-xsave" prop in that series that does 
> > > > something
> > > > similar to "amd-cpuid-only", so a little worried about tacking to much 
> > > > extra
> > > > into X86CPUVersionDefinition. But maybe that one could just be rolled 
> > > > into
> > > > "amd-cpuid-only" since it is basically fixing up xsave-related cpuid
> > > > entries for AMD...
> > > > 
> > > Hi, this patch wants to fix the issue:
> > > AMD CPU (Rome/Milan) should get the cpuid level 0x10, not 0x1F in the 
> > > guest.
> > > If QEMU reports a 0x1F to guest, guest(Linux) would use leaf 0x1F instead 
> > > of
> > > leaf 0xB to get extended topology:
> > > 
> > > https://github.com/torvalds/linux/blob/master/arch/x86/kernel/cpu/topology.c#L49
> > > 
> > > static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
> > > {
> > > if (c->cpuid_level >= 0x1f) {
> > > if (check_extended_topology_leaf(0x1f) == 0)
> > > return 0x1f;
> > > }
> > > 
> > > if (c->cpuid_level >= 0xb) {
> > > if (check_extended_topology_leaf(0xb) == 0)
> > > return 0xb;
> > > }
> > > 
> > > return -1;
> > > }
> > > 
> > > Because of the wrong cpuid level, the guest gets unexpected topology from
> > > leaf 0x1F.
> > > 
> > > I tested https://github.com/mdroth/qemu/commits/new-cpu-types-upstream, 
> > > and
> > > it seems that these patches could not fix this issue.
> > 
> > Yes, I think your patch would still be needed. The question is whether it's
> > okay to change it for existing CPU types, e.g. EPYC-Milan, or only for new 
> > ones
> > when they set a certain flag/property, like the proposed "amd-cpuid-only" 
> > (which
> > the proposed EPYC-Milan-v2 would set).
> 
> I tried to answer this in a separate reply in this thread, but
> answering here for visibility:
> 
> You can safely do it on existing CPU types, because the new
> behavior doesn't introduce host software or hardware requirements
> when enabled.  You just need to disable the new behavior in
> 

Re: [PATCH] docs/system: riscv: Update Microchip Icicle Kit for direct kernel boot

2021-07-07 Thread Alistair Francis
On Tue, Jul 6, 2021 at 7:50 PM Bin Meng  wrote:
>
> From: Bin Meng 
>
> This adds a new section in the documentation to demonstrate how to
> use the new direct kernel boot feature for Microchip Icicle Kit,
> other than the HSS bootflow, using an upstream U-Boot v2021.07 image
> as an example.
>
> It also updates the truth table to have a new '-dtb' column which is
> required by direct kernel boot.
>
> Signed-off-by: Bin Meng 

Reviewed-by: Alistair Francis 

Alistair

> ---
>
>  docs/system/riscv/microchip-icicle-kit.rst | 54 +++---
>  1 file changed, 47 insertions(+), 7 deletions(-)
>
> diff --git a/docs/system/riscv/microchip-icicle-kit.rst 
> b/docs/system/riscv/microchip-icicle-kit.rst
> index 54ced661e3..817d2aec9c 100644
> --- a/docs/system/riscv/microchip-icicle-kit.rst
> +++ b/docs/system/riscv/microchip-icicle-kit.rst
> @@ -47,13 +47,13 @@ The user provided DTB should have the following 
> requirements:
>
>  QEMU follows below truth table to select which payload to execute:
>
> -=  == ===
> --bios -kernel payload
> -=  == ===
> -N   N HSS
> -Y  don't care HSS
> -N   Y  kernel
> -=  == ===
> += == == ===
> +-bios-kernel   -dtb payload
> += == == ===
> +N  N don't care HSS
> +Y don't care don't care HSS
> +N  Y  Y  kernel
> += == == ===
>
>  The memory is set to 1537 MiB by default which is the minimum required high
>  memory size by HSS. A sanity check on ram size is performed in the machine
> @@ -106,4 +106,44 @@ HSS output is on the first serial port (stdio) and 
> U-Boot outputs on the
>  second serial port. U-Boot will automatically load the Linux kernel from
>  the SD card image.
>
> +Direct Kernel Boot
> +--
> +
> +Sometimes we just want to test booting a new kernel, and transforming the
> +kernel image to the format required by the HSS bootflow is tedious. We can
> +use '-kernel' for direct kernel booting just like other RISC-V machines do.
> +
> +In this mode, the OpenSBI fw_dynamic BIOS image for 'generic' platform is
> +used to boot an S-mode payload like U-Boot or OS kernel directly.
> +
> +For example, the following commands show building a U-Boot image from U-Boot
> +mainline v2021.07 for the Microchip Icicle Kit board:
> +
> +.. code-block:: bash
> +
> +  $ export CROSS_COMPILE=riscv64-linux-
> +  $ make microchip_mpfs_icicle_defconfig
> +
> +Then we can boot the machine by:
> +
> +.. code-block:: bash
> +
> +  $ qemu-system-riscv64 -M microchip-icicle-kit -smp 5 -m 2G \
> +  -sd path/to/sdcard.img \
> +  -nic user,model=cadence_gem \
> +  -nic tap,ifname=tap,model=cadence_gem,script=no \
> +  -display none -serial stdio \
> +  -kernel path/to/u-boot/build/dir/u-boot.bin \
> +  -dtb path/to/u-boot/build/dir/u-boot.dtb
> +
> +CAVEATS:
> +
> +* Check the "stdout-path" property in the /chosen node in the DTB to 
> determine
> +  which serial port is used for the serial console, e.g.: if the console is 
> set
> +  to the second serial port, change to use "-serial null -serial stdio".
> +* The default U-Boot configuration uses CONFIG_OF_SEPARATE hence the ELF 
> image
> +  ``u-boot`` cannot be passed to "-kernel" as it does not contain the DTB 
> hence
> +  ``u-boot.bin`` has to be used which does contain one. To use the ELF image,
> +  we need to change to CONFIG_OF_EMBED or CONFIG_OF_PRIOR_STAGE.
> +
>  .. _HSS: https://github.com/polarfire-soc/hart-software-services
> --
> 2.25.1
>
>



Re: [PATCH] Fix libpmem configuration option

2021-07-07 Thread Pankaj Gupta
> For some reason, libpmem option setting was set to work in an opposite
> way (--enable-libpmem disabled it and vice versa). Fixing this so
> configuration works properly.
>
> Signed-off-by: Miroslav Rezanina 
> ---
>  configure | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/configure b/configure
> index 7994bdee92..ffa93cc5fd 100755
> --- a/configure
> +++ b/configure
> @@ -1501,9 +1501,9 @@ for opt do
>;;
>--disable-debug-mutex) debug_mutex=no
>;;
> -  --enable-libpmem) libpmem=disabled
> +  --enable-libpmem) libpmem="enabled"
>;;
> -  --disable-libpmem) libpmem=enabled
> +  --disable-libpmem) libpmem="disabled"
>;;
>--enable-xkbcommon) xkbcommon="enabled"
>;;

Reviewed-by: Pankaj Gupta 



Re: [PATCH] hw/riscv: sifive_u: Correct the CLINT timebase frequency

2021-07-07 Thread Alistair Francis
On Tue, Jul 6, 2021 at 8:48 PM Bin Meng  wrote:
>
> From: Bin Meng 
>
> At present the CLINT timebase frequency is set to 10MHz on sifive_u,
> but on the real hardware the timebase frequency is 1Mhz.
>
> Signed-off-by: Bin Meng 

Reviewed-by: Alistair Francis 

Alistair

> ---
>
>  hw/riscv/sifive_u.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> index 273c86418c..e75ca38783 100644
> --- a/hw/riscv/sifive_u.c
> +++ b/hw/riscv/sifive_u.c
> @@ -62,6 +62,9 @@
>
>  #include 
>
> +/* CLINT timebase frequency */
> +#define CLINT_TIMEBASE_FREQ 100
> +
>  static const MemMapEntry sifive_u_memmap[] = {
>  [SIFIVE_U_DEV_DEBUG] ={0x0,  0x100 },
>  [SIFIVE_U_DEV_MROM] = { 0x1000, 0xf000 },
> @@ -165,7 +168,7 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry 
> *memmap,
>
>  qemu_fdt_add_subnode(fdt, "/cpus");
>  qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency",
> -SIFIVE_CLINT_TIMEBASE_FREQ);
> +CLINT_TIMEBASE_FREQ);
>  qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0);
>  qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1);
>
> @@ -847,7 +850,7 @@ static void sifive_u_soc_realize(DeviceState *dev, Error 
> **errp)
>  sifive_clint_create(memmap[SIFIVE_U_DEV_CLINT].base,
>  memmap[SIFIVE_U_DEV_CLINT].size, 0, ms->smp.cpus,
>  SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE,
> -SIFIVE_CLINT_TIMEBASE_FREQ, false);
> +CLINT_TIMEBASE_FREQ, false);
>
>  if (!sysbus_realize(SYS_BUS_DEVICE(>prci), errp)) {
>  return;
> --
> 2.25.1
>
>



Re: [PATCH 0/4] ppc/Pegasos2: Firmware replacement using VOF

2021-07-07 Thread David Gibson
On Sun, Jun 27, 2021 at 06:27:13PM +0200, BALATON Zoltan wrote:
> Based-on: <20210625055155.2252896-1-...@ozlabs.ru>
> ^ That is v22 of Alexey's VOF patch
> 
> With this series on top of VOF v22 I can now boot Linux and MorphOS on
> pegasos2 without a firmware blob so I hope this is enough to get this
> board in 6.1 and also have it enabled so people can start using it
> eventually (a lot of people don't compile their QEMU but rely on
> binaries from distros and other sources). Provided that VOF will also
> be merged by then. This gives VOF another use case that may help it
> getting merged at last.
> 
> Further info and example command lines can be found at
> https://osdn.net/projects/qmiga/wiki/SubprojectPegasos2

Applied to ppc-for-6.1, thanks.

> 
> Regards,
> BALATON Zoltan
> 
> BALATON Zoltan (4):
>   ppc/pegasos2: Introduce Pegasos2MachineState structure
>   target/ppc: Allow virtual hypervisor on CPU without HV
>   ppc/pegasos2: Use Virtual Open Firmware as firmware replacement
>   ppc/pegasos2: Implement some RTAS functions with VOF
> 
>  default-configs/devices/ppc-softmmu.mak |   2 +-
>  hw/ppc/Kconfig  |   1 +
>  hw/ppc/pegasos2.c   | 783 +++-
>  target/ppc/cpu.c|   2 +-
>  4 files changed, 771 insertions(+), 17 deletions(-)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] target/riscv: hardwire bits in hideleg and hedeleg

2021-07-07 Thread Alistair Francis
On Thu, Jun 24, 2021 at 11:48 PM Jose Martins  wrote:
>
> > > +static const target_ulong vs_delegable_excps = delegable_excps &
> > > +~((1ULL << (RISCV_EXCP_S_ECALL)) |
> >
> > > +(1ULL << (RISCV_EXCP_VS_ECALL)) |
> > > +(1ULL << (RISCV_EXCP_M_ECALL)) |
> >
> > These two are both read only 0, shouldn't they not be included in this list?
> >
> > >  static int write_hedeleg(CPURISCVState *env, int csrno, target_ulong val)
> > >  {
> > > -env->hedeleg = val;
> > > +env->hedeleg = val & vs_delegable_excps;
> >
> > Because we then allow a write to occur here.
>
> Note that the list is being bitwise negated, so both of these are
> actually not writable (ie read-only 0). There is still the question
> regarding the VS_ECALL (exception 10) bit raised by Zhiwei, since
> table 5.2 in the spec does not explicitly classify it. However, I
> believe it is safe to assume that exception 10 is non-delegable.

Ah, I see.

Reviewed-by: Alistair Francis 

Applied to riscv-to-apply.next

I improved the indentation and rebased this on the latest master.

Alistair

>
> José



Re: [PATCH qemu v22] spapr: Implement Open Firmware client interface

2021-07-07 Thread David Gibson
On Thu, Jul 08, 2021 at 01:15:10PM +1000, Alexey Kardashevskiy wrote:
> 
> 
> On 08/07/2021 12:40, David Gibson wrote:
> > On Fri, Jun 25, 2021 at 03:51:55PM +1000, Alexey Kardashevskiy wrote:
[snip]
> > > +void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
> > > +{
> > > +char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
> > > +int chosen;
> > > +
> > > +vof_build_dt(fdt, spapr->vof);
> > > +
> > > +_FDT(chosen = fdt_path_offset(fdt, "/chosen"));
> > > +_FDT(fdt_setprop_string(fdt, chosen, "bootargs",
> > > +spapr->vof->bootargs ? : ""));
> > 
> > You do several things with vof->bootargs, but if you've initialized it
> > from machine->kernel_cmdline, I didn't spot it.
> 
> 
> GRUB initilizes it and updates via spapr_vof_setprop().

Right, but my point is if an OF client doesn't poke it, it should have
the value from qemu's -append option which is in
machine->kernel_cmdline.

[snip]
> > > +static int path_offset(const void *fdt, const char *path)
> > > +{
> > > +g_autofree char *p = NULL;
> > > +char *at;
> > > +
> > > +/*
> > > + * 
> > > https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16
> > > + *
> > > + * "Conversion from numeric representation to text representation 
> > > shall use
> > > + * the lower case forms of the hexadecimal digits in the range a..f,
> > > + * suppressing leading zeros".
> > 
> > Huh... that suggests that Zoltan's firmware which passes a caps hex
> > and expects it to work is doing the wrong thing.  We still need to
> > accomodate it, though.
> > 
> > > + */
> > > +at = strchr(path, '@');
> > > +if (!at) {
> > > +return fdt_path_offset(fdt, path);
> > > +}
> > > +
> > > +p = g_strdup(path);
> > > +for (at = at - path + p + 1; *at; ++at) {
> > > +*at = tolower(*at);
> > > +}
> > 
> > This isn't quite right, though we might get away with it in practice.
> > You're taking a whole path here, and each path component could
> > potentially have a unit address.  This will tolower() everything after
> > the first @, potentially case mangling the base names of later
> > components.
> 
> Ah. I need the last "@" here, at least. But I do not think we need to go any
> further than this here.

That's closer to correct, and will probably work in practice.  That
will fail, though, if we find a client that uses bad caps for an
intermediate path component.

[snip]
> > > +static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof,
> > > +uint32_t nodeph, uint32_t pname,
> > > +uint32_t valaddr, uint32_t vallen)
> > > +{
> > > +char propname[OF_PROPNAME_LEN_MAX + 1];
> > > +uint32_t ret = -1;
> > > +int offset;
> > > +char trval[64] = "";
> > > +char nodepath[VOF_MAX_PATH] = "";
> > > +Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
> > > +g_autofree char *val = NULL;
> > > +
> > > +if (vallen > VOF_MAX_SETPROPLEN) {
> > > +goto trace_exit;
> > > +}
> > > +if (readstr(pname, propname, sizeof(propname))) {
> > > +goto trace_exit;
> > > +}
> > > +offset = fdt_node_offset_by_phandle(fdt, nodeph);
> > > +if (offset < 0) {
> > > +goto trace_exit;
> > > +}
> > > +ret = get_path(fdt, offset, nodepath, sizeof(nodepath));
> > > +if (ret <= 0) {
> > > +goto trace_exit;
> > > +}
> > > +
> > > +val = g_malloc0(vallen);
> > > +if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) {
> > > +goto trace_exit;
> > > +}
> > > +
> > > +if (vmo) {
> > > +VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
> > > +
> > > +if (vmc->setprop &&
> > > +!vmc->setprop(ms, nodepath, propname, val, vallen)) {
> > > +goto trace_exit;
> > 
> > This defaults to allowing the setprop if the machine doesn't provide a
> > setprop callback.  I think it would be safer to default to prohibiting
> > all setprops except those the machine explicitly allows.
> 
> 
> Mmmm... I can imagine the client using the device tree as a temporary
> storage. I'd rather add a trace for such cases.

If they do, I think that's something we'll need to consider and
account for that platform, rather than something we want to allow to
begin with.

[snip]
> > > +static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf,
> > > +  uint32_t len)
> > > +{
> > > +char tmp[VOF_VTY_BUF_SIZE];
> > > +unsigned cb;
> > > +OfInstance *inst = (OfInstance *)
> > > +g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle));
> > > +
> > > +if (!inst) {
> > > +trace_vof_error_write(ihandle);
> > > +return -1;
> > > +}
> > > +
> > > +for ( ; len > 0; len -= cb) {
> > > +cb = MIN(len, sizeof(tmp) - 1);
> > > +if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) {
> 

Re: [PATCH 4/4] ppc/pegasos2: Implement some RTAS functions with VOF

2021-07-07 Thread David Gibson
On Sun, Jun 27, 2021 at 06:27:13PM +0200, BALATON Zoltan wrote:
> Linux uses RTAS functions to access PCI devices so we need to provide
> these with VOF. Implement some of the most important functions to
> allow booting Linux with VOF. With this the board is now usable
> without a binary ROM image and we can enable it by default as other
> boards.
> 
> Signed-off-by: BALATON Zoltan 

Applied, but...

> @@ -687,6 +795,29 @@ static void *build_fdt(MachineState *machine, int 
> *fdt_size)
>  qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial");
>  qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe");
>  
> +qemu_fdt_add_subnode(fdt, "/rtas");
> +qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", 20);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", 19);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", 18);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", 17);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", 11);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "display-character", 10);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config", 9);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config", 8);
> +/* Pegasos2 firmware misspells check-exception and guests use that */
> +qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption", 7);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", 6);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day", 4);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day", 3);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", 2);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", 1);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", 0);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20);
> +qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1);

..you really want to be using your symbolic names here as well.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Bug 1679358] Re: ARM: RES0/RES1 SCTLR fields not read-only

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1679358

Title:
  ARM: RES0/RES1 SCTLR fields not read-only

Status in QEMU:
  Expired

Bug description:
  There are fields in SCTLR that are RAO/SBOP or WI or in the case of
  the RR field, accessible only in secure mode. Currently it seems that
  qemu just propagates any write to SCTLR to the register and this
  screwed up in a bootloader that I am debugging.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1679358/+subscriptions



[Bug 1892533] Re: Meson: Missing config-host.mak

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1892533

Title:
  Meson: Missing config-host.mak

Status in QEMU:
  Expired

Bug description:
  Wanted to give a try to the new build system, but a simple "meson
  build" gives that error:

  meson.build:15:0: ERROR: Failed to load
  /home/xclaesse/programmation/qemu/build/config-host.mak: [Errno 2] No
  such file or directory: '/home/xclaesse/programmation/qemu/build
  /config-host.mak'

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1892533/+subscriptions



[Bug 1890395] Re: qmp/hmp: crash if client closes socket too early

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1890395

Title:
  qmp/hmp: crash if client closes socket too early

Status in QEMU:
  Expired

Bug description:
  Qemu crashes on qmp/hmp command if client closes connection before
  reading the whole response from the socket.

  Reproducer:

  1. Start arbitrary vm via qemu
  2. Send e.g. hmp command 'info mem'
  3. Abort before whole response came back

  
  Stack Trace:

  Stack trace of thread 6493:
  #0  0x559902fd2d30 object_get_class (qemu-system-x86_64)
  #1  0x559903071020 qio_channel_create_watch (qemu-system-x86_64)
  #2  0x55990305f437 qemu_chr_fe_add_watch (qemu-system-x86_64)
  #3  0x559902f7340d monitor_flush_locked (qemu-system-x86_64)
  #4  0x559902f7360e monitor_flush_locked (qemu-system-x86_64)
  #5  0x559902f74342 qmp_send_response (qemu-system-x86_64)
  #6  0x559902f74409 monitor_qmp_respond (qemu-system-x86_64)
  #7  0x559902f74bc0 monitor_qmp_bh_dispatcher (qemu-system-x86_64)
  #8  0x5599030c37be aio_bh_call (qemu-system-x86_64)
  #9  0x5599030c6dd0 aio_dispatch (qemu-system-x86_64)
  #10 0x5599030c369e aio_ctx_dispatch (qemu-system-x86_64)
  #11 0x7f5b6d37f417 g_main_context_dispatch (libglib-2.0.so.0)
  #12 0x5599030c5e0a glib_pollfds_poll (qemu-system-x86_64)
  #13 0x559902dd75df main_loop (qemu-system-x86_64)
  #14 0x559902c59f49 main (qemu-system-x86_64)
  #15 0x7f5b6bfeab97 __libc_start_main (libc.so.6)
  #16 0x559902c5d38a _start (qemu-system-x86_64)

  #0  0x559902fd2d30 in object_get_class (obj=obj@entry=0x0) at 
./qom/object.c:909
  #1  0x559903071020 in qio_channel_create_watch (ioc=0x0, 
condition=(G_IO_OUT | G_IO_HUP)) at ./io/channel.c:281
  klass = 
  __func__ = "qio_channel_create_watch"
  ret = 
  #2  0x55990305f437 in qemu_chr_fe_add_watch (be=be@entry=0x559905a7f460, 
cond=cond@entry=(G_IO_OUT | G_IO_HUP), func=func@entry=0x559902f734b0 
, user_data=user_data@entry=0x559905a7f460) at 
./chardev/char-fe.c:367
  s = 0x5599055782c0
  src = 
  tag = 
  __func__ = "qemu_chr_fe_add_watch"
  #3  0x559902f7340d in monitor_flush_locked (mon=mon@entry=0x559905a7f460) 
at ./monitor/monitor.c:140
  rc = 219264
  len = 3865832
  buf = 0x7f5afc00e480 "{\"return\": 
\"9eb48000-9eb480099000 ", '0' , "99000 
-rw\\r\\n9eb480099000-9eb48009b000 ", '0' , "2000 
-r-\\r\\n9eb48009b000-9eb48680 06765000 
-rw\\r\\n9eb4868000"...
  #4  0x559902f7360e in monitor_flush_locked (mon=0x559905a7f460) at 
./monitor/monitor.c:160
  i = 3865830
  c = 
  #5  0x559902f7360e in monitor_puts (mon=mon@entry=0x559905a7f460, 
str=0x7f5aa1eda010 "{\"return\": \"9eb48000-9eb480099000 ", '0' 
, "99000 -rw\\r\\n9eb480099000-9eb48009b000 ", '0' 
, "2000 -r-\\r\\n9eb48009b000-9eb48680 
06765000 -rw\\r\\n9eb4868000"...) at ./monitor/monitor.c:167
  i = 3865830
  c = 
  #6  0x559902f74342 in qmp_send_response (mon=0x559905a7f460, 
rsp=) at ./monitor/qmp.c:119
  data = 
  json = 0x559906c88380
  #7  0x559902f74409 in monitor_qmp_respond (rsp=0x559905bbf740, 
mon=0x559905a7f460) at ./monitor/qmp.c:132
  old_mon = 
  rsp = 0x559905bbf740
  error = 
  #8  0x559902f74409 in monitor_qmp_dispatch (mon=0x559905a7f460, 
req=) at ./monitor/qmp.c:161
  old_mon = 
  rsp = 0x559905bbf740
  error = 
  #9  0x559902f74bc0 in monitor_qmp_bh_dispatcher (data=) at 
./monitor/qmp.c:234
  id = 
  rsp = 
  need_resume = true
  mon = 0x559905a7f460
  __PRETTY_FUNCTION__ = "monitor_qmp_bh_dispatcher"
  #10 0x5599030c37be in aio_bh_call (bh=0x559905571b40) at ./util/async.c:89
  bh = 0x559905571b40
  bhp = 
  next = 0x5599055718f0
  ret = 1
  deleted = false
  #11 0x5599030c37be in aio_bh_poll (ctx=ctx@entry=0x5599055706f0) at 
./util/async.c:117
  bh = 0x559905571b40
  bhp = 
  next = 0x5599055718f0
  ret = 1
  deleted = false
  #12 0x5599030c6dd0 in aio_dispatch (ctx=0x5599055706f0) at 
./util/aio-posix.c:459
  #13 0x5599030c369e in aio_ctx_dispatch (source=, 
callback=, user_data=) at ./util/async.c:260
  ctx = 
  #14 0x7f5b6d37f417 in g_main_context_dispatch () at 
/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
  #15 0x5599030c5e0a in glib_pollfds_poll () at ./util/main-loop.c:219
  context = 0x559905652420
  pfds = 
  context = 0x559905652420
  ret = 1
  mlpoll = {state = 0, timeout = 4294967295, pollfds = 

[Bug 1892541] Re: qemu 5.1 on windows 10 with whpx can not install Windows 7 guest

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1892541

Title:
  qemu 5.1 on windows 10 with whpx can not install Windows 7 guest

Status in QEMU:
  Expired

Bug description:
  Command install and start win7

  qemu-system-x86_64  -smbios type=1,uuid=e77aacd6-0acb-4a5c-9a83-a80d029b36f1 
-smp 2,sockets=1,cores=2,maxcpus=2 -nodefaults -boot 
menu=on,strict=on,reboot-timeout=1000 -m 8192 ^
  -readconfig pve-q35-4.0.cfg ^
  -device vmgenid,guid=6d4865f5-353e-4cf1-b8ca-f5abbd062736 -device 
usb-tablet,id=tablet,bus=ehci.0,port=1 -device VGA,id=vga,bus=pcie.0,addr=0x1 ^
  -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 ^
  -drive 
file=en_windows_7_ultimate_with_sp1_x64_dvd_u_677332.iso,if=none,id=drive-ide2,media=cdrom,aio=threads
 ^
  -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200 
-device ahci,id=ahci0,multifunction=on,bus=pci.0,addr=0x7 ^
  -drive 
id=drive-sata0,if=none,file=win7.qcow2,format=qcow2,cache=none,aio=native,detect-zeroes=on
 ^
  -device ide-hd,bus=ahci0.0,drive=drive-sata0,id=sata0,bootindex=100 ^
  -netdev type=tap,id=mynet0,ifname=tap1,script=no,downscript=no ^
  -device 
e1000,netdev=mynet0,mac=52:55:00:d1:55:10,bus=pci.0,addr=0x12,id=net0,bootindex=300
 ^
  -machine type=q35,accel=whpx

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1892541/+subscriptions



[Bug 1892684] Re: curl and wget segfaults when link has redirects

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1892684

Title:
  curl and wget segfaults when link has redirects

Status in QEMU:
  Expired

Bug description:
  Hello,

  I've been using qemu-user-static with aarch64 docker images and faced the 
problem
  using binares from the following release: 
https://github.com/multiarch/qemu-user-static/releases/tag/v5.0.0-2.

  curl and wget fails with segmentation fault when trying to fetch something 
from the link
  that has some redirects.

  In order to reproduce you can run the following:

  1) Register qemu on x86_64 machine
 docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
  2) Run arm64v8 docker image and try to run wget or curl
 docker run --rm -it arm64v8/ubuntu bash
 $ apt update
 $ apt install curl wget
 $ curl -L http://erratique.ch/software/astring/releases/astring-0.8.3.tbz
 $ wget  http://erratique.ch/software/astring/releases/astring-0.8.3.tbz

  This error cannot be reproduced with binaries from eariler release 
https://github.com/multiarch/qemu-user-static/releases/tag/v4.2.0-7.
  curl and wget work fine with the given link and don't fail with segfault when 
using
  older qemu-user-static binaries

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1892684/+subscriptions



[Bug 1893634] Re: blk_get_max_transfer() works only with sg

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1893634

Title:
  blk_get_max_transfer() works only with sg

Status in QEMU:
  Expired

Bug description:
  blk_get_max_transfer() is supposed to be able to get the max_sectors
  queue limit of the scsi device on the host side and is used in both
  scsi-generic.c (for scsi-generic and scsi-block) and scsi-disk.c (for
  scsi-hd) to set/change the max_xfer_len (and opt_xfer_len in the case
  of scsi-generic.c).

  However, it only works with the sg driver in doing so. It cannot get
  the queue limit with the sd driver and simply returns MAX_INT.

  qemu version 5.1.0
  kernel version 5.8.5

  Btw, is there a particular reason that it doesn't MIN_NON_ZERO against
  the original max_xfer_len:
  https://github.com/qemu/qemu/blob/v5.1.0/hw/scsi/scsi-generic.c#L172?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1893634/+subscriptions



[Bug 1890775] Re: Aten USB to Serial bridge does not work with qemu under Windows 10

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1890775

Title:
  Aten USB to Serial bridge does not work with qemu under Windows 10

Status in QEMU:
  Expired

Bug description:
  I would like to use MSDOS 6.22 with qemu (unfortunatelly lot of our test 
programs has been written in dos).
  I tried to connect two laptop by RS232 port, one of the machine have a 
built-in serial port and run with native MSDOS 6.22 with 4.0 norton commander. 
Another machine have only USB ports and i try to use a new Aten USB to Serial 
device. Ok. Has been started qemu with -serial and -chardev parameters, at 
startup appear a window with serial port setting such as baud rate, start bit, 
etc...

  Quemu has been satrted succeeded but serial port cannot be used
  becouse was nothing activited on usb serial adapter :(

  I tried same configuration with VirtualBox and everything was worked
  fine (serial connection was estabiled and copied several files from
  one machine into another machine), seems to be the emulated serial
  port has been worked fine.

  I would like to use qemu, i just thougt qemu is better, simple and
  faster...

  Exists solution or is this a qemu bug?

  Thank you!

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1890775/+subscriptions



[Bug 1893807] Re: Crash when launching windows qemu version from WSL2

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1893807

Title:
  Crash when launching windows qemu version from WSL2

Status in QEMU:
  Expired

Bug description:
  Version: 5.1.0
  Command line from WSL2:
  qemu-system-x86_64.exe -hdd 
/home/jesus/proyectos/RWivOS/bin/RELEASE/image.hdd -m 4G -smp 4 -machine q35 
-debugcon stdio

  OS: Windows 10(64 bits) from WSL2 Ubuntu 18.04

  The error:
  
ERROR:/home/stefan/src/qemu/repo.or.cz/qemu/ar7/block.c:1325:bdrv_open_driver: 
assertion
   failed: (is_power_of_2(bs->bl.request_alignment))

  The problem i'm seeing when i lauch from wsl2 only occurs when
  launched with argument -hdd from WSL2, if i launch it from Windows
  pointing to the WSL path where the file is stored works.

  It occurs on other versions, i tried 4.1.0 too.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1893807/+subscriptions



[Bug 1894804] Re: Second DEVICE_DELETED event missing during virtio-blk disk device detach

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1894804

Title:
  Second DEVICE_DELETED event missing during virtio-blk disk device
  detach

Status in QEMU:
  Expired
Status in qemu package in Ubuntu:
  Expired

Bug description:
  We are in the process of moving OpenStack CI across to use 20.04 Focal
  as the underlying OS and encountering the following issue in any test
  attempting to detach disk devices from running QEMU instances.

  We can see a single DEVICE_DELETED event raised to libvirtd for the
  /machine/peripheral/virtio-disk1/virtio-backend device but we do not
  see a second event for the actual disk. As a result the device is
  still marked as present in libvirt but QEMU reports it as missing in
  subsequent attempts to remove the device.

  The following log snippets can also be found in the following pastebin
  that's slightly easier to gork:

  http://paste.openstack.org/show/797564/

  https://review.opendev.org/#/c/746981/ libvirt: Bump
  MIN_{LIBVIRT,QEMU}_VERSION and NEXT_MIN_{LIBVIRT,QEMU}_VERSION

  https://zuul.opendev.org/t/openstack/build/4c56def513884c5eb3ba7b0adf7fa260
  nova-ceph-multistore

  
https://zuul.opendev.org/t/openstack/build/4c56def513884c5eb3ba7b0adf7fa260/log/controller/logs/dpkg-l.txt

  ii  libvirt-daemon   6.0.0-0ubuntu8.3 
 amd64Virtualization daemon
  ii  libvirt-daemon-driver-qemu   6.0.0-0ubuntu8.3 
 amd64Virtualization daemon QEMU connection driver
  ii  libvirt-daemon-system6.0.0-0ubuntu8.3 
 amd64Libvirt daemon configuration files
  ii  libvirt-daemon-system-systemd6.0.0-0ubuntu8.3 
 amd64Libvirt daemon configuration files (systemd)
  ii  libvirt-dev:amd646.0.0-0ubuntu8.3 
 amd64development files for the libvirt library
  ii  libvirt0:amd64   6.0.0-0ubuntu8.3 
 amd64library for interfacing with different virtualization systems
  [..]
  ii  qemu-block-extra:amd64   1:4.2-3ubuntu6.4 
 amd64extra block backend modules for qemu-system and qemu-utils
  ii  qemu-slof20191209+dfsg-1  
 all  Slimline Open Firmware -- QEMU PowerPC version
  ii  qemu-system  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries
  ii  qemu-system-arm  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (arm)
  ii  qemu-system-common   1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (common files)
  ii  qemu-system-data 1:4.2-3ubuntu6.4 
 all  QEMU full system emulation (data files)
  ii  qemu-system-mips 1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (mips)
  ii  qemu-system-misc 1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (miscellaneous)
  ii  qemu-system-ppc  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (ppc)
  ii  qemu-system-s390x1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (s390x)
  ii  qemu-system-sparc1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (sparc)
  ii  qemu-system-x86  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (x86)
  ii  qemu-utils   1:4.2-3ubuntu6.4 
 amd64QEMU utilities

  
https://zuul.opendev.org/t/openstack/build/4c56def513884c5eb3ba7b0adf7fa260/log/controller/logs/libvirt/qemu
  /instance-003a_log.txt

  2020-09-07 19:29:55.021+: starting up libvirt version: 6.0.0, package: 
0ubuntu8.3 (Marc Deslauriers  Thu, 30 Jul 2020 
06:40:28 -0400), qemu version: 4.2.0Debian 1:4.2-3ubuntu6.4, kernel: 
5.4.0-45-generic, hostname: ubuntu-focal-ovh-bhs1-0019682147
  LC_ALL=C \
  PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
  HOME=/var/lib/libvirt/qemu/domain-86-instance-003a \
  XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-86-instance-003a/.local/share \
  XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-86-instance-003a/.cache \
  XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-86-instance-003a/.config \
  QEMU_AUDIO_DRV=none \
  /usr/bin/qemu-system-x86_64 \
  -name guest=instance-003a,debug-threads=on \
  -S \
  -object 

[Bug 1895053] Re: Cannot nspawn raspbian 10 [FAILED] Failed to start Journal Service.

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1895053

Title:
  Cannot nspawn raspbian 10 [FAILED] Failed to start Journal Service.

Status in QEMU:
  Expired

Bug description:
  Hi, I'm using nspawn and asked the question @systemd-devel. They redirected 
me to you, guessing that nspawn calls a syscall or ioctl qemu isnt aware of and 
can't implement properly?
  They were like: "Sorry, that's not my department." ^^

  Maybe you can reproduce the issue or help me investigating whats wrong
  or put the ball right back into their court? :D

  Testscript:
  wget https://downloads.raspberrypi.org/raspios_lite_armhf_latest -o r.zip
  unzip r.zip
  LOOP=$(losetup --show -Pf *raspios-buster-armhf-lite.img)
  mount ${LOOP}p2 /mnt
  mount ${LOOP}p1 /mnt/boot
  systemd-nspawn --bind /usr/bin/qemu-arm-static --boot --directory=/mnt -- 
systemd.log_level=debug

  Output:
  see attachment

  System:
  uname -a
  Linux MArch 5.8.7-arch1-1 #1 SMP PREEMPT Sat, 05 Sep 2020 12:31:32 +
  x86_64 GNU/Linux

  qemu-arm-static --version
  qemu-arm version 5.1.0

  systemd-nspawn --version
  systemd 246 (246.4-1-arch)
  +PAM +AUDIT -SELINUX -IMA -APPARMOR +SMACK -SYSVINIT +UTMP +LIBCRYPTSETUP
  +GCRYPT +GNUTLS +ACL +XZ +LZ4 +ZSTD +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN
  +PCRE2 default-hierarchy=hybrid

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1895053/+subscriptions



[Bug 1891829] Re: High bit(s) sometimes set high on rcvd serial bytes when char size < 8 bits

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1891829

Title:
  High bit(s) sometimes set high on rcvd serial bytes when char size < 8
  bits

Status in QEMU:
  Expired

Bug description:
  I *believe* (not confirmed) that the old standard PC serial ports,
  when configured with a character size of 7 bits or less, should set
  non-data bits to 0 when the CPU reads received chars from the read
  register.  qemu doesn't do this.

  Windows 1.01 will not make use of a serial mouse when bit 7 is 1.  The
  ID byte that the mouse sends on reset is ignored.  I added a temporary
  hack to set bit 7 to 0 on all incoming bytes, and this convinced
  windows 1.01 to use the mouse.

  note 1:  This was using a real serial mouse through a passed-through
  serial port.  The emulated msmouse doesn't work for other reasons.

  note 2:  The USB serial port I am passing through to the guest sets
  non-data bits to 1.  Not sure if this is the USB hardware or linux.

  note 3:  I also needed to add an -icount line to slow down the guest
  CPU, so that certain cpu-sensitive timing code in the guest didn't
  give up too quickly.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1891829/+subscriptions



[Bug 1894617] Re: qemu-i386 mmap but offset greater than 32 bits

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1894617

Title:
  qemu-i386 mmap but offset greater than 32 bits

Status in QEMU:
  Expired

Bug description:
  I don't know if it's a problem, but I did, and it bothered me for a long time.
  When I use qemu-i386 and interact with the video card device,an error has 
occurred:

  18534 ioctl(4,DRM_IOCTL_MODE_GETENCODER,{39,0,0,0,0}) = 0 ({39,4,34,3,0})
  18534 ioctl(4,DRM_IOCTL_MODE_CREATE_DUMB,{1080,1920,32,0,0,0,0}) = 0 
({1080,1920,32,0,1,7680,8294400})
  18534 ioctl(4,DRM_IOCTL_MODE_ADDFB,{0,1920,1080,7680,32,24,1}) = 0 
({66,1920,1080,7680,32,24,1})
  18534 ioctl(4,DRM_IOCTL_MODE_MAP_DUMB,{1,0,0}) = 0 ({1,0,5543018496})
  18534 mmap2(NULL,8294400,PROT_READ|PROT_WRITE,MAP_SHARED,4,0x14a63c) = -1 
errno=22 (Invalid argument)

  "5543018496" is the offset through ioctl() and it is "0x14a63c000".
  In qemu:
  ret = target_mmap(arg1, arg2, arg3,
target_to_host_bitmask(arg4, mmap_flags_tbl),
arg5, arg6 << MMAP_SHIFT);

  The type of "arg6" is ulong.When use qemu-i386, arg6 can't be set to
  "0x14a63c000".So it's wrong for my program.

  I want to find a good way to deal with this kind of problem, but I'm not very 
familiar with QEMU,
  so I came to ask how to deal with this problem.

  Thank you!

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1894617/+subscriptions



[Bug 1895219] Re: qemu git -vnc fails due to missing en-us keymap

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1895219

Title:
  qemu git -vnc fails due to missing en-us keymap

Status in QEMU:
  Expired

Bug description:
  If trying to run qemu with -vnc :0, it will fail with:
  ./qemu-system-x86_64 -vnc :2
  qemu-system-x86_64: -vnc :2: could not read keymap file: 'en-us'

  share/keymaps is missing en-us keymap and only has sl and sv,
  confirmed previous stable versions had en-us.

  Tried with multiple targets, on arm64 and amd64

  Git commit hash: 9435a8b3dd35f1f926f1b9127e8a906217a5518a (head)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1895219/+subscriptions



[Bug 1894804] Re: Second DEVICE_DELETED event missing during virtio-blk disk device detach

2021-07-07 Thread Launchpad Bug Tracker
[Expired for qemu (Ubuntu) because there has been no activity for 60
days.]

** Changed in: qemu (Ubuntu)
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1894804

Title:
  Second DEVICE_DELETED event missing during virtio-blk disk device
  detach

Status in QEMU:
  Expired
Status in qemu package in Ubuntu:
  Expired

Bug description:
  We are in the process of moving OpenStack CI across to use 20.04 Focal
  as the underlying OS and encountering the following issue in any test
  attempting to detach disk devices from running QEMU instances.

  We can see a single DEVICE_DELETED event raised to libvirtd for the
  /machine/peripheral/virtio-disk1/virtio-backend device but we do not
  see a second event for the actual disk. As a result the device is
  still marked as present in libvirt but QEMU reports it as missing in
  subsequent attempts to remove the device.

  The following log snippets can also be found in the following pastebin
  that's slightly easier to gork:

  http://paste.openstack.org/show/797564/

  https://review.opendev.org/#/c/746981/ libvirt: Bump
  MIN_{LIBVIRT,QEMU}_VERSION and NEXT_MIN_{LIBVIRT,QEMU}_VERSION

  https://zuul.opendev.org/t/openstack/build/4c56def513884c5eb3ba7b0adf7fa260
  nova-ceph-multistore

  
https://zuul.opendev.org/t/openstack/build/4c56def513884c5eb3ba7b0adf7fa260/log/controller/logs/dpkg-l.txt

  ii  libvirt-daemon   6.0.0-0ubuntu8.3 
 amd64Virtualization daemon
  ii  libvirt-daemon-driver-qemu   6.0.0-0ubuntu8.3 
 amd64Virtualization daemon QEMU connection driver
  ii  libvirt-daemon-system6.0.0-0ubuntu8.3 
 amd64Libvirt daemon configuration files
  ii  libvirt-daemon-system-systemd6.0.0-0ubuntu8.3 
 amd64Libvirt daemon configuration files (systemd)
  ii  libvirt-dev:amd646.0.0-0ubuntu8.3 
 amd64development files for the libvirt library
  ii  libvirt0:amd64   6.0.0-0ubuntu8.3 
 amd64library for interfacing with different virtualization systems
  [..]
  ii  qemu-block-extra:amd64   1:4.2-3ubuntu6.4 
 amd64extra block backend modules for qemu-system and qemu-utils
  ii  qemu-slof20191209+dfsg-1  
 all  Slimline Open Firmware -- QEMU PowerPC version
  ii  qemu-system  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries
  ii  qemu-system-arm  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (arm)
  ii  qemu-system-common   1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (common files)
  ii  qemu-system-data 1:4.2-3ubuntu6.4 
 all  QEMU full system emulation (data files)
  ii  qemu-system-mips 1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (mips)
  ii  qemu-system-misc 1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (miscellaneous)
  ii  qemu-system-ppc  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (ppc)
  ii  qemu-system-s390x1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (s390x)
  ii  qemu-system-sparc1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (sparc)
  ii  qemu-system-x86  1:4.2-3ubuntu6.4 
 amd64QEMU full system emulation binaries (x86)
  ii  qemu-utils   1:4.2-3ubuntu6.4 
 amd64QEMU utilities

  
https://zuul.opendev.org/t/openstack/build/4c56def513884c5eb3ba7b0adf7fa260/log/controller/logs/libvirt/qemu
  /instance-003a_log.txt

  2020-09-07 19:29:55.021+: starting up libvirt version: 6.0.0, package: 
0ubuntu8.3 (Marc Deslauriers  Thu, 30 Jul 2020 
06:40:28 -0400), qemu version: 4.2.0Debian 1:4.2-3ubuntu6.4, kernel: 
5.4.0-45-generic, hostname: ubuntu-focal-ovh-bhs1-0019682147
  LC_ALL=C \
  PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
  HOME=/var/lib/libvirt/qemu/domain-86-instance-003a \
  XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-86-instance-003a/.local/share \
  XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-86-instance-003a/.cache \
  XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-86-instance-003a/.config \
  QEMU_AUDIO_DRV=none \
  /usr/bin/qemu-system-x86_64 \
  -name guest=instance-003a,debug-threads=on \
  

[Bug 1895122] Re: qemu on wsl tests failed, this configured with debug

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1895122

Title:
  qemu on wsl tests failed, this configured with debug

Status in QEMU:
  Expired

Bug description:
  
  ../configure --enable-debug-info --enable-debug

  **
  ERROR:../tests/test-util-filemonitor.c:704:test_file_monitor_events: 
assertion failed: (err == 0)
  Aborted (core dumped)

  
TESTiotest-qcow2: 271 [fail]
  QEMU  -- 
"/home/lygstate/work/qemu/build/tests/qemu-iotests/../../qemu-system-x86_64" 
-nodefaults -display none -accel qtest
  QEMU_IMG  -- 
"/home/lygstate/work/qemu/build/tests/qemu-iotests/../../qemu-img" 
  QEMU_IO   -- 
"/home/lygstate/work/qemu/build/tests/qemu-iotests/../../qemu-io"  --cache 
writeback --aio threads -f qcow2
  QEMU_NBD  -- 
"/home/lygstate/work/qemu/build/tests/qemu-iotests/../../qemu-nbd" 
  IMGFMT-- qcow2 (compat=1.1)
  IMGPROTO  -- file
  PLATFORM  -- Linux/x86_64 DESKTOP-BLLJ03T 4.4.0-19041-Microsoft
  TEST_DIR  -- /home/lygstate/work/qemu/build/tests/qemu-iotests/scratch
  SOCK_DIR  -- /tmp/tmp.eyVcw8nLNQ
  SOCKET_SCM_HELPER -- 
/home/lygstate/work/qemu/build/tests/qemu-iotests/socket_scm_helper

  --- /home/lygstate/work/qemu/tests/qemu-iotests/271.out   2020-09-10 
15:00:58.190763400 +0800
  +++ /home/lygstate/work/qemu/build/tests/qemu-iotests/271.out.bad 
2020-09-10 18:38:25.625090800 +0800
  @@ -37,6 +37,7 @@
   write -q -P PATTERN 0 64k
   L2 entry #0: 0x8005 
   discard -q 0 64k
  +Content mismatch at offset 0!
   L2 entry #0: 0x 
   write -q -c -P PATTERN 0 64k
   L2 entry #0: 0x4005 
  @@ -79,6 +80,7 @@
   write -q -P PATTERN 0 64k
   L2 entry #0: 0x8005 
   discard -q 0 64k
  +Content mismatch at offset 0!
   L2 entry #0: 0x 
   write -q -c -P PATTERN 0 64k
   L2 entry #0: 0x4005 
TESTiotest-qcow2: 283
TESTiotest-qcow2: 287
TESTiotest-qcow2: 290
TESTiotest-qcow2: 292
TESTiotest-qcow2: 299
  Not run: 060 181 220 259
  Failures: 271
  Failed 1 of 118 iotests
  make: [/home/lygstate/work/qemu/tests/Makefile.include:144: check-block] 
Error 1 (ignored)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1895122/+subscriptions



[Bug 1894818] Re: COLO's guest VNC client hang after failover

2021-07-07 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1894818

Title:
  COLO's guest VNC client hang after failover

Status in QEMU:
  Expired

Bug description:
  Hello,

  After setting up COLO's primary and secondary VMs,
  I installed the vncserver and xrdp (apt install tightvncserver xrdp) inside 
the VM.

  I access the VM from another PC via VNC/RDP client, and everything is OK.
  Then, kill the primary VM and issue the failover commands.

  The expected result is that the VNC/RDP client can reconnect and
  resume automatically after failover. (I've confirmed the VNC/RDP
  client can reconnect automatically.)

  But in my test, the VNC client's screen hangs and cannot be recovered
  no longer. I need to restart VNC client by myself.

  BTW, it works well after killing SVM.

  Here is my QEMU networking device
  ```
  -device virtio-net-pci,id=e0,netdev=hn0 \
  -netdev 
tap,id=hn0,br=br0,vhost=off,helper=/usr/local/libexec/qemu-bridge-helper \
  ```

  Thanks.

  Regards,
  Derek

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1894818/+subscriptions



Re: [PATCH 2/2] docs/system: ppc: Update ppce500 documentation with eTSEC support

2021-07-07 Thread David Gibson
On Thu, Jul 08, 2021 at 09:22:48AM +0800, Bin Meng wrote:
> Hi David,
> 
> On Thu, Jul 8, 2021 at 9:08 AM David Gibson  
> wrote:
> >
> > On Tue, Jul 06, 2021 at 12:31:24PM +0800, Bin Meng wrote:
> > > This adds eTSEC support to the PowerPC `ppce500` machine documentation.
> > >
> > > Signed-off-by: Bin Meng 
> >
> > Applied to ppc-for-6.1, thanks.
> 
> Thanks!
> 
> Are both 2 patches applied, or only this one?

Both, sorry I wasn't clear.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH 2/4] target/ppc: Allow virtual hypervisor on CPU without HV

2021-07-07 Thread David Gibson
On Sun, Jun 27, 2021 at 06:27:13PM +0200, BALATON Zoltan wrote:
> Change the assert in ppc_store_sdr1() to allow vhyp to be set on CPUs
> without HV bit. This allows using the vhyp interface for firmware
> emulation on pegasos2.
> 
> Signed-off-by: BALATON Zoltan 

Kind of a hack, but a simple one, so applied to ppc-for-6.1.

> ---
>  target/ppc/cpu.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
> index 19d67b5b07..a29299882a 100644
> --- a/target/ppc/cpu.c
> +++ b/target/ppc/cpu.c
> @@ -72,7 +72,7 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
>  {
>  PowerPCCPU *cpu = env_archcpu(env);
>  qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
> -assert(!cpu->vhyp);
> +assert(!cpu->env.has_hv_mode || !cpu->vhyp);
>  #if defined(TARGET_PPC64)
>  if (mmu_is_64bit(env->mmu_model)) {
>  target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] target/ppc: mtmsrd is an illegal instruction on BookE

2021-07-07 Thread David Gibson
On Tue, Jul 06, 2021 at 03:13:21PM +1000, Nicholas Piggin wrote:
> MSR is a 32-bit register in BookE and there is no mtmsrd instruction.
> 
> Cc: Christian Zigotzky 
> Signed-off-by: Nicholas Piggin 

Applied to ppc-for-6.1, thanks.

> ---
>  target/ppc/translate.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index f65d1e81ea..d1f482b0f3 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -4940,6 +4940,11 @@ static void gen_mtcrf(DisasContext *ctx)
>  #if defined(TARGET_PPC64)
>  static void gen_mtmsrd(DisasContext *ctx)
>  {
> +if (unlikely(!is_book3s_arch2x(ctx))) {
> +gen_invalid(ctx);
> +return;
> +}
> +
>  CHK_SV;
>  
>  #if !defined(CONFIG_USER_ONLY)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] target/ppc/spapr: Update H_GET_CPU_CHARACTERISTICS L1D cache flush bits

2021-07-07 Thread David Gibson
On Tue, Jun 15, 2021 at 02:41:07PM +1000, Nicholas Piggin wrote:
> There are several new L1D cache flush bits added to the hcall which reflect
> hardware security features for speculative cache access issues.
> 
> These behaviours are now being specified as negative in order to simplify
> patched kernel compatibility with older firmware (a new problem found in
> existing systems would automatically be vulnerable).
> 
> Signed-off-by: Nicholas Piggin 

After our discussion, I'm convinced that the small behaviour change
for old machine types is safe.  I've added an explanatory note to the
commit message and merged to ppc-for-6.1.

> ---
>  hw/ppc/spapr_hcall.c   | 2 ++
>  include/hw/ppc/spapr.h | 3 +++
>  2 files changed, 5 insertions(+)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index f25014afda..dfd9df469d 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1299,6 +1299,8 @@ static target_ulong 
> h_get_cpu_characteristics(PowerPCCPU *cpu,
>  behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
>  break;
>  case SPAPR_CAP_FIXED:
> +behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY;
> +behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS;
>  break;
>  default: /* broken */
>  assert(safe_cache == SPAPR_CAP_BROKEN);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index f05219f75e..0f25d081a8 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -398,10 +398,13 @@ struct SpaprMachineState {
>  #define H_CPU_CHAR_THR_RECONF_TRIG  PPC_BIT(6)
>  #define H_CPU_CHAR_CACHE_COUNT_DIS  PPC_BIT(7)
>  #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST   PPC_BIT(9)
> +
>  #define H_CPU_BEHAV_FAVOUR_SECURITY PPC_BIT(0)
>  #define H_CPU_BEHAV_L1D_FLUSH_PRPPC_BIT(1)
>  #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR   PPC_BIT(2)
>  #define H_CPU_BEHAV_FLUSH_COUNT_CACHE   PPC_BIT(5)
> +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY  PPC_BIT(7)
> +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESSPPC_BIT(8)
>  
>  /* Each control block has to be on a 4K boundary */
>  #define H_CB_ALIGNMENT 4096

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH 1/4] ppc/pegasos2: Introduce Pegasos2MachineState structure

2021-07-07 Thread David Gibson
On Sun, Jun 27, 2021 at 06:27:13PM +0200, BALATON Zoltan wrote:
> Add own machine state structure which will be used to store state
> needed for firmware emulation.
> 
> Signed-off-by: BALATON Zoltan 
> Reviewed-by: Philippe Mathieu-Daudé 

Applied to ppc-for-6.1.

> ---
>  hw/ppc/pegasos2.c | 50 +++
>  1 file changed, 37 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
> index 0bfd0928aa..07971175c9 100644
> --- a/hw/ppc/pegasos2.c
> +++ b/hw/ppc/pegasos2.c
> @@ -1,7 +1,7 @@
>  /*
>   * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
>   *
> - * Copyright (c) 2018-2020 BALATON Zoltan
> + * Copyright (c) 2018-2021 BALATON Zoltan
>   *
>   * This work is licensed under the GNU GPL license version 2 or later.
>   *
> @@ -41,6 +41,15 @@
>  
>  #define BUS_FREQ_HZ 1
>  
> +#define TYPE_PEGASOS2_MACHINE  MACHINE_TYPE_NAME("pegasos2")
> +OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE)
> +
> +struct Pegasos2MachineState {
> +MachineState parent_obj;
> +PowerPCCPU *cpu;
> +DeviceState *mv;
> +};
> +
>  static void pegasos2_cpu_reset(void *opaque)
>  {
>  PowerPCCPU *cpu = opaque;
> @@ -51,9 +60,9 @@ static void pegasos2_cpu_reset(void *opaque)
>  
>  static void pegasos2_init(MachineState *machine)
>  {
> -PowerPCCPU *cpu = NULL;
> +Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
> +CPUPPCState *env;
>  MemoryRegion *rom = g_new(MemoryRegion, 1);
> -DeviceState *mv;
>  PCIBus *pci_bus;
>  PCIDevice *dev;
>  I2CBus *i2c_bus;
> @@ -63,15 +72,16 @@ static void pegasos2_init(MachineState *machine)
>  uint8_t *spd_data;
>  
>  /* init CPU */
> -cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
> -if (PPC_INPUT(>env) != PPC_FLAGS_INPUT_6xx) {
> +pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
> +env = >cpu->env;
> +if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
>  error_report("Incompatible CPU, only 6xx bus supported");
>  exit(1);
>  }
>  
>  /* Set time-base frequency */
> -cpu_ppc_tb_init(>env, BUS_FREQ_HZ / 4);
> -qemu_register_reset(pegasos2_cpu_reset, cpu);
> +cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4);
> +qemu_register_reset(pegasos2_cpu_reset, pm->cpu);
>  
>  /* RAM */
>  memory_region_add_subregion(get_system_memory(), 0, machine->ram);
> @@ -96,16 +106,16 @@ static void pegasos2_init(MachineState *machine)
>  g_free(filename);
>  
>  /* Marvell Discovery II system controller */
> -mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
> -((qemu_irq 
> *)cpu->env.irq_inputs)[PPC6xx_INPUT_INT]));
> -pci_bus = mv64361_get_pci_bus(mv, 1);
> +pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
> + ((qemu_irq 
> *)env->irq_inputs)[PPC6xx_INPUT_INT]));
> +pci_bus = mv64361_get_pci_bus(pm->mv, 1);
>  
>  /* VIA VT8231 South Bridge (multifunction PCI device) */
>  /* VT8231 function 0: PCI-to-ISA Bridge */
>  dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true,
>TYPE_VT8231_ISA);
>  qdev_connect_gpio_out(DEVICE(dev), 0,
> -  qdev_get_gpio_in_named(mv, "gpp", 31));
> +  qdev_get_gpio_in_named(pm->mv, "gpp", 31));
>  
>  /* VT8231 function 1: IDE Controller */
>  dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide");
> @@ -129,8 +139,10 @@ static void pegasos2_init(MachineState *machine)
>  pci_vga_init(pci_bus);
>  }
>  
> -static void pegasos2_machine(MachineClass *mc)
> +static void pegasos2_machine_class_init(ObjectClass *oc, void *data)
>  {
> +MachineClass *mc = MACHINE_CLASS(oc);
> +
>  mc->desc = "Genesi/bPlan Pegasos II";
>  mc->init = pegasos2_init;
>  mc->block_default_type = IF_IDE;
> @@ -141,4 +153,16 @@ static void pegasos2_machine(MachineClass *mc)
>  mc->default_ram_size = 512 * MiB;
>  }
>  
> -DEFINE_MACHINE("pegasos2", pegasos2_machine)
> +static const TypeInfo pegasos2_machine_info = {
> +.name  = TYPE_PEGASOS2_MACHINE,
> +.parent= TYPE_MACHINE,
> +.class_init= pegasos2_machine_class_init,
> +.instance_size = sizeof(Pegasos2MachineState),
> +};
> +
> +static void pegasos2_machine_register_types(void)
> +{
> +type_register_static(_machine_info);
> +}
> +
> +type_init(pegasos2_machine_register_types)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] python: Configure tox to skip missing interpreters

2021-07-07 Thread John Snow
On Wed, Jun 30, 2021 at 5:00 PM Willian Rampazzo 
wrote:

> On Wed, Jun 30, 2021 at 3:46 PM Wainer dos Santos Moschetta
>  wrote:
> >
> > Currently tox tests against the installed interpreters, however if any
> > supported interpreter is absent then it will return fail. It seems not
> > reasonable to expect developers to have all supported interpreters
> > installed on their systems. Luckily tox can be configured to skip
> > missing interpreters.
> >
> > This changed the tox setup so that missing interpreters are skipped by
> > default. On the CI, however, we still want to enforce it tests
> > against all supported. This way on CI the
> > --skip-missing-interpreters=false option is passed to tox.
> >
> > Signed-off-by: Wainer dos Santos Moschetta 
> > ---
> > Tested locally with `make check-tox` and where I only Python 3.6 and 3.9
> > installed.
> > Tested on CI: https://gitlab.com/wainersm/qemu/-/jobs/1390010988
> > Still on CI, but I deliberately removed Python 3.8:
> https://gitlab.com/wainersm/qemu/-/jobs/1390046531
> >
> >  .gitlab-ci.d/static_checks.yml | 1 +
> >  python/Makefile| 5 -
> >  python/setup.cfg   | 1 +
> >  3 files changed, 6 insertions(+), 1 deletion(-)
> >
>
> Seems reasonable.
>
> Reviewed-by: Willian Rampazzo 
>
>
Nice, I get to have my cake and eat it too :)

Reviewed-by: John Snow 


Re: [PATCH 3/6] python/aqmp-tui: Add AQMP TUI draft

2021-07-07 Thread John Snow
On Fri, Jul 2, 2021 at 5:26 PM G S Niteesh Babu 
wrote:

> Added a draft of AQMP TUI.
>
> Implements the follwing basic features:
> 1) Command transmission/reception.
> 2) Shows events asynchronously.
> 3) Shows server status in the bottom status bar.
>
> Also added necessary pylint, mypy configurations
>
> Signed-off-by: G S Niteesh Babu 
> ---
>  python/qemu/aqmp/aqmp_tui.py | 246 +++
>  python/setup.cfg |  16 ++-
>  2 files changed, 261 insertions(+), 1 deletion(-)
>  create mode 100644 python/qemu/aqmp/aqmp_tui.py
>
> diff --git a/python/qemu/aqmp/aqmp_tui.py b/python/qemu/aqmp/aqmp_tui.py
> new file mode 100644
> index 00..8e9e8ac8ff
> --- /dev/null
> +++ b/python/qemu/aqmp/aqmp_tui.py
> @@ -0,0 +1,246 @@
> +# Copyright (c) 2021
> +#
> +# Authors:
> +#  Niteesh Babu G S 
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later.  See the COPYING file in the top-level directory.
> +
> +import argparse
> +import asyncio
> +import logging
> +import signal
> +
> +import urwid
> +import urwid_readline
> +
> +from .protocol import ConnectError
> +from .qmp_protocol import QMP, ExecInterruptedError, ExecuteError
> +from .util import create_task, pretty_traceback
> +
> +
> +UPDATE_MSG = 'UPDATE_MSG'
>
+
> +
> +class StatusBar(urwid.Text):
> +"""
> +A simple Text widget that currently only shows connection status.
> +"""
> +def __init__(self, text=''):
> +super().__init__(text, align='right')
> +
> +
> +class Editor(urwid_readline.ReadlineEdit):
> +"""
> +Support urwid_readline features along with
> +history support which lacks in urwid_readline
> +"""
> +def __init__(self, master):
> +super().__init__(caption='> ', multiline=True)
> +self.master = master
> +self.history = []
> +self.last_index = -1
> +self.show_history = False
> +
> +def keypress(self, size, key):
> +# TODO: Add some logic for down key and clean up logic if
> possible.
> +# Returning None means the key has been handled by this widget
> +# which otherwise is propogated to the parent widget to be
> +# handled
> +msg = self.get_edit_text()
> +if key == 'up' and not msg:
> +# Show the history when 'up arrow' is pressed with no input
> text.
> +# NOTE: The show_history logic is necessary because in
> 'multiline'
> +# mode (which we use) 'up arrow' is used to move between
> lines.
> +self.show_history = True
> +last_msg = self.history[self.last_index] if self.history else
> ''
> +self.set_edit_text(last_msg)
> +self.edit_pos = len(last_msg)
> +self.last_index += 1
> +elif key == 'up' and self.show_history:
> +if self.last_index < len(self.history):
> +self.set_edit_text(self.history[self.last_index])
> +self.edit_pos = len(self.history[self.last_index])
> +self.last_index += 1
> +elif key == 'meta enter':
> +# When using multiline, enter inserts a new line into the
> editor
> +# send the input to the server on alt + enter
> +self.master.cb_send_to_server(msg)
> +self.history.insert(0, msg)
> +self.set_edit_text('')
> +self.last_index = 0
> +self.show_history = False
> +else:
> +self.show_history = False
> +self.last_index = 0
> +return super().keypress(size, key)
> +return None
> +
> +
> +class EditorWidget(urwid.Filler):
> +"""
> +Wraps CustomEdit
> +"""
> +def __init__(self, master):
> +super().__init__(Editor(master), valign='top')
> +
> +
> +class HistoryBox(urwid.ListBox):
> +"""
> +Shows all the QMP message transmitted/received
> +"""
> +def __init__(self, master):
> +self.master = master
> +self.history = urwid.SimpleFocusListWalker([])
> +super().__init__(self.history)
> +
> +def add_to_history(self, history):
> +self.history.append(urwid.Text(history))
> +if self.history:
> +self.history.set_focus(len(self.history) - 1)
> +
> +
> +class HistoryWindow(urwid.Frame):
> +"""
> +Composes the HistoryBox and EditorWidget
> +"""
> +def __init__(self, master):
> +self.master = master
> +self.editor = EditorWidget(master)
> +self.editor_widget = urwid.LineBox(self.editor)
> +self.history = HistoryBox(master)
> +self.body = urwid.Pile([('weight', 80, self.history),
> +('weight', 10, self.editor_widget)])
> +super().__init__(self.body)
> +urwid.connect_signal(self.master, UPDATE_MSG,
> self.cb_add_to_history)
> +
> +def cb_add_to_history(self, msg):
> +self.history.add_to_history(msg)
> +
> +
> +class Window(urwid.Frame):
> +  

Re: [PATCH qemu v22] spapr: Implement Open Firmware client interface

2021-07-07 Thread Alexey Kardashevskiy




On 08/07/2021 12:40, David Gibson wrote:

On Fri, Jun 25, 2021 at 03:51:55PM +1000, Alexey Kardashevskiy wrote:

The PAPR platform describes an OS environment that's presented by
a combination of a hypervisor and firmware. The features it specifies
require collaboration between the firmware and the hypervisor.

Since the beginning, the runtime component of the firmware (RTAS) has
been implemented as a 20 byte shim which simply forwards it to
a hypercall implemented in qemu. The boot time firmware component is
SLOF - but a build that's specific to qemu, and has always needed to be
updated in sync with it. Even though we've managed to limit the amount
of runtime communication we need between qemu and SLOF, there's some,
and it has become increasingly awkward to handle as we've implemented
new features.

This implements a boot time OF client interface (CI) which is
enabled by a new "x-vof" pseries machine option (stands for "Virtual Open
Firmware). When enabled, QEMU implements the custom H_OF_CLIENT hcall
which implements Open Firmware Client Interface (OF CI). This allows
using a smaller stateless firmware which does not have to manage
the device tree.

The new "vof.bin" firmware image is included with source code under
pc-bios/. It also includes RTAS blob.

This implements a handful of CI methods just to get -kernel/-initrd
working. In particular, this implements the device tree fetching and
simple memory allocator - "claim" (an OF CI memory allocator) and updates
"/memory@0/available" to report the client about available memory.

This implements changing some device tree properties which we know how
to deal with, the rest is ignored. To allow changes, this skips
fdt_pack() when x-vof=on as not packing the blob leaves some room for
appending.

In absence of SLOF, this assigns phandles to device tree nodes to make
device tree traversing work.

When x-vof=on, this adds "/chosen" every time QEMU (re)builds a tree.

This adds basic instances support which are managed by a hash map
ihandle -> [phandle].

Before the guest started, the used memory is:
0..e60 - the initial firmware
8000..1 - stack
40.. - kernel
3ea.. - initramdisk

This OF CI does not implement "interpret".

Unlike SLOF, this does not format uninitialized nvram. Instead, this
includes a disk image with pre-formatted nvram.

With this basic support, this can only boot into kernel directly.
However this is just enough for the petitboot kernel and initradmdisk to
boot from any possible source. Note this requires reasonably recent guest
kernel with:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df5be5be8735

The immediate benefit is much faster booting time which especially
crucial with fully emulated early CPU bring up environments. Also this
may come handy when/if GRUB-in-the-userspace sees light of the day.

This separates VOF and sPAPR in a hope that VOF bits may be reused by
other POWERPC boards which do not support pSeries.

This assumes potential support for booting from QEMU backends
such as blockdev or netdev without devices/drivers used.

Signed-off-by: Alexey Kardashevskiy 


Applied to ppc-for-6.1.  However, I'm going to make a number of
comments on minor nits.  If they can be addressed as follow up
patches, I'd appreciate it.

[snip]

diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h
new file mode 100644
index ..2d8958076907
--- /dev/null
+++ b/pc-bios/vof/vof.h
@@ -0,0 +1,43 @@
+/*
+ * Virtual Open Firmware
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned long uint32_t;
+typedef unsigned long long uint64_t;
+#define NULL (0)
+#define PROM_ERROR (-1u)
+typedef unsigned long ihandle;
+typedef unsigned long phandle;
+typedef int size_t;
+typedef void client(void);


AFAICT 'client' is only used in one place, might as well just open
code it.


+
+/* globals */
+extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */
+
+void do_boot(unsigned long addr, unsigned long r3, unsigned long r4);
+
+/* libc */
+int strlen(const char *s);
+int strcmp(const char *s1, const char *s2);
+void *memcpy(void *dest, const void *src, size_t n);
+int memcmp(const void *ptr1, const void *ptr2, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+void *memset(void *dest, int c, size_t size);
+
+/* CI wrappers */
+void ci_panic(const char *str);
+phandle ci_finddevice(const char *path);
+uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len);
+
+/* booting from -kernel */
+void boot_from_memory(uint64_t initrd, uint64_t initrdsize);
+
+/* Entry points for CI and RTAS */
+extern uint32_t ci_entry(uint32_t params);
+extern unsigned long hv_rtas(unsigned long params);
+extern unsigned int hv_rtas_size;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9e19c570327e..e9b6d0f58756 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -101,6 +101,7 @@
  

[PATCH REBASED v5 2/2] spapr: nvdimm: Introduce spapr-nvdimm device

2021-07-07 Thread Shivaprasad G Bhat
If the device backend is not persistent memory for the nvdimm, there is
need for explicit IO flushes on the backend to ensure persistence.

On SPAPR, the issue is addressed by adding a new hcall to request for
an explicit flush from the guest when the backend is not pmem. So, the
approach here is to convey when the hcall flush is required in a device
tree property. The guest once it knows the device backend is not pmem,
makes the hcall whenever flush is required.

To set the device tree property, the patch introduces a new papr specific
device type inheriting the nvdimm device. When the backend doesn't have
pmem="yes", the device tree property "ibm,hcall-flush-required" is set,
and the guest makes hcall H_SCM_FLUSH requesting for an explicit flush.

Signed-off-by: Shivaprasad G Bhat 
---
 hw/ppc/spapr_nvdimm.c |   46 +
 include/hw/ppc/spapr_nvdimm.h |4 
 2 files changed, 50 insertions(+)

diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 4f8931ab15..4dc7c3f147 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -54,6 +54,8 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, 
NVDIMMDevice *nvdimm,
 {
 const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
 const MachineState *ms = MACHINE(hotplug_dev);
+PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
 g_autofree char *uuidstr = NULL;
 QemuUUID uuid;
 int ret;
@@ -91,6 +93,14 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, 
NVDIMMDevice *nvdimm,
 return false;
 }
 
+if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) &&
+(memory_region_get_fd(mr) < 0)) {
+error_setg(errp, "spapr-nvdimm device requires the "
+   "memdev %s to be of memory-backend-file type",
+   object_get_canonical_path_component(OBJECT(dimm->hostmem)));
+return false;
+}
+
 return true;
 }
 
@@ -162,6 +172,21 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void 
*fdt,
  "operating-system")));
 _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
 
+if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
+bool is_pmem = false;
+#ifdef CONFIG_LIBPMEM
+PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+HostMemoryBackend *hostmem = dimm->hostmem;
+
+is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem",
+   _abort);
+#endif
+if (!is_pmem) {
+_FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+ NULL, 0));
+}
+}
+
 return child_offset;
 }
 
@@ -585,7 +610,16 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 }
 
 dimm = PC_DIMM(drc->dev);
+if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) {
+return H_PARAMETER;
+}
+
 backend = MEMORY_BACKEND(dimm->hostmem);
+#ifdef CONFIG_LIBPMEM
+if (object_property_get_bool(OBJECT(backend), "pmem", _abort)) {
+return H_UNSUPPORTED;
+}
+#endif
 fd = memory_region_get_fd(>mr);
 
 if (fd < 0) {
@@ -766,3 +800,15 @@ static void spapr_scm_register_types(void)
 }
 
 type_init(spapr_scm_register_types)
+
+static TypeInfo spapr_nvdimm_info = {
+.name  = TYPE_SPAPR_NVDIMM,
+.parent= TYPE_NVDIMM,
+};
+
+static void spapr_nvdimm_register_types(void)
+{
+type_register_static(_nvdimm_info);
+}
+
+type_init(spapr_nvdimm_register_types)
diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h
index 24d8e37b33..fb4e56418e 100644
--- a/include/hw/ppc/spapr_nvdimm.h
+++ b/include/hw/ppc/spapr_nvdimm.h
@@ -13,6 +13,10 @@
 #include "hw/mem/nvdimm.h"
 #include "migration/vmstate.h"
 
+#define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprNVDIMMDevice, SPAPR_NVDIMM)
+
+typedef struct SpaprNVDIMMDevice  SpaprNVDIMMDevice;
 typedef struct SpaprDrc SpaprDrc;
 typedef struct SpaprMachineState SpaprMachineState;
 





Re: [PATCH v5 2/3] fuzz: add an instrumentation filter

2021-07-07 Thread Alexander Bulekov
On 210629 2341, Alexander Bulekov wrote:
> By default, -fsanitize=fuzzer instruments all code with coverage
> information. However, this means that libfuzzer will track coverage over
> hundreds of source files that are unrelated to virtual-devices. This
> means that libfuzzer will optimize inputs for coverage observed in timer
> code, memory APIs etc. This slows down the fuzzer and stores many inputs
> that are not relevant to the actual virtual-devices.
> 
> With this change, clang versions that support the
> "-fsanitize-coverage-allowlist" will only instrument a subset of the
> compiled code, that is directly related to virtual-devices.
> 
> Signed-off-by: Alexander Bulekov 

Ping?

Are there any other device-related subdirectories we should add to the
filter besides slirp? I'm considering net/ , since it do not seem to
generate much coverage noise, and there might be some tricky codepaths
there that benefit from the coverage guidance.

-Alex

> ---
>  configure| 13 +
>  scripts/oss-fuzz/instrumentation-filter-template | 14 ++
>  2 files changed, 27 insertions(+)
>  create mode 100644 scripts/oss-fuzz/instrumentation-filter-template
> 
> diff --git a/configure b/configure
> index 38704b4e11..3b6ca054b9 100755
> --- a/configure
> +++ b/configure
> @@ -5189,6 +5189,11 @@ if test "$fuzzing" = "yes" && test -z 
> "${LIB_FUZZING_ENGINE+xxx}"; then
>  error_exit "Your compiler doesn't support -fsanitize=fuzzer"
>  exit 1
>fi
> +  have_clang_coverage_filter=no
> +  echo > $TMPTXT
> +  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=fuzzer 
> -fsanitize-coverage-allowlist=$TMPTXT" ""; then
> +  have_clang_coverage_filter=yes
> +  fi
>  fi
>  
>  # Thread sanitizer is, for now, much noisier than the other sanitizers;
> @@ -6120,6 +6125,14 @@ if test "$fuzzing" = "yes" ; then
>  # rule for the fuzzer adds these to the link_args. They need to be
>  # configurable, to support OSS-Fuzz
>  FUZZ_EXE_LDFLAGS="-fsanitize=fuzzer"
> +
> +# Specify a filter to only instrument code that is directly related to
> +# virtual-devices.
> +if test "$have_clang_coverage_filter" = "yes" ; then
> +cp "$source_path/scripts/oss-fuzz/instrumentation-filter-template" \
> +instrumentation-filter
> +QEMU_CFLAGS="$QEMU_CFLAGS 
> -fsanitize-coverage-allowlist=instrumentation-filter"
> +fi
>else
>  FUZZ_EXE_LDFLAGS="$LIB_FUZZING_ENGINE"
>fi
> diff --git a/scripts/oss-fuzz/instrumentation-filter-template 
> b/scripts/oss-fuzz/instrumentation-filter-template
> new file mode 100644
> index 00..44e853159c
> --- /dev/null
> +++ b/scripts/oss-fuzz/instrumentation-filter-template
> @@ -0,0 +1,14 @@
> +# Code that we actually want the fuzzer to target
> +# See: 
> https://clang.llvm.org/docs/SanitizerCoverage.html#disabling-instrumentation-without-source-modification
> +#
> +src:*/hw/*
> +src:*/include/hw/*
> +src:*/slirp/*
> +
> +# We don't care about coverage over fuzzer-specific code, however we should
> +# instrument the fuzzer entry-point so libFuzzer always sees at least some
> +# coverage - otherwise it will exit after the first input
> +src:*/tests/qtest/fuzz/fuzz.c
> +
> +# Enable instrumentation for all functions in those files
> +fun:*
> -- 
> 2.28.0
> 



[PATCH REBASED v5 1/2] spapr: nvdimm: Implement H_SCM_FLUSH hcall

2021-07-07 Thread Shivaprasad G Bhat
The patch adds support for the SCM flush hcall for the nvdimm devices.
To be available for exploitation by guest through the next patch.

The hcall expects the semantics such that the flush to return
with one of H_LONG_BUSY when the operation is expected to take longer
time along with a continue_token. The hcall to be called again providing
the continue_token to get the status. So, all fresh requests are put into
a 'pending' list and flush worker is submitted to the thread pool. The
thread pool completion callbacks move the requests to 'completed' list,
which are cleaned up after reporting to guest in subsequent hcalls to
get the status.

The semantics makes it necessary to preserve the continue_tokens and
their return status across migrations. So, the completed flush states
are forwarded to the destination and the pending ones are restarted
at the destination in post_load. The necessary nvdimm flush specific
vmstate structures are added to the spapr machine vmstate.

Signed-off-by: Shivaprasad G Bhat 
---
 hw/ppc/spapr.c|6 +
 hw/ppc/spapr_nvdimm.c |  240 +
 include/hw/ppc/spapr.h|   11 ++
 include/hw/ppc/spapr_nvdimm.h |   13 ++
 4 files changed, 269 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4dd90b75cc..546d825dde 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1622,6 +1622,8 @@ static void spapr_machine_reset(MachineState *machine)
 spapr->ov5_cas = spapr_ovec_clone(spapr->ov5);
 }
 
+spapr_nvdimm_finish_flushes(spapr);
+
 /* DRC reset may cause a device to be unplugged. This will cause troubles
  * if this device is used by another device (eg, a running vhost backend
  * will crash QEMU if the DIMM holding the vring goes away). To avoid such
@@ -2018,6 +2020,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_cap_ccf_assist,
 _spapr_cap_fwnmi,
 _spapr_fwnmi,
+_spapr_nvdimm_states,
 NULL
 }
 };
@@ -3014,6 +3017,9 @@ static void spapr_machine_init(MachineState *machine)
 }
 
 qemu_cond_init(>fwnmi_machine_check_interlock_cond);
+
+QLIST_INIT(>pending_flush_states);
+QLIST_INIT(>completed_flush_states);
 }
 
 #define DEFAULT_KVM_TYPE "auto"
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 91de1052f2..4f8931ab15 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "hw/ppc/spapr_drc.h"
 #include "hw/ppc/spapr_nvdimm.h"
@@ -30,6 +31,7 @@
 #include "hw/ppc/fdt.h"
 #include "qemu/range.h"
 #include "hw/ppc/spapr_numa.h"
+#include "block/thread-pool.h"
 
 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
 /* SCM device is unable to persist memory contents */
@@ -375,6 +377,243 @@ static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 return H_SUCCESS;
 }
 
+static uint64_t flush_token;
+
+static int flush_worker_cb(void *opaque)
+{
+int ret = H_SUCCESS;
+SpaprNVDIMMDeviceFlushState *state = opaque;
+
+/* flush raw backing image */
+if (qemu_fdatasync(state->backend_fd) < 0) {
+error_report("papr_scm: Could not sync nvdimm to backend file: %s",
+ strerror(errno));
+ret = H_HARDWARE;
+}
+
+return ret;
+}
+
+static void spapr_nvdimm_flush_completion_cb(void *opaque, int hcall_ret)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+SpaprNVDIMMDeviceFlushState *state = opaque;
+
+state->hcall_ret = hcall_ret;
+QLIST_REMOVE(state, node);
+QLIST_INSERT_HEAD(>completed_flush_states, state, node);
+}
+
+static const VMStateDescription vmstate_spapr_nvdimm_flush_state = {
+ .name = "spapr_nvdimm_flush_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(continue_token, SpaprNVDIMMDeviceFlushState),
+ VMSTATE_INT64(hcall_ret, SpaprNVDIMMDeviceFlushState),
+ VMSTATE_UINT32(drcidx, SpaprNVDIMMDeviceFlushState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool spapr_nvdimm_states_needed(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ return (!QLIST_EMPTY(>pending_flush_states) ||
+ !QLIST_EMPTY(>completed_flush_states));
+}
+
+static int spapr_nvdimm_post_load(void *opaque, int version_id)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+SpaprNVDIMMDeviceFlushState *state, *next;
+PCDIMMDevice *dimm;
+HostMemoryBackend *backend = NULL;
+ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
+SpaprDrc *drc;
+
+QLIST_FOREACH_SAFE(state, >completed_flush_states, node, next) {
+if (flush_token < state->continue_token) {
+flush_token = state->continue_token;
+}
+}
+
+

[PATCH REBASED v5 0/3] spapr: nvdimm: Introduce spapr-nvdimm device

2021-07-07 Thread Shivaprasad G Bhat
If the device backend is not persistent memory for the nvdimm, there
is need for explicit IO flushes to ensure persistence.

On SPAPR, the issue is addressed by adding a new hcall to request for
an explicit flush from the guest when the backend is not pmem.
So, the approach here is to convey when the hcall flush is required
in a device tree property. The guest once it knows the device needs
explicit flushes, makes the hcall as and when required.

It was suggested to create a new device type to address the
explicit flush for such backends on PPC instead of extending the
generic nvdimm device with new property. So, the patch introduces
the spapr-nvdimm device. The new device inherits the nvdimm device
with the new bahviour such that if the backend has pmem=no, the
device tree property is set.

The below demonstration shows the map_sync behavior for non-pmem
backends.
(https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c)

The pmem0 is from spapr-nvdimm with with backend pmem=yes, and pmem1 is
from spapr-nvdimm with pmem=no, mounted as
/dev/pmem0 on /mnt1 type xfs 
(rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
/dev/pmem1 on /mnt2 type xfs 
(rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)

[root@atest-guest ~]# ./mapsync /mnt1/newfile > When pmem=yes
[root@atest-guest ~]# ./mapsync /mnt2/newfile > when pmem=no
Failed to mmap  with Operation not supported

First patch implements the hcall, adds the necessary
vmstate properties to spapr machine structure for carrying the hcall
status during save-restore. The nature of the hcall being asynchronus,
the patch uses aio utilities to offload the flush. The second patch
introduces the spapr-nvdimm device, adds the device tree property
for the guest when spapr-nvdimm is used with pmem="no" on the backend.

The kernel changes to exploit this hcall is at
https://github.com/linuxppc/linux/commit/75b7c05ebf9026.patch

---
v4 - https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg05982.html
Changes from v4:
  - Introduce spapr-nvdimm device with nvdimm device as the parent.
  - The new spapr-nvdimm has no new properties. As this is a new
device and there is no migration related dependencies to be
taken care of, the device behavior is made to set the device tree
property and enable hcall when the device type spapr-nvdimm is
used with pmem="no"
  - Fixed commit messages
  - Added checks to ensure the backend is actualy file and not memory
  - Addressed things pointed out by Eric

v3 - https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg07916.html
Changes from v3:
  - Fixed the forward declaration coding guideline violations in 1st patch.
  - Removed the code waiting for the flushes to complete during migration,
instead restart the flush worker on destination qemu in post load.
  - Got rid of the randomization of the flush tokens, using simple
counter.
  - Got rid of the redundant flush state lock, relying on the BQL now.
  - Handling the memory-backend-ram usage
  - Changed the sync-dax symantics from on/off to 'unsafe','writeback' and 
'direct'.
Added prevention code using 'writeback' on arm and x86_64.
  - Fixed all the miscellaneous comments.

v2 - https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg07031.html
Changes from v2:
  - Using the thread pool based approach as suggested
  - Moved the async hcall handling code to spapr_nvdimm.c along
with some simplifications
  - Added vmstate to preserve the hcall status during save-restore
along with pre_save handler code to complete all ongoning flushes.
  - Added hw_compat magic for sync-dax 'on' on previous machines.
  - Miscellanious minor fixes.

v1 - https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg06330.html
Changes from v1
  - Fixed a missed-out unlock
  - using QLIST_FOREACH instead of QLIST_FOREACH_SAFE while generating token

Shivaprasad G Bhat (2):
  spapr: nvdimm: Implement H_SCM_FLUSH hcall
  spapr: nvdimm: Introduce spapr-nvdimm device


 hw/ppc/spapr.c|6 +
 hw/ppc/spapr_nvdimm.c |  286 +
 include/hw/ppc/spapr.h|   11 +-
 include/hw/ppc/spapr_nvdimm.h |   17 ++
 4 files changed, 319 insertions(+), 1 deletion(-)

--
Signature




RE: [PATCH] migration: Move bitmap_mutex out of migration_bitmap_clear_dirty()

2021-07-07 Thread Wang, Wei W
On Thursday, July 8, 2021 12:55 AM, Peter Xu wrote:
> On Wed, Jul 07, 2021 at 08:34:50AM +, Wang, Wei W wrote:
> > On Wednesday, July 7, 2021 1:47 AM, Peter Xu wrote:
> > > On Sat, Jul 03, 2021 at 02:53:27AM +, Wang, Wei W wrote:
> > > > +   do {
> > > > +page_to_clear = start + (i++ <<
> > > > + block->clear_bmap_shift);
> > >
> > > Why "i" needs to be shifted?
> >
> > Just move to the next clear chunk, no?
> > For example, (1 << 18) pages chunk (i.e. 1GB).
> 
> But migration_clear_memory_region_dirty_bitmap() has done the shifting?
> 

Please see this example: start=0, npages = 2 * (1 <<18), i.e. we have 2 chunks 
of pages to clear, and start from 0.
First chunk: from 0 to (1 <<18);
Second chunk: from (1 << 18) to 2*(1<<18).

To clear the second chunk, we need to pass (start + "1 << 18") to 
migration_clear_memory_region_dirty_bitmap(),
and clear_bmap_test_and_clear() there will do ">>18" to transform it into the 
id of clear_bitmap, which is 1.

Best,
Wei
 


RE: [PATCH] migration: Move bitmap_mutex out of migration_bitmap_clear_dirty()

2021-07-07 Thread Wang, Wei W
On Thursday, July 8, 2021 12:44 AM, Peter Xu wrote:
> > > Not to mention the hard migration issues are mostly with non-idle
> > > guest, in that case having the balloon in the guest will be
> > > disastrous from this pov since it'll start to take mutex for each
> > > page, while balloon would hardly report anything valid since most guest 
> > > pages
> are being used.
> >
> > If no pages are reported, migration thread wouldn't wait on the lock then.
> 
> Yes I think this is the place I didn't make myself clear.  It's not about 
> sleeping, it's
> about the cmpxchg being expensive already when the vm is huge.

OK.
How did you root cause that it's caused by cmpxchg, instead of lock contention 
(i.e. syscall and sleep) or
some other code inside pthread_mutex_lock(). Do you have cycles about cmpxchg 
v.s. cycles of pthread_mutex_lock()?

I check the implementation of pthread_mutex_lock(). The code path for lock 
acquire is long. QemuSpin looks more efficient.
(probably we also don’t want migration thread to sleep in any case)

I think it's also better to see the comparison of migration throughput data 
(i.e. pages per second) in the following cases, before we make a decision:
- per-page mutex
- per-page spinlock
- 50-ms mutex

Best,
Wei



Re: [PATCH qemu v22] spapr: Implement Open Firmware client interface

2021-07-07 Thread David Gibson
On Fri, Jun 25, 2021 at 03:51:55PM +1000, Alexey Kardashevskiy wrote:
> The PAPR platform describes an OS environment that's presented by
> a combination of a hypervisor and firmware. The features it specifies
> require collaboration between the firmware and the hypervisor.
> 
> Since the beginning, the runtime component of the firmware (RTAS) has
> been implemented as a 20 byte shim which simply forwards it to
> a hypercall implemented in qemu. The boot time firmware component is
> SLOF - but a build that's specific to qemu, and has always needed to be
> updated in sync with it. Even though we've managed to limit the amount
> of runtime communication we need between qemu and SLOF, there's some,
> and it has become increasingly awkward to handle as we've implemented
> new features.
> 
> This implements a boot time OF client interface (CI) which is
> enabled by a new "x-vof" pseries machine option (stands for "Virtual Open
> Firmware). When enabled, QEMU implements the custom H_OF_CLIENT hcall
> which implements Open Firmware Client Interface (OF CI). This allows
> using a smaller stateless firmware which does not have to manage
> the device tree.
> 
> The new "vof.bin" firmware image is included with source code under
> pc-bios/. It also includes RTAS blob.
> 
> This implements a handful of CI methods just to get -kernel/-initrd
> working. In particular, this implements the device tree fetching and
> simple memory allocator - "claim" (an OF CI memory allocator) and updates
> "/memory@0/available" to report the client about available memory.
> 
> This implements changing some device tree properties which we know how
> to deal with, the rest is ignored. To allow changes, this skips
> fdt_pack() when x-vof=on as not packing the blob leaves some room for
> appending.
> 
> In absence of SLOF, this assigns phandles to device tree nodes to make
> device tree traversing work.
> 
> When x-vof=on, this adds "/chosen" every time QEMU (re)builds a tree.
> 
> This adds basic instances support which are managed by a hash map
> ihandle -> [phandle].
> 
> Before the guest started, the used memory is:
> 0..e60 - the initial firmware
> 8000..1 - stack
> 40.. - kernel
> 3ea.. - initramdisk
> 
> This OF CI does not implement "interpret".
> 
> Unlike SLOF, this does not format uninitialized nvram. Instead, this
> includes a disk image with pre-formatted nvram.
> 
> With this basic support, this can only boot into kernel directly.
> However this is just enough for the petitboot kernel and initradmdisk to
> boot from any possible source. Note this requires reasonably recent guest
> kernel with:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df5be5be8735
> 
> The immediate benefit is much faster booting time which especially
> crucial with fully emulated early CPU bring up environments. Also this
> may come handy when/if GRUB-in-the-userspace sees light of the day.
> 
> This separates VOF and sPAPR in a hope that VOF bits may be reused by
> other POWERPC boards which do not support pSeries.
> 
> This assumes potential support for booting from QEMU backends
> such as blockdev or netdev without devices/drivers used.
> 
> Signed-off-by: Alexey Kardashevskiy 

Applied to ppc-for-6.1.  However, I'm going to make a number of
comments on minor nits.  If they can be addressed as follow up
patches, I'd appreciate it.

[snip]
> diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h
> new file mode 100644
> index ..2d8958076907
> --- /dev/null
> +++ b/pc-bios/vof/vof.h
> @@ -0,0 +1,43 @@
> +/*
> + * Virtual Open Firmware
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +#include 
> +
> +typedef unsigned char uint8_t;
> +typedef unsigned short uint16_t;
> +typedef unsigned long uint32_t;
> +typedef unsigned long long uint64_t;
> +#define NULL (0)
> +#define PROM_ERROR (-1u)
> +typedef unsigned long ihandle;
> +typedef unsigned long phandle;
> +typedef int size_t;
> +typedef void client(void);

AFAICT 'client' is only used in one place, might as well just open
code it.

> +
> +/* globals */
> +extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */
> +
> +void do_boot(unsigned long addr, unsigned long r3, unsigned long r4);
> +
> +/* libc */
> +int strlen(const char *s);
> +int strcmp(const char *s1, const char *s2);
> +void *memcpy(void *dest, const void *src, size_t n);
> +int memcmp(const void *ptr1, const void *ptr2, size_t n);
> +void *memmove(void *dest, const void *src, size_t n);
> +void *memset(void *dest, int c, size_t size);
> +
> +/* CI wrappers */
> +void ci_panic(const char *str);
> +phandle ci_finddevice(const char *path);
> +uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len);
> +
> +/* booting from -kernel */
> +void boot_from_memory(uint64_t initrd, uint64_t initrdsize);
> +
> +/* Entry points for CI and RTAS */
> +extern uint32_t ci_entry(uint32_t params);
> +extern unsigned long hv_rtas(unsigned long params);
> 

Re: pipe2 & configure script

2021-07-07 Thread Richard Zak
În mie., 7 iul. 2021 la 10:32, Paolo Bonzini  a scris:

> On 07/07/21 05:24, Richard Zak wrote:
> > What conditions are required for "#define CONFIG_PIPE2" to be set in
> > build/config-host.h? It prevents building for Haiku as pipe2() doesn't
> > exist. I didn't see anything in the configure script regarding pipe2. I
> > also updated my code to the latest in the repository and this issue just
> > popped up.
>
> Does this help?
>
> diff --git a/meson.build b/meson.build
> index 660e294b7e..32d5bd3685 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1339,7 +1339,7 @@ config_host_data.set('HAVE_STRUCT_STAT_ST_ATIM',
>   config_host_data.set('CONFIG_EVENTFD', cc.compiles('''
> #include 
> int main(void) { return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); }'''))
> -config_host_data.set('CONFIG_FDATASYNC', cc.compiles(gnu_source_prefix +
> '''
> +config_host_data.set('CONFIG_FDATASYNC', cc.links(gnu_source_prefix + '''
> #include 
> int main(void) {
> #if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
> @@ -1356,14 +1356,14 @@ config_host_data.set('CONFIG_MADVISE',
> cc.compiles(gnu_source_prefix + '''
>   config_host_data.set('CONFIG_MEMFD', cc.compiles(gnu_source_prefix + '''
> #include 
> int main(void) { return memfd_create("foo", MFD_ALLOW_SEALING); }'''))
> -config_host_data.set('CONFIG_OPEN_BY_HANDLE',
> cc.compiles(gnu_source_prefix + '''
> +config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix
> + '''
> #include 
> #if !defined(AT_EMPTY_PATH)
> # error missing definition
> #else
> int main(void) { struct file_handle fh; return open_by_handle_at(0,
> , 0); }
> #endif'''))
> -config_host_data.set('CONFIG_PIPE2', cc.compiles(gnu_source_prefix + '''
> +config_host_data.set('CONFIG_PIPE2', cc.links(gnu_source_prefix + '''
> #include 
> #include 
>
> ?
>
> Paolo
>
>
That did it! build/config-host.h now has "#undef CONFIG_PIPE2" and the code
compiles.

-- 
Regards,

Richard J. Zak
Professional Genius
PGP Key: https://keybase.io/rjzak/key.asc


Re: [PATCH v8 3/3] target/i386: Correct implementation for FCS, FIP, FDS and FDP

2021-07-07 Thread Ziqiao Kong
Hello!

Thanks for applying fixes for me. Is there anything else I should do?

Ziqiao


On Thu, Jul 8, 2021 at 6:24 AM Richard Henderson
 wrote:
>
> From: Ziqiao Kong 
>
> Update FCS:FIP and FDS:FDP according to the Intel Manual Vol.1 8.1.8.
> Note that CPUID.(EAX=07H,ECX=0H):EBX[bit 13] is not implemented by
> design in this patch and will be added along with TCG features flag
> in a separate patch later.
>
> Signed-off-by: Ziqiao Kong 
> Message-Id: <20210530150112.74411-2-ziqiaok...@gmail.com>
> [rth: Push FDS/FDP handling down into mod != 3 case; free last_addr.]
> Signed-off-by: Richard Henderson 
> ---
>  target/i386/cpu.h|  2 ++
>  target/i386/tcg/fpu_helper.c | 20 ++---
>  target/i386/tcg/translate.c  | 43 +++-
>  3 files changed, 56 insertions(+), 9 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 0f7ddbfeae..8921bf5318 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1435,6 +1435,8 @@ typedef struct CPUX86State {
>  FPReg fpregs[8];
>  /* KVM-only so far */
>  uint16_t fpop;
> +uint16_t fpcs;
> +uint16_t fpds;
>  uint64_t fpip;
>  uint64_t fpdp;
>
> diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
> index beb63be432..cdd8e9f947 100644
> --- a/target/i386/tcg/fpu_helper.c
> +++ b/target/i386/tcg/fpu_helper.c
> @@ -731,6 +731,10 @@ static void do_fninit(CPUX86State *env)
>  {
>  env->fpus = 0;
>  env->fpstt = 0;
> +env->fpcs = 0;
> +env->fpds = 0;
> +env->fpip = 0;
> +env->fpdp = 0;
>  cpu_set_fpuc(env, 0x37f);
>  env->fptags[0] = 1;
>  env->fptags[1] = 1;
> @@ -2378,19 +2382,19 @@ static void do_fstenv(CPUX86State *env, target_ulong 
> ptr, int data32,
>  cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
>  cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
>  cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
> -cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
> -cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
> -cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
> -cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
> +cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
> +cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
> +cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
> +cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
>  } else {
>  /* 16 bit */
>  cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
>  cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
>  cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
> -cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
> -cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
> -cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
> -cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
> +cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
> +cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
> +cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
> +cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
>  }
>  }
>
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 886d3d80f3..888ead0d49 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -5930,6 +5930,8 @@ static target_ulong disas_insn(DisasContext *s, 
> CPUState *cpu)
>  /* floats */
>  case 0xd8 ... 0xdf:
>  {
> +bool update_fip = true;
> +
>  if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
>  /* if CR0.EM or CR0.TS are set, generate an FPU exception */
>  /* XXX: what to do if illegal op ? */
> @@ -5942,7 +5944,14 @@ static target_ulong disas_insn(DisasContext *s, 
> CPUState *cpu)
>  op = ((b & 7) << 3) | ((modrm >> 3) & 7);
>  if (mod != 3) {
>  /* memory op */
> -gen_lea_modrm(env, s, modrm);
> +AddressParts a = gen_lea_modrm_0(env, s, modrm);
> +TCGv ea = gen_lea_modrm_1(s, a);
> +TCGv last_addr = tcg_temp_new();
> +bool update_fdp = true;
> +
> +tcg_gen_mov_tl(last_addr, ea);
> +gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
> +
>  switch (op) {
>  case 0x00 ... 0x07: /* fxxxs */
>  case 0x10 ... 0x17: /* fixxxl */
> @@ -6070,20 +6079,24 @@ static target_ulong disas_insn(DisasContext *s, 
> CPUState *cpu)
>  case 0x0c: /* fldenv mem */
>  gen_helper_fldenv(cpu_env, s->A0,
>tcg_const_i32(dflag - 1));
> +update_fip = update_fdp = false;
>  break;
>  case 0x0d: /* fldcw mem */
>  tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
>   

Re: [PATCH 6/6] python: add entry point for aqmp-tui

2021-07-07 Thread John Snow
On Fri, Jul 2, 2021 at 5:26 PM G S Niteesh Babu 
wrote:

> Add an entry point for aqmp-tui. This will allow it to be run from
> the command line using "aqmp-tui -a localhost:1234"
>
> Signed-off-by: G S Niteesh Babu 
> ---
>  python/setup.cfg | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/python/setup.cfg b/python/setup.cfg
> index 4782fe5241..23e30185f4 100644
> --- a/python/setup.cfg
> +++ b/python/setup.cfg
> @@ -68,6 +68,7 @@ console_scripts =
>  qom-fuse = qemu.qmp.qom_fuse:QOMFuse.entry_point [fuse]
>  qemu-ga-client = qemu.qmp.qemu_ga_client:main
>  qmp-shell = qemu.qmp.qmp_shell:main
> +aqmp-tui = qemu.aqmp.aqmp_tui:main
>

I was going to suggest that you could use [tui] at the end here to protect
the script from being run when we don't have the optional dependency group
installed, but even with it, I get a pretty nasty error:

Traceback (most recent call last):
  File "/home/jsnow/src/qemu/python/.pyvenv/bin/aqmp-tui", line 33, in

sys.exit(load_entry_point('qemu==0.6.1.0a1', 'console_scripts',
'aqmp-tui')())
  File "/home/jsnow/src/qemu/python/.pyvenv/bin/aqmp-tui", line 25, in
importlib_load_entry_point
return next(matches).load()
  File "/usr/lib64/python3.9/importlib/metadata.py", line 77, in load
module = import_module(match.group('module'))
  File "/usr/lib64/python3.9/importlib/__init__.py", line 127, in
import_module
return _bootstrap._gcd_import(name[level:], package, level)
  File "", line 1030, in _gcd_import
  File "", line 1007, in _find_and_load
  File "", line 986, in _find_and_load_unlocked
  File "", line 680, in _load_unlocked
  File "", line 855, in exec_module
  File "", line 228, in
_call_with_frames_removed
  File
"/home/jsnow/src/qemu/python/.pyvenv/lib64/python3.9/site-packages/qemu/aqmp/aqmp_tui.py",
line 14, in 
from pygments import lexers
ModuleNotFoundError: No module named 'pygments'

It looks like this feature isn't working for me ... I'm not sure I know why.

In theory it should work:
https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html#dependency-management

We might have to make our own custom entry point script that guards this a
little bit better if we can't solve this mystery. The goal is either to:

(1) Do not install an aqmp-tui script at all if we don't select the
optional TUI group, or
(2) Have the script error out early with a nice error message explaining
what optional dependencies it requires.


>
>  [flake8]
>  extend-ignore = E722  # Prefer pylint's bare-except checks to flake8's
> --
> 2.17.1
>
>


Re: [PATCH 2/6] python: Add dependencies for AQMP TUI

2021-07-07 Thread John Snow
On Fri, Jul 2, 2021 at 5:26 PM G S Niteesh Babu 
wrote:

> Added dependencies for the upcoming AQMP TUI under the optional
> 'tui' group.
>
> The same dependencies have also been added under the devel group
> since no work around has been found for optional groups to imply
> other optional groups.
>
> Signed-off-by: G S Niteesh Babu 
>

One thing that I notice is that if you already have a .tox environment,
adding new dependencies won't cause .tox to regenerate its environments,
forcing me to delete .tox and try again.
Maybe it's possible to update the Makefile so that changes to 'setup.cfg'
will coerce Tox to rebuild?

Looks good otherwise, thank you for taking the time to re-order your
patches for the list, and congrats on your first submission :)

--js


> ---
>  python/Pipfile.lock | 12 
>  python/setup.cfg|  7 +++
>  2 files changed, 19 insertions(+)
>
> diff --git a/python/Pipfile.lock b/python/Pipfile.lock
> index 8ab41a3f60..76cf1e4930 100644
> --- a/python/Pipfile.lock
> +++ b/python/Pipfile.lock
> @@ -289,6 +289,18 @@
>  "markers": "python_version < '3.8'",
>  "version": "==3.10.0.0"
>  },
> +"urwid": {
> +"hashes": [
> +
> "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae"
> +],
> +"version": "==2.1.2"
> +},
> +"urwid-readline": {
> +"hashes": [
> +
> "sha256:018020cbc864bb5ed87be17dc26b069eae2755cb29f3a9c569aac3bded1efaf4"
> +],
> +"version": "==0.13"
> +},
>  "virtualenv": {
>  "hashes": [
>
>  "sha256:14fdf849f80dbb29a4eb6caa9875d476ee2a5cf76a5f5415fa2f1606010ab467",
> diff --git a/python/setup.cfg b/python/setup.cfg
> index 1a552d672a..c62803bffc 100644
> --- a/python/setup.cfg
> +++ b/python/setup.cfg
> @@ -44,11 +44,18 @@ devel =
>  mypy >= 0.770
>  pylint >= 2.8.0
>  tox >= 3.18.0
> +urwid >= 2.1.2
> +urwid-readline >= 0.13
>
>  # Provides qom-fuse functionality
>  fuse =
>  fusepy >= 2.0.4
>
> +# AQMP TUI dependencies
> +tui =
> +urwid >= 2.1.2
> +urwid-readline >= 0.13
> +
>  [options.entry_points]
>  console_scripts =
>  qom = qemu.qmp.qom:main
> --
> 2.17.1
>
>


[PATCH 1/2] iotests: Improve and rename test 291 to qemu-img-bitmap

2021-07-07 Thread Eric Blake
Enhance the test to demonstrate behavior of qemu-img with a qcow2
image containing an inconsistent bitmap, and rename it now that we
support useful iotest names.

While at it, fix a missing newline in the error message thus exposed.

Signed-off-by: Eric Blake 
---
 block/dirty-bitmap.c  |  2 +-
 .../{291 => tests/qemu-img-bitmaps}   | 13 +++-
 .../{291.out => tests/qemu-img-bitmaps.out}   | 32 ++-
 3 files changed, 44 insertions(+), 3 deletions(-)
 rename tests/qemu-iotests/{291 => tests/qemu-img-bitmaps} (92%)
 rename tests/qemu-iotests/{291.out => tests/qemu-img-bitmaps.out} (82%)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 68d295d6e3ed..0ef46163e3ea 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -193,7 +193,7 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, 
uint32_t flags,
 error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used",
bitmap->name);
 error_append_hint(errp, "Try block-dirty-bitmap-remove to delete"
-  " this bitmap from disk");
+  " this bitmap from disk\n");
 return -1;
 }

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/tests/qemu-img-bitmaps
similarity index 92%
rename from tests/qemu-iotests/291
rename to tests/qemu-iotests/tests/qemu-img-bitmaps
index 20efb080a6c0..76cd9e31e850 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps
@@ -3,7 +3,7 @@
 #
 # Test qemu-img bitmap handling
 #
-# Copyright (C) 2018-2020 Red Hat, Inc.
+# Copyright (C) 2018-2021 Red Hat, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -32,6 +32,7 @@ _cleanup()
 trap "_cleanup; exit \$status" 0 1 2 3 15

 # get standard environment, filters and checks
+cd ..
 . ./common.rc
 . ./common.filter
 . ./common.nbd
@@ -129,6 +130,16 @@ $QEMU_IMG map --output=json --image-opts \

 nbd_server_stop

+echo
+echo "=== Check handling of inconsistent bitmap ==="
+echo
+
+$QEMU_IO -c abort "$TEST_IMG" 2>/dev/null
+$QEMU_IMG bitmap --add "$TEST_IMG" b4
+$QEMU_IMG bitmap --remove "$TEST_IMG" b1
+_img_info --format-specific | _filter_irrelevant_img_info
+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy"
+
 # success, all done
 echo '*** done'
 rm -f $seq.full
diff --git a/tests/qemu-iotests/291.out 
b/tests/qemu-iotests/tests/qemu-img-bitmaps.out
similarity index 82%
rename from tests/qemu-iotests/291.out
rename to tests/qemu-iotests/tests/qemu-img-bitmaps.out
index 018d6b103f87..17b34eaed30f 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out
@@ -1,4 +1,4 @@
-QA output created by 291
+QA output created by qemu-img-bitmaps

 === Initial image setup ===

@@ -115,4 +115,34 @@ Format specific information:
 [{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, 
"data": true, "offset": OFFSET},
 { "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": 
false, "data": false},
 { "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": 
false, "data": true, "offset": OFFSET}]
+
+=== Check handling of inconsistent bitmap ===
+
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+backing file: TEST_DIR/t.IMGFMT.base
+backing file format: IMGFMT
+Format specific information:
+bitmaps:
+[0]:
+flags:
+[0]: in-use
+[1]: auto
+name: b2
+granularity: 65536
+[1]:
+flags:
+[0]: in-use
+name: b0
+granularity: 65536
+[2]:
+flags:
+[0]: auto
+name: b4
+granularity: 65536
+corrupt: false
+qemu-img: Failed to populate bitmap b0: Bitmap 'b0' is inconsistent and cannot 
be used
+Try block-dirty-bitmap-remove to delete this bitmap from disk
 *** done
-- 
2.31.1




[PATCH 0/2] Let 'qemu-img convert --bitmaps' skip inconsistent bitmaps

2021-07-07 Thread Eric Blake
This is mostly a convenience factor as one could already use 'qemu-img
info' to learn which bitmaps are broken and then 'qemu-img bitmap
--remove' to nuke them before calling 'qemu-img convert --bitmaps',
but it does have the advantage that the copied file is usable without
extra efforts and the broken bitmap is not deleted from the source
file.

Eric Blake (2):
  iotests: Improve and rename test 291 to qemu-img-bitmap
  qemu-img: Add --skip-broken for 'convert --bitmaps'

 docs/tools/qemu-img.rst   |  8 +++-
 block/dirty-bitmap.c  |  2 +-
 qemu-img.c| 20 +++-
 .../{291 => tests/qemu-img-bitmaps}   | 17 ++-
 .../{291.out => tests/qemu-img-bitmaps.out}   | 46 ++-
 5 files changed, 87 insertions(+), 6 deletions(-)
 rename tests/qemu-iotests/{291 => tests/qemu-img-bitmaps} (88%)
 rename tests/qemu-iotests/{291.out => tests/qemu-img-bitmaps.out} (76%)

-- 
2.31.1




[PATCH 2/2] qemu-img: Add --skip-broken for 'convert --bitmaps'

2021-07-07 Thread Eric Blake
The point of 'qemu-img convert --bitmaps' is to be a convenience for
actions that are already possible through a string of smaller
'qemu-img bitmap' sub-commands.  One situation not accounted for
already is that if a source image contains an inconsistent bitmap (for
example, because a qemu process died abruptly before flushing bitmap
state), the user MUST delete those inconsistent bitmaps before
anything else useful can be done with the image.

We don't want to delete inconsistent bitmaps by default: although a
corrupt bitmap is only a loss of optimization rather than a corruption
of user-visible data, it is still nice to require the user to opt in
to the fact that they are aware of the loss of the bitmap.  Still,
requiring the user to check 'qemu-img info' to see whether bitmaps are
consistent, then use 'qemu-img bitmap --remove' to remove offenders,
all before using 'qemu-img convert', is a lot more work than just
adding a knob 'qemu-img convert --bitmaps --skip-broken' which opts in
to skipping the broken bitmaps.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1946084
Signed-off-by: Eric Blake 
---
 docs/tools/qemu-img.rst   |  8 +++-
 qemu-img.c| 20 +--
 tests/qemu-iotests/tests/qemu-img-bitmaps |  4 
 tests/qemu-iotests/tests/qemu-img-bitmaps.out | 14 +
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 1d8470eada0e..5cf1c764597b 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -414,7 +414,7 @@ Command description:
   4
 Error on reading data

-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] 
[--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l 
SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] 
FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] 
[--target-is-zero] [--bitmaps [--skip-broken]] [-U] [-C] [-c] [-p] [-q] [-n] 
[-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o 
OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m 
NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME

   Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM*
   to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can
@@ -456,6 +456,12 @@ Command description:
   *NUM_COROUTINES* specifies how many coroutines work in parallel during
   the convert process (defaults to 8).

+  Use of ``--bitmaps`` requests that any persistent bitmaps present in
+  the original are also copied to the destination.  If any bitmap is
+  inconsistent in the source, the conversion will fail unless
+  ``--skip-broken`` is also specified to copy only the consistent
+  bitmaps.
+
 .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F 
BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE]

   Create the new disk image *FILENAME* of size *SIZE* and format
diff --git a/qemu-img.c b/qemu-img.c
index 68a4d298098f..e8b012f39c0c 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -82,6 +82,7 @@ enum {
 OPTION_MERGE = 274,
 OPTION_BITMAPS = 275,
 OPTION_FORCE = 276,
+OPTION_SKIP_BROKEN = 277,
 };

 typedef enum OutputFormat {
@@ -2101,7 +2102,8 @@ static int convert_do_copy(ImgConvertState *s)
 return s->ret;
 }

-static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
+static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
+bool skip_broken)
 {
 BdrvDirtyBitmap *bm;
 Error *err = NULL;
@@ -2113,6 +2115,10 @@ static int convert_copy_bitmaps(BlockDriverState *src, 
BlockDriverState *dst)
 continue;
 }
 name = bdrv_dirty_bitmap_name(bm);
+if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
+warn_report("Skipping inconsistent bitmap %s", name);
+continue;
+}
 qmp_block_dirty_bitmap_add(dst->node_name, name,
true, bdrv_dirty_bitmap_granularity(bm),
true, true,
@@ -2167,6 +2173,7 @@ static int img_convert(int argc, char **argv)
 bool force_share = false;
 bool explict_min_sparse = false;
 bool bitmaps = false;
+bool skip_broken = false;
 int64_t rate_limit = 0;

 ImgConvertState s = (ImgConvertState) {
@@ -2188,6 +2195,7 @@ static int img_convert(int argc, char **argv)
 {"salvage", no_argument, 0, OPTION_SALVAGE},
 {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
 {"bitmaps", no_argument, 0, OPTION_BITMAPS},
+{"skip-broken", no_argument, 0, OPTION_SKIP_BROKEN},
 {0, 0, 0, 0}
 };
 c = getopt_long(argc, argv, 

Re: [PATCH 2/2] docs/system: ppc: Update ppce500 documentation with eTSEC support

2021-07-07 Thread Bin Meng
Hi David,

On Thu, Jul 8, 2021 at 9:08 AM David Gibson  wrote:
>
> On Tue, Jul 06, 2021 at 12:31:24PM +0800, Bin Meng wrote:
> > This adds eTSEC support to the PowerPC `ppce500` machine documentation.
> >
> > Signed-off-by: Bin Meng 
>
> Applied to ppc-for-6.1, thanks.

Thanks!

Are both 2 patches applied, or only this one?

Regards,
Bin



Re: [PATCH v5 2/4] Acceptance Tests: Add default kernel params and pxeboot url to the KNOWN_DISTROS collection

2021-07-07 Thread Cleber Rosa



On 7/6/21 9:17 AM, Eric Auger wrote:

From: Willian Rampazzo 

When running LinuxTests we may need to run the guest with
custom params. It is practical to store the pxeboot URL
and the default kernel params so that the
tests just need to fetch those and augment the kernel params.

Signed-off-by: Eric Auger 
Reviewed-by: Willian Rampazzo 

---

v4 -> v5:
- fixed some indentation issues (William)
- added William's R-b
---
  tests/acceptance/avocado_qemu/__init__.py | 53 ++-
  1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/tests/acceptance/avocado_qemu/__init__.py 
b/tests/acceptance/avocado_qemu/__init__.py
index af93cd63ea..ee354eb458 100644
--- a/tests/acceptance/avocado_qemu/__init__.py
+++ b/tests/acceptance/avocado_qemu/__init__.py
@@ -309,12 +309,51 @@ class LinuxDistro:
  'fedora': {
  '31': {
  'x86_64':
-{'checksum': 
'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'},
+{'checksum': 
'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0',
+'pxeboot_url': 'https://archives.fedoraproject.org/'
+   'pub/archive/fedora/linux/releases/31/'
+   'Everything/x86_64/os/images/pxeboot/',
+'kernel_params': 
'root=UUID=b1438b9b-2cab-4065-a99a-08a96687f73c'
+ ' ro no_timer_check net.ifnames=0'
+ ' console=tty1 console=ttyS0,115200n8'
+},
+'aarch64':
+{'checksum': 
'1e18d9c0cf734940c4b5d5ec592facaed2af0ad0329383d5639c997fdf16fe49',



So this is what made it easy for me to catch the issue on the previous 
patch.  The aarch64 info should be present on the previous patch too, 
with this one adding the pxeboot_url, kernel_params, etc.



But, just to be extra clear, the actual issue of `self._info` being set 
to `None` on the previous patch still needs to be addressed.





+'pxeboot_url': 'https://archives.fedoraproject.org/'
+   'pub/archive/fedora/linux/releases/31/'
+   'Everything/aarch64/os/images/pxeboot/',
+'kernel_params': 
'root=UUID=b6950a44-9f3c-4076-a9c2-355e8475b0a7'
+ ' ro earlyprintk=pl011,0x900'
+ ' ignore_loglevel no_timer_check'
+ ' printk.time=1 rd_NO_PLYMOUTH'
+ ' console=ttyAMA0'
+},
  'ppc64':
  {'checksum': 
'7c3528b85a3df4b2306e892199a9e1e43f991c506f2cc390dc4efa2026ad2f58'},
  's390x':
  {'checksum': 
'4caaab5a434fd4d1079149a072fdc7891e354f834d355069ca982fdcaf5a122d'},
-}
+},
+'32': {
+'aarch64':
+{'checksum': 
'b367755c664a2d7a26955bbfff985855adfa2ca15e908baf15b4b176d68d3967',
+'pxeboot_url': 'http://dl.fedoraproject.org/pub/fedora/linux/'
+   'releases/32/Server/aarch64/os/images/pxeboot/',
+'kernel_params': 
'root=UUID=3df75b65-be8d-4db4-8655-14d95c0e90c5'
+ ' ro no_timer_check net.ifnames=0'
+ ' console=tty1 console=ttyS0,115200n8',
+},
+},
+'33': {
+'aarch64':
+{'checksum': 
'e7f75cdfd523fe5ac2ca9eeece68edc1a81f386a17f969c1d1c7c87031008a6b',
+'pxeboot_url': 'http://dl.fedoraproject.org/pub/fedora/linux/'
+   'releases/33/Server/aarch64/os/images/pxeboot/',
+'kernel_params': 
'root=UUID=d20b3ffa-6397-4a63-a734-1126a0208f8a'
+ ' ro no_timer_check net.ifnames=0'
+ ' console=tty1 console=ttyS0,115200n8'
+ ' console=tty0',
+ },
+},
  }
  }
  
@@ -337,6 +376,16 @@ def checksum(self):

  def checksum(self, value):
  self._info['checksum'] = value
  
+@property

+def pxeboot_url(self):
+"""Gets the repository url where pxeboot files can be found"""
+return self._info.get('pxeboot_url', None)
+
+@property
+def default_kernel_params(self):
+"""Gets the default kernel parameters"""
+return self._info.get('kernel_params', None)
+
  
  class LinuxTest(Test, LinuxSSHMixIn):

  """Facilitates having a cloud-image Linux based available.



Everything else looks to me.


Thanks,

- Cleber.




[RFC PATCH v2 26/44] pci-host/q35: Move PAM initialization above SMRAM initialization

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

In mch_realize(), process PAM initialization before SMRAM initialization so
that later patch can skill all the SMRAM related with a single check.

Signed-off-by: Isaku Yamahata 
---
 hw/pci-host/q35.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 9a2be237d7..68234d209c 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -571,6 +571,16 @@ static void mch_realize(PCIDevice *d, Error **errp)
 pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
mch->pci_address_space);
 
+/* PAM */
+init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
+ mch->pci_address_space, >pam_regions[0],
+ PAM_BIOS_BASE, PAM_BIOS_SIZE);
+for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
+init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
+ mch->pci_address_space, >pam_regions[i+1],
+ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
+}
+
 /* if *disabled* show SMRAM to all CPUs */
 memory_region_init_alias(>smram_region, OBJECT(mch), "smram-region",
  mch->pci_address_space, 
MCH_HOST_BRIDGE_SMRAM_C_BASE,
@@ -637,15 +647,6 @@ static void mch_realize(PCIDevice *d, Error **errp)
 
 object_property_add_const_link(qdev_get_machine(), "smram",
OBJECT(>smram));
-
-init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
- mch->pci_address_space, >pam_regions[0],
- PAM_BIOS_BASE, PAM_BIOS_SIZE);
-for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
-init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
- mch->pci_address_space, >pam_regions[i+1],
- PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
-}
 }
 
 uint64_t mch_mcfg_base(void)
-- 
2.25.1




Re: [PATCH v5 1/4] avocado_qemu: Fix KNOWN_DISTROS map into the LinuxDistro class

2021-07-07 Thread Cleber Rosa



On 7/6/21 9:17 AM, Eric Auger wrote:

From: Willian Rampazzo 

As the KNOWN_DISTROS grows, more loosely methods will be created in
the avocado_qemu/__init__.py file.

Let's refactor the code so that KNOWN_DISTROS and related methods are
packaged in a class

Signed-off-by: Wainer dos Santos Moschetta 
Signed-off-by: Eric Auger 
---
  tests/acceptance/avocado_qemu/__init__.py | 74 +--
  1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/tests/acceptance/avocado_qemu/__init__.py 
b/tests/acceptance/avocado_qemu/__init__.py
index 81ac90bebb..af93cd63ea 100644
--- a/tests/acceptance/avocado_qemu/__init__.py
+++ b/tests/acceptance/avocado_qemu/__init__.py
@@ -299,29 +299,43 @@ def ssh_command(self, command):
   f'Guest command failed: {command}')
  return stdout_lines, stderr_lines
  
+class LinuxDistro:

+"""Represents a Linux distribution
  



I definitely like the idea.



-#: A collection of known distros and their respective image checksum
-KNOWN_DISTROS = {
-'fedora': {
-'31': {
-'x86_64':
-{'checksum': 
'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'},
-'aarch64':
-{'checksum': 
'1e18d9c0cf734940c4b5d5ec592facaed2af0ad0329383d5639c997fdf16fe49'},
-'ppc64':
-{'checksum': 
'7c3528b85a3df4b2306e892199a9e1e43f991c506f2cc390dc4efa2026ad2f58'},
-'s390x':
-{'checksum': 
'4caaab5a434fd4d1079149a072fdc7891e354f834d355069ca982fdcaf5a122d'},
+Holds information of known distros.
+"""
+#: A collection of known distros and their respective image checksum
+KNOWN_DISTROS = {
+'fedora': {
+'31': {
+'x86_64':
+{'checksum': 
'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'},
+'ppc64':
+{'checksum': 
'7c3528b85a3df4b2306e892199a9e1e43f991c506f2cc390dc4efa2026ad2f58'},
+'s390x':
+{'checksum': 
'4caaab5a434fd4d1079149a072fdc7891e354f834d355069ca982fdcaf5a122d'},
  }
  }
  }
  
+def __init__(self, name, version, arch):

+self.name = name
+self.version = version
+self.arch = arch



This looks a lot like 
https://github.com/avocado-framework/avocado/blob/f0996dafefa412c77c221c2d1a6fafdcba1c97b7/avocado/utils/distro.py#L34 
, although admittedly, their goals are very different.



As a next step, in the future, I'd consider separating the data from the 
actual class and having it the LinuxDistro instances, helped by a 
registry.  Something like:



class LinuxDistroRegistry:

   def __init__(self):
  self.distros = set()

   def register(self, linux_distro):

  self.distros.add(linux_distro)

   def query(self, **kwargs):

  ...


registry = LinuxDistroRegistry()

registry.register(LinuxDistro('fedora', '31', 'x86_64', 'deadbeefdeadbeef'))

registry.register(LinuxDistro('fedora', '31', 'aarch64', 
'beefdeadbeefdead'))


checksum = registry.query(name='fedora', version='31', 
arch='x86_64').checksum




+try:
+self._info = self.KNOWN_DISTROS.get(name).get(version).get(arch)



The `AttributeError` that could be caught at the removed 
`get_known_distro_checksum()` function, could come from any of the 
`.get()`s returning `None`, which in turn would not have a `.get()` 
attribute.


But now, if there's a "name", then a "version", but no "arch" entry, 
this line will set `self._info` to `None`.  This is manifested if you 
try to run a test that tries to find an aarch64 distro, such as:


./tests/venv/bin/avocado run 
tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_tcg_gicv2



It will result in:


20:38:18 ERROR| Reproduced traceback from: 
/var/lib/users/cleber/build/qemu/tests/venv/lib64/python3.9/site-packages/avocado/core/test.py:756

20:38:18 ERROR| Traceback (most recent call last):
20:38:18 ERROR|   File 
"/var/lib/users/cleber/build/qemu/tests/acceptance/avocado_qemu/__init__.py", 
line 426, in download_boot

20:38:18 ERROR| checksum=self.distro.checksum,
20:38:18 ERROR|   File 
"/var/lib/users/cleber/build/qemu/tests/acceptance/avocado_qemu/__init__.py", 
line 334, in checksum

20:38:18 ERROR| return self._info.get('checksum', None)
20:38:18 ERROR| AttributeError: 'NoneType' object has no attribute 'get'
20:38:18 ERROR|
20:38:18 ERROR| During handling of the above exception, another 
exception occurred:

20:38:18 ERROR|
20:38:18 ERROR| Traceback (most recent call last):
20:38:18 ERROR|   File 
"/var/lib/users/cleber/build/qemu/tests/acceptance/avocado_qemu/__init__.py", 
line 387, in setUp

20:38:18 ERROR| self.set_up_boot()
20:38:18 ERROR|   File 
"/var/lib/users/cleber/build/qemu/tests/acceptance/avocado_qemu/__init__.py", 
line 455, in set_up_boot

20:38:18 ERROR| path = self.download_boot()
20:38:18 ERROR|   File 

[RFC PATCH v2 23/44] i386/tdx: Use KVM_TDX_INIT_VCPU to pass HOB to TDVF

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Specify the initial value for RCX/R8 to be the address of the HOB.
Don't propagate the value to Qemu's cache of the registers so as to
avoid implying that the register state is valid, e.g. Qemu doesn't model
TDX-SEAM behavior for initializing other GPRs.

Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/tdx.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 0cd649dd01..c348626dbf 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -285,10 +285,17 @@ out:
 
 void tdx_post_init_vcpu(CPUState *cpu)
 {
-CPUX86State *env = _CPU(cpu)->env;
+MachineState *ms = MACHINE(qdev_get_machine());
+TdxGuest *tdx = (TdxGuest *)object_dynamic_cast(OBJECT(ms->cgs),
+TYPE_TDX_GUEST);
+TdxFirmwareEntry *hob;
+
+if (!tdx) {
+return;
+}
 
-_tdx_ioctl(cpu, KVM_TDX_INIT_VCPU, 0,
-   (void *)(unsigned long)env->regs[R_ECX]);
+hob = tdx_get_hob_entry(tdx);
+_tdx_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address);
 }
 
 static bool tdx_guest_get_debug(Object *obj, Error **errp)
-- 
2.25.1




Re: [PATCH 2/2] docs/system: ppc: Update ppce500 documentation with eTSEC support

2021-07-07 Thread David Gibson
On Tue, Jul 06, 2021 at 12:31:24PM +0800, Bin Meng wrote:
> This adds eTSEC support to the PowerPC `ppce500` machine documentation.
> 
> Signed-off-by: Bin Meng 

Applied to ppc-for-6.1, thanks.

> ---
> 
>  docs/system/ppc/ppce500.rst | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/docs/system/ppc/ppce500.rst b/docs/system/ppc/ppce500.rst
> index 7a815c1881..afc58f60f5 100644
> --- a/docs/system/ppc/ppce500.rst
> +++ b/docs/system/ppc/ppce500.rst
> @@ -19,6 +19,7 @@ The ``ppce500`` machine supports the following devices:
>  * Power-off functionality via one GPIO pin
>  * 1 Freescale MPC8xxx PCI host controller
>  * VirtIO devices via PCI bus
> +* 1 Freescale Enhanced Triple Speed Ethernet controller (eTSEC)
>  
>  Hardware configuration information
>  --
> @@ -121,7 +122,7 @@ To boot the 32-bit Linux kernel:
>  Running U-Boot
>  --
>  
> -U-Boot mainline v2021.04 release is tested at the time of writing. To build a
> +U-Boot mainline v2021.07 release is tested at the time of writing. To build a
>  U-Boot mainline bootloader that can be booted by the ``ppce500`` machine, use
>  the qemu-ppce500_defconfig with similar commands as described above for 
> Linux:
>  
> @@ -154,3 +155,10 @@ interface at PCI address 0.1.0, but we can switch that 
> to an e1000 NIC by:
>  -display none -serial stdio \
>  -bios u-boot \
>  -nic tap,ifname=tap0,script=no,downscript=no,model=e1000
> +
> +The QEMU ``ppce500`` machine can also dynamically instantiate an eTSEC device
> +if “-device eTSEC” is given to QEMU:
> +
> +.. code-block:: bash
> +
> +  -netdev tap,ifname=tap0,script=no,downscript=no,id=net0 -device 
> eTSEC,netdev=net0

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[RFC PATCH v2 43/44] i386/tdx: disallow level interrupt and SMI/INIT/SIPI delivery mode

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

TDX doesn't allow level interrupt and SMI/INIT/SIPI interrupt delivery
mode.  So disallow them.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/x86.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 24af05c313..c372403b87 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1307,6 +1307,9 @@ static int x86_kvm_type(MachineState *ms, const char 
*vm_type)
 kvm_type = KVM_X86_LEGACY_VM;
 } else if (!g_ascii_strcasecmp(vm_type, "tdx")) {
 kvm_type = KVM_X86_TDX_VM;
+X86_MACHINE(ms)->eoi_intercept_unsupported = true;
+X86_MACHINE(ms)->smi_unsupported = true;
+X86_MACHINE(ms)->init_sipi_unsupported = true;
 } else {
 error_report("Unknown kvm-type specified '%s'", vm_type);
 exit(1);
-- 
2.25.1




Re: [PATCH v4 0/3] DEVICE_UNPLUG_ERROR QAPI event

2021-07-07 Thread David Gibson
On Tue, Jul 06, 2021 at 09:33:11PM -0300, Daniel Henrique Barboza wrote:
> Hi,
> 
> This new version is rebased with current master (9aef0954195cc),
> hopefully an adequate format of patch 1, and David's R-b on all
> patches.

Markus do you want to take this, or will you ack and I'll take it
through the ppc tree?

If you want to take it then, for the ppc parts:
Acked-by: David Gibson 

> 
> changes from v3:
> - patch 1:
>   * fixed format
> - all patches:
>   * rebased with master
>   * added David's R-b
> - v3 link: https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg05842.html
> 
> changes from v2:
> - patch 1:
>   * moved DEVICE_UNPLUG_ERROR declaration to qapi/qdev.json
>   * updated 'device_del' description
>   * added 'deprecated' notice on MEM_UNPLUG_ERROR
>   * added MEM_UNPLUG_ERROR 'deprecated' info in docs/system/deprecated.rst
> - patch 2:
>   * send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
> - patch 3 (new):
>   * send DEVICE_UNPLUG_ERROR in acpi/memory_hotplug.c
> - v2 link: https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg01304.html
> 
> changes from v1:
> - former patches 1 and 2: dropped
> - patch 1 (former 3): changed the version to '6.1'
> - patch 2 (former 4): add a DEVICE_UNPLUG_ERROR event in the device
>   unplug error path of CPUs and DIMMs
> - v1 link: https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg04682.html
> 
> *** BLURB HERE ***
> 
> Daniel Henrique Barboza (3):
>   qapi/qdev.json: add DEVICE_UNPLUG_ERROR QAPI event
>   spapr: use DEVICE_UNPLUG_ERROR to report unplug errors
>   memory_hotplug.c: send DEVICE_UNPLUG_ERROR in
> acpi_memory_hotplug_write()
> 
>  docs/system/deprecated.rst | 10 ++
>  hw/acpi/memory_hotplug.c   | 13 +++--
>  hw/ppc/spapr.c |  8 
>  hw/ppc/spapr_drc.c | 15 +--
>  qapi/machine.json  |  6 +-
>  qapi/qdev.json | 27 ++-
>  6 files changed, 69 insertions(+), 10 deletions(-)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[RFC PATCH v2 28/44] i386/tdx: Force x2apic mode and routing for TDs

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

TDX requires x2apic and "resets" vCPUs to have x2apic enabled.  Model
this in QEMU and unconditionally enable x2apic interrupt routing.

This fixes issues where interrupts from IRQFD would not get forwarded to
the guest due to KVM silently dropping the invalid routing entry.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 hw/intc/apic_common.c   | 12 
 include/hw/i386/apic.h  |  1 +
 include/hw/i386/apic_internal.h |  1 +
 target/i386/kvm/tdx.c   |  7 +++
 4 files changed, 21 insertions(+)

diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 2a20982066..b95fed95da 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -262,6 +262,15 @@ void apic_designate_bsp(DeviceState *dev, bool bsp)
 }
 }
 
+void apic_force_x2apic(DeviceState *dev)
+{
+if (dev == NULL) {
+return;
+}
+
+APIC_COMMON(dev)->force_x2apic = true;
+}
+
 static void apic_reset_common(DeviceState *dev)
 {
 APICCommonState *s = APIC_COMMON(dev);
@@ -270,6 +279,9 @@ static void apic_reset_common(DeviceState *dev)
 
 bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
 s->apicbase = APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
+if (s->force_x2apic) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+}
 s->id = s->initial_apic_id;
 
 apic_reset_irq_delivered();
diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
index da1d2fe155..7d05abd7e0 100644
--- a/include/hw/i386/apic.h
+++ b/include/hw/i386/apic.h
@@ -19,6 +19,7 @@ void apic_init_reset(DeviceState *s);
 void apic_sipi(DeviceState *s);
 void apic_poll_irq(DeviceState *d);
 void apic_designate_bsp(DeviceState *d, bool bsp);
+void apic_force_x2apic(DeviceState *d);
 int apic_get_highest_priority_irr(DeviceState *dev);
 
 /* pc.c */
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index c175e7e718..eda0b5a587 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -187,6 +187,7 @@ struct APICCommonState {
 DeviceState *vapic;
 hwaddr vapic_paddr; /* note: persistence via kvmvapic */
 bool legacy_instance_id;
+bool force_x2apic;
 };
 
 typedef struct VAPICState {
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index c348626dbf..47a502051c 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -139,6 +139,11 @@ int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 tdx_caps->nr_cpuid_configs = TDX1_MAX_NR_CPUID_CONFIGS;
 tdx_ioctl(KVM_TDX_CAPABILITIES, 0, tdx_caps);
 
+if (!kvm_enable_x2apic()) {
+error_report("Failed to enable x2apic in KVM");
+exit(1);
+}
+
 qemu_add_machine_init_done_late_notifier(_machine_done_late_notify);
 
 return 0;
@@ -296,6 +301,8 @@ void tdx_post_init_vcpu(CPUState *cpu)
 
 hob = tdx_get_hob_entry(tdx);
 _tdx_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address);
+
+apic_force_x2apic(X86_CPU(cpu)->apic_state);
 }
 
 static bool tdx_guest_get_debug(Object *obj, Error **errp)
-- 
2.25.1




[RFC PATCH v2 42/44] hw/i386: add a flag to disable init/sipi delivery mode of interrupt

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add a new flag to X86Machine to disallow INIT/SIPI delivery mode of
interrupt and pass it to ioapic creation so that ioapic disallows INIT/SIPI
delivery mode.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/microvm.c |  4 ++--
 hw/i386/pc_piix.c |  2 +-
 hw/i386/pc_q35.c  |  2 +-
 hw/i386/x86.c | 11 +--
 include/hw/i386/x86.h |  7 +--
 5 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 7504324891..c790adecfb 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -176,11 +176,11 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 isa_bus_irqs(isa_bus, x86ms->gsi);
 
 ioapic_init_gsi(gsi_state, "machine", x86ms->eoi_intercept_unsupported,
-x86ms->smi_unsupported);
+x86ms->smi_unsupported, x86ms->init_sipi_unsupported);
 if (ioapics > 1) {
 x86ms->ioapic2 = ioapic_init_secondary(
 gsi_state, x86ms->eoi_intercept_unsupported,
-x86ms->smi_unsupported);
+x86ms->smi_unsupported, x86ms->init_sipi_unsupported);
 }
 
 kvmclock_create(true);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0958035bf8..940cd0f47b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -224,7 +224,7 @@ static void pc_init1(MachineState *machine,
 
 if (pcmc->pci_enabled) {
 ioapic_init_gsi(gsi_state, "i440fx", x86ms->eoi_intercept_unsupported,
-x86ms->smi_unsupported);
+x86ms->smi_unsupported, x86ms->init_sipi_unsupported);
 }
 
 if (tcg_enabled()) {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1ab8a6a78b..8f677ec136 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -257,7 +257,7 @@ static void pc_q35_init(MachineState *machine)
 
 if (pcmc->pci_enabled) {
 ioapic_init_gsi(gsi_state, "q35", x86ms->eoi_intercept_unsupported,
-x86ms->smi_unsupported);
+x86ms->smi_unsupported, x86ms->init_sipi_unsupported);
 }
 
 if (tcg_enabled()) {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 3dc36e3590..24af05c313 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -610,7 +610,8 @@ void gsi_handler(void *opaque, int n, int level)
 
 void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name,
  bool level_trigger_unsupported,
- bool smi_unsupported)
+ bool smi_unsupported,
+ bool init_sipi_unsupported)
 {
 DeviceState *dev;
 SysBusDevice *d;
@@ -628,6 +629,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
*parent_name,
  level_trigger_unsupported, NULL);
 object_property_set_bool(OBJECT(dev), "smi_unsupported",
  smi_unsupported, NULL);
+object_property_set_bool(OBJECT(dev), "init_sipi_unsupported",
+ init_sipi_unsupported, NULL);
 d = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(d, _fatal);
 sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
@@ -639,7 +642,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
*parent_name,
 
 DeviceState *ioapic_init_secondary(GSIState *gsi_state,
bool level_trigger_unsupported,
-   bool smi_unsupported)
+   bool smi_unsupported,
+   bool init_sipi_unsupported)
 {
 DeviceState *dev;
 SysBusDevice *d;
@@ -650,6 +654,8 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state,
  level_trigger_unsupported, NULL);
 object_property_set_bool(OBJECT(dev), "smi_unsupported",
  smi_unsupported, NULL);
+object_property_set_bool(OBJECT(dev), "init_sipi_unsupported",
+ init_sipi_unsupported, NULL);
 d = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(d, _fatal);
 sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS);
@@ -1325,6 +1331,7 @@ static void x86_machine_initfn(Object *obj)
 x86ms->bus_lock_ratelimit = 0;
 x86ms->eoi_intercept_unsupported = false;
 x86ms->smi_unsupported = false;
+x86ms->init_sipi_unsupported = false;
 
 object_property_add_str(obj, "kvm-type",
 x86_get_kvm_type, x86_set_kvm_type);
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 3d1d74d171..bca8c2b57d 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -65,6 +65,7 @@ struct X86MachineState {
 uint16_t boot_cpus;
 bool eoi_intercept_unsupported;
 bool smi_unsupported;
+bool init_sipi_unsupported;
 
 OnOffAuto smm;
 OnOffAuto acpi;
@@ -143,9 +144,11 @@ qemu_irq x86_allocate_cpu_irq(void);
 void gsi_handler(void *opaque, int n, int level);
 void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name,
  bool 

[RFC PATCH v2 27/44] q35: Introduce smm_ranges property for q35-pci-host

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG,
etc... exist for the target platform.  TDX doesn't support SMM and doesn't
play nice with QEMU modifying related guest memory ranges.

Signed-off-by: Isaku Yamahata 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 hw/i386/pc_q35.c  |  2 ++
 hw/pci-host/q35.c | 42 +++
 include/hw/i386/pc.h  |  1 +
 include/hw/pci-host/q35.h |  1 +
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 46a0f196f4..1718aa94d9 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine)
 x86ms->below_4g_mem_size, NULL);
 object_property_set_int(OBJECT(q35_host), PCI_HOST_ABOVE_4G_MEM_SIZE,
 x86ms->above_4g_mem_size, NULL);
+object_property_set_bool(OBJECT(q35_host), PCI_HOST_PROP_SMM_RANGES,
+ x86_machine_is_smm_enabled(x86ms), NULL);
 /* pci */
 sysbus_realize_and_unref(SYS_BUS_DEVICE(q35_host), _fatal);
 phb = PCI_HOST_BRIDGE(q35_host);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 68234d209c..ba28d969ba 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -183,6 +183,8 @@ static Property q35_host_props[] = {
  mch.below_4g_mem_size, 0),
 DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
  mch.above_4g_mem_size, 0),
+DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost,
+ mch.has_smm_ranges, true),
 DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
 DEFINE_PROP_END_OF_LIST(),
 };
@@ -218,6 +220,7 @@ static void q35_host_initfn(Object *obj)
 /* mch's object_initialize resets the default value, set it again */
 qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
  Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
+
 object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
 q35_host_get_pci_hole_start,
 NULL, NULL, NULL);
@@ -478,6 +481,10 @@ static void mch_write_config(PCIDevice *d,
 mch_update_pam(mch);
 }
 
+if (!mch->has_smm_ranges) {
+return;
+}
+
 if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
MCH_HOST_BRIDGE_SMRAM_SIZE)) {
 mch_update_smram(mch);
@@ -496,10 +503,13 @@ static void mch_write_config(PCIDevice *d,
 static void mch_update(MCHPCIState *mch)
 {
 mch_update_pciexbar(mch);
+
 mch_update_pam(mch);
-mch_update_smram(mch);
-mch_update_ext_tseg_mbytes(mch);
-mch_update_smbase_smram(mch);
+if (mch->has_smm_ranges) {
+mch_update_smram(mch);
+mch_update_ext_tseg_mbytes(mch);
+mch_update_smbase_smram(mch);
+}
 
 /*
  * pci hole goes from end-of-low-ram to io-apic.
@@ -540,18 +550,20 @@ static void mch_reset(DeviceState *qdev)
 pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
  MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
 
-d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
-d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
-d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
-d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
+if (mch->has_smm_ranges) {
+d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
+d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
+d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
+d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
 
-if (mch->ext_tseg_mbytes > 0) {
-pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
- MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
-}
+if (mch->ext_tseg_mbytes > 0) {
+pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+}
 
-d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
-d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
+d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
+d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
+}
 
 mch_update(mch);
 }
@@ -581,6 +593,10 @@ static void mch_realize(PCIDevice *d, Error **errp)
  PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
 }
 
+if (!mch->has_smm_ranges) {
+return;
+}
+
 /* if *disabled* show SMRAM to all CPUs */
 memory_region_init_alias(>smram_region, OBJECT(mch), "smram-region",
  mch->pci_address_space, 
MCH_HOST_BRIDGE_SMRAM_C_BASE,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 87294f2632..cd2113c763 100644
--- a/include/hw/i386/pc.h
+++ 

[RFC PATCH v2 25/44] q35: Move PCIe BAR check above PAM check in mch_write_config()

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Process PCIe BAR before PAM so that a future patch can skip all the SMM
related crud with a single check-and-return.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 hw/pci-host/q35.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 2eb729dff5..9a2be237d7 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -468,16 +468,16 @@ static void mch_write_config(PCIDevice *d,
 
 pci_default_write_config(d, address, val, len);
 
-if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0,
-   MCH_HOST_BRIDGE_PAM_SIZE)) {
-mch_update_pam(mch);
-}
-
 if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PCIEXBAR,
MCH_HOST_BRIDGE_PCIEXBAR_SIZE)) {
 mch_update_pciexbar(mch);
 }
 
+if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0,
+   MCH_HOST_BRIDGE_PAM_SIZE)) {
+mch_update_pam(mch);
+}
+
 if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
MCH_HOST_BRIDGE_SMRAM_SIZE)) {
 mch_update_smram(mch);
-- 
2.25.1




[RFC PATCH v2 08/44] i386/kvm: Skip KVM_X86_SETUP_MCE for TDX guests

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Despite advertising MCE support to the guest, TDX-SEAM doesn't support
injecting #MCs into the guest.   All of the associated setup is thus
rejected by KVM.

Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 27b64dedc2..c29cb420a1 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1825,7 +1825,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
 if (((env->cpuid_version >> 8)&0xF) >= 6
 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) ==
(CPUID_MCE | CPUID_MCA)
-&& kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) {
+&& kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0
+&& vm_type != KVM_X86_TDX_VM) {
 uint64_t mcg_cap, unsupported_caps;
 int banks;
 int ret;
-- 
2.25.1




[RFC PATCH v2 21/44] i386/tdx: Create the TD HOB list upon machine init done

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Build the TD HOB during machine late initialization, i.e. once guest
memory is fully defined.

Signed-off-by: Isaku Yamahata 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
---
 hw/i386/meson.build   |   2 +-
 hw/i386/tdvf-hob.c| 166 ++
 hw/i386/tdvf-hob.h|  20 +
 target/i386/kvm/tdx.c |  19 +
 4 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100644 hw/i386/tdvf-hob.c
 create mode 100644 hw/i386/tdvf-hob.h

diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 945e805525..8175c3c638 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -24,7 +24,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
   'pc_sysfw.c',
   'acpi-build.c',
   'port92.c'))
-i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
 
 subdir('kvm')
 subdir('xen')
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 00..5e0bf807f7
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,166 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata 
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "e820_memory_layout.h"
+#include "hw/i386/x86.h"
+#include "sysemu/tdx.h"
+#include "tdvf-hob.h"
+#include "uefi.h"
+
+typedef struct TdvfHob {
+hwaddr hob_addr;
+void *ptr;
+int size;
+
+/* working area */
+void *current;
+void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+void *ret;
+
+if (hob->current + size > hob->end) {
+error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+exit(1);
+}
+
+ret = hob->current;
+hob->current += size;
+tdvf_align(hob, 8);
+return ret;
+}
+
+static int tdvf_e820_compare(const void *lhs_, const void* rhs_)
+{
+const struct e820_entry *lhs = lhs_;
+const struct e820_entry *rhs = rhs_;
+
+if (lhs->address == rhs->address) {
+return 0;
+}
+if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
+return 1;
+}
+return -1;
+}
+
+static void tdvf_hob_add_memory_resources(TdvfHob *hob)
+{
+EFI_HOB_RESOURCE_DESCRIPTOR *region;
+EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+EFI_RESOURCE_TYPE resource_type;
+
+struct e820_entry *e820_entries, *e820_entry;
+int nr_e820_entries, i;
+
+nr_e820_entries = e820_get_num_entries();
+e820_entries = g_new(struct e820_entry, nr_e820_entries);
+
+/* Copy and sort the e820 tables to add them to the HOB. */
+memcpy(e820_entries, e820_table,
+   nr_e820_entries * sizeof(struct e820_entry));
+qsort(e820_entries, nr_e820_entries, sizeof(struct e820_entry),
+  _e820_compare);
+
+for (i = 0; i < nr_e820_entries; i++) {
+e820_entry = _entries[i];
+
+if (le32_to_cpu(e820_entry->type) == E820_RAM) {
+resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+} else {
+resource_type = EFI_RESOURCE_MEMORY_RESERVED;
+attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+}
+
+region = tdvf_get_area(hob, sizeof(*region));
+*region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+.Header = {
+.HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+.HobLength = cpu_to_le16(sizeof(*region)),
+.Reserved = cpu_to_le32(0),
+},
+.Owner = EFI_HOB_OWNER_ZERO,
+.ResourceType = cpu_to_le32(resource_type),
+.ResourceAttribute = cpu_to_le32(attr),
+.PhysicalStart = e820_entry->address,
+.ResourceLength = e820_entry->length,
+};
+}
+
+g_free(e820_entries);
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *hob_entry)
+{
+TdvfHob hob = {
+.hob_addr = hob_entry->address,
+.ptr = hob_entry->mem_ptr,
+.size = 

[RFC PATCH v2 24/44] i386/tdx: Add MMIO HOB entries

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Add MMIO HOB entries, which are needed to enumerate legal MMIO ranges to
early TDVF.

Note, the attribute absolutely must include UNCACHEABLE, else TDVF will
effectively consider it a bad HOB entry and ignore it.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 hw/i386/tdvf-hob.c | 69 ++
 hw/i386/tdvf-hob.h |  5 
 2 files changed, 74 insertions(+)

diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
index 5e0bf807f7..60c5ed0e03 100644
--- a/hw/i386/tdvf-hob.c
+++ b/hw/i386/tdvf-hob.c
@@ -22,7 +22,10 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "e820_memory_layout.h"
+#include "hw/i386/pc.h"
 #include "hw/i386/x86.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_host.h"
 #include "sysemu/tdx.h"
 #include "tdvf-hob.h"
 #include "uefi.h"
@@ -62,6 +65,70 @@ static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
 return ret;
 }
 
+static void tdvf_hob_add_mmio_resource(TdvfHob *hob, uint64_t start,
+   uint64_t end)
+{
+EFI_HOB_RESOURCE_DESCRIPTOR *region;
+
+if (!start) {
+return;
+}
+
+region = tdvf_get_area(hob, sizeof(*region));
+*region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+.Header = {
+.HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+.HobLength = cpu_to_le16(sizeof(*region)),
+.Reserved = cpu_to_le32(0),
+},
+.Owner = EFI_HOB_OWNER_ZERO,
+.ResourceType = cpu_to_le32(EFI_RESOURCE_MEMORY_MAPPED_IO),
+.ResourceAttribute = cpu_to_le32(EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO),
+.PhysicalStart = cpu_to_le64(start),
+.ResourceLength = cpu_to_le64(end - start),
+};
+}
+
+static void tdvf_hob_add_mmio_resources(TdvfHob *hob)
+{
+MachineState *ms = MACHINE(qdev_get_machine());
+X86MachineState *x86ms = X86_MACHINE(ms);
+PCIHostState *pci_host;
+uint64_t start, end;
+uint64_t mcfg_base, mcfg_size;
+Object *host;
+
+/* Effectively PCI hole + other MMIO devices. */
+tdvf_hob_add_mmio_resource(hob, x86ms->below_4g_mem_size,
+   APIC_DEFAULT_ADDRESS);
+
+/* Stolen from acpi_get_i386_pci_host(), there's gotta be an easier way. */
+pci_host = OBJECT_CHECK(PCIHostState,
+object_resolve_path("/machine/i440fx", NULL),
+TYPE_PCI_HOST_BRIDGE);
+if (!pci_host) {
+pci_host = OBJECT_CHECK(PCIHostState,
+object_resolve_path("/machine/q35", NULL),
+TYPE_PCI_HOST_BRIDGE);
+}
+g_assert(pci_host);
+
+host = OBJECT(pci_host);
+
+/* PCI hole above 4gb. */
+start = object_property_get_uint(host, PCI_HOST_PROP_PCI_HOLE64_START,
+ NULL);
+end = object_property_get_uint(host, PCI_HOST_PROP_PCI_HOLE64_END, NULL);
+tdvf_hob_add_mmio_resource(hob, start, end);
+
+/* MMCFG region */
+mcfg_base = object_property_get_uint(host, PCIE_HOST_MCFG_BASE, NULL);
+mcfg_size = object_property_get_uint(host, PCIE_HOST_MCFG_SIZE, NULL);
+if (mcfg_base && mcfg_base != PCIE_BASE_ADDR_UNMAPPED && mcfg_size) {
+tdvf_hob_add_mmio_resource(hob, mcfg_base, mcfg_base + mcfg_size);
+}
+}
+
 static int tdvf_e820_compare(const void *lhs_, const void* rhs_)
 {
 const struct e820_entry *lhs = lhs_;
@@ -156,6 +223,8 @@ void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry 
*hob_entry)
 
 tdvf_hob_add_memory_resources();
 
+tdvf_hob_add_mmio_resources();
+
 last_hob = tdvf_get_area(, sizeof(*last_hob));
 *last_hob =  (EFI_HOB_GENERIC_HEADER) {
 .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
index c6c5c1d564..9967dbfe5a 100644
--- a/hw/i386/tdvf-hob.h
+++ b/hw/i386/tdvf-hob.h
@@ -17,4 +17,9 @@ void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry 
*hob_entry);
  EFI_RESOURCE_ATTRIBUTE_INITIALIZED |   \
  EFI_RESOURCE_ATTRIBUTE_UNACCEPTED)
 
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO\
+(EFI_RESOURCE_ATTRIBUTE_PRESENT |   \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED |   \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
 #endif
-- 
2.25.1




[RFC PATCH v2 22/44] i386/tdx: Add TDVF memory via INIT_MEM_REGION

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add, and optionally measure, TDVF memory via KVM_TDX_INIT_MEM_REGION as
part of finalizing the TD.

Signed-off-by: Isaku Yamahata 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
---
 target/i386/kvm/tdx.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 12b2e02fa2..0cd649dd01 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -85,10 +85,26 @@ static void tdx_finalize_vm(Notifier *notifier, void 
*unused)
 {
 MachineState *ms = MACHINE(qdev_get_machine());
 TdxGuest *tdx = TDX_GUEST(ms->cgs);
+TdxFirmwareEntry *entry;
 
 tdvf_hob_create(tdx, tdx_get_hob_entry(tdx));
 
+for_each_fw_entry(>fw, entry) {
+struct kvm_tdx_init_mem_region mem_region = {
+.source_addr = (__u64)entry->mem_ptr,
+.gpa = entry->address,
+.nr_pages = entry->size / 4096,
+};
+
+__u32 metadata = entry->attributes & TDVF_SECTION_ATTRIBUTES_EXTENDMR ?
+ KVM_TDX_MEASURE_MEMORY_REGION : 0;
+
+tdx_ioctl(KVM_TDX_INIT_MEM_REGION, metadata, _region);
+}
+
 tdx_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL);
+
+tdx->parent_obj.ready = true;
 }
 
 static Notifier tdx_machine_done_late_notify = {
@@ -301,7 +317,6 @@ static void tdx_guest_init(Object *obj)
 {
 TdxGuest *tdx = TDX_GUEST(obj);
 
-tdx->parent_obj.ready = true;
 qemu_mutex_init(>lock);
 
 tdx->debug = false;
-- 
2.25.1




[RFC PATCH v2 14/44] i386/tdx: Frame in the call for KVM_TDX_INIT_VCPU

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 include/sysemu/tdx.h   |  1 +
 target/i386/kvm/kvm.c  |  8 
 target/i386/kvm/tdx-stub.c |  4 
 target/i386/kvm/tdx.c  | 20 
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/include/sysemu/tdx.h b/include/sysemu/tdx.h
index 36a901e723..03461b6ae8 100644
--- a/include/sysemu/tdx.h
+++ b/include/sysemu/tdx.h
@@ -8,5 +8,6 @@ bool kvm_has_tdx(KVMState *s);
 #endif
 
 void tdx_pre_create_vcpu(CPUState *cpu);
+void tdx_post_init_vcpu(CPUState *cpu);
 
 #endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 25dcecd60c..af6b5f350e 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4122,6 +4122,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
 
 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+/*
+ * level == KVM_PUT_FULL_STATE is only set by
+ * kvm_cpu_synchronize_post_init() after initialization
+ */
+if (vm_type == KVM_X86_TDX_VM && level == KVM_PUT_FULL_STATE) {
+tdx_post_init_vcpu(cpu);
+}
+
 /* TODO: Allow accessing guest state for debug TDs. */
 if (vm_type == KVM_X86_TDX_VM) {
 return 0;
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 93d5913c89..93afe07ddb 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -12,3 +12,7 @@ bool kvm_has_tdx(KVMState *s)
 void tdx_pre_create_vcpu(CPUState *cpu)
 {
 }
+
+void tdx_post_init_vcpu(CPUState *cpu)
+{
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index b1e4f27c9a..67fb03b4b5 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -38,7 +38,7 @@ bool kvm_has_tdx(KVMState *s)
 return !!(kvm_check_extension(s, KVM_CAP_VM_TYPES) & BIT(KVM_X86_TDX_VM));
 }
 
-static void __tdx_ioctl(int ioctl_no, const char *ioctl_name,
+static void __tdx_ioctl(void *state, int ioctl_no, const char *ioctl_name,
 __u32 metadata, void *data)
 {
 struct kvm_tdx_cmd tdx_cmd;
@@ -51,17 +51,21 @@ static void __tdx_ioctl(int ioctl_no, const char 
*ioctl_name,
 tdx_cmd.data = (__u64)(unsigned long)data;
 
 if (ioctl_no == KVM_TDX_CAPABILITIES) {
-r = kvm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+r = kvm_ioctl(state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+} else if (ioctl_no == KVM_TDX_INIT_VCPU) {
+r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, _cmd);
 } else {
-r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+r = kvm_vm_ioctl(state, KVM_MEMORY_ENCRYPT_OP, _cmd);
 }
 if (r) {
 error_report("%s failed: %s", ioctl_name, strerror(-r));
 exit(1);
 }
 }
+#define _tdx_ioctl(cpu, ioctl_no, metadata, data) \
+__tdx_ioctl(cpu, ioctl_no, stringify(ioctl_no), metadata, data)
 #define tdx_ioctl(ioctl_no, metadata, data) \
-__tdx_ioctl(ioctl_no, stringify(ioctl_no), metadata, data)
+_tdx_ioctl(kvm_state, ioctl_no, metadata, data)
 
 static void tdx_finalize_vm(Notifier *notifier, void *unused)
 {
@@ -219,6 +223,14 @@ out:
 qemu_mutex_unlock(>lock);
 }
 
+void tdx_post_init_vcpu(CPUState *cpu)
+{
+CPUX86State *env = _CPU(cpu)->env;
+
+_tdx_ioctl(cpu, KVM_TDX_INIT_VCPU, 0,
+   (void *)(unsigned long)env->regs[R_ECX]);
+}
+
 static bool tdx_guest_get_debug(Object *obj, Error **errp)
 {
 TdxGuest *tdx = TDX_GUEST(obj);
-- 
2.25.1




[RFC PATCH v2 41/44] ioapic: add property to disallow INIT/SIPI delivery mode

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add a property to prevent ioapic from setting INIT/SIPI delivery mode.
Without this guard, qemu can result in unexpected behavior.

Signed-off-by: Isaku Yamahata 
---
 hw/intc/ioapic.c  | 19 +++
 hw/intc/ioapic_common.c   | 21 +
 include/hw/i386/ioapic_internal.h |  1 +
 3 files changed, 41 insertions(+)

diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 1815fbd282..f7eb9f7146 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -396,6 +396,22 @@ ioapic_fix_smi_unsupported(uint64_t *entry)
 }
 }
 
+static inline void
+ioapic_fix_init_sipi_unsupported(uint64_t *entry)
+{
+uint64_t delmode = *entry & IOAPIC_LVT_DELIV_MODE;
+if (delmode == IOAPIC_DM_INIT << IOAPIC_LVT_DELIV_MODE_SHIFT ||
+delmode == IOAPIC_DM_SIPI << IOAPIC_LVT_DELIV_MODE_SHIFT) {
+/*
+ * ignore a request for delivery mode of lowest SMI
+ */
+warn_report_once("attempting to set delivery mode to INIT/SIPI"
+ "which is not supported");
+*entry &= ~IOAPIC_LVT_DELIV_MODE;
+*entry |= IOAPIC_DM_FIXED << IOAPIC_LVT_DELIV_MODE_SHIFT;
+}
+}
+
 static void
 ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
  unsigned int size)
@@ -442,6 +458,9 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
 if (s->smi_unsupported) {
 ioapic_fix_smi_unsupported(>ioredtbl[index]);
 }
+if (s->init_sipi_unsupported) {
+ioapic_fix_init_sipi_unsupported(>ioredtbl[index]);
+}
 ioapic_fix_edge_remote_irr(>ioredtbl[index]);
 ioapic_service(s);
 }
diff --git a/hw/intc/ioapic_common.c b/hw/intc/ioapic_common.c
index b8ef7efbad..018bacbf96 100644
--- a/hw/intc/ioapic_common.c
+++ b/hw/intc/ioapic_common.c
@@ -185,6 +185,23 @@ static void ioapic_common_set_smi_unsupported(Object *obj, 
bool value,
 s->smi_unsupported = value;
 }
 
+static bool ioapic_common_get_init_sipi_unsupported(Object *obj, Error **errp)
+{
+IOAPICCommonState *s = IOAPIC_COMMON(obj);
+return s->init_sipi_unsupported;
+}
+
+static void ioapic_common_set_init_sipi_unsupported(Object *obj, bool value,
+   Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+IOAPICCommonState *s = IOAPIC_COMMON(obj);
+/* only disabling before realize is allowed */
+assert(!dev->realized);
+assert(!s->init_sipi_unsupported);
+s->init_sipi_unsupported = value;
+}
+
 static void ioapic_common_init(Object *obj)
 {
 object_property_add_bool(obj, "level_trigger_unsupported",
@@ -194,6 +211,10 @@ static void ioapic_common_init(Object *obj)
 object_property_add_bool(obj, "smi_unsupported",
  ioapic_common_get_smi_unsupported,
  ioapic_common_set_smi_unsupported);
+
+object_property_add_bool(obj, "init_sipi_unsupported",
+ ioapic_common_get_init_sipi_unsupported,
+ ioapic_common_set_init_sipi_unsupported);
 }
 
 static void ioapic_common_realize(DeviceState *dev, Error **errp)
diff --git a/include/hw/i386/ioapic_internal.h 
b/include/hw/i386/ioapic_internal.h
index 46f22a4f85..634b97426d 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -105,6 +105,7 @@ struct IOAPICCommonState {
 Notifier machine_done;
 bool level_trigger_unsupported;
 bool smi_unsupported;
+bool init_sipi_unsupported;
 uint8_t version;
 uint64_t irq_count[IOAPIC_NUM_PINS];
 int irq_level[IOAPIC_NUM_PINS];
-- 
2.25.1




[RFC PATCH v2 20/44] i386/tdx: Parse tdx metadata and store the result into TdxGuestState

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add support for loading TDX's Trusted Domain Virtual Firmware (TDVF) via
the generic loader.  Prioritize the TDVF above plain hex to avoid false
positives with hex (TDVF has explicit metadata to confirm it's a TDVF).

Enumerate TempMem as added, private memory, i.e. E820_RESERVED,
otherwise TDVF will interpret the whole shebang as MMIO and complain
that the aperture overlaps other MMIO regions.

Signed-off-by: Isaku Yamahata 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Reported-by: Min M. Xu 
---
 hw/core/generic-loader.c |   5 +
 hw/core/meson.build  |   3 +
 hw/core/tdvf-stub.c  |   6 +
 hw/i386/meson.build  |   1 +
 hw/i386/tdvf.c   | 312 +++
 include/sysemu/tdvf.h|   6 +
 target/i386/kvm/tdx.h|  26 
 7 files changed, 359 insertions(+)
 create mode 100644 hw/core/tdvf-stub.c
 create mode 100644 hw/i386/tdvf.c
 create mode 100644 include/sysemu/tdvf.h

diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index d14f932eea..ee2f49b47a 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -34,6 +34,7 @@
 #include "hw/core/cpu.h"
 #include "sysemu/dma.h"
 #include "sysemu/reset.h"
+#include "sysemu/tdvf.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "hw/qdev-properties.h"
@@ -147,6 +148,10 @@ static void generic_loader_realize(DeviceState *dev, Error 
**errp)
   as);
 }
 
+if (size < 0) {
+size = load_tdvf(s->file);
+}
+
 if (size < 0) {
 size = load_targphys_hex_as(s->file, , as);
 }
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 18f44fb7c2..ec943debf1 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -24,6 +24,9 @@ common_ss.add(when: 'CONFIG_REGISTER', if_true: 
files('register.c'))
 common_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c'))
 common_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
 
+common_ss.add(when: 'CONFIG_TDX', if_false: files('tdvf-stub.c'))
+common_ss.add(when: 'CONFIG_ALL', if_true: files('tdvf-stub.c'))
+
 softmmu_ss.add(files(
   'cpu-sysemu.c',
   'fw-path-provider.c',
diff --git a/hw/core/tdvf-stub.c b/hw/core/tdvf-stub.c
new file mode 100644
index 00..5f2586dd70
--- /dev/null
+++ b/hw/core/tdvf-stub.c
@@ -0,0 +1,6 @@
+#include "sysemu/tdvf.h"
+
+int load_tdvf(const char *filename)
+{
+return -1;
+}
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index e5d109f5c6..945e805525 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -24,6 +24,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
   'pc_sysfw.c',
   'acpi-build.c',
   'port92.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c'))
 
 subdir('kvm')
 subdir('xen')
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 00..9b0065d656
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,312 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata 
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/units.h"
+#include "cpu.h"
+#include "exec/hwaddr.h"
+#include "hw/boards.h"
+#include "hw/i386/e820_memory_layout.h"
+#include "hw/i386/tdvf.h"
+#include "hw/i386/x86.h"
+#include "hw/loader.h"
+#include "sysemu/tdx.h"
+#include "sysemu/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+static void tdvf_init_ram_memory(MachineState *ms, TdxFirmwareEntry *entry)
+{
+void *ram_ptr = memory_region_get_ram_ptr(ms->ram);
+X86MachineState *x86ms = X86_MACHINE(ms);
+
+if (entry->type == TDVF_SECTION_TYPE_BFV ||
+entry->type == TDVF_SECTION_TYPE_CFV) {
+error_report("TDVF type %u addr 0x%" PRIx64 " in RAM (disallowed)",
+ entry->type, entry->address);
+exit(1);
+}
+
+if (entry->address < 4 * GiB) {
+entry->mem_ptr = ram_ptr + entry->address;
+} else {
+/*
+ * If TDVF temp memory describe in TDVF metadata lays in RAM, reserve
+ * the region property.
+ */
+if (entry->address >= 4 * GiB + x86ms->above_4g_mem_size ||
+

[RFC PATCH v2 44/44] i386/tdx: disable S3/S4 unconditionally

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Disable S3/S4 unconditionally when TDX is enabled.  Because cpu state is
protected, it's not allowed to reset cpu state.  So S3/S4 can't be
supported.

Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/tdx.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 0621317b0a..0dd6d94c2a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -31,6 +31,9 @@
 #include "sysemu/tdx.h"
 #include "tdx.h"
 
+#include "hw/southbridge/piix.h"
+#include "hw/i386/ich9.h"
+
 #define TDX1_TD_ATTRIBUTE_DEBUG BIT_ULL(0)
 #define TDX1_TD_ATTRIBUTE_PERFMON BIT_ULL(63)
 #define TDX1_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
@@ -103,10 +106,27 @@ static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
 
 static void tdx_finalize_vm(Notifier *notifier, void *unused)
 {
+Object *pm;
+bool ambig;
 MachineState *ms = MACHINE(qdev_get_machine());
 TdxGuest *tdx = TDX_GUEST(ms->cgs);
 TdxFirmwareEntry *entry;
 
+/*
+ * object look up logic is copied from acpi_get_pm_info()
+ * @ hw/ie86/acpi-build.c
+ * This property override needs to be done after machine initialization
+ * as there is no ordering of creation of objects/properties.
+ */
+pm = object_resolve_path_type("", TYPE_PIIX4_PM, );
+if (ambig || !pm) {
+pm = object_resolve_path_type("", TYPE_ICH9_LPC_DEVICE, );
+}
+if (!ambig && pm) {
+object_property_set_uint(pm, ACPI_PM_PROP_S3_DISABLED, 1, NULL);
+object_property_set_uint(pm, ACPI_PM_PROP_S4_DISABLED, 1, NULL);
+}
+
 tdvf_hob_create(tdx, tdx_get_hob_entry(tdx));
 
 for_each_fw_entry(>fw, entry) {
-- 
2.25.1




[RFC PATCH v2 39/44] ioapic: add property to disallow SMI delivery mode

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add a property to prevent ioapic from setting SMI delivery mode.  Without
this guard, qemu can result in unexpected behavior.

Signed-off-by: Isaku Yamahata 
---
 hw/intc/ioapic.c  | 18 ++
 hw/intc/ioapic_common.c   | 20 
 include/hw/i386/ioapic_internal.h |  1 +
 3 files changed, 39 insertions(+)

diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 6d61744961..1815fbd282 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -381,6 +381,21 @@ ioapic_fix_level_trigger_unsupported(uint64_t *entry)
 }
 }
 
+static inline void
+ioapic_fix_smi_unsupported(uint64_t *entry)
+{
+if ((*entry & IOAPIC_LVT_DELIV_MODE) ==
+IOAPIC_DM_PMI << IOAPIC_LVT_DELIV_MODE_SHIFT) {
+/*
+ * ignore a request for delivery mode of lowest SMI
+ */
+warn_report_once("attempting to set delivery mode to SMI"
+ "which is not supported");
+*entry &= ~IOAPIC_LVT_DELIV_MODE;
+*entry |= IOAPIC_DM_FIXED << IOAPIC_LVT_DELIV_MODE_SHIFT;
+}
+}
+
 static void
 ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
  unsigned int size)
@@ -424,6 +439,9 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
 if (s->level_trigger_unsupported) {
 ioapic_fix_level_trigger_unsupported(>ioredtbl[index]);
 }
+if (s->smi_unsupported) {
+ioapic_fix_smi_unsupported(>ioredtbl[index]);
+}
 ioapic_fix_edge_remote_irr(>ioredtbl[index]);
 ioapic_service(s);
 }
diff --git a/hw/intc/ioapic_common.c b/hw/intc/ioapic_common.c
index 07ee142470..b8ef7efbad 100644
--- a/hw/intc/ioapic_common.c
+++ b/hw/intc/ioapic_common.c
@@ -168,12 +168,32 @@ static void 
ioapic_common_set_level_trigger_unsupported(Object *obj, bool value,
 s->level_trigger_unsupported = value;
 }
 
+static bool ioapic_common_get_smi_unsupported(Object *obj, Error **errp)
+{
+IOAPICCommonState *s = IOAPIC_COMMON(obj);
+return s->smi_unsupported;
+}
+
+static void ioapic_common_set_smi_unsupported(Object *obj, bool value,
+   Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+IOAPICCommonState *s = IOAPIC_COMMON(obj);
+/* only disabling before realize is allowed */
+assert(!dev->realized);
+assert(!s->smi_unsupported);
+s->smi_unsupported = value;
+}
+
 static void ioapic_common_init(Object *obj)
 {
 object_property_add_bool(obj, "level_trigger_unsupported",
  ioapic_common_get_level_trigger_unsupported,
  ioapic_common_set_level_trigger_unsupported);
 
+object_property_add_bool(obj, "smi_unsupported",
+ ioapic_common_get_smi_unsupported,
+ ioapic_common_set_smi_unsupported);
 }
 
 static void ioapic_common_realize(DeviceState *dev, Error **errp)
diff --git a/include/hw/i386/ioapic_internal.h 
b/include/hw/i386/ioapic_internal.h
index 20f2fc7897..46f22a4f85 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -104,6 +104,7 @@ struct IOAPICCommonState {
 uint64_t ioredtbl[IOAPIC_NUM_PINS];
 Notifier machine_done;
 bool level_trigger_unsupported;
+bool smi_unsupported;
 uint8_t version;
 uint64_t irq_count[IOAPIC_NUM_PINS];
 int irq_level[IOAPIC_NUM_PINS];
-- 
2.25.1




[RFC PATCH v2 04/44] vl: Introduce machine_init_done_late notifier

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Introduce a new notifier, machine_init_done_late, that is notified after
machine_init_done.  This will be used by TDX to generate the HOB for its
virtual firmware, which needs to be done after all guest memory has been
added, i.e. after machine_init_done notifiers have run.  Some code
registers memory by machine_init_done().

Signed-off-by: Isaku Yamahata 
---
 hw/core/machine.c   | 26 ++
 include/sysemu/sysemu.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index ffc076ae84..66c39cf72a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1278,6 +1278,31 @@ void qemu_remove_machine_init_done_notifier(Notifier 
*notify)
 notifier_remove(notify);
 }
 
+static NotifierList machine_init_done_late_notifiers =
+NOTIFIER_LIST_INITIALIZER(machine_init_done_late_notifiers);
+
+static bool machine_init_done_late;
+
+void qemu_add_machine_init_done_late_notifier(Notifier *notify)
+{
+notifier_list_add(_init_done_late_notifiers, notify);
+if (machine_init_done_late) {
+notify->notify(notify, NULL);
+}
+}
+
+void qemu_remove_machine_init_done_late_notifier(Notifier *notify)
+{
+notifier_remove(notify);
+}
+
+
+static void qemu_run_machine_init_done_late_notifiers(void)
+{
+machine_init_done_late = true;
+notifier_list_notify(_init_done_late_notifiers, NULL);
+}
+
 void qdev_machine_creation_done(void)
 {
 cpu_synchronize_all_post_init();
@@ -1311,6 +1336,7 @@ void qdev_machine_creation_done(void)
 if (rom_check_and_register_reset() != 0) {
 exit(1);
 }
+qemu_run_machine_init_done_late_notifiers();
 
 replay_start();
 
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 8fae667172..d44f8cf778 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -19,6 +19,8 @@ void qemu_remove_exit_notifier(Notifier *notify);
 void qemu_run_machine_init_done_notifiers(void);
 void qemu_add_machine_init_done_notifier(Notifier *notify);
 void qemu_remove_machine_init_done_notifier(Notifier *notify);
+void qemu_add_machine_init_done_late_notifier(Notifier *notify);
+void qemu_remove_machine_init_done_late_notifier(Notifier *notify);
 
 void configure_rtc(QemuOpts *opts);
 
-- 
2.25.1




[RFC PATCH v2 18/44] hw/i386: refactor e820_add_entry()

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

The following patch will utilize this refactoring.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/e820_memory_layout.c | 42 
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c
index bcf9eaf837..d9bb11c02a 100644
--- a/hw/i386/e820_memory_layout.c
+++ b/hw/i386/e820_memory_layout.c
@@ -14,31 +14,45 @@ static size_t e820_entries;
 struct e820_table e820_reserve;
 struct e820_entry *e820_table;
 
-int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
+static int e820_append_reserve(uint64_t address, uint64_t length, uint32_t 
type)
 {
 int index = le32_to_cpu(e820_reserve.count);
 struct e820_entry *entry;
 
-if (type != E820_RAM) {
-/* old FW_CFG_E820_TABLE entry -- reservations only */
-if (index >= E820_NR_ENTRIES) {
-return -EBUSY;
-}
-entry = _reserve.entry[index++];
+/* old FW_CFG_E820_TABLE entry -- reservations only */
+if (index >= E820_NR_ENTRIES) {
+return -EBUSY;
+}
+entry = _reserve.entry[index++];
 
-entry->address = cpu_to_le64(address);
-entry->length = cpu_to_le64(length);
-entry->type = cpu_to_le32(type);
+entry->address = cpu_to_le64(address);
+entry->length = cpu_to_le64(length);
+entry->type = cpu_to_le32(type);
 
-e820_reserve.count = cpu_to_le32(index);
-}
+e820_reserve.count = cpu_to_le32(index);
+return 0;
+}
 
-/* new "etc/e820" file -- include ram too */
-e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
+static void e820_append_entry(uint64_t address, uint64_t length, uint32_t type)
+{
 e820_table[e820_entries].address = cpu_to_le64(address);
 e820_table[e820_entries].length = cpu_to_le64(length);
 e820_table[e820_entries].type = cpu_to_le32(type);
 e820_entries++;
+}
+
+int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
+{
+if (type != E820_RAM) {
+int ret = e820_append_reserve(address, length, type);
+if (ret) {
+return ret;
+}
+}
+
+/* new "etc/e820" file -- include ram too */
+e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
+e820_append_entry(address, length, type);
 
 return e820_entries;
 }
-- 
2.25.1




[RFC PATCH v2 40/44] hw/i386: add a flag to disallow SMI

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add a new flag to X86Machine to disallow SMI and pass it to ioapic creation
so that ioapic disallows delivery mode of SMI.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/microvm.c |  6 --
 hw/i386/pc_piix.c |  3 ++-
 hw/i386/pc_q35.c  |  3 ++-
 hw/i386/x86.c | 11 +--
 include/hw/i386/x86.h |  7 +--
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 9b03d051ca..7504324891 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -175,10 +175,12 @@ static void microvm_devices_init(MicrovmMachineState *mms)
   _abort);
 isa_bus_irqs(isa_bus, x86ms->gsi);
 
-ioapic_init_gsi(gsi_state, "machine", x86ms->eoi_intercept_unsupported);
+ioapic_init_gsi(gsi_state, "machine", x86ms->eoi_intercept_unsupported,
+x86ms->smi_unsupported);
 if (ioapics > 1) {
 x86ms->ioapic2 = ioapic_init_secondary(
-gsi_state, x86ms->eoi_intercept_unsupported);
+gsi_state, x86ms->eoi_intercept_unsupported,
+x86ms->smi_unsupported);
 }
 
 kvmclock_create(true);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a601c4a916..0958035bf8 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -223,7 +223,8 @@ static void pc_init1(MachineState *machine,
 }
 
 if (pcmc->pci_enabled) {
-ioapic_init_gsi(gsi_state, "i440fx", x86ms->eoi_intercept_unsupported);
+ioapic_init_gsi(gsi_state, "i440fx", x86ms->eoi_intercept_unsupported,
+x86ms->smi_unsupported);
 }
 
 if (tcg_enabled()) {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 464463766c..1ab8a6a78b 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -256,7 +256,8 @@ static void pc_q35_init(MachineState *machine)
 }
 
 if (pcmc->pci_enabled) {
-ioapic_init_gsi(gsi_state, "q35", x86ms->eoi_intercept_unsupported);
+ioapic_init_gsi(gsi_state, "q35", x86ms->eoi_intercept_unsupported,
+x86ms->smi_unsupported);
 }
 
 if (tcg_enabled()) {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 88c365b72d..3dc36e3590 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -609,7 +609,8 @@ void gsi_handler(void *opaque, int n, int level)
 }
 
 void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name,
- bool level_trigger_unsupported)
+ bool level_trigger_unsupported,
+ bool smi_unsupported)
 {
 DeviceState *dev;
 SysBusDevice *d;
@@ -625,6 +626,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
*parent_name,
   "ioapic", OBJECT(dev));
 object_property_set_bool(OBJECT(dev), "level_trigger_unsupported",
  level_trigger_unsupported, NULL);
+object_property_set_bool(OBJECT(dev), "smi_unsupported",
+ smi_unsupported, NULL);
 d = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(d, _fatal);
 sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
@@ -635,7 +638,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
*parent_name,
 }
 
 DeviceState *ioapic_init_secondary(GSIState *gsi_state,
-   bool level_trigger_unsupported)
+   bool level_trigger_unsupported,
+   bool smi_unsupported)
 {
 DeviceState *dev;
 SysBusDevice *d;
@@ -644,6 +648,8 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state,
 dev = qdev_new(TYPE_IOAPIC);
 object_property_set_bool(OBJECT(dev), "level_trigger_unsupported",
  level_trigger_unsupported, NULL);
+object_property_set_bool(OBJECT(dev), "smi_unsupported",
+ smi_unsupported, NULL);
 d = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(d, _fatal);
 sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS);
@@ -1318,6 +1324,7 @@ static void x86_machine_initfn(Object *obj)
 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
 x86ms->bus_lock_ratelimit = 0;
 x86ms->eoi_intercept_unsupported = false;
+x86ms->smi_unsupported = false;
 
 object_property_add_str(obj, "kvm-type",
 x86_get_kvm_type, x86_set_kvm_type);
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 7536e5fb8c..3d1d74d171 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -64,6 +64,7 @@ struct X86MachineState {
 unsigned apic_id_limit;
 uint16_t boot_cpus;
 bool eoi_intercept_unsupported;
+bool smi_unsupported;
 
 OnOffAuto smm;
 OnOffAuto acpi;
@@ -141,8 +142,10 @@ typedef struct GSIState {
 qemu_irq x86_allocate_cpu_irq(void);
 void gsi_handler(void *opaque, int n, int level);
 void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name,
- bool eoi_intercept_unsupported);
+ bool 

[RFC PATCH v2 13/44] i386/tdx: Frame in tdx_get_supported_cpuid with KVM_TDX_CAPABILITIES

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Add support for grabbing KVM_TDX_CAPABILITIES and use the new
kvm_get_supported_cpuid() hook to adjust the supported XCR0 bits.

Add TODOs for the remaining work.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c |  2 ++
 target/i386/kvm/tdx.c | 79 ---
 target/i386/kvm/tdx.h |  2 ++
 3 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5742fa4806..25dcecd60c 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -448,6 +448,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t 
function,
 ret |= 1U << KVM_HINTS_REALTIME;
 }
 
+tdx_get_supported_cpuid(s, function, index, reg, );
+
 return ret;
 }
 
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index f8c7560fc8..b1e4f27c9a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -21,6 +21,7 @@
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
+#include "standard-headers/asm-x86/kvm_para.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_int.h"
@@ -49,7 +50,11 @@ static void __tdx_ioctl(int ioctl_no, const char *ioctl_name,
 tdx_cmd.metadata = metadata;
 tdx_cmd.data = (__u64)(unsigned long)data;
 
-r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+if (ioctl_no == KVM_TDX_CAPABILITIES) {
+r = kvm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+} else {
+r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+}
 if (r) {
 error_report("%s failed: %s", ioctl_name, strerror(-r));
 exit(1);
@@ -67,6 +72,18 @@ static Notifier tdx_machine_done_late_notify = {
 .notify = tdx_finalize_vm,
 };
 
+#define TDX1_MAX_NR_CPUID_CONFIGS 6
+
+static struct {
+struct kvm_tdx_capabilities __caps;
+struct kvm_tdx_cpuid_config __cpuid_configs[TDX1_MAX_NR_CPUID_CONFIGS];
+} __tdx_caps;
+
+static struct kvm_tdx_capabilities *tdx_caps = (void *)&__tdx_caps;
+
+#define XCR0_MASK (MAKE_64BIT_MASK(0, 8) | BIT_ULL(9))
+#define XSS_MASK (~XCR0_MASK)
+
 int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
 TdxGuest *tdx = (TdxGuest *)object_dynamic_cast(OBJECT(cgs),
@@ -75,10 +92,65 @@ int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 return 0;
 }
 
+QEMU_BUILD_BUG_ON(sizeof(__tdx_caps) !=
+  sizeof(struct kvm_tdx_capabilities) +
+  sizeof(struct kvm_tdx_cpuid_config) *
+  TDX1_MAX_NR_CPUID_CONFIGS);
+
+tdx_caps->nr_cpuid_configs = TDX1_MAX_NR_CPUID_CONFIGS;
+tdx_ioctl(KVM_TDX_CAPABILITIES, 0, tdx_caps);
+
 qemu_add_machine_init_done_late_notifier(_machine_done_late_notify);
+
 return 0;
 }
 
+void tdx_get_supported_cpuid(KVMState *s, uint32_t function,
+ uint32_t index, int reg, uint32_t *ret)
+{
+MachineState *ms = MACHINE(qdev_get_machine());
+TdxGuest *tdx = (TdxGuest *)object_dynamic_cast(OBJECT(ms->cgs),
+TYPE_TDX_GUEST);
+
+if (!tdx) {
+return;
+}
+
+switch (function) {
+case 1:
+if (reg == R_ECX) {
+*ret &= ~CPUID_EXT_VMX;
+}
+break;
+case 0xd:
+if (index == 0) {
+if (reg == R_EAX) {
+*ret &= (uint32_t)tdx_caps->xfam_fixed0 & XCR0_MASK;
+*ret |= (uint32_t)tdx_caps->xfam_fixed1 & XCR0_MASK;
+} else if (reg == R_EDX) {
+*ret &= (tdx_caps->xfam_fixed0 & XCR0_MASK) >> 32;
+*ret |= (tdx_caps->xfam_fixed1 & XCR0_MASK) >> 32;
+}
+} else if (index == 1) {
+/* TODO: Adjust XSS when it's supported. */
+}
+break;
+case KVM_CPUID_FEATURES:
+if (reg == R_EAX) {
+*ret &= ~((1ULL << KVM_FEATURE_CLOCKSOURCE) |
+  (1ULL << KVM_FEATURE_CLOCKSOURCE2) |
+  (1ULL << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+  (1ULL << KVM_FEATURE_ASYNC_PF) |
+  (1ULL << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+  (1ULL << KVM_FEATURE_ASYNC_PF_INT));
+}
+break;
+default:
+/* TODO: Use tdx_caps to adjust CPUID leafs. */
+break;
+}
+}
+
 void tdx_pre_create_vcpu(CPUState *cpu)
 {
 struct {
@@ -105,10 +177,7 @@ void tdx_pre_create_vcpu(CPUState *cpu)
 return;
 }
 
-/* HACK: Remove MPX support, which is not allowed by TDX. */
-env->features[FEAT_XSAVE_COMP_LO] &= ~(XSTATE_BNDREGS_MASK |
-   XSTATE_BNDCSR_MASK);
-
+/* TODO: Use tdx_caps to validate the config. */
 if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
 error_report("TDX VM must support XSAVE features");
 exit(1);
diff --git 

[RFC PATCH v2 19/44] hw/i386/e820: introduce a helper function to change type of e820

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Introduce a helper function, e820_change_type(), that change
the type of subregion of e820 entry.
The following patch uses it.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/e820_memory_layout.c | 72 
 hw/i386/e820_memory_layout.h |  1 +
 2 files changed, 73 insertions(+)

diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c
index d9bb11c02a..109c4f715a 100644
--- a/hw/i386/e820_memory_layout.c
+++ b/hw/i386/e820_memory_layout.c
@@ -57,6 +57,78 @@ int e820_add_entry(uint64_t address, uint64_t length, 
uint32_t type)
 return e820_entries;
 }
 
+int e820_change_type(uint64_t address, uint64_t length, uint32_t type)
+{
+size_t i;
+
+if (type != E820_RAM) {
+int ret = e820_append_reserve(address, length, type);
+if (ret) {
+return ret;
+}
+}
+
+/* new "etc/e820" file -- include ram too */
+for (i = 0; i < e820_entries; i++) {
+struct e820_entry *e = _table[i];
+struct e820_entry tmp = {
+.address = le64_to_cpu(e->address),
+.length = le64_to_cpu(e->length),
+.type = le32_to_cpu(e->type),
+};
+/* overlap? */
+if (address + length < tmp.address ||
+tmp.address + tmp.length < address) {
+continue;
+}
+/*
+ * partial-overlap is not allowed.
+ * It is assumed that the region is completely contained within
+ * other region.
+ */
+if (address < tmp.address ||
+tmp.address + tmp.length < address + length) {
+return -EINVAL;
+}
+/* only real type change is allowed. */
+if (tmp.type == type) {
+return -EINVAL;
+}
+
+if (tmp.address == address &&
+tmp.address + tmp.length == address + length) {
+e->type = cpu_to_le32(type);
+return e820_entries;
+} else if (tmp.address == address) {
+e820_table = g_renew(struct e820_entry,
+ e820_table, e820_entries + 1);
+e = _table[i];
+e->address = cpu_to_le64(tmp.address + length);
+e820_append_entry(address, length, type);
+return e820_entries;
+} else if (tmp.address + tmp.length == address + length) {
+e820_table = g_renew(struct e820_entry,
+ e820_table, e820_entries + 1);
+e = _table[i];
+e->length = cpu_to_le64(tmp.length - length);
+e820_append_entry(address, length, type);
+return e820_entries;
+} else {
+e820_table = g_renew(struct e820_entry,
+ e820_table, e820_entries + 2);
+e = _table[i];
+e->length = cpu_to_le64(address - tmp.address);
+e820_append_entry(address, length, type);
+e820_append_entry(address + length,
+  tmp.address + tmp.length - (address + length),
+  tmp.type);
+return e820_entries;
+}
+}
+
+return -EINVAL;
+}
+
 int e820_get_num_entries(void)
 {
 return e820_entries;
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index 2a0ceb8b9c..5f27cee476 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -33,6 +33,7 @@ extern struct e820_table e820_reserve;
 extern struct e820_entry *e820_table;
 
 int e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
+int e820_change_type(uint64_t address, uint64_t length, uint32_t type);
 int e820_get_num_entries(void);
 bool e820_get_entry(int index, uint32_t type,
 uint64_t *address, uint64_t *length);
-- 
2.25.1




[RFC PATCH v2 37/44] hw/i386: add option to forcibly report edge trigger in acpi tables

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

When level trigger isn't supported on x86 platform, forcibly report edge
trigger in acpi tables.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/acpi-build.c  | 103 --
 hw/i386/acpi-common.c |  74 ++
 2 files changed, 124 insertions(+), 53 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 796ffc6f5c..d0d52258b9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -866,7 +866,8 @@ static void build_dbg_aml(Aml *table)
 aml_append(table, scope);
 }
 
-static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
+static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg,
+   bool level_trigger_unsupported)
 {
 Aml *dev;
 Aml *crs;
@@ -878,7 +879,10 @@ static Aml *build_link_dev(const char *name, uint8_t uid, 
Aml *reg)
 aml_append(dev, aml_name_decl("_UID", aml_int(uid)));
 
 crs = aml_resource_template();
-aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+aml_append(crs, aml_interrupt(AML_CONSUMER,
+  level_trigger_unsupported ?
+  AML_EDGE : AML_LEVEL,
+  AML_ACTIVE_HIGH,
   AML_SHARED, irqs, ARRAY_SIZE(irqs)));
 aml_append(dev, aml_name_decl("_PRS", crs));
 
@@ -902,7 +906,8 @@ static Aml *build_link_dev(const char *name, uint8_t uid, 
Aml *reg)
 return dev;
  }
 
-static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
+static Aml *build_gsi_link_dev(const char *name, uint8_t uid,
+   uint8_t gsi, bool level_trigger_unsupported)
 {
 Aml *dev;
 Aml *crs;
@@ -915,7 +920,10 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t 
uid, uint8_t gsi)
 
 crs = aml_resource_template();
 irqs = gsi;
-aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+aml_append(crs, aml_interrupt(AML_CONSUMER,
+  level_trigger_unsupported ?
+  AML_EDGE : AML_LEVEL,
+  AML_ACTIVE_HIGH,
   AML_SHARED, , 1));
 aml_append(dev, aml_name_decl("_PRS", crs));
 
@@ -934,7 +942,7 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t 
uid, uint8_t gsi)
 }
 
 /* _CRS method - get current settings */
-static Aml *build_iqcr_method(bool is_piix4)
+static Aml *build_iqcr_method(bool is_piix4, bool level_trigger_unsupported)
 {
 Aml *if_ctx;
 uint32_t irqs;
@@ -942,7 +950,9 @@ static Aml *build_iqcr_method(bool is_piix4)
 Aml *crs = aml_resource_template();
 
 irqs = 0;
-aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+aml_append(crs, aml_interrupt(AML_CONSUMER,
+  level_trigger_unsupported ?
+  AML_EDGE : AML_LEVEL,
   AML_ACTIVE_HIGH, AML_SHARED, , 1));
 aml_append(method, aml_name_decl("PRR0", crs));
 
@@ -976,7 +986,7 @@ static Aml *build_irq_status_method(void)
 return method;
 }
 
-static void build_piix4_pci0_int(Aml *table)
+static void build_piix4_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
 Aml *dev;
 Aml *crs;
@@ -997,12 +1007,16 @@ static void build_piix4_pci0_int(Aml *table)
 aml_append(sb_scope, field);
 
 aml_append(sb_scope, build_irq_status_method());
-aml_append(sb_scope, build_iqcr_method(true));
+aml_append(sb_scope, build_iqcr_method(true, level_trigger_unsupported));
 
-aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0")));
-aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1")));
-aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2")));
-aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3")));
+aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"),
+level_trigger_unsupported));
+aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"),
+level_trigger_unsupported));
+aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"),
+level_trigger_unsupported));
+aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"),
+level_trigger_unsupported));
 
 dev = aml_device("LNKS");
 {
@@ -1011,7 +1025,9 @@ static void build_piix4_pci0_int(Aml *table)
 
 crs = aml_resource_template();
 irqs = 9;
-aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+aml_append(crs, aml_interrupt(AML_CONSUMER,
+  level_trigger_unsupported ?
+  AML_EDGE : AML_LEVEL,
   AML_ACTIVE_HIGH, AML_SHARED,

[RFC PATCH v2 34/44] target/i386/tdx: set reboot action to shutdown when tdx

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

In TDX CPU state is also protected, thus vcpu state can't be reset by VMM.
It assumes -action reboot=shutdown instead of silently ignoring vcpu reset.

TDX module spec version 344425-002US doesn't support vcpu reset by VMM.  VM
needs to be destroyed and created again to emulate REBOOT_ACTION_RESET.
For simplicity, put its responsibility to management system like libvirt
because it's difficult for the current qemu implementation to destroy and
re-create KVM VM resources with keeping other resources.

If management system wants reboot behavior for its users, it needs to
 - set reboot_action to REBOOT_ACTION_SHUTDOWN,
 - set shutdown_action to SHUTDOWN_ACTION_PAUSE optionally and,
 - subscribe VM state change and on reboot, (destroy qemu if
   SHUTDOWN_ACTION_PAUSE and) start new qemu.

Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/tdx.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 1316d95209..0621317b0a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -25,6 +25,7 @@
 #include "qapi/qapi-types-misc-target.h"
 #include "standard-headers/asm-x86/kvm_para.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/runstate-action.h"
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_int.h"
 #include "sysemu/tdx.h"
@@ -363,6 +364,19 @@ static void tdx_guest_init(Object *obj)
 
 qemu_mutex_init(>lock);
 
+/*
+ * TDX module spec version 344425-002US doesn't support reset of vcpu by
+ * VMM.  VM needs to be destroyed and created again to emulate
+ * REBOOT_ACTION_RESET.  For simplicity, put its responsibility to
+ * management system like libvirt.
+ *
+ * Management system should
+ *  - set reboot_action to REBOOT_ACTION_SHUTDOWN
+ *  - set shutdown_action to SHUTDOWN_ACTION_PAUSE
+ *  - subscribe VM state and on reboot, destroy qemu and start new qemu
+ */
+reboot_action = REBOOT_ACTION_SHUTDOWN;
+
 tdx->debug = false;
 object_property_add_bool(obj, "debug", tdx_guest_get_debug,
  tdx_guest_set_debug);
-- 
2.25.1




[RFC PATCH v2 17/44] i386/tdx: Add definitions for TDVF metadata

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add constants and structs for the TD Virtual Firmware metadata, which
describes how the TDVF must be built to ensure correct functionality and
measurement.  They are defined in TDVF Design Guide [1].

[1] TDVF Design Guide
https://software.intel.com/content/dam/develop/external/us/en/documents/tdx-virtual-firmware-design-guide-rev-1.pdf

Signed-off-by: Isaku Yamahata 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
---
 include/hw/i386/tdvf.h | 55 ++
 1 file changed, 55 insertions(+)
 create mode 100644 include/hw/i386/tdvf.h

diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h
new file mode 100644
index 00..5c78e2affb
--- /dev/null
+++ b/include/hw/i386/tdvf.h
@@ -0,0 +1,55 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata 
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#ifndef HW_I386_TDVF_H
+#define HW_I386_TDVF_H
+
+#include "qemu/osdep.h"
+
+#define TDVF_METDATA_OFFSET_FROM_END0x20
+
+#define TDVF_SECTION_TYPE_BFV   0
+#define TDVF_SECTION_TYPE_CFV   1
+#define TDVF_SECTION_TYPE_TD_HOB2
+#define TDVF_SECTION_TYPE_TEMP_MEM  3
+
+#define TDVF_SECTION_ATTRIBUTES_EXTENDMR(1U << 0)
+
+typedef struct {
+uint32_t DataOffset;
+uint32_t RawDataSize;
+uint64_t MemoryAddress;
+uint64_t MemoryDataSize;
+uint32_t Type;
+uint32_t Attributes;
+} TdvfSectionEntry;
+
+#define TDVF_SIGNATURE_LE32 0x46564454 /* TDVF as little endian */
+
+typedef struct {
+uint8_t Signature[4];
+uint32_t Length;
+uint32_t Version;
+uint32_t NumberOfSectionEntries;
+TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+#endif /* HW_I386_TDVF_H */
-- 
2.25.1




[RFC PATCH v2 07/44] i386/kvm: Squash getting/putting guest state for TDX VMs

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Ignore get/put state of TDX VMs as accessing/mutating guest state of
producation TDs is not supported.
Allow kvm_arch_get_registers() to run as normal, except for MSRs, for
debug TDs, and silently ignores attempts to read guest state for
non-debug TDs.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a3d5b334d1..27b64dedc2 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2641,6 +2641,11 @@ void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
 {
 int ret;
 
+/* TODO: Allow accessing guest state for debug TDs. */
+if (vm_type == KVM_X86_TDX_VM) {
+return;
+}
+
 ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
 assert(ret == 1);
 }
@@ -4099,6 +4104,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
 
 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+/* TODO: Allow accessing guest state for debug TDs. */
+if (vm_type == KVM_X86_TDX_VM) {
+return 0;
+}
+
 /* must be before kvm_put_nested_state so that EFER.SVME is set */
 ret = kvm_put_sregs(x86_cpu);
 if (ret < 0) {
@@ -4209,9 +4219,11 @@ int kvm_arch_get_registers(CPUState *cs)
 if (ret < 0) {
 goto out;
 }
-ret = kvm_get_msrs(cpu);
-if (ret < 0) {
-goto out;
+if (vm_type != KVM_X86_TDX_VM) {
+ret = kvm_get_msrs(cpu);
+if (ret < 0) {
+goto out;
+}
 }
 ret = kvm_get_apic(cpu);
 if (ret < 0) {
-- 
2.25.1




[RFC PATCH v2 15/44] i386/tdx: Add hook to require generic device loader

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Add a hook for TDX to denote that the TD Virtual Firmware must be
provided via the "generic" device loader.  Error out if pflash is used
in conjuction with TDX.

Suggested-by: Isaku Yamahata 
Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 hw/i386/pc_sysfw.c |  6 ++
 include/sysemu/tdx.h   |  2 ++
 target/i386/kvm/tdx-stub.c |  5 +
 target/i386/kvm/tdx.c  | 25 +
 4 files changed, 38 insertions(+)

diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 6ce37a2b05..5ff571af36 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -38,6 +38,7 @@
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sev.h"
+#include "sysemu/tdx.h"
 
 #define FLASH_SECTOR_SIZE 4096
 
@@ -328,6 +329,11 @@ void pc_system_firmware_init(PCMachineState *pcms,
 int i;
 BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)];
 
+if (!tdx_system_firmware_init(pcms, rom_memory)) {
+pc_system_flash_cleanup_unused(pcms);
+return;
+}
+
 if (!pcmc->pci_enabled) {
 x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true);
 return;
diff --git a/include/sysemu/tdx.h b/include/sysemu/tdx.h
index 03461b6ae8..70eb01348f 100644
--- a/include/sysemu/tdx.h
+++ b/include/sysemu/tdx.h
@@ -3,8 +3,10 @@
 
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/kvm.h"
+#include "hw/i386/pc.h"
 
 bool kvm_has_tdx(KVMState *s);
+int tdx_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
 #endif
 
 void tdx_pre_create_vcpu(CPUState *cpu);
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 93afe07ddb..4e1a0a4280 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -7,6 +7,11 @@ bool kvm_has_tdx(KVMState *s)
 {
 return false;
 }
+
+int tdx_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory)
+{
+return -ENOSYS;
+}
 #endif
 
 void tdx_pre_create_vcpu(CPUState *cpu)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 67fb03b4b5..48c04d344d 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -109,6 +109,31 @@ int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 return 0;
 }
 
+int tdx_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory)
+{
+MachineState *ms = MACHINE(pcms);
+TdxGuest *tdx = (TdxGuest *)object_dynamic_cast(OBJECT(ms->cgs),
+TYPE_TDX_GUEST);
+int i;
+
+if (!tdx) {
+return -ENOSYS;
+}
+
+/*
+ * Sanitiy check for tdx:
+ * TDX uses generic loader to load bios instead of pflash.
+ */
+for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
+if (drive_get(IF_PFLASH, 0, i)) {
+error_report("pflash not supported by VM type, "
+ "use -device loader,file=");
+exit(1);
+}
+}
+return 0;
+}
+
 void tdx_get_supported_cpuid(KVMState *s, uint32_t function,
  uint32_t index, int reg, uint32_t *ret)
 {
-- 
2.25.1




[RFC PATCH v2 38/44] hw/i386: plug eoi_intercept_unsupported to ioapic

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

When x86machine doesn't support eoi intercept, set
level_trigger_unsupported property of ioapic to true so that ioapic doesn't
accept configuration to use level trigger.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/microvm.c |  5 +++--
 hw/i386/pc_piix.c |  2 +-
 hw/i386/pc_q35.c  |  2 +-
 hw/i386/x86.c | 10 --
 include/hw/i386/x86.h |  6 --
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index aba0c83219..9b03d051ca 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -175,9 +175,10 @@ static void microvm_devices_init(MicrovmMachineState *mms)
   _abort);
 isa_bus_irqs(isa_bus, x86ms->gsi);
 
-ioapic_init_gsi(gsi_state, "machine");
+ioapic_init_gsi(gsi_state, "machine", x86ms->eoi_intercept_unsupported);
 if (ioapics > 1) {
-x86ms->ioapic2 = ioapic_init_secondary(gsi_state);
+x86ms->ioapic2 = ioapic_init_secondary(
+gsi_state, x86ms->eoi_intercept_unsupported);
 }
 
 kvmclock_create(true);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4c1e31f180..a601c4a916 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -223,7 +223,7 @@ static void pc_init1(MachineState *machine,
 }
 
 if (pcmc->pci_enabled) {
-ioapic_init_gsi(gsi_state, "i440fx");
+ioapic_init_gsi(gsi_state, "i440fx", x86ms->eoi_intercept_unsupported);
 }
 
 if (tcg_enabled()) {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 106f5726cc..464463766c 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -256,7 +256,7 @@ static void pc_q35_init(MachineState *machine)
 }
 
 if (pcmc->pci_enabled) {
-ioapic_init_gsi(gsi_state, "q35");
+ioapic_init_gsi(gsi_state, "q35", x86ms->eoi_intercept_unsupported);
 }
 
 if (tcg_enabled()) {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 9862fe5bc9..88c365b72d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -608,7 +608,8 @@ void gsi_handler(void *opaque, int n, int level)
 }
 }
 
-void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
+void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name,
+ bool level_trigger_unsupported)
 {
 DeviceState *dev;
 SysBusDevice *d;
@@ -622,6 +623,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
*parent_name)
 }
 object_property_add_child(object_resolve_path(parent_name, NULL),
   "ioapic", OBJECT(dev));
+object_property_set_bool(OBJECT(dev), "level_trigger_unsupported",
+ level_trigger_unsupported, NULL);
 d = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(d, _fatal);
 sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
@@ -631,13 +634,16 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
*parent_name)
 }
 }
 
-DeviceState *ioapic_init_secondary(GSIState *gsi_state)
+DeviceState *ioapic_init_secondary(GSIState *gsi_state,
+   bool level_trigger_unsupported)
 {
 DeviceState *dev;
 SysBusDevice *d;
 unsigned int i;
 
 dev = qdev_new(TYPE_IOAPIC);
+object_property_set_bool(OBJECT(dev), "level_trigger_unsupported",
+ level_trigger_unsupported, NULL);
 d = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(d, _fatal);
 sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS);
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 6eff42550f..7536e5fb8c 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -140,7 +140,9 @@ typedef struct GSIState {
 
 qemu_irq x86_allocate_cpu_irq(void);
 void gsi_handler(void *opaque, int n, int level);
-void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name);
-DeviceState *ioapic_init_secondary(GSIState *gsi_state);
+void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name,
+ bool eoi_intercept_unsupported);
+DeviceState *ioapic_init_secondary(GSIState *gsi_state,
+   bool eoi_intercept_unsupported);
 
 #endif
-- 
2.25.1




[RFC PATCH v2 32/44] tdx: add kvm_tdx_enabled() accessor for later use

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Signed-off-by: Isaku Yamahata 
---
 include/sysemu/tdx.h  | 1 +
 target/i386/kvm/kvm.c | 5 +
 2 files changed, 6 insertions(+)

diff --git a/include/sysemu/tdx.h b/include/sysemu/tdx.h
index 70eb01348f..f3eced10f9 100644
--- a/include/sysemu/tdx.h
+++ b/include/sysemu/tdx.h
@@ -6,6 +6,7 @@
 #include "hw/i386/pc.h"
 
 bool kvm_has_tdx(KVMState *s);
+bool kvm_tdx_enabled(void);
 int tdx_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
 #endif
 
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index af6b5f350e..76c3ea9fac 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -152,6 +152,11 @@ int kvm_set_vm_type(MachineState *ms, int kvm_type)
 return -ENOTSUP;
 }
 
+bool kvm_tdx_enabled(void)
+{
+return vm_type == KVM_X86_TDX_VM;
+}
+
 int kvm_has_pit_state2(void)
 {
 return has_pit_state2;
-- 
2.25.1




[RFC PATCH v2 36/44] hw/i386: add eoi_intercept_unsupported member to X86MachineState

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add a new bool member, eoi_intercept_unsupported, to X86MachineState with
default value false.  Set true when tdx kvm type.  Inability to intercept
eoi causes impossibility to emulate level triggered interrupt to be
re-injected when level is still kept active.  which affects interrupt
controller emulation. Such new behavior will be introduced later.

Signed-off-by: Isaku Yamahata 
---
 hw/i386/x86.c | 1 +
 include/hw/i386/x86.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index ed15f6f2cf..9862fe5bc9 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1311,6 +1311,7 @@ static void x86_machine_initfn(Object *obj)
 x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
 x86ms->bus_lock_ratelimit = 0;
+x86ms->eoi_intercept_unsupported = false;
 
 object_property_add_str(obj, "kvm-type",
 x86_get_kvm_type, x86_set_kvm_type);
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index a450b5e226..6eff42550f 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -63,6 +63,7 @@ struct X86MachineState {
 unsigned pci_irq_mask;
 unsigned apic_id_limit;
 uint16_t boot_cpus;
+bool eoi_intercept_unsupported;
 
 OnOffAuto smm;
 OnOffAuto acpi;
-- 
2.25.1




[RFC PATCH v2 16/44] hw/i386: Add definitions from UEFI spec for volumes, resources, etc...

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Add definitions for literals, enums, structs, GUIDs, etc... that will be
used by TDX to build the UEFI Hand-Off Block (HOB) that is passed to the
Trusted Domain Virtual Firmware (TDVF).  All values come from the UEFI
specification and TDVF design guide. [1]

Note: EFI_RESOURCE_ATTRIBUTE_{ENCRYPTED, UNACCEPTED}, will be added
in future UEFI spec.

[1] 
https://software.intel.com/content/dam/develop/external/us/en/documents/tdx-virtual-firmware-design-guide-rev-1.pdf

Signed-off-by: Isaku Yamahata 
---
 hw/i386/uefi.h | 496 +
 1 file changed, 496 insertions(+)
 create mode 100644 hw/i386/uefi.h

diff --git a/hw/i386/uefi.h b/hw/i386/uefi.h
new file mode 100644
index 00..72bfc2f6a9
--- /dev/null
+++ b/hw/i386/uefi.h
@@ -0,0 +1,496 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * Author: Isaku Yamahata 
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ *
+ */
+
+#ifndef HW_I386_UEFI_H
+#define HW_I386_UEFI_H
+
+/***/
+/*
+ * basic EFI definitions
+ * supplemented with UEFI Specification Version 2.8 (Errata A)
+ * released February 2020
+ */
+/* UEFI integer is little endian */
+
+typedef struct {
+uint32_t Data1;
+uint16_t Data2;
+uint16_t Data3;
+uint8_t Data4[8];
+} EFI_GUID;
+
+typedef uint64_t EFI_PHYSICAL_ADDRESS;
+typedef uint32_t EFI_BOOT_MODE;
+
+typedef enum {
+EfiReservedMemoryType,
+EfiLoaderCode,
+EfiLoaderData,
+EfiBootServicesCode,
+EfiBootServicesData,
+EfiRuntimeServicesCode,
+EfiRuntimeServicesData,
+EfiConventionalMemory,
+EfiUnusableMemory,
+EfiACPIReclaimMemory,
+EfiACPIMemoryNVS,
+EfiMemoryMappedIO,
+EfiMemoryMappedIOPortSpace,
+EfiPalCode,
+EfiPersistentMemory,
+EfiMaxMemoryType
+} EFI_MEMORY_TYPE;
+
+
+/*
+ * data structure firmware volume/file
+ * based on
+ * UEFI Platform Initialization Specification Version 1.7. vol 3, 3.2.1
+ */
+
+#define SIGNATURE_16(A, B)(((A) | (B << 8)))
+#define SIGNATURE_32(A, B, C, D)  (((A) | (B << 8) | (C << 16) | (D << 24)))
+#define SIGNATURE_64(A, B, C, D, E, F, G, H)\
+(SIGNATURE_32(A, B, C, D) | ((uint64_t)(SIGNATURE_32(E, F, G, H)) << 32))
+
+/***/
+/* Firmware Volume format */
+
+typedef uint32_t EFI_FV_FILE_ATTRIBUTES;
+
+
+#define EFI_FV_FILE_ATTRIB_ALIGNMENT 0x001F
+#define EFI_FV_FILE_ATTRIB_FIXED 0x0100
+#define EFI_FV_FILE_ATTRIB_MEMORY_MAPPED 0x0200
+
+typedef uint32_t EFI_FVB_ATTRIBUTES_2;
+
+
+#define EFI_FVB2_READ_DISABLED_CAP  0x0001
+#define EFI_FVB2_READ_ENABLED_CAP   0x0002
+#define EFI_FVB2_READ_STATUS0x0004
+#define EFI_FVB2_WRITE_DISABLED_CAP 0x0008
+#define EFI_FVB2_WRITE_ENABLED_CAP  0x0010
+#define EFI_FVB2_WRITE_STATUS   0x0020
+#define EFI_FVB2_LOCK_CAP   0x0040
+#define EFI_FVB2_LOCK_STATUS0x0080
+#define EFI_FVB2_STICKY_WRITE   0x0200
+#define EFI_FVB2_MEMORY_MAPPED  0x0400
+#define EFI_FVB2_ERASE_POLARITY 0x0800
+#define EFI_FVB2_READ_LOCK_CAP  0x1000
+#define EFI_FVB2_READ_LOCK_STATUS   0x2000
+#define EFI_FVB2_WRITE_LOCK_CAP 0x4000
+#define EFI_FVB2_WRITE_LOCK_STATUS  0x8000
+#define EFI_FVB2_ALIGNMENT  0x001F
+#define EFI_FVB2_WEAK_ALIGNMENT 0x8000
+#define EFI_FVB2_ALIGNMENT_10x
+#define EFI_FVB2_ALIGNMENT_20x0001
+#define EFI_FVB2_ALIGNMENT_40x0002
+#define EFI_FVB2_ALIGNMENT_80x0003
+#define EFI_FVB2_ALIGNMENT_16   0x0004
+#define EFI_FVB2_ALIGNMENT_32   0x0005
+#define EFI_FVB2_ALIGNMENT_64   0x0006
+#define EFI_FVB2_ALIGNMENT_128  0x0007
+#define EFI_FVB2_ALIGNMENT_256  0x0008
+#define EFI_FVB2_ALIGNMENT_512  0x0009
+#define EFI_FVB2_ALIGNMENT_1K   0x000A
+#define EFI_FVB2_ALIGNMENT_2K   0x000B
+#define EFI_FVB2_ALIGNMENT_4K   0x000C
+#define EFI_FVB2_ALIGNMENT_8K   0x000D
+#define EFI_FVB2_ALIGNMENT_16K  0x000E
+#define EFI_FVB2_ALIGNMENT_32K  0x000F
+#define EFI_FVB2_ALIGNMENT_64K  0x0010
+#define EFI_FVB2_ALIGNMENT_128K 0x0011
+#define 

[RFC PATCH v2 35/44] ioapic: add property to disable level interrupt

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

According to TDX module spec version 344425-002US [1], VMM can inject
virtual interrupt only via posted interrupt and VMM can't get TDEXIT on
guest EOI to virtual x2APIC.  Because posted interrupt is edge-trigger and
VMM needs to hook guest EOI to re-inject level-triggered interrupt if the
level still active, level-trigger isn't supported for TD Guest VM.

Prevent trigger mode from setting to be level trigger with warning.
Without this guard, qemu can result in unexpected behavior later.

[1] 
https://software.intel.com/content/dam/develop/external/us/en/documents/tdx-module-1eas-v0.85.039.pdf

Signed-off-by: Isaku Yamahata 
---
 hw/intc/ioapic.c  | 20 
 hw/intc/ioapic_common.c   | 27 +++
 include/hw/i386/ioapic_internal.h |  1 +
 3 files changed, 48 insertions(+)

diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 264262959d..6d61744961 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -364,6 +364,23 @@ ioapic_fix_edge_remote_irr(uint64_t *entry)
 }
 }
 
+static inline void
+ioapic_fix_level_trigger_unsupported(uint64_t *entry)
+{
+if ((*entry & IOAPIC_LVT_TRIGGER_MODE) !=
+IOAPIC_TRIGGER_EDGE << IOAPIC_LVT_TRIGGER_MODE_SHIFT) {
+/*
+ * ignore a request for level trigger because
+ * level trigger requires eoi intercept to re-inject
+ * interrupt when the level is still active.
+ */
+warn_report_once("attempting to set level-trigger mode "
+ "while eoi intercept isn't supported");
+*entry &= ~IOAPIC_LVT_TRIGGER_MODE;
+*entry |= IOAPIC_TRIGGER_EDGE << IOAPIC_LVT_TRIGGER_MODE_SHIFT;
+}
+}
+
 static void
 ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
  unsigned int size)
@@ -404,6 +421,9 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
 s->ioredtbl[index] &= IOAPIC_RW_BITS;
 s->ioredtbl[index] |= ro_bits;
 s->irq_eoi[index] = 0;
+if (s->level_trigger_unsupported) {
+ioapic_fix_level_trigger_unsupported(>ioredtbl[index]);
+}
 ioapic_fix_edge_remote_irr(>ioredtbl[index]);
 ioapic_service(s);
 }
diff --git a/hw/intc/ioapic_common.c b/hw/intc/ioapic_common.c
index 3cccfc1556..07ee142470 100644
--- a/hw/intc/ioapic_common.c
+++ b/hw/intc/ioapic_common.c
@@ -150,6 +150,32 @@ static int ioapic_dispatch_post_load(void *opaque, int 
version_id)
 return 0;
 }
 
+static bool ioapic_common_get_level_trigger_unsupported(Object *obj,
+Error **errp)
+{
+IOAPICCommonState *s = IOAPIC_COMMON(obj);
+return s->level_trigger_unsupported;
+}
+
+static void ioapic_common_set_level_trigger_unsupported(Object *obj, bool 
value,
+   Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+IOAPICCommonState *s = IOAPIC_COMMON(obj);
+/* only disabling before realize is allowed */
+assert(!dev->realized);
+assert(!s->level_trigger_unsupported);
+s->level_trigger_unsupported = value;
+}
+
+static void ioapic_common_init(Object *obj)
+{
+object_property_add_bool(obj, "level_trigger_unsupported",
+ ioapic_common_get_level_trigger_unsupported,
+ ioapic_common_set_level_trigger_unsupported);
+
+}
+
 static void ioapic_common_realize(DeviceState *dev, Error **errp)
 {
 IOAPICCommonState *s = IOAPIC_COMMON(dev);
@@ -207,6 +233,7 @@ static const TypeInfo ioapic_common_type = {
 .name = TYPE_IOAPIC_COMMON,
 .parent = TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(IOAPICCommonState),
+.instance_init = ioapic_common_init,
 .class_size = sizeof(IOAPICCommonClass),
 .class_init = ioapic_common_class_init,
 .abstract = true,
diff --git a/include/hw/i386/ioapic_internal.h 
b/include/hw/i386/ioapic_internal.h
index 021e715f11..20f2fc7897 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -103,6 +103,7 @@ struct IOAPICCommonState {
 uint32_t irr;
 uint64_t ioredtbl[IOAPIC_NUM_PINS];
 Notifier machine_done;
+bool level_trigger_unsupported;
 uint8_t version;
 uint64_t irq_count[IOAPIC_NUM_PINS];
 int irq_level[IOAPIC_NUM_PINS];
-- 
2.25.1




[RFC PATCH v2 31/44] target/i386/tdx: Allows mrconfigid/mrowner/mrownerconfig for TDX_INIT_VM

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

When creating VM with TDX_INIT_VM, three sha384 hash values are accepted
for TDX attestation.
So far they were hard coded as 0. Now allow user to specify those values
via property mrconfigid, mrowner and mrownerconfig.
string for those property are hex string of 48 * 2 length.

example
-device tdx-guest, \
  
mrconfigid=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef,
 \
  
mrowner=fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210,
 \
  
mrownerconfig=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef

Signed-off-by: Isaku Yamahata 
---
 qapi/qom.json | 11 ++-
 target/i386/kvm/tdx.c | 17 +
 target/i386/kvm/tdx.h |  3 +++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/qapi/qom.json b/qapi/qom.json
index 70c70e3efe..8f8b7828b3 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -767,10 +767,19 @@
 #
 # @debug: enable debug mode (default: off)
 #
+# @mrconfigid: MRCONFIGID SHA384 hex string of 48 * 2 length (default: 0)
+#
+# @mrowner: MROWNER SHA384 hex string of 48 * 2 length (default: 0)
+#
+# @mrownerconfig: MROWNERCONFIG SHA384 hex string of 48 * 2 length (default: 0)
+#
 # Since: 6.0
 ##
 { 'struct': 'TdxGuestProperties',
-  'data': { '*debug': 'bool' } }
+  'data': { '*debug': 'bool',
+'*mrconfigid': 'str',
+'*mrowner': 'str',
+'*mrownerconfig': 'str' } }
 
 ##
 # @ObjectType:
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 47a502051c..6b560c1c0b 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -282,6 +282,17 @@ void tdx_pre_create_vcpu(CPUState *cpu)
 init_vm.attributes |= tdx->debug ? TDX1_TD_ATTRIBUTE_DEBUG : 0;
 init_vm.attributes |= x86cpu->enable_pmu ? TDX1_TD_ATTRIBUTE_PERFMON : 0;
 
+QEMU_BUILD_BUG_ON(sizeof(init_vm.mrconfigid) != sizeof(tdx->mrconfigid));
+memcpy(init_vm.mrconfigid, tdx->mrconfigid, sizeof(init_vm.mrconfigid));
+QEMU_BUILD_BUG_ON(sizeof(init_vm.mrowner) != sizeof(tdx->mrowner));
+memcpy(init_vm.mrowner, tdx->mrowner, sizeof(init_vm.mrowner));
+QEMU_BUILD_BUG_ON(sizeof(init_vm.mrownerconfig) !=
+  sizeof(tdx->mrownerconfig));
+memcpy(init_vm.mrownerconfig, tdx->mrownerconfig,
+   sizeof(init_vm.mrownerconfig));
+
+memset(init_vm.reserved, 0, sizeof(init_vm.reserved));
+
 init_vm.cpuid = (__u64)(_data);
 tdx_ioctl(KVM_TDX_INIT_VM, 0, _vm);
 out:
@@ -336,6 +347,12 @@ static void tdx_guest_init(Object *obj)
 tdx->debug = false;
 object_property_add_bool(obj, "debug", tdx_guest_get_debug,
  tdx_guest_set_debug);
+object_property_add_sha384(obj, "mrconfigid", tdx->mrconfigid,
+   OBJ_PROP_FLAG_READWRITE);
+object_property_add_sha384(obj, "mrowner", tdx->mrowner,
+   OBJ_PROP_FLAG_READWRITE);
+object_property_add_sha384(obj, "mrownerconfig", tdx->mrownerconfig,
+   OBJ_PROP_FLAG_READWRITE);
 }
 
 static void tdx_guest_finalize(Object *obj)
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 2fed27b3fb..4132d1be30 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -44,6 +44,9 @@ typedef struct TdxGuest {
 
 bool initialized;
 bool debug;
+uint8_t mrconfigid[48]; /* sha348 digest */
+uint8_t mrowner[48];/* sha348 digest */
+uint8_t mrownerconfig[48];  /* sha348 digest */
 
 TdxFirmware fw;
 } TdxGuest;
-- 
2.25.1




[RFC PATCH v2 33/44] qmp: add query-tdx-capabilities query-tdx command

2021-07-07 Thread isaku . yamahata
From: Chenyi Qiang 

Add QMP commands that can be used by libvirt to query the TDX capabilities
and TDX info.  The set of capabilities that needs to be reported is only
enabled at the moment, which means TDX is enabled.

Signed-off-by: Chenyi Qiang 
Co-developed-by: Isaku Yamahata 
Signed-off-by: Isaku Yamahata 
---
 include/sysemu/tdx.h   |  6 
 qapi/misc-target.json  | 59 ++
 target/i386/kvm/tdx-stub.c | 10 +++
 target/i386/kvm/tdx.c  | 19 
 target/i386/monitor.c  | 23 +++
 5 files changed, 117 insertions(+)

diff --git a/include/sysemu/tdx.h b/include/sysemu/tdx.h
index f3eced10f9..756f46d2de 100644
--- a/include/sysemu/tdx.h
+++ b/include/sysemu/tdx.h
@@ -13,4 +13,10 @@ int tdx_system_firmware_init(PCMachineState *pcms, 
MemoryRegion *rom_memory);
 void tdx_pre_create_vcpu(CPUState *cpu);
 void tdx_post_init_vcpu(CPUState *cpu);
 
+struct TDXInfo;
+struct TDXInfo *tdx_get_info(void);
+
+struct TDXCapability;
+struct TDXCapability *tdx_get_capabilities(void);
+
 #endif
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 5573dcf8f0..c1de95c082 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -323,3 +323,62 @@
 { 'command': 'query-sev-attestation-report', 'data': { 'mnonce': 'str' },
   'returns': 'SevAttestationReport',
   'if': 'defined(TARGET_I386)' }
+
+##
+# @TDXInfo:
+#
+# Information about Trust Domain Extensions (TDX) support
+#
+# @enabled: true if TDX is active
+#
+##
+{ 'struct': 'TDXInfo',
+'data': { 'enabled': 'bool' },
+  'if': 'defined(TARGET_I386)'
+}
+
+##
+# @query-tdx:
+#
+# Returns information about TDX
+#
+# Returns: @TdxInfo
+#
+#
+# Example:
+#
+# -> { "execute": "query-tdx" }
+# <- { "return": { "enabled": true } }
+#
+##
+{ 'command': 'query-tdx', 'returns': 'TDXInfo',
+  'if': 'defined(TARGET_I386)' }
+
+##
+# @TDXCapability:
+#
+# The struct describes capability for a TDX
+# feature.
+#
+##
+{ 'struct': 'TDXCapability',
+  'data': { 'enabled': 'bool' },
+  'if': 'defined(TARGET_I386)' }
+
+##
+# @query-tdx-capabilities:
+#
+# This command is used to get the TDX capabilities, and is supported on Intel
+# X86 platforms only.
+#
+# Returns: @TDXCapability.
+#
+#
+# Example:
+#
+# -> { "execute": "query-tdx-capabilities" }
+# <- { "return": { 'enabled': 'bool' }}
+#
+##
+{ 'command': 'query-tdx-capabilities', 'returns': 'TDXCapability',
+  'if': 'defined(TARGET_I386)' }
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 4e1a0a4280..5d8faf0716 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -21,3 +21,13 @@ void tdx_pre_create_vcpu(CPUState *cpu)
 void tdx_post_init_vcpu(CPUState *cpu)
 {
 }
+
+struct TDXInfo *tdx_get_info(void)
+{
+return NULL;
+}
+
+struct TDXCapability *tdx_get_capabilities(void)
+{
+return NULL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 6b560c1c0b..1316d95209 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -22,6 +22,7 @@
 #include "hw/i386/tdvf-hob.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
+#include "qapi/qapi-types-misc-target.h"
 #include "standard-headers/asm-x86/kvm_para.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
@@ -39,6 +40,24 @@ bool kvm_has_tdx(KVMState *s)
 return !!(kvm_check_extension(s, KVM_CAP_VM_TYPES) & BIT(KVM_X86_TDX_VM));
 }
 
+TDXInfo *tdx_get_info(void)
+{
+TDXInfo *info;
+
+info = g_new0(TDXInfo, 1);
+info->enabled = kvm_enabled() && kvm_tdx_enabled();
+return info;
+}
+
+TDXCapability *tdx_get_capabilities(void)
+{
+TDXCapability *cap;
+
+cap = g_new0(TDXCapability, 1);
+cap->enabled = kvm_enabled() && kvm_has_tdx(kvm_state);
+return cap;
+}
+
 static void __tdx_ioctl(void *state, int ioctl_no, const char *ioctl_name,
 __u32 metadata, void *data)
 {
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 119211f0b0..c0be99d13d 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -30,6 +30,7 @@
 #include "qapi/qmp/qdict.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sev.h"
+#include "sysemu/tdx.h"
 #include "qapi/error.h"
 #include "sev_i386.h"
 #include "qapi/qapi-commands-misc-target.h"
@@ -763,3 +764,25 @@ qmp_query_sev_attestation_report(const char *mnonce, Error 
**errp)
 {
 return sev_get_attestation_report(mnonce, errp);
 }
+
+TDXInfo *qmp_query_tdx(Error **errp)
+{
+TDXInfo *info;
+
+info = tdx_get_info();
+if (!info) {
+error_setg(errp, "TDX is not available.");
+}
+return info;
+}
+
+TDXCapability *qmp_query_tdx_capabilities(Error **errp)
+{
+TDXCapability *cap;
+
+cap = tdx_get_capabilities();
+if (!cap) {
+error_setg(errp, "TDX is not available.");
+}
+return cap;
+}
-- 
2.25.1




[RFC PATCH v2 05/44] linux-headers: Update headers to pull in TDX API changes

2021-07-07 Thread isaku . yamahata
From: Xiaoyao Li 

Pull in recent TDX updates, which are not backwards compatible.

Signed-off-by: Xiaoyao Li 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 linux-headers/asm-x86/kvm.h | 60 +
 linux-headers/linux/kvm.h   |  2 ++
 2 files changed, 62 insertions(+)

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 0662f644aa..dbcb590fb8 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -490,4 +490,64 @@ struct kvm_pmu_event_filter {
 #define KVM_PMU_EVENT_ALLOW 0
 #define KVM_PMU_EVENT_DENY 1
 
+#define KVM_X86_LEGACY_VM  0
+#define KVM_X86_SW_PROTECTED_VM1
+#define KVM_X86_TDX_VM 2
+
+/* Trust Domain eXtension command*/
+enum kvm_tdx_cmd_id {
+   KVM_TDX_CAPABILITIES = 0,
+   KVM_TDX_INIT_VM,
+   KVM_TDX_INIT_VCPU,
+   KVM_TDX_INIT_MEM_REGION,
+   KVM_TDX_FINALIZE_VM,
+
+   KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+   __u32 id;
+   __u32 metadata;
+   __u64 data;
+};
+
+struct kvm_tdx_cpuid_config {
+   __u32 leaf;
+   __u32 sub_leaf;
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+};
+
+struct kvm_tdx_capabilities {
+   __u64 attrs_fixed0;
+   __u64 attrs_fixed1;
+   __u64 xfam_fixed0;
+   __u64 xfam_fixed1;
+
+   __u32 nr_cpuid_configs;
+   __u32 padding;
+   struct kvm_tdx_cpuid_config cpuid_configs[0];
+};
+
+struct kvm_tdx_init_vm {
+   __u32 max_vcpus;
+   __u32 tsc_khz;
+   __u64 attributes;
+   __u64 cpuid;
+   __u64 mrconfigid[6];/* sha384 digest */
+   __u64 mrowner[6];   /* sha384 digest */
+   __u64 mrownerconfig[6]; /* sha348 digest */
+   __u64 reserved[43]; /* must be zero for future extensibility */
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION  (1UL << 0)
+
+struct kvm_tdx_init_mem_region {
+   __u64 source_addr;
+   __u64 gpa;
+   __u64 nr_pages;
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 20d6a263bb..65ac70d6fd 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1084,6 +1084,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
 #define KVM_CAP_PTP_KVM 198
 
+#define KVM_CAP_VM_TYPES 1000
+
 #ifdef KVM_CAP_IRQ_ROUTING
 
 struct kvm_irq_routing_irqchip {
-- 
2.25.1




[RFC PATCH v2 12/44] target/i386/tdx: Finalize the TD's measurement when machine is done

2021-07-07 Thread isaku . yamahata
From: Xiaoyao Li 

Invoke KVM_TDX_FINALIZEMR to finalize the TD's measurement and make
the TD vCPUs runnable once machine initialization is complete.

Signed-off-by: Xiaoyao Li 
Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c |  7 +++
 target/i386/kvm/tdx.c | 21 +
 target/i386/kvm/tdx.h |  3 +++
 3 files changed, 31 insertions(+)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index be0b96b120..5742fa4806 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -53,6 +53,7 @@
 #include "migration/blocker.h"
 #include "exec/memattrs.h"
 #include "trace.h"
+#include "tdx.h"
 
 //#define DEBUG_KVM
 
@@ -2246,6 +2247,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 return ret;
 }
 
+ret = tdx_kvm_init(ms->cgs, _err);
+if (ret < 0) {
+error_report_err(local_err);
+return ret;
+}
+
 if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
 error_report("kvm: KVM_CAP_IRQ_ROUTING not supported by KVM");
 return -ENOTSUP;
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index c50a0dcf11..f8c7560fc8 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -58,6 +58,27 @@ static void __tdx_ioctl(int ioctl_no, const char *ioctl_name,
 #define tdx_ioctl(ioctl_no, metadata, data) \
 __tdx_ioctl(ioctl_no, stringify(ioctl_no), metadata, data)
 
+static void tdx_finalize_vm(Notifier *notifier, void *unused)
+{
+tdx_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL);
+}
+
+static Notifier tdx_machine_done_late_notify = {
+.notify = tdx_finalize_vm,
+};
+
+int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+TdxGuest *tdx = (TdxGuest *)object_dynamic_cast(OBJECT(cgs),
+TYPE_TDX_GUEST);
+if (!tdx) {
+return 0;
+}
+
+qemu_add_machine_init_done_late_notifier(_machine_done_late_notify);
+return 0;
+}
+
 void tdx_pre_create_vcpu(CPUState *cpu)
 {
 struct {
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 6ad6c9a313..e15657d272 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -2,6 +2,7 @@
 #define QEMU_I386_TDX_H
 
 #include "qom/object.h"
+#include "qapi/error.h"
 #include "exec/confidential-guest-support.h"
 
 #define TYPE_TDX_GUEST "tdx-guest"
@@ -21,4 +22,6 @@ typedef struct TdxGuest {
 bool debug;
 } TdxGuest;
 
+int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
+
 #endif
-- 
2.25.1




[RFC PATCH v2 10/44] hw/i386: Initialize TDX via KVM ioctl() when kvm_type is TDX

2021-07-07 Thread isaku . yamahata
From: Xiaoyao Li 

Introduce tdx_ioctl() to invoke TDX specific sub-ioctls of
KVM_MEMORY_ENCRYPT_OP.  Use tdx_ioctl() to invoke KVM_TDX_INIT, by way
of tdx_init(), during kvm_arch_init().  KVM_TDX_INIT configures global
TD state, e.g. the canonical CPUID config, and must be executed prior to
creating vCPUs.

Note, this doesn't address the fact that Qemu may change the CPUID
configuration when creating vCPUs, i.e. punts on refactoring Qemu to
provide a stable CPUID config prior to kvm_arch_init().

Explicitly set subleaf index and flags when adding CPUID
Set the index and flags when adding a CPUID entry to avoid propagating
stale state from a removed entry, e.g. when the CPUID 0x4 loop bails, it
can leave non-zero index and flags in the array.

Signed-off-by: Xiaoyao Li 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 accel/kvm/kvm-all.c|   2 +
 include/sysemu/tdx.h   |   2 +
 qapi/qom.json  |  14 +
 target/i386/kvm/tdx-stub.c |   4 ++
 target/i386/kvm/tdx.c  | 126 +
 target/i386/kvm/tdx.h  |  24 +++
 6 files changed, 172 insertions(+)
 create mode 100644 target/i386/kvm/tdx.h

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index fdbe24bf59..6475f15d5f 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -38,6 +38,7 @@
 #include "qemu/main-loop.h"
 #include "trace.h"
 #include "hw/irq.h"
+#include "sysemu/tdx.h"
 #include "qapi/visitor.h"
 #include "qapi/qapi-types-common.h"
 #include "qapi/qapi-visit-common.h"
@@ -459,6 +460,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
 
 trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
+tdx_pre_create_vcpu(cpu);
 ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
 if (ret < 0) {
 error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed 
(%lu)",
diff --git a/include/sysemu/tdx.h b/include/sysemu/tdx.h
index 60ebded851..36a901e723 100644
--- a/include/sysemu/tdx.h
+++ b/include/sysemu/tdx.h
@@ -7,4 +7,6 @@
 bool kvm_has_tdx(KVMState *s);
 #endif
 
+void tdx_pre_create_vcpu(CPUState *cpu);
+
 #endif
diff --git a/qapi/qom.json b/qapi/qom.json
index 652be317b8..70c70e3efe 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -760,6 +760,18 @@
 '*cbitpos': 'uint32',
 'reduced-phys-bits': 'uint32' } }
 
+##
+# @TdxGuestProperties:
+#
+# Properties for tdx-guest objects.
+#
+# @debug: enable debug mode (default: off)
+#
+# Since: 6.0
+##
+{ 'struct': 'TdxGuestProperties',
+  'data': { '*debug': 'bool' } }
+
 ##
 # @ObjectType:
 #
@@ -802,6 +814,7 @@
 'secret_keyring',
 'sev-guest',
 's390-pv-guest',
+'tdx-guest',
 'throttle-group',
 'tls-creds-anon',
 'tls-creds-psk',
@@ -858,6 +871,7 @@
   'secret': 'SecretProperties',
   'secret_keyring': 'SecretKeyringProperties',
   'sev-guest':  'SevGuestProperties',
+  'tdx-guest':  'TdxGuestProperties',
   'throttle-group': 'ThrottleGroupProperties',
   'tls-creds-anon': 'TlsCredsAnonProperties',
   'tls-creds-psk':  'TlsCredsPskProperties',
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index e1eb09cae1..93d5913c89 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -8,3 +8,7 @@ bool kvm_has_tdx(KVMState *s)
 return false;
 }
 #endif
+
+void tdx_pre_create_vcpu(CPUState *cpu)
+{
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index e62a570f75..e8c70f241d 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -14,8 +14,10 @@
 #include "qemu/osdep.h"
 
 #include 
+#include 
 
 #include "cpu.h"
+#include "kvm_i386.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
@@ -23,8 +25,132 @@
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_int.h"
 #include "sysemu/tdx.h"
+#include "tdx.h"
+
+#define TDX1_TD_ATTRIBUTE_DEBUG BIT_ULL(0)
+#define TDX1_TD_ATTRIBUTE_PERFMON BIT_ULL(63)
 
 bool kvm_has_tdx(KVMState *s)
 {
 return !!(kvm_check_extension(s, KVM_CAP_VM_TYPES) & BIT(KVM_X86_TDX_VM));
 }
+
+static void __tdx_ioctl(int ioctl_no, const char *ioctl_name,
+__u32 metadata, void *data)
+{
+struct kvm_tdx_cmd tdx_cmd;
+int r;
+
+memset(_cmd, 0x0, sizeof(tdx_cmd));
+
+tdx_cmd.id = ioctl_no;
+tdx_cmd.metadata = metadata;
+tdx_cmd.data = (__u64)(unsigned long)data;
+
+r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
+if (r) {
+error_report("%s failed: %s", ioctl_name, strerror(-r));
+exit(1);
+}
+}
+#define tdx_ioctl(ioctl_no, metadata, data) \
+__tdx_ioctl(ioctl_no, stringify(ioctl_no), metadata, data)
+
+void tdx_pre_create_vcpu(CPUState *cpu)
+{
+struct {
+struct kvm_cpuid2 cpuid;
+struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+} 

[RFC PATCH v2 11/44] i386/tdx: Implement user specified tsc frequency

2021-07-07 Thread isaku . yamahata
From: Xiaoyao Li 

Reuse -cpu,tsc-frequency= to get user wanted tsc frequency and pass it
to KVM_TDX_INIT_VM.

Besides, sanity check the tsc frequency to be in the legal range and
legal granularity (required by SEAM module).

Signed-off-by: Xiaoyao Li 
Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c |  8 
 target/i386/kvm/tdx.c | 16 
 2 files changed, 24 insertions(+)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ecb1714920..be0b96b120 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -763,6 +763,14 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
 int r, cur_freq;
 bool set_ioctl = false;
 
+/*
+ * TD guest's TSC is immutable, it cannot be set/changed via
+ * KVM_SET_TSC_KHZ, but only be initialized via KVM_TDX_INIT_VM
+ */
+if (vm_type == KVM_X86_TDX_VM) {
+return 0;
+}
+
 if (!env->tsc_khz) {
 return 0;
 }
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index e8c70f241d..c50a0dcf11 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -29,6 +29,8 @@
 
 #define TDX1_TD_ATTRIBUTE_DEBUG BIT_ULL(0)
 #define TDX1_TD_ATTRIBUTE_PERFMON BIT_ULL(63)
+#define TDX1_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
+#define TDX1_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
 
 bool kvm_has_tdx(KVMState *s)
 {
@@ -91,6 +93,19 @@ void tdx_pre_create_vcpu(CPUState *cpu)
 exit(1);
 }
 
+if (env->tsc_khz && (env->tsc_khz < TDX1_MIN_TSC_FREQUENCY_KHZ ||
+ env->tsc_khz > TDX1_MAX_TSC_FREQUENCY_KHZ)) {
+error_report("Invalid TSC %ld KHz, must specify cpu_frequecy between 
[%d, %d] kHz\n",
+  env->tsc_khz, TDX1_MIN_TSC_FREQUENCY_KHZ,
+  TDX1_MAX_TSC_FREQUENCY_KHZ);
+exit(1);
+}
+
+if (env->tsc_khz % (25 * 1000)) {
+error_report("Invalid TSC %ld KHz, it must be multiple of 25MHz\n", 
env->tsc_khz);
+exit(1);
+}
+
 qemu_mutex_lock(>lock);
 if (tdx->initialized) {
 goto out;
@@ -103,6 +118,7 @@ void tdx_pre_create_vcpu(CPUState *cpu)
 cpuid_data.cpuid.padding = 0;
 
 init_vm.max_vcpus = ms->smp.cpus;
+init_vm.tsc_khz = env->tsc_khz;
 init_vm.attributes = 0;
 init_vm.attributes |= tdx->debug ? TDX1_TD_ATTRIBUTE_DEBUG : 0;
 init_vm.attributes |= x86cpu->enable_pmu ? TDX1_TD_ATTRIBUTE_PERFMON : 0;
-- 
2.25.1




[RFC PATCH v2 01/44] target/i386: Expose x86_cpu_get_supported_feature_word() for TDX

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Expose x86_cpu_get_supported_feature_word() outside of cpu.c so that it
can be used by TDX to setup the VM-wide CPUID configuration.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 target/i386/cpu.c | 4 ++--
 target/i386/cpu.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index d8f3ab3192..45b81a63df 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4894,8 +4894,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error 
**errp)
 return cpu_list;
 }
 
-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
-   bool migratable_only)
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+bool migratable_only)
 {
 FeatureWordInfo *wi = _word_info[w];
 uint64_t r = 0;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index f7fa5870b1..ff8f9532b9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1838,6 +1838,9 @@ void cpu_clear_ignne(void);
 /* mpx_helper.c */
 void cpu_sync_bndcs_hflags(CPUX86State *env);
 
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+bool migratable_only);
+
 /* this function must always be used to load data in the segment
cache: it synchronizes the hflags with the segment cache values */
 static inline void cpu_x86_load_seg_cache(CPUX86State *env,
-- 
2.25.1




[RFC PATCH v2 29/44] target/i386: Add machine option to disable PIC/8259

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Add a machine option to disable the legacy PIC (8259), which cannot be
supported for TDX guests as TDX-SEAM doesn't allow directly interrupt
injection.  Using posted interrupts for the PIC is not a viable option
as the guest BIOS/kernel will not do EOI for PIC IRQs, i.e. will leave
the vIRR bit set.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 hw/i386/pc.c | 18 ++
 hw/i386/pc_piix.c|  4 +++-
 hw/i386/pc_q35.c |  4 +++-
 include/hw/i386/pc.h |  2 ++
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8e1220db72..f4590df231 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1522,6 +1522,20 @@ static void pc_machine_set_hpet(Object *obj, bool value, 
Error **errp)
 pcms->hpet_enabled = value;
 }
 
+static bool pc_machine_get_pic(Object *obj, Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+
+return pcms->pic_enabled;
+}
+
+static void pc_machine_set_pic(Object *obj, bool value, Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+
+pcms->pic_enabled = value;
+}
+
 static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
 const char *name, void *opaque,
 Error **errp)
@@ -1617,6 +1631,7 @@ static void pc_machine_initfn(Object *obj)
 pcms->smbus_enabled = true;
 pcms->sata_enabled = true;
 pcms->pit_enabled = true;
+pcms->pic_enabled = true;
 pcms->max_fw_size = 8 * MiB;
 #ifdef CONFIG_HPET
 pcms->hpet_enabled = true;
@@ -1742,6 +1757,9 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 object_class_property_add_bool(oc, PC_MACHINE_PIT,
 pc_machine_get_pit, pc_machine_set_pit);
 
+object_class_property_add_bool(oc, PC_MACHINE_PIC,
+pc_machine_get_pic, pc_machine_set_pic);
+
 object_class_property_add_bool(oc, "hpet",
 pc_machine_get_hpet, pc_machine_set_hpet);
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 30b8bd6ea9..4c1e31f180 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -218,7 +218,9 @@ static void pc_init1(MachineState *machine,
 }
 isa_bus_irqs(isa_bus, x86ms->gsi);
 
-pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+if (pcms->pic_enabled) {
+pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+}
 
 if (pcmc->pci_enabled) {
 ioapic_init_gsi(gsi_state, "i440fx");
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1718aa94d9..106f5726cc 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -251,7 +251,9 @@ static void pc_q35_init(MachineState *machine)
 pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq);
 isa_bus = ich9_lpc->isa_bus;
 
-pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+if (pcms->pic_enabled) {
+pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+}
 
 if (pcmc->pci_enabled) {
 ioapic_init_gsi(gsi_state, "q35");
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index cd2113c763..9cede7a260 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -44,6 +44,7 @@ typedef struct PCMachineState {
 bool sata_enabled;
 bool pit_enabled;
 bool hpet_enabled;
+bool pic_enabled;
 uint64_t max_fw_size;
 
 /* NUMA information: */
@@ -61,6 +62,7 @@ typedef struct PCMachineState {
 #define PC_MACHINE_SMBUS"smbus"
 #define PC_MACHINE_SATA "sata"
 #define PC_MACHINE_PIT  "pit"
+#define PC_MACHINE_PIC  "pic"
 #define PC_MACHINE_MAX_FW_SIZE  "max-fw-size"
 /**
  * PCMachineClass:
-- 
2.25.1




[RFC PATCH v2 06/44] hw/i386: Introduce kvm-type for TDX guest

2021-07-07 Thread isaku . yamahata
From: Xiaoyao Li 

Introduce a machine property, kvm-type, to allow the user to create a
Trusted Domain eXtensions (TDX) VM, a.k.a. a Trusted Domain (TD), e.g.:

 # $QEMU \
-machine ...,kvm-type=tdx \
...

Only two types are supported: "legacy" and "tdx", with "legacy" being
the default.

Signed-off-by: Xiaoyao Li 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 default-configs/devices/i386-softmmu.mak |  1 +
 hw/i386/Kconfig  |  5 +++
 hw/i386/x86.c| 44 
 include/hw/i386/x86.h|  1 +
 include/sysemu/tdx.h | 10 ++
 target/i386/kvm/kvm-stub.c   |  5 +++
 target/i386/kvm/kvm.c| 16 +
 target/i386/kvm/kvm_i386.h   |  1 +
 target/i386/kvm/meson.build  |  1 +
 target/i386/kvm/tdx-stub.c   | 10 ++
 target/i386/kvm/tdx.c| 30 
 11 files changed, 124 insertions(+)
 create mode 100644 include/sysemu/tdx.h
 create mode 100644 target/i386/kvm/tdx-stub.c
 create mode 100644 target/i386/kvm/tdx.c

diff --git a/default-configs/devices/i386-softmmu.mak 
b/default-configs/devices/i386-softmmu.mak
index 84d1a2487c..6e805407b8 100644
--- a/default-configs/devices/i386-softmmu.mak
+++ b/default-configs/devices/i386-softmmu.mak
@@ -18,6 +18,7 @@
 #CONFIG_QXL=n
 #CONFIG_SEV=n
 #CONFIG_SGA=n
+#CONFIG_TDX=n
 #CONFIG_TEST_DEVICES=n
 #CONFIG_TPM_CRB=n
 #CONFIG_TPM_TIS_ISA=n
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index aacb6f6d96..01633123e0 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -2,6 +2,10 @@ config SEV
 bool
 depends on KVM
 
+config TDX
+bool
+depends on KVM
+
 config PC
 bool
 imply APPLESMC
@@ -17,6 +21,7 @@ config PC
 imply PVPANIC_ISA
 imply QXL
 imply SEV
+imply TDX
 imply SGA
 imply TEST_DEVICES
 imply TPM_CRB
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 00448ed55a..ed15f6f2cf 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -21,6 +21,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include 
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/cutils.h"
@@ -31,6 +32,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/qapi-visit-common.h"
 #include "qapi/visitor.h"
+#include "sysemu/kvm_int.h"
 #include "sysemu/qtest.h"
 #include "sysemu/whpx.h"
 #include "sysemu/numa.h"
@@ -1263,6 +1265,42 @@ static void x86_machine_set_bus_lock_ratelimit(Object 
*obj, Visitor *v,
 visit_type_uint64(v, name, >bus_lock_ratelimit, errp);
 }
 
+static char *x86_get_kvm_type(Object *obj, Error **errp)
+{
+X86MachineState *x86ms = X86_MACHINE(obj);
+
+return g_strdup(x86ms->kvm_type);
+}
+
+static void x86_set_kvm_type(Object *obj, const char *value, Error **errp)
+{
+X86MachineState *x86ms = X86_MACHINE(obj);
+
+g_free(x86ms->kvm_type);
+x86ms->kvm_type = g_strdup(value);
+}
+
+static int x86_kvm_type(MachineState *ms, const char *vm_type)
+{
+int kvm_type;
+
+if (!vm_type || !strcmp(vm_type, "") ||
+!g_ascii_strcasecmp(vm_type, "legacy")) {
+kvm_type = KVM_X86_LEGACY_VM;
+} else if (!g_ascii_strcasecmp(vm_type, "tdx")) {
+kvm_type = KVM_X86_TDX_VM;
+} else {
+error_report("Unknown kvm-type specified '%s'", vm_type);
+exit(1);
+}
+if (kvm_set_vm_type(ms, kvm_type)) {
+error_report("kvm-type '%s' not supported by KVM", vm_type);
+exit(1);
+}
+
+return kvm_type;
+}
+
 static void x86_machine_initfn(Object *obj)
 {
 X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1273,6 +1311,11 @@ static void x86_machine_initfn(Object *obj)
 x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
 x86ms->bus_lock_ratelimit = 0;
+
+object_property_add_str(obj, "kvm-type",
+x86_get_kvm_type, x86_set_kvm_type);
+object_property_set_description(obj, "kvm-type",
+"KVM guest type (legacy, tdx)");
 }
 
 static void x86_machine_class_init(ObjectClass *oc, void *data)
@@ -1284,6 +1327,7 @@ static void x86_machine_class_init(ObjectClass *oc, void 
*data)
 mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
 mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
 mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
+mc->kvm_type = x86_kvm_type;
 x86mc->compat_apic_id_mode = false;
 x86mc->save_tsc_khz = true;
 nc->nmi_monitor_handler = x86_nmi;
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 6e9244a82c..a450b5e226 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -56,6 +56,7 @@ struct X86MachineState {
 
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;
+char *kvm_type;
 

[RFC PATCH v2 30/44] qom: implement property helper for sha384

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Implement property_add_sha384() which converts hex string <-> uint8_t[48]
It will be used for TDX which uses sha384 for measurement.

Signed-off-by: Isaku Yamahata 
---
 include/qom/object.h | 17 ++
 qom/object.c | 76 
 2 files changed, 93 insertions(+)

diff --git a/include/qom/object.h b/include/qom/object.h
index 6721cd312e..594d0ec52c 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -1853,6 +1853,23 @@ ObjectProperty *object_property_add_alias(Object *obj, 
const char *name,
 ObjectProperty *object_property_add_const_link(Object *obj, const char *name,
Object *target);
 
+
+/**
+ * object_property_add_sha384:
+ * @obj: the object to add a property to
+ * @name: the name of the property
+ * @v: pointer to value
+ * @flags: bitwise-or'd ObjectPropertyFlags
+ *
+ * Add an sha384 property in memory.  This function will add a
+ * property of type 'sha384'.
+ *
+ * Returns: The newly added property on success, or %NULL on failure.
+ */
+ObjectProperty * object_property_add_sha384(Object *obj, const char *name,
+const uint8_t *v,
+ObjectPropertyFlags flags);
+
 /**
  * object_property_set_description:
  * @obj: the object owning the property
diff --git a/qom/object.c b/qom/object.c
index 6a01d56546..e33a0b8c5d 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -15,6 +15,7 @@
 #include "qapi/error.h"
 #include "qom/object.h"
 #include "qom/object_interfaces.h"
+#include "qemu/ctype.h"
 #include "qemu/cutils.h"
 #include "qapi/visitor.h"
 #include "qapi/string-input-visitor.h"
@@ -2749,6 +2750,81 @@ object_property_add_alias(Object *obj, const char *name,
 return op;
 }
 
+#define SHA384_DIGEST_SIZE  48
+static void property_get_sha384(Object *obj, Visitor *v, const char *name,
+void *opaque, Error **errp)
+{
+uint8_t *value = (uint8_t *)opaque;
+char str[SHA384_DIGEST_SIZE * 2 + 1];
+char *str_ = (char*)str;
+size_t i;
+
+for (i = 0; i < SHA384_DIGEST_SIZE; i++) {
+char *buf;
+buf = [i * 2];
+
+sprintf(buf, "%02hhx", value[i]);
+}
+str[SHA384_DIGEST_SIZE * 2] = '\0';
+
+visit_type_str(v, name, _, errp);
+}
+
+static void property_set_sha384(Object *obj, Visitor *v, const char *name,
+void *opaque, Error **errp)
+{
+uint8_t *value = (uint8_t *)opaque;
+char* str;
+size_t len;
+size_t i;
+
+if (!visit_type_str(v, name, , errp)) {
+goto err;
+}
+
+len = strlen(str);
+if (len != SHA384_DIGEST_SIZE * 2) {
+error_setg(errp, "invalid length for sha348 hex string %s. "
+   "it must be 48 * 2 hex", name);
+goto err;
+}
+
+for (i = 0; i < SHA384_DIGEST_SIZE; i++) {
+if (!qemu_isxdigit(str[i * 2]) || !qemu_isxdigit(str[i * 2 + 1])) {
+error_setg(errp, "invalid char for sha318 hex string %s at %c%c",
+   name, str[i * 2], str[i * 2 + 1]);
+goto err;
+}
+
+if (sscanf(str + i * 2, "%02hhx", [i]) != 1) {
+error_setg(errp, "invalid format for sha318 hex string %s", name);
+goto err;
+}
+}
+
+err:
+g_free(str);
+}
+
+ObjectProperty *
+object_property_add_sha384(Object *obj, const char *name,
+   const uint8_t *v, ObjectPropertyFlags flags)
+{
+ObjectPropertyAccessor *getter = NULL;
+ObjectPropertyAccessor *setter = NULL;
+
+if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+getter = property_get_sha384;
+}
+
+if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+setter = property_set_sha384;
+}
+
+return object_property_add(obj, name, "sha384",
+   getter, setter, NULL, (void *)v);
+}
+
 void object_property_set_description(Object *obj, const char *name,
  const char *description)
 {
-- 
2.25.1




[RFC PATCH v2 03/44] i386/kvm: Move architectural CPUID leaf generation to separarte helper

2021-07-07 Thread isaku . yamahata
From: Sean Christopherson 

Move the architectural (for lack of a better term) CPUID leaf generation
to a separate helper so that the generation code can be reused by TDX,
which needs to generate a canonical VM-scoped configuration.

Signed-off-by: Sean Christopherson 
Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c  | 186 +++--
 target/i386/kvm/kvm_i386.h |   4 +
 2 files changed, 102 insertions(+), 88 deletions(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 04e4ec063f..0558e4b506 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1507,90 +1507,12 @@ static int hyperv_init_vcpu(X86CPU *cpu)
 
 static Error *invtsc_mig_blocker;
 
-#define KVM_MAX_CPUID_ENTRIES  100
-
-int kvm_arch_init_vcpu(CPUState *cs)
+uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+uint32_t cpuid_i)
 {
-struct {
-struct kvm_cpuid2 cpuid;
-struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
-} cpuid_data;
-/*
- * The kernel defines these structs with padding fields so there
- * should be no extra padding in our cpuid_data struct.
- */
-QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
-  sizeof(struct kvm_cpuid2) +
-  sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
-
-X86CPU *cpu = X86_CPU(cs);
-CPUX86State *env = >env;
-uint32_t limit, i, j, cpuid_i;
+uint32_t limit, i, j;
 uint32_t unused;
 struct kvm_cpuid_entry2 *c;
-uint32_t signature[3];
-int kvm_base = KVM_CPUID_SIGNATURE;
-int max_nested_state_len;
-int r;
-Error *local_err = NULL;
-
-memset(_data, 0, sizeof(cpuid_data));
-
-cpuid_i = 0;
-
-r = kvm_arch_set_tsc_khz(cs);
-if (r < 0) {
-return r;
-}
-
-/* vcpu's TSC frequency is either specified by user, or following
- * the value used by KVM if the former is not present. In the
- * latter case, we query it from KVM and record in env->tsc_khz,
- * so that vcpu's TSC frequency can be migrated later via this field.
- */
-if (!env->tsc_khz) {
-r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
-kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
--ENOTSUP;
-if (r > 0) {
-env->tsc_khz = r;
-}
-}
-
-env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
-
-/* Paravirtualization CPUIDs */
-hyperv_expand_features(cs, _err);
-if (local_err) {
-error_report_err(local_err);
-return -ENOSYS;
-}
-
-if (hyperv_enabled(cpu)) {
-r = hyperv_init_vcpu(cpu);
-if (r) {
-return r;
-}
-
-cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
-kvm_base = KVM_CPUID_SIGNATURE_NEXT;
-has_msr_hv_hypercall = true;
-}
-
-if (cpu->expose_kvm) {
-memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-c = _data.entries[cpuid_i++];
-c->function = KVM_CPUID_SIGNATURE | kvm_base;
-c->eax = KVM_CPUID_FEATURES | kvm_base;
-c->ebx = signature[0];
-c->ecx = signature[1];
-c->edx = signature[2];
-
-c = _data.entries[cpuid_i++];
-c->function = KVM_CPUID_FEATURES | kvm_base;
-c->eax = env->features[FEAT_KVM];
-c->edx = env->features[FEAT_KVM_HINTS];
-}
 
 cpu_x86_cpuid(env, 0, 0, , , , );
 
@@ -1599,7 +1521,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 fprintf(stderr, "unsupported level value: 0x%x\n", limit);
 abort();
 }
-c = _data.entries[cpuid_i++];
+c = [cpuid_i++];
 
 switch (i) {
 case 2: {
@@ -1618,7 +1540,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
 abort();
 }
-c = _data.entries[cpuid_i++];
+c = [cpuid_i++];
 c->function = i;
 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
 cpu_x86_cpuid(env, i, 0, >eax, >ebx, >ecx, >edx);
@@ -1664,7 +1586,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
 abort();
 }
-c = _data.entries[cpuid_i++];
+c = [cpuid_i++];
 }
 break;
 case 0x7:
@@ -1683,7 +1605,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
 abort();
 }
-c = _data.entries[cpuid_i++];
+c = [cpuid_i++];
 c->function = i;
 c->index = j;
 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -1740,7 +1662,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
 abort();
 

[RFC PATCH v2 00/44] TDX support

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

This patch series is to enable TDX support.  This needs corresponding KVM patch
for TDX [1].  The patch [1] requires more patches to be function. So this patch
series is RFC.  For those who want to try github repo is available at [2].

Patch series is organized as follows.
 1- 5 code refactoring and simple hooks that will be used later
 6- 9 introduce kvm type and tdx type. disallow non-usable operations
10-15 wire up necessary TDX kvm ioctl to initialize TD guest
16-24 load TDVF and setup necessary info for TDVF
25-26 prohibit unsupported operations related to SMM
28-29 force x2apic and disable PIC
30-31 allows user to specify sha384 value for TD guest
32-33 add qmp operation to query KVM capability and TD info
34make reboot action shutdown
35-43 suppress level-trigger/SMI/INIT/SIPI
44suppress S3/S4

TODO:
- gdb support
- sanity check of CPUID

Changes from v1:
- suppress level trigger/SMI/INIT/SIPI related to IOAPIC.
- add VM attribute sha384 to TD measurement.
- guest TSC Hz specification.

Links:
[1] KVM TDX patch series v2
https://patchwork.kernel.org/project/kvm/list/?series=510271
[2] intel public github
   kvm TDX branch: https://github.com/intel/tdx/tree/kvm
   TDX guest branch: https://github.com/intel/tdx/tree/guest
   qemu TDX https://github.com/intel/qemu-tdx
[3] TDVF
https://github.com/tianocore/edk2-staging/tree/TDVF
[4] TDX specs
https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html

Chenyi Qiang (1):
  qmp: add query-tdx-capabilities query-tdx command

Isaku Yamahata (29):
  kvm: Switch KVM_CAP_READONLY_MEM to a per-VM ioctl()
  vl: Introduce machine_init_done_late notifier
  i386/kvm: Skip KVM_X86_SETUP_MCE for TDX guests
  target/i386: kvm: don't synchronize guest tsc for TD guest
  i386/tdx: Frame in the call for KVM_TDX_INIT_VCPU
  hw/i386: Add definitions from UEFI spec for volumes, resources, etc...
  i386/tdx: Add definitions for TDVF metadata
  hw/i386: refactor e820_add_entry()
  hw/i386/e820: introduce a helper function to change type of e820
  i386/tdx: Parse tdx metadata and store the result into TdxGuestState
  i386/tdx: Create the TD HOB list upon machine init done
  i386/tdx: Add TDVF memory via INIT_MEM_REGION
  i386/tdx: Use KVM_TDX_INIT_VCPU to pass HOB to TDVF
  pci-host/q35: Move PAM initialization above SMRAM initialization
  q35: Introduce smm_ranges property for q35-pci-host
  qom: implement property helper for sha384
  target/i386/tdx: Allows mrconfigid/mrowner/mrownerconfig for
TDX_INIT_VM
  tdx: add kvm_tdx_enabled() accessor for later use
  target/i386/tdx: set reboot action to shutdown when tdx
  ioapic: add property to disable level interrupt
  hw/i386: add eoi_intercept_unsupported member to X86MachineState
  hw/i386: add option to forcibly report edge trigger in acpi tables
  hw/i386: plug eoi_intercept_unsupported to ioapic
  ioapic: add property to disallow SMI delivery mode
  hw/i386: add a flag to disallow SMI
  ioapic: add property to disallow INIT/SIPI delivery mode
  hw/i386: add a flag to disable init/sipi delivery mode of interrupt
  i386/tdx: disallow level interrupt and SMI/INIT/SIPI delivery mode
  i386/tdx: disable S3/S4 unconditionally

Sean Christopherson (9):
  target/i386: Expose x86_cpu_get_supported_feature_word() for TDX
  i386/kvm: Move architectural CPUID leaf generation to separarte helper
  i386/kvm: Squash getting/putting guest state for TDX VMs
  i386/tdx: Frame in tdx_get_supported_cpuid with KVM_TDX_CAPABILITIES
  i386/tdx: Add hook to require generic device loader
  i386/tdx: Add MMIO HOB entries
  q35: Move PCIe BAR check above PAM check in mch_write_config()
  i386/tdx: Force x2apic mode and routing for TDs
  target/i386: Add machine option to disable PIC/8259

Xiaoyao Li (5):
  linux-headers: Update headers to pull in TDX API changes
  hw/i386: Introduce kvm-type for TDX guest
  hw/i386: Initialize TDX via KVM ioctl() when kvm_type is TDX
  i386/tdx: Implement user specified tsc frequency
  target/i386/tdx: Finalize the TD's measurement when machine is done

 accel/kvm/kvm-all.c  |   4 +-
 default-configs/devices/i386-softmmu.mak |   1 +
 hw/core/generic-loader.c |   5 +
 hw/core/machine.c|  26 ++
 hw/core/meson.build  |   3 +
 hw/core/tdvf-stub.c  |   6 +
 hw/i386/Kconfig  |   5 +
 hw/i386/acpi-build.c | 103 +++--
 hw/i386/acpi-common.c|  74 +++-
 hw/i386/e820_memory_layout.c | 114 +-
 hw/i386/e820_memory_layout.h |   1 +
 hw/i386/meson.build  |   1 +
 hw/i386/microvm.c|   7 +-
 hw/i386/pc.c |  18 +
 hw/i386/pc_piix.c|   7 +-
 hw/i386/pc_q35.c |   9 +-
 hw/i386/pc_sysfw.c   |   6 +
 hw/i386/tdvf-hob.c

[RFC PATCH v2 09/44] target/i386: kvm: don't synchronize guest tsc for TD guest

2021-07-07 Thread isaku . yamahata
From: Isaku Yamahata 

Make kvm_synchronize_all_tsc() nop for TD-guest.

TDX module specification, 9.11.1 TSC Virtualization
"Virtual TSC values are consistent among all the TD;s VCPUs at the
level suppored by the CPU".
There is no need for qemu to synchronize tsc and VMM can't access
to guest TSC. Actually do_kvm_synchronize_tsc() hits assert due to
failure to write to guest tsc.

> qemu/target/i386/kvm.c:235: kvm_get_tsc: Assertion `ret == 1' failed.

Signed-off-by: Isaku Yamahata 
---
 target/i386/kvm/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index c29cb420a1..ecb1714920 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -254,7 +254,7 @@ void kvm_synchronize_all_tsc(void)
 {
 CPUState *cpu;
 
-if (kvm_enabled()) {
+if (kvm_enabled() && vm_type != KVM_X86_TDX_VM) {
 CPU_FOREACH(cpu) {
 run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
 }
-- 
2.25.1




  1   2   3   4   >