[PATCH v2] hvf: use standard CR0 and CR4 register definitions
Signed-off-by: Cameron Esfahani --- v2: + Fix duplicate line Roman Bolshakov found in review. --- target/i386/cpu.h | 2 ++ target/i386/hvf/hvf.c | 2 +- target/i386/hvf/vmx.h | 15 --- target/i386/hvf/x86.c | 6 +++--- target/i386/hvf/x86.h | 34 -- target/i386/hvf/x86_mmu.c | 2 +- target/i386/hvf/x86_task.c | 3 ++- 7 files changed, 17 insertions(+), 47 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 60d797d594..1286ec6e7a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -225,6 +225,8 @@ typedef enum X86Seg { #define CR0_NE_MASK (1U << 5) #define CR0_WP_MASK (1U << 16) #define CR0_AM_MASK (1U << 18) +#define CR0_NW_MASK (1U << 29) +#define CR0_CD_MASK (1U << 30) #define CR0_PG_MASK (1U << 31) #define CR4_VME_MASK (1U << 0) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index d72543dc31..48f3ef050c 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -455,7 +455,7 @@ void hvf_reset_vcpu(CPUState *cpu) { wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); } -macvm_set_cr0(cpu->hvf_fd, 0x6010); +macvm_set_cr0(cpu->hvf_fd, CR0_CD_MASK | CR0_NW_MASK | CR0_ET_MASK); wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 03d2c79b9c..8ec2e6414e 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -121,9 +121,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); -uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET; +uint64_t mask = CR0_PG_MASK | CR0_CD_MASK | CR0_NW_MASK | +CR0_NE_MASK | CR0_ET_MASK; -if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) && +if ((cr0 & CR0_PG_MASK) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE_MASK) && !(efer & MSR_EFER_LME)) { address_space_read(&address_space_memory, rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, @@ -138,17 +139,17 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); if (efer & MSR_EFER_LME) { -if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { +if (!(old_cr0 & CR0_PG_MASK) && (cr0 & CR0_PG_MASK)) { enter_long_mode(vcpu, cr0, efer); } -if (/*(old_cr0 & CR0_PG) &&*/ !(cr0 & CR0_PG)) { +if (!(cr0 & CR0_PG_MASK)) { exit_long_mode(vcpu, cr0, efer); } } /* Filter new CR0 after we are finished examining it above. */ -cr0 = (cr0 & ~(mask & ~CR0_PG)); -wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); +cr0 = (cr0 & ~(mask & ~CR0_PG_MASK)); +wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE_MASK | CR0_ET_MASK); hv_vcpu_invalidate_tlb(vcpu); hv_vcpu_flush(vcpu); @@ -156,7 +157,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t cr4) { -uint64_t guest_cr4 = cr4 | CR4_VMXE; +uint64_t guest_cr4 = cr4 | CR4_VMXE_MASK; wvmcs(vcpu, VMCS_GUEST_CR4, guest_cr4); wvmcs(vcpu, VMCS_CR4_SHADOW, cr4); diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c index 3afcedc7fc..668c02de6e 100644 --- a/target/i386/hvf/x86.c +++ b/target/i386/hvf/x86.c @@ -119,7 +119,7 @@ bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, bool x86_is_protected(struct CPUState *cpu) { uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); -return cr0 & CR0_PE; +return cr0 & CR0_PE_MASK; } bool x86_is_real(struct CPUState *cpu) @@ -150,13 +150,13 @@ bool x86_is_long64_mode(struct CPUState *cpu) bool x86_is_paging_mode(struct CPUState *cpu) { uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); -return cr0 & CR0_PG; +return cr0 & CR0_PG_MASK; } bool x86_is_pae_enabled(struct CPUState *cpu) { uint64_t cr4 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); -return cr4 & CR4_PAE; +return cr4 & CR4_PAE_MASK; } target_ulong linear_addr(struct CPUState *cpu, target_ulong addr, X86Seg seg) diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h index c95d5b2116..bc0170b2a8 100644 --- a/target/i386/hvf/x86.h +++ b/target/i386/hvf/x86.h @@ -100,40 +100,6 @@ typedef struct x86_reg_flags { }; } __attribute__ ((__packed__)) x86_reg_flags; -typedef enum x86_reg_cr0 { -CR0_PE =(1L << 0), -CR0_MP =(1L << 1), -CR0_EM =(1L << 2), -CR0_TS =(1L << 3), -CR0_ET =(1L << 4), -CR0_NE =(1L << 5), -CR0_WP =(1L << 16), -CR0_AM =(1L << 18), -CR0_NW =(1L << 29), -CR0_CD =(1L << 30), -
[PATCH v2] nrf51: Fix last GPIO CNF address
NRF51_GPIO_REG_CNF_END doesn't actually refer to the start of the last valid CNF register: it's referring to the last byte of the last valid CNF register. This hasn't been a problem up to now, as current implementation in memory.c turns an unaligned 4-byte read from 0x77f to a single byte read and the qtest only looks at the least-significant byte of the register. But when running with patches which fix unaligned accesses in memory.c, the qtest breaks. Considering NRF51 doesn't support unaligned accesses, the simplest fix is to actually set NRF51_GPIO_REG_CNF_END to the start of the last valid CNF register: 0x77c. Now, qtests work with or without the unaligned access patches. Reviewed-by: Cédric Le Goater Tested-by: Cédric Le Goater Reviewed-by: Joel Stanley Signed-off-by: Cameron Esfahani --- include/hw/gpio/nrf51_gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h index 337ee534bb..1d62bbc928 100644 --- a/include/hw/gpio/nrf51_gpio.h +++ b/include/hw/gpio/nrf51_gpio.h @@ -42,7 +42,7 @@ #define NRF51_GPIO_REG_DIRSET 0x518 #define NRF51_GPIO_REG_DIRCLR 0x51C #define NRF51_GPIO_REG_CNF_START0x700 -#define NRF51_GPIO_REG_CNF_END 0x77F +#define NRF51_GPIO_REG_CNF_END 0x77C #define NRF51_GPIO_PULLDOWN 1 #define NRF51_GPIO_PULLUP 3 -- 2.24.0
Re: [PATCH v2 07/12] acpi: move aml builder code for rtc device
I'm curious why there's two ranges as well. In our branch of QEMU, I've had to modify this RTC creation code to have only one range instead of two ranges. Traditionally Macs have had one range for RTC and we have incompatibility with a two ranges. If you could change it to one range without losing any compatibility, you'd get my thumbs up. Cameron Esfahani di...@apple.com "The cake is a lie." Common wisdom > On Apr 8, 2020, at 5:59 AM, Gerd Hoffmann wrote: > > Hi, > >>> +crs = aml_resource_template(); >>> +aml_append(crs, aml_io(AML_DECODE16, 0x0070, 0x0070, 0x10, 0x02)); >> maybe replace magic 0x0070 with macro >> RTC_BASE_ADDR > > Yes, that sounds better. > >>> +aml_append(crs, aml_irq_no_flags(8)); >>> +aml_append(crs, aml_io(AML_DECODE16, 0x0072, 0x0072, 0x02, 0x06)); >> >> one more comment, is this last io record correct? >> (looking at realize it maps only 2 bytes at 0x70) > > No idea, I've just moved around the code. > > Good question though. Also why this splitted in two ranges the first > place. Looking at physical hardware (my workstation) I see this: > >Device (RTC) >{ >Name (_HID, EisaId ("PNP0B00") /* AT Real-Time Clock */) // _HID: > Hardware ID >Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings >{ >IO (Decode16, >0x0070, // Range Minimum >0x0070, // Range Maximum >0x01, // Alignment >0x08, // Length >) >IRQNoFlags () >{8} >}) >} > > Clues anyone? > > thanks, > Gerd > >
Re: [PATCH v1 2/3] hvf: Make long mode enter and exit code clearer.
I'll update with your feedback. Cameron Esfahani di...@apple.com "We do what we must because we can." Aperture Science > On Apr 5, 2020, at 11:51 AM, Roman Bolshakov wrote: > > On Mon, Mar 30, 2020 at 05:16:05PM -0700, Cameron Esfahani wrote: >> Signed-off-by: Cameron Esfahani >> --- >> target/i386/hvf/vmx.h | 12 +++- >> 1 file changed, 7 insertions(+), 5 deletions(-) >> >> diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h >> index 8ec2e6414e..1a1b150c97 100644 >> --- a/target/i386/hvf/vmx.h >> +++ b/target/i386/hvf/vmx.h >> @@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, >> uint64_t cr0) >> uint64_t pdpte[4] = {0, 0, 0, 0}; >> uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); >> uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); >> +uint64_t changed_cr0 = old_cr0 ^ cr0; >> uint64_t mask = CR0_PG_MASK | CR0_CD_MASK | CR0_NW_MASK | >> CR0_NE_MASK | CR0_ET_MASK; >> >> @@ -139,11 +140,12 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, >> uint64_t cr0) >> wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); >> >> if (efer & MSR_EFER_LME) { >> -if (!(old_cr0 & CR0_PG_MASK) && (cr0 & CR0_PG_MASK)) { >> -enter_long_mode(vcpu, cr0, efer); >> -} >> -if (!(cr0 & CR0_PG_MASK)) { >> -exit_long_mode(vcpu, cr0, efer); >> +if (changed_cr0 & CR0_PG_MASK) { >> +if (cr0 & CR0_PG_MASK) { >> +enter_long_mode(vcpu, cr0, efer); >> +} else { >> +exit_long_mode(vcpu, cr0, efer); >> +} >> } >> } >> >> -- >> 2.24.0 >> > > The changes look good but I have a few nitpicks. > > The summary line should not have "." at the end, please see > (https://wiki.qemu.org/Contribute/SubmitAPatch#Write_a_meaningful_commit_message): >> Whether the "single line summary of change" starts with a capital is a >> matter of taste, but we prefer that the summary does not end in "." > > Also, it would be good to mention in the title/commit message that with the > change QEMU is skipping unconditional writes to the guest IA32_EFER.LMA > and VMCS Entry Controls in compatibility mode, instead it does so only > when the actual switch out of long mode happens. (It's worth to mention > any other issues the patch helps to address, if any). > > The comment in the previous patch may be dropped here IMO. > > Besides that, > Reviewed-by: Roman Bolshakov > > Thanks, > Roman
Re: [PATCH v1 1/3] hvf: use standard CR0 and CR4 register definitions
Responses inline Cameron Esfahani di...@apple.com "We do what we must because we can." Aperture Science > On Apr 5, 2020, at 10:58 AM, Roman Bolshakov wrote: > > On Mon, Mar 30, 2020 at 05:16:04PM -0700, Cameron Esfahani wrote: >> Signed-off-by: Cameron Esfahani >> --- >> target/i386/cpu.h | 2 ++ >> target/i386/hvf/hvf.c | 1 + >> target/i386/hvf/vmx.h | 15 --- >> target/i386/hvf/x86.c | 6 +++--- >> target/i386/hvf/x86.h | 34 -- >> target/i386/hvf/x86_mmu.c | 2 +- >> target/i386/hvf/x86_task.c | 3 ++- >> 7 files changed, 17 insertions(+), 46 deletions(-) >> > > Hi Cameron, > > I'm sorry for delay. > > This is fun, I had very similar changeset I forgot to send quite a while > ago: > https://github.com/roolebo/qemu/commits/hvf-common-cr-constants > >> diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c >> index d72543dc31..fef1ee7d70 100644 >> --- a/target/i386/hvf/hvf.c >> +++ b/target/i386/hvf/hvf.c >> @@ -455,6 +455,7 @@ void hvf_reset_vcpu(CPUState *cpu) { >> wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); >> } >> >> +macvm_set_cr0(cpu->hvf_fd, CR0_CD_MASK | CR0_NW_MASK | CR0_ET_MASK); >> macvm_set_cr0(cpu->hvf_fd, 0x6010); > > The second macvm_set_cr0() is a duplicate of the first one and should be > dropped. > I'll fix it in next patch update, pending feedback from next issue. >> >> wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); >> diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h >> index 03d2c79b9c..8ec2e6414e 100644 >> --- a/target/i386/hvf/vmx.h >> +++ b/target/i386/hvf/vmx.h >> @@ -138,17 +139,17 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, >> uint64_t cr0) >> wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); >> >> if (efer & MSR_EFER_LME) { >> -if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { >> +if (!(old_cr0 & CR0_PG_MASK) && (cr0 & CR0_PG_MASK)) { >> enter_long_mode(vcpu, cr0, efer); >> } >> -if (/*(old_cr0 & CR0_PG) &&*/ !(cr0 & CR0_PG)) { >> +if (!(cr0 & CR0_PG_MASK)) { > > IMO the patch should only change CR0_PG to CR0_PG_MASK without removal > of the commented condition. > > In the next patch you're improving how long mode exit is done and > replacement of the comment with an implementation fits better there. > The reason I removed that code was because checkpatch.pl scolded me for a patch with code commented out. I assumed that I'd get a similar warning from patchew.org about some erroneous coding styles. So I thought the easiest thing would be to remove that code as well. But I'll defer to you or Paolo: should I remove that commented code with this patch? >> exit_long_mode(vcpu, cr0, efer); >> } >> } >> >> /* Filter new CR0 after we are finished examining it above. */ >> -cr0 = (cr0 & ~(mask & ~CR0_PG)); >> -wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); >> +cr0 = (cr0 & ~(mask & ~CR0_PG_MASK)); >> +wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE_MASK | CR0_ET_MASK); >> >> hv_vcpu_invalidate_tlb(vcpu); >> hv_vcpu_flush(vcpu); >> -- >> 2.24.0 >> > > Best regards, > Roman
Re: [PATCH v1] nrf51: Fix last GPIO CNF address
I'm not burying anything. This patch is stand alone and all the tests do work. They work with or without Cedric's nee Andrew's patch. But, if some derivative of that patch is eventually implemented, something needs to be done for this NRF51 gpio qtest to work. There are two possibilities for the following qtest in microbit-test.c: > actual = qtest_readl(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_CNF_END) & > 0x01; > g_assert_cmpuint(actual, ==, 0x01); 1 - The code is purposefully reading 32-bits from an unaligned address which only partially refers to a documented register. And the only reason this code works is because that 32-bit value is turned into a 8-bit read. And if that's the case, then someone should update a comment in the test to indicate that this is being done purposefully. 2 - NRF51_GPIO_REG_CNF_END is incorrectly set to be 0x77F. Looking at the documentation for this chip, the last defined CNF register starts at 0x77C. The NRF51 doesn't support unaligned accesses, so I assume that possibility 1 isn't true. So, is NRF51_GPIO_REG_CNF_END supposed to refer to a valid register, or the end of the implementation space? If it's the former, then it should be adjusted to 0x77c and possibly update the below code in nrf51_gpio.c (line 156): > case NRF51_GPIO_REG_CNF_START ... NRF51_GPIO_REG_CNF_END: to become > case NRF51_GPIO_REG_CNF_START ... NRF51_GPIO_REG_CNF_END + 3: if unaligned access are expected to work. But, considering the NRF51 doesn't support unaligned accesses, I don't think this appropriate. If it's the latter, then the test cases in microbit-test.c should be updated to something like the following: > actual = qtest_readl(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_CNF_END - 3) & > 0x01; > g_assert_cmpuint(actual, ==, 0x01); Cameron Esfahani di...@apple.com "Americans are very skilled at creating a custom meaning from something that's mass-produced." Ann Powers > On Apr 7, 2020, at 1:44 AM, Philippe Mathieu-Daudé wrote: > >> Considering NRF51 doesn't support unaligned accesses, the simplest fix >> is to actually set NRF51_GPIO_REG_CNF_END to the start of the last valid >> CNF register: 0x77c. > > NAck. You are burying bugs deeper. This test has to work. > > What would be helpful is qtests with unaligned accesses (expected to work) > such your USB XHCI VERSION example.
[PATCH v1] nrf51: Fix last GPIO CNF address
NRF51_GPIO_REG_CNF_END doesn't actually refer to the start of the last valid CNF register: it's referring to the last byte of the last valid CNF register. This hasn't been a problem up to now, as current implementation in memory.c turns an unaligned 4-byte read from 0x77f to a single byte read and the qtest only looks at the least-significant byte of the register. But, when running with Cedric Le Goater's pending fix for unaligned accesses in memory.c, the qtest breaks. Considering NRF51 doesn't support unaligned accesses, the simplest fix is to actually set NRF51_GPIO_REG_CNF_END to the start of the last valid CNF register: 0x77c. Now, qtests work with or without Cedric's patch. Signed-off-by: Cameron Esfahani --- include/hw/gpio/nrf51_gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h index 337ee534bb..1d62bbc928 100644 --- a/include/hw/gpio/nrf51_gpio.h +++ b/include/hw/gpio/nrf51_gpio.h @@ -42,7 +42,7 @@ #define NRF51_GPIO_REG_DIRSET 0x518 #define NRF51_GPIO_REG_DIRCLR 0x51C #define NRF51_GPIO_REG_CNF_START0x700 -#define NRF51_GPIO_REG_CNF_END 0x77F +#define NRF51_GPIO_REG_CNF_END 0x77C #define NRF51_GPIO_PULLDOWN 1 #define NRF51_GPIO_PULLUP 3 -- 2.24.0
Re: [PATCH v1] usb: Add read support for HCIVERSION register to XHCI
I had a look at this failing test case after running applying Cedric's patch (https://github.com/legoater/qemu/commit/d57ac950c4be47a2bafd6c6a96dec2922c2ecd65). It looks like this is just a bug in the test case. The test case is attempting to verify that the CNF registers are programmed correctly by sampling the first and last CNF register. The bug is that NRF51_GPIO_REG_CNF_END doesn't actually refer to the start of the last valid CNF register. It refers to the last byte of the last valid CNF register. So either NRF51_GPIO_REG_CNF_END needs to be adjusted to 0x77C or we need to subtract 3 to get to the start of the register. Considering the NRF51 doesn't appear to support unaligned access, it seems like changing NRF51_GPIO_REG_CNF_END to 0x77C is reasonable. Here's a patch which seems to fix it for me: diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h index 337ee53..1d62bbc 100644 --- a/include/hw/gpio/nrf51_gpio.h +++ b/include/hw/gpio/nrf51_gpio.h @@ -42,7 +42,7 @@ #define NRF51_GPIO_REG_DIRSET 0x518 #define NRF51_GPIO_REG_DIRCLR 0x51C #define NRF51_GPIO_REG_CNF_START0x700 -#define NRF51_GPIO_REG_CNF_END 0x77F +#define NRF51_GPIO_REG_CNF_END 0x77C #define NRF51_GPIO_PULLDOWN 1 #define NRF51_GPIO_PULLUP 3 Considering this change works for pre-Cedric patch and post, I'll post at official version shortly. And hopefully this unblocks review of Cedric's patch... Cameron Esfahani di...@apple.com "The cake is a lie." Common wisdom > On Apr 1, 2020, at 4:23 AM, Cédric Le Goater wrote: > > Hello, > > On 3/31/20 1:02 PM, Philippe Mathieu-Daudé wrote: >> On 3/31/20 11:57 AM, Cameron Esfahani wrote: >>> Philippe - >>> From what I've seen, access size has nothing to do with alignment. >> >> Yes, I was wondering if you were using unaligned accesses. >> >> I *think* the correct fix is in the "memory: Support unaligned accesses on >> aligned-only models" patch from Andrew Jeffery: >> https://www.mail-archive.com/qemu-devel@nongnu.org/msg461247.html > > Here is an updated version I have been keeping since : > > > https://github.com/legoater/qemu/commit/d57ac950c4be47a2bafd6c6a96dec2922c2ecd65 > > which breaks qtest : > > microbit-test.c:307:test_nrf51_gpio: assertion failed (actual == 0x01): > (0 == 1) > > I haven't had time to look at this closely but it would be nice to get this > patch merged. It's a requirement for the Aspeed ADC model. > > Thanks, > > c. > >>> >>> What the code in access_with_adjusted_size() will do is make sure that >>> "size" is >= min_access_size and <= max_access_size. >>> >>> So reading 2-bytes from address 2 turns into reading 4-bytes from address >>> 2: xhci_cap_read() is called with reg 2, size 4. >>> >>> But, due to the fact our change to support reg 2 only returns back 2-bytes, >>> and how the loops work in access_with_adjusted_size(), we only call >>> xhci_cap_read() once. >>> >>> It seems like we should also change impl.min_access_size for xhci_cap_ops >>> to be 2. >>> >>> But, after that, to support people doing strange things like reading >>> traditionally 4-byte values as 2 2-byte values, we probably need to change >>> xhci_cap_read() to handle every memory range in steps of 2-bytes. >>> >>> But I'll defer to Gerd on this... >>> >>> Cameron Esfahani >>> di...@apple.com >>> >>> "Americans are very skilled at creating a custom meaning from something >>> that's mass-produced." >>> >>> Ann Powers >>> >>> >>>> On Mar 31, 2020, at 12:52 AM, Philippe Mathieu-Daudé >>>> wrote: >>>> >>>> On 3/30/20 11:44 PM, Cameron Esfahani via wrote: >>>>> macOS will read HCIVERSION separate from CAPLENGTH. Add a distinct >>>>> handler for that register. >>>> >>>> Apparently a fix for https://bugs.launchpad.net/qemu/+bug/1693050. >>>> >>>>> Signed-off-by: Cameron Esfahani >>>>> --- >>>>> hw/usb/hcd-xhci.c | 3 +++ >>>>> 1 file changed, 3 insertions(+) >>>>> diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c >>>>> index b330e36fe6..061f8438de 100644 >>>>> --- a/hw/usb/hcd-xhci.c >>>>> +++ b/hw/usb/hcd-xhci.c >>>>> @@ -2739,6 +2739,9 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr >>>>> reg, unsigned size) >>>
Re: [PATCH v1] usb: Add read support for HCIVERSION register to XHCI
Actually, reading the specification a little more, the only fields which are documented as being smaller than 4-bytes are CAPLENGTH (1-byte) and HCIVERSION (2-bytes). So maybe change impl.min_access_size to 1 and rely on the fact that traditionally callers haven't read less than 4-bytes for any of the 4-byte fields... Cameron Esfahani di...@apple.com "In the elder days of Art, Builders wrought with greatest care each minute and unseen part; For the gods see everywhere." "The Builders", H. W. Longfellow > On Mar 31, 2020, at 2:57 AM, Cameron Esfahani via > wrote: > > Philippe - > From what I've seen, access size has nothing to do with alignment. > > What the code in access_with_adjusted_size() will do is make sure that "size" > is >= min_access_size and <= max_access_size. > > So reading 2-bytes from address 2 turns into reading 4-bytes from address 2: > xhci_cap_read() is called with reg 2, size 4. > > But, due to the fact our change to support reg 2 only returns back 2-bytes, > and how the loops work in access_with_adjusted_size(), we only call > xhci_cap_read() once. > > It seems like we should also change impl.min_access_size for xhci_cap_ops to > be 2. > > But, after that, to support people doing strange things like reading > traditionally 4-byte values as 2 2-byte values, we probably need to change > xhci_cap_read() to handle every memory range in steps of 2-bytes. > > But I'll defer to Gerd on this... >
Re: [PATCH v1] usb: Add read support for HCIVERSION register to XHCI
Philippe - From what I've seen, access size has nothing to do with alignment. What the code in access_with_adjusted_size() will do is make sure that "size" is >= min_access_size and <= max_access_size. So reading 2-bytes from address 2 turns into reading 4-bytes from address 2: xhci_cap_read() is called with reg 2, size 4. But, due to the fact our change to support reg 2 only returns back 2-bytes, and how the loops work in access_with_adjusted_size(), we only call xhci_cap_read() once. It seems like we should also change impl.min_access_size for xhci_cap_ops to be 2. But, after that, to support people doing strange things like reading traditionally 4-byte values as 2 2-byte values, we probably need to change xhci_cap_read() to handle every memory range in steps of 2-bytes. But I'll defer to Gerd on this... Cameron Esfahani di...@apple.com "Americans are very skilled at creating a custom meaning from something that's mass-produced." Ann Powers > On Mar 31, 2020, at 12:52 AM, Philippe Mathieu-Daudé > wrote: > > On 3/30/20 11:44 PM, Cameron Esfahani via wrote: >> macOS will read HCIVERSION separate from CAPLENGTH. Add a distinct >> handler for that register. > > Apparently a fix for https://bugs.launchpad.net/qemu/+bug/1693050. > >> Signed-off-by: Cameron Esfahani >> --- >> hw/usb/hcd-xhci.c | 3 +++ >> 1 file changed, 3 insertions(+) >> diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c >> index b330e36fe6..061f8438de 100644 >> --- a/hw/usb/hcd-xhci.c >> +++ b/hw/usb/hcd-xhci.c >> @@ -2739,6 +2739,9 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr reg, >> unsigned size) >> case 0x00: /* HCIVERSION, CAPLENGTH */ >> ret = 0x0100 | LEN_CAP; >> break; >> +case 0x02: /* HCIVERSION */ >> +ret = 0x0100; >> +break; > > But we have: > > static const MemoryRegionOps xhci_cap_ops = { >.read = xhci_cap_read, >.write = xhci_cap_write, >.valid.min_access_size = 1, >.valid.max_access_size = 4, >.impl.min_access_size = 4, >.impl.max_access_size = 4, >.endianness = DEVICE_LITTLE_ENDIAN, > }; > > IIUC ".impl.min_access_size = 4" means the case 'reg == 2' can not happen. It > seems we have a bug in memory.c elsewhere. > > How can we reproduce? > > If not easy, can you share the backtrace of: > > -- >8 -- > diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c > index b330e36fe6..d021129f3f 100644 > --- a/hw/usb/hcd-xhci.c > +++ b/hw/usb/hcd-xhci.c > @@ -2735,6 +2735,7 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr reg, > unsigned size) > XHCIState *xhci = ptr; > uint32_t ret; > > +assert(reg != 2); > switch (reg) { > case 0x00: /* HCIVERSION, CAPLENGTH */ > ret = 0x0100 | LEN_CAP; > --- > >> case 0x04: /* HCSPARAMS 1 */ >> ret = ((xhci->numports_2+xhci->numports_3)<<24) >> | (xhci->numintrs<<8) | xhci->numslots;
Re: [PATCH] i386: hvf: Reset IRQ inhibition after moving RIP
Reviewed-by: Cameron Esfahani LGTM. Cameron Esfahani di...@apple.com "There are times in the life of a nation when the only place a decent man can find himself is in prison." > On Mar 28, 2020, at 10:44 AM, Roman Bolshakov wrote: > > The sequence of instructions exposes an issue: > sti > hlt > > Interrupts cannot be delivered to hvf after hlt instruction cpu because > HF_INHIBIT_IRQ_MASK is set just before hlt is handled and never reset > after moving instruction pointer beyond hlt. > > So, after hvf_vcpu_exec() returns, CPU thread gets locked up forever in > qemu_wait_io_event() (cpu_thread_is_idle() evaluates inhibition > flag and considers the CPU idle if the flag is set). > > Cc: Cameron Esfahani > Signed-off-by: Roman Bolshakov > --- > target/i386/hvf/vmx.h | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h > index 03d2c79b9c..ce2a1532d5 100644 > --- a/target/i386/hvf/vmx.h > +++ b/target/i386/hvf/vmx.h > @@ -167,6 +167,8 @@ static inline void macvm_set_cr4(hv_vcpuid_t vcpu, > uint64_t cr4) > > static inline void macvm_set_rip(CPUState *cpu, uint64_t rip) > { > +X86CPU *x86_cpu = X86_CPU(cpu); > +CPUX86State *env = &x86_cpu->env; > uint64_t val; > > /* BUG, should take considering overlap.. */ > @@ -176,6 +178,7 @@ static inline void macvm_set_rip(CPUState *cpu, uint64_t > rip) >val = rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); >if (val & (VMCS_INTERRUPTIBILITY_STI_BLOCKING | >VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { > +env->hflags &= ~HF_INHIBIT_IRQ_MASK; > wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, >val & ~(VMCS_INTERRUPTIBILITY_STI_BLOCKING | >VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)); > -- > 2.24.1 > >
[PATCH v1 3/3] hvf: Support AVX512 guests on capable hardware
macOS lazily enables AVX512. Explicitly enable it if the processor supports it. cpu_x86_cpuid() tries to handle OSXSAVE but refers to env->cr[4] for the guest copy of CR4. HVF doesn't support caching CPUID values like KVM, so we need to track it ourselves. Signed-off-by: Cameron Esfahani --- target/i386/cpu.h| 1 + target/i386/hvf/hvf.c| 68 ++-- target/i386/hvf/vmx.h| 9 +- target/i386/hvf/x86hvf.c | 2 +- 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1286ec6e7a..f3864d0fac 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1591,6 +1591,7 @@ typedef struct CPUX86State { struct kvm_nested_state *nested_state; #endif #if defined(CONFIG_HVF) +bool osxsave_enabled; HVFX86EmulatorState *hvf_emul; #endif diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index fef1ee7d70..68a85c3b9b 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -65,6 +65,7 @@ #include #include +#include #include "exec/address-spaces.h" #include "hw/i386/apic_internal.h" @@ -458,7 +459,7 @@ void hvf_reset_vcpu(CPUState *cpu) { macvm_set_cr0(cpu->hvf_fd, CR0_CD_MASK | CR0_NW_MASK | CR0_ET_MASK); macvm_set_cr0(cpu->hvf_fd, 0x6010); -wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); +wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK | CR4_OSXSAVE_MASK); wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); @@ -541,6 +542,55 @@ static void dummy_signal(int sig) { } +static bool enable_avx512_thread_state(void) +{ +x86_avx512_state_t state; +uint32_t ebx; +kern_return_t ret; +unsigned int count; + +/* + * macOS lazily enables AVX512 support. Enable it explicitly if the + * processor supports it. + */ + +host_cpuid(7, 0, NULL, &ebx, NULL, NULL); +if ((ebx & CPUID_7_0_EBX_AVX512F) == 0) { +return false; +} + +memset(&state, 0, sizeof(x86_avx512_state_t)); + +/* Get AVX state */ +count = x86_AVX_STATE_COUNT; +ret = thread_get_state(mach_thread_self(), + x86_AVX_STATE, + (thread_state_t) &state, + &count); +if (ret != KERN_SUCCESS) { +return false; +} +if (count != x86_AVX_STATE_COUNT) { +return false; +} +if (state.ash.flavor != x86_AVX_STATE64) { +return false; +} +state.ash.flavor = x86_AVX512_STATE64; +state.ash.count = x86_AVX512_STATE64_COUNT; + +/* Now set as AVX512 */ +ret = thread_set_state(mach_thread_self(), + state.ash.flavor, + (thread_state_t) &state.ufs.as64, + state.ash.count); +if (ret != KERN_SUCCESS) { +return false; +} + +return true; +} + int hvf_init_vcpu(CPUState *cpu) { @@ -826,6 +876,18 @@ int hvf_vcpu_exec(CPUState *cpu) cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); +if (rax == 1) { +/* + * cpu_x86_cpuid tries to handle OSXSAVE but refers to + * env->cr[4] for the guest copy of CR4. This isn't + * updated regularly so we track it ourselves in + * env->osxsave_enabled. + */ +if ((rcx & CPUID_EXT_XSAVE) && env->osxsave_enabled) { +rcx |= CPUID_EXT_OSXSAVE; +} +} + wreg(cpu->hvf_fd, HV_X86_RAX, rax); wreg(cpu->hvf_fd, HV_X86_RBX, rbx); wreg(cpu->hvf_fd, HV_X86_RCX, rcx); @@ -889,7 +951,7 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case 4: { -macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); +macvm_set_cr4(env, cpu->hvf_fd, RRX(env, reg)); break; } case 8: { @@ -966,6 +1028,8 @@ static int hvf_accel_init(MachineState *ms) hv_return_t ret; HVFState *s; +enable_avx512_thread_state(); + ret = hv_vm_create(HV_VM_DEFAULT); assert_hvf_ok(ret); diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 1a1b150c97..dccd5ceb0f 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -157,13 +157,20 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) hv_vcpu_flush(vcpu); } -static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t cr4) +static inline void macvm_set_cr4(CPUX86State *env, hv_vcpuid_t vcpu, + uint64_t cr4) { uint64_t guest_cr4 = cr4 | CR4_VMXE_MASK; wvmcs(vcpu, VMCS_GUEST_CR4, guest_cr4); wvmcs(vcpu, VMCS_CR4_SHADOW, cr4); +/* + * Track whether OSXSAVE is enabled so we can properly return it + * for CPUID 1. + */ +env->osxsave_enabled = ((cr4 & CR4_OSXSAVE_MASK) != 0); +
[PATCH v1 1/3] hvf: use standard CR0 and CR4 register definitions
Signed-off-by: Cameron Esfahani --- target/i386/cpu.h | 2 ++ target/i386/hvf/hvf.c | 1 + target/i386/hvf/vmx.h | 15 --- target/i386/hvf/x86.c | 6 +++--- target/i386/hvf/x86.h | 34 -- target/i386/hvf/x86_mmu.c | 2 +- target/i386/hvf/x86_task.c | 3 ++- 7 files changed, 17 insertions(+), 46 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 60d797d594..1286ec6e7a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -225,6 +225,8 @@ typedef enum X86Seg { #define CR0_NE_MASK (1U << 5) #define CR0_WP_MASK (1U << 16) #define CR0_AM_MASK (1U << 18) +#define CR0_NW_MASK (1U << 29) +#define CR0_CD_MASK (1U << 30) #define CR0_PG_MASK (1U << 31) #define CR4_VME_MASK (1U << 0) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index d72543dc31..fef1ee7d70 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -455,6 +455,7 @@ void hvf_reset_vcpu(CPUState *cpu) { wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); } +macvm_set_cr0(cpu->hvf_fd, CR0_CD_MASK | CR0_NW_MASK | CR0_ET_MASK); macvm_set_cr0(cpu->hvf_fd, 0x6010); wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 03d2c79b9c..8ec2e6414e 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -121,9 +121,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); -uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET; +uint64_t mask = CR0_PG_MASK | CR0_CD_MASK | CR0_NW_MASK | +CR0_NE_MASK | CR0_ET_MASK; -if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) && +if ((cr0 & CR0_PG_MASK) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE_MASK) && !(efer & MSR_EFER_LME)) { address_space_read(&address_space_memory, rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, @@ -138,17 +139,17 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); if (efer & MSR_EFER_LME) { -if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { +if (!(old_cr0 & CR0_PG_MASK) && (cr0 & CR0_PG_MASK)) { enter_long_mode(vcpu, cr0, efer); } -if (/*(old_cr0 & CR0_PG) &&*/ !(cr0 & CR0_PG)) { +if (!(cr0 & CR0_PG_MASK)) { exit_long_mode(vcpu, cr0, efer); } } /* Filter new CR0 after we are finished examining it above. */ -cr0 = (cr0 & ~(mask & ~CR0_PG)); -wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); +cr0 = (cr0 & ~(mask & ~CR0_PG_MASK)); +wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE_MASK | CR0_ET_MASK); hv_vcpu_invalidate_tlb(vcpu); hv_vcpu_flush(vcpu); @@ -156,7 +157,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t cr4) { -uint64_t guest_cr4 = cr4 | CR4_VMXE; +uint64_t guest_cr4 = cr4 | CR4_VMXE_MASK; wvmcs(vcpu, VMCS_GUEST_CR4, guest_cr4); wvmcs(vcpu, VMCS_CR4_SHADOW, cr4); diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c index 3afcedc7fc..668c02de6e 100644 --- a/target/i386/hvf/x86.c +++ b/target/i386/hvf/x86.c @@ -119,7 +119,7 @@ bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, bool x86_is_protected(struct CPUState *cpu) { uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); -return cr0 & CR0_PE; +return cr0 & CR0_PE_MASK; } bool x86_is_real(struct CPUState *cpu) @@ -150,13 +150,13 @@ bool x86_is_long64_mode(struct CPUState *cpu) bool x86_is_paging_mode(struct CPUState *cpu) { uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); -return cr0 & CR0_PG; +return cr0 & CR0_PG_MASK; } bool x86_is_pae_enabled(struct CPUState *cpu) { uint64_t cr4 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); -return cr4 & CR4_PAE; +return cr4 & CR4_PAE_MASK; } target_ulong linear_addr(struct CPUState *cpu, target_ulong addr, X86Seg seg) diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h index c95d5b2116..bc0170b2a8 100644 --- a/target/i386/hvf/x86.h +++ b/target/i386/hvf/x86.h @@ -100,40 +100,6 @@ typedef struct x86_reg_flags { }; } __attribute__ ((__packed__)) x86_reg_flags; -typedef enum x86_reg_cr0 { -CR0_PE =(1L << 0), -CR0_MP =(1L << 1), -CR0_EM =(1L << 2), -CR0_TS =(1L << 3), -CR0_ET =(1L << 4), -CR0_NE =(1L << 5), -CR0_WP =(1L << 16), -CR0_AM =(1L << 18), -CR0_NW =(1L << 29), -CR0_CD =(1L << 30), -CR0_PG =(1L << 31), -} x86_reg_cr0; - -typedef enum x86_reg_cr4 { -CR4_VME =(1L <
[PATCH v1 0/3] hvf: Support AVX512 guests and cleanup
HVF had its own copy of the CR0 and CR4 register definitions. Remove them in favor of the definitions in target/i386/cpu.h. Change long mode enter and exit code to be clearer. Support AVX512 guests on capable hardware. This involves two separate changes: - Correctly manage the OSXSAVE bit in CPUID[0x01]. cpu_x86_cpuid() attempts to account for OSXSAVE, but it refers to env->cr[4] for the guest copy of CR4. That field isn't up to date under HVF. Instead, we track OSXSAVE separately, by adding OSXSAVE to CR4 mask and saving the state. Then, when handling CPUID[0x01] in EXIT_REASON_CPUID, we reflect the current state of CR4[OSXSAVE]. - macOS lazily enables AVX512 for processes. Explicitly enable AVX512 for QEMU. With these two changes, guests can correctly detect and enable AVX512. Cameron Esfahani (3): hvf: use standard CR0 and CR4 register definitions hvf: Make long mode enter and exit code clearer. hvf: Support AVX512 guests on capable hardware target/i386/cpu.h | 3 ++ target/i386/hvf/hvf.c | 69 -- target/i386/hvf/vmx.h | 32 -- target/i386/hvf/x86.c | 6 ++-- target/i386/hvf/x86.h | 34 --- target/i386/hvf/x86_mmu.c | 2 +- target/i386/hvf/x86_task.c | 3 +- target/i386/hvf/x86hvf.c | 2 +- 8 files changed, 98 insertions(+), 53 deletions(-) -- 2.24.0
[PATCH v1 2/3] hvf: Make long mode enter and exit code clearer.
Signed-off-by: Cameron Esfahani --- target/i386/hvf/vmx.h | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 8ec2e6414e..1a1b150c97 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); +uint64_t changed_cr0 = old_cr0 ^ cr0; uint64_t mask = CR0_PG_MASK | CR0_CD_MASK | CR0_NW_MASK | CR0_NE_MASK | CR0_ET_MASK; @@ -139,11 +140,12 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); if (efer & MSR_EFER_LME) { -if (!(old_cr0 & CR0_PG_MASK) && (cr0 & CR0_PG_MASK)) { -enter_long_mode(vcpu, cr0, efer); -} -if (!(cr0 & CR0_PG_MASK)) { -exit_long_mode(vcpu, cr0, efer); +if (changed_cr0 & CR0_PG_MASK) { +if (cr0 & CR0_PG_MASK) { +enter_long_mode(vcpu, cr0, efer); +} else { +exit_long_mode(vcpu, cr0, efer); +} } } -- 2.24.0
[PATCH v1] usb: Add read support for HCIVERSION register to XHCI
macOS will read HCIVERSION separate from CAPLENGTH. Add a distinct handler for that register. Signed-off-by: Cameron Esfahani --- hw/usb/hcd-xhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index b330e36fe6..061f8438de 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -2739,6 +2739,9 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr reg, unsigned size) case 0x00: /* HCIVERSION, CAPLENGTH */ ret = 0x0100 | LEN_CAP; break; +case 0x02: /* HCIVERSION */ +ret = 0x0100; +break; case 0x04: /* HCSPARAMS 1 */ ret = ((xhci->numports_2+xhci->numports_3)<<24) | (xhci->numintrs<<8) | xhci->numslots; -- 2.24.0
Re: [PATCH 04/11] MAINTAINERS: Add an entry for the HVF accelerator
Please add me to the HVF maintainers as well. Cameron Esfahani di...@apple.com "In the elder days of Art, Builders wrought with greatest care each minute and unseen part; For the gods see everywhere." "The Builders", H. W. Longfellow > On Mar 16, 2020, at 5:00 AM, Philippe Mathieu-Daudé wrote: > > Signed-off-by: Philippe Mathieu-Daudé > --- > Cc: Reviewed-by: Nikita Leshenko > Cc: Sergio Andres Gomez Del Real > Cc: Roman Bolshakov > Cc: Patrick Colp > Cc: Cameron Esfahani > Cc: Liran Alon > Cc: Heiher > --- > MAINTAINERS | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/MAINTAINERS b/MAINTAINERS > index 7ec42a18f7..bcf40afb85 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -420,6 +420,12 @@ F: accel/stubs/hax-stub.c > F: target/i386/hax-all.c > F: include/sysemu/hax.h > > +HVF Accelerator > +S: Orphan > +F: accel/stubs/hvf-stub.c > +F: target/i386/hvf/hvf.c > +F: include/sysemu/hvf.h > + > WHPX CPUs > M: Sunil Muthuswamy > S: Supported > -- > 2.21.1 > >
Re: [PATCH 04/11] MAINTAINERS: Add an entry for the HVF accelerator
Sorry I didn't see this yesterday. We've (Apple) signed up for taking over HVF ownership. I didn't realize I needed to add to the MAINTAINERS list. Roman, we also have a bunch of pending fixes for some of the issues you've listed. We're in the process of upstreaming them. Cameron Esfahani di...@apple.com "All that is necessary for the triumph of evil is that good men do nothing." Edmund Burke > On Mar 16, 2020, at 5:12 AM, Roman Bolshakov wrote: > > Hi Philippe, > > I can take the ownership if nobody wants it. At the moment I'm working > on APIC for HVF to get kvm-unit-tests fixed. > > Next items on the list (in no particular order): > * MMX emulation > * SSE emulation > * qxl display > * gdb stub > * virtio-gpu/virgil running on metal > * VFIO-PCI based on macOS user-space DriverKit framework > > Best regards, > Roman > > On Mon, Mar 16, 2020 at 01:00:42PM +0100, Philippe Mathieu-Daudé wrote: >> Signed-off-by: Philippe Mathieu-Daudé >> --- >> Cc: Reviewed-by: Nikita Leshenko >> Cc: Sergio Andres Gomez Del Real >> Cc: Roman Bolshakov >> Cc: Patrick Colp >> Cc: Cameron Esfahani >> Cc: Liran Alon >> Cc: Heiher >> --- >> MAINTAINERS | 6 ++ >> 1 file changed, 6 insertions(+) >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 7ec42a18f7..bcf40afb85 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -420,6 +420,12 @@ F: accel/stubs/hax-stub.c >> F: target/i386/hax-all.c >> F: include/sysemu/hax.h >> >> +HVF Accelerator >> +S: Orphan >> +F: accel/stubs/hvf-stub.c >> +F: target/i386/hvf/hvf.c >> +F: include/sysemu/hvf.h >> + >> WHPX CPUs >> M: Sunil Muthuswamy >> S: Supported >> -- >> 2.21.1 >> >
[PATCH v2 2/2] vnc: prioritize ZRLE compression over ZLIB
In my investigation, ZRLE always compresses better than ZLIB so prioritize ZRLE over ZLIB, even if the client hints that ZLIB is preferred. zlib buffer is always reset in zrle_compress_data(), so using offset to calculate next_out and avail_out is useless. Signed-off-by: Cameron Esfahani --- ui/vnc-enc-zrle.c | 4 ++-- ui/vnc.c | 11 +-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ui/vnc-enc-zrle.c b/ui/vnc-enc-zrle.c index 17fd28a2e2..b4f71e32cf 100644 --- a/ui/vnc-enc-zrle.c +++ b/ui/vnc-enc-zrle.c @@ -98,8 +98,8 @@ static int zrle_compress_data(VncState *vs, int level) /* set pointers */ zstream->next_in = vs->zrle->zrle.buffer; zstream->avail_in = vs->zrle->zrle.offset; -zstream->next_out = vs->zrle->zlib.buffer + vs->zrle->zlib.offset; -zstream->avail_out = vs->zrle->zlib.capacity - vs->zrle->zlib.offset; +zstream->next_out = vs->zrle->zlib.buffer; +zstream->avail_out = vs->zrle->zlib.capacity; zstream->data_type = Z_BINARY; /* start encoding */ diff --git a/ui/vnc.c b/ui/vnc.c index 3e8d1f1207..1d7138a3a0 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -2071,8 +2071,15 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) break; #endif case VNC_ENCODING_ZLIB: -vs->features |= VNC_FEATURE_ZLIB_MASK; -vs->vnc_encoding = enc; +/* + * VNC_ENCODING_ZRLE compresses better than VNC_ENCODING_ZLIB. + * So prioritize ZRLE, even if the client hints that it prefers + * ZLIB. + */ +if ((vs->features & VNC_FEATURE_ZRLE_MASK) == 0) { +vs->features |= VNC_FEATURE_ZLIB_MASK; +vs->vnc_encoding = enc; +} break; case VNC_ENCODING_ZRLE: vs->features |= VNC_FEATURE_ZRLE_MASK; -- 2.24.0
[PATCH v2 0/2] vnc: fix VNC artifacts
Remove VNC optimization to reencode framebuffer update as raw if it's smaller than the default encoding. QEMU's implementation was naive and didn't account for the ZLIB z_stream mutating with each compression. Just saving and restoring the output buffer offset wasn't sufficient to "rewind" the previous encoding. Considering that ZRLE is never larger than raw and even though ZLIB can occasionally be fractionally larger than raw, the overhead of implementing this optimization correctly isn't worth it. While debugging this, I noticed ZRLE always compresses better than ZLIB. Prioritize ZRLE over ZLIB, even if the client hints that ZLIB is preferred. Cameron Esfahani (2): vnc: fix VNC artifacts vnc: prioritize ZRLE compression over ZLIB ui/vnc-enc-zrle.c | 4 ++-- ui/vnc.c | 31 +++ 2 files changed, 13 insertions(+), 22 deletions(-) -- 2.24.0
[PATCH v2 1/2] vnc: fix VNC artifacts
Patch de3f7de7f4e257ce44cdabb90f5f17ee99624557 was too simplistic in its implementation: it didn't account for the ZLIB z_stream mutating with each compression. Because of the mutation, simply resetting the output buffer's offset wasn't sufficient to "rewind" the operation. The mutated z_stream would generate future zlib blocks which referred to symbols in past blocks which weren't sent. This would lead to artifacting. This reverts commit de3f7de7f4e257ce44cdabb90f5f17ee99624557. Fixes: ("vnc: allow fall back to RAW encoding") Signed-off-by: Cameron Esfahani --- ui/vnc.c | 20 ++-- 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/ui/vnc.c b/ui/vnc.c index 4100d6e404..3e8d1f1207 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -898,8 +898,6 @@ int vnc_raw_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) { int n = 0; -bool encode_raw = false; -size_t saved_offs = vs->output.offset; switch(vs->vnc_encoding) { case VNC_ENCODING_ZLIB: @@ -922,24 +920,10 @@ int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) n = vnc_zywrle_send_framebuffer_update(vs, x, y, w, h); break; default: -encode_raw = true; +vnc_framebuffer_update(vs, x, y, w, h, VNC_ENCODING_RAW); +n = vnc_raw_send_framebuffer_update(vs, x, y, w, h); break; } - -/* If the client has the same pixel format as our internal buffer and - * a RAW encoding would need less space fall back to RAW encoding to - * save bandwidth and processing power in the client. */ -if (!encode_raw && vs->write_pixels == vnc_write_pixels_copy && -12 + h * w * VNC_SERVER_FB_BYTES <= (vs->output.offset - saved_offs)) { -vs->output.offset = saved_offs; -encode_raw = true; -} - -if (encode_raw) { -vnc_framebuffer_update(vs, x, y, w, h, VNC_ENCODING_RAW); -n = vnc_raw_send_framebuffer_update(vs, x, y, w, h); -} - return n; } -- 2.24.0
Re: [PATCH v1] vnc: fix VNC artifacts
I’m new to this process, what are the next steps? Cameron Esfahani di...@apple.com > On Jan 16, 2020, at 11:47 PM, Gerd Hoffmann wrote: > > On Thu, Jan 16, 2020 at 07:50:58PM -0800, Cameron Esfahani wrote: >> Remove VNC optimization to reencode framebuffer update as raw if it's >> smaller than the default encoding. QEMU's implementation was naive and >> didn't account for the ZLIB z_stream mutating with each compression. Just >> saving and restoring the output buffer offset wasn't sufficient to "rewind" >> the previous encoding. Considering that ZRLE is never larger than raw and >> even though ZLIB can occasionally be fractionally larger than raw, the >> overhead of implementing this optimization correctly isn't worth it. > > So just revert de3f7de7f4e257 then ... > >> In my investigation, ZRLE always compresses better than ZLIB so >> prioritize ZRLE over ZLIB, even if the client hints that ZLIB is >> preferred. > > ... and make this a separate patch? > > cheers, > Gerd > >
Re: [PATCH v1] vnc: fix VNC artifacts
Yes. Personally, I'd also take the change to vnc-enc-zrle.c: because vs->zrle->zlib is reset at the top of the function, using vs->zrle->zlib.offset in determining zstream->next_out and zstream->avail_out is useless. Cameron Esfahani di...@apple.com "All that is necessary for the triumph of evil is that good men do nothing." Edmund Burke > On Jan 16, 2020, at 11:45 PM, Gerd Hoffmann wrote: > > On Thu, Jan 16, 2020 at 07:50:58PM -0800, Cameron Esfahani wrote: >> Remove VNC optimization to reencode framebuffer update as raw if it's >> smaller than the default encoding. QEMU's implementation was naive and >> didn't account for the ZLIB z_stream mutating with each compression. Just >> saving and restoring the output buffer offset wasn't sufficient to "rewind" >> the previous encoding. Considering that ZRLE is never larger than raw and >> even though ZLIB can occasionally be fractionally larger than raw, the >> overhead of implementing this optimization correctly isn't worth it. > > So just revert de3f7de7f4e257 then ... > >> In my investigation, ZRLE always compresses better than ZLIB so >> prioritize ZRLE over ZLIB, even if the client hints that ZLIB is >> preferred. > > ... and make this a separate patch? > > cheers, > Gerd >
[PATCH v1] vnc: fix VNC artifacts
Remove VNC optimization to reencode framebuffer update as raw if it's smaller than the default encoding. QEMU's implementation was naive and didn't account for the ZLIB z_stream mutating with each compression. Just saving and restoring the output buffer offset wasn't sufficient to "rewind" the previous encoding. Considering that ZRLE is never larger than raw and even though ZLIB can occasionally be fractionally larger than raw, the overhead of implementing this optimization correctly isn't worth it. In my investigation, ZRLE always compresses better than ZLIB so prioritize ZRLE over ZLIB, even if the client hints that ZLIB is preferred. Fixes: ("vnc: allow fall back to RAW encoding") Signed-off-by: Cameron Esfahani --- ui/vnc-enc-zrle.c | 4 ++-- ui/vnc.c | 30 +++--- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/ui/vnc-enc-zrle.c b/ui/vnc-enc-zrle.c index 17fd28a2e2..b4f71e32cf 100644 --- a/ui/vnc-enc-zrle.c +++ b/ui/vnc-enc-zrle.c @@ -98,8 +98,8 @@ static int zrle_compress_data(VncState *vs, int level) /* set pointers */ zstream->next_in = vs->zrle->zrle.buffer; zstream->avail_in = vs->zrle->zrle.offset; -zstream->next_out = vs->zrle->zlib.buffer + vs->zrle->zlib.offset; -zstream->avail_out = vs->zrle->zlib.capacity - vs->zrle->zlib.offset; +zstream->next_out = vs->zrle->zlib.buffer; +zstream->avail_out = vs->zrle->zlib.capacity; zstream->data_type = Z_BINARY; /* start encoding */ diff --git a/ui/vnc.c b/ui/vnc.c index 4100d6e404..f085e1b747 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -898,8 +898,6 @@ int vnc_raw_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) { int n = 0; -bool encode_raw = false; -size_t saved_offs = vs->output.offset; switch(vs->vnc_encoding) { case VNC_ENCODING_ZLIB: @@ -922,24 +920,11 @@ int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) n = vnc_zywrle_send_framebuffer_update(vs, x, y, w, h); break; default: -encode_raw = true; +vnc_framebuffer_update(vs, x, y, w, h, VNC_ENCODING_RAW); +n = vnc_raw_send_framebuffer_update(vs, x, y, w, h); break; } -/* If the client has the same pixel format as our internal buffer and - * a RAW encoding would need less space fall back to RAW encoding to - * save bandwidth and processing power in the client. */ -if (!encode_raw && vs->write_pixels == vnc_write_pixels_copy && -12 + h * w * VNC_SERVER_FB_BYTES <= (vs->output.offset - saved_offs)) { -vs->output.offset = saved_offs; -encode_raw = true; -} - -if (encode_raw) { -vnc_framebuffer_update(vs, x, y, w, h, VNC_ENCODING_RAW); -n = vnc_raw_send_framebuffer_update(vs, x, y, w, h); -} - return n; } @@ -2087,8 +2072,15 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) break; #endif case VNC_ENCODING_ZLIB: -vs->features |= VNC_FEATURE_ZLIB_MASK; -vs->vnc_encoding = enc; +/* + * VNC_ENCODING_ZRLE compresses better than VNC_ENCODING_ZLIB. + * So prioritize ZRLE, even if the client hints that it prefers + * ZLIB. + */ +if ((vs->features & VNC_FEATURE_ZRLE_MASK) == 0) { +vs->features |= VNC_FEATURE_ZLIB_MASK; +vs->vnc_encoding = enc; +} break; case VNC_ENCODING_ZRLE: vs->features |= VNC_FEATURE_ZRLE_MASK; -- 2.24.0
Re: [Bug 1818937] Crash with HV_ERROR on macOS host
Try against 4.2. Cameron Esfahani di...@apple.com "In the elder days of Art, Builders wrought with greatest care each minute and unseen part; For the gods see everywhere." "The Builders", H. W. Longfellow > On Dec 30, 2019, at 8:41 AM, Alex Fliker > wrote: > > Are there any updates? Trying to run the IE11 image from Microsoft > (based on Windows 8.1) and it is crashing with this error sporadically > :-\ > > -- > You received this bug notification because you are a member of qemu- > devel-ml, which is subscribed to QEMU. > https://bugs.launchpad.net/bugs/1818937 > > Title: > Crash with HV_ERROR on macOS host > > Status in QEMU: > New > > Bug description: > On macOS host running Windows 10 guest, qemu crashed with error > message: Error: HV_ERROR. > > Host: macOS Mojave 10.14.3 (18D109) Late 2014 Mac mini presumably Core i5 > 4278U. > QEMU: git commit a3e3b0a7bd5de211a62cdf2d6c12b96d3c403560 > QEMU parameter: qemu-system-x86_64 -m 3000 -drive > file=disk.img,if=virtio,discard=unmap -accel hvf -soundhw hda -smp 3 > > thread list > Process 56054 stopped >thread #1: tid = 0x2ffec8, 0x7fff48d0805a vImage`vLookupTable_Planar16 > + 970, queue = 'com.apple.main-thread' >thread #2: tid = 0x2ffecc, 0x7fff79d6d7de > libsystem_kernel.dylib`__psynch_cvwait + 10 >thread #3: tid = 0x2ffecd, 0x7fff79d715aa > libsystem_kernel.dylib`__select + 10 >thread #4: tid = 0x2ffece, 0x7fff79d71d9a > libsystem_kernel.dylib`__sigwait + 10 > * thread #6: tid = 0x2ffed0, 0x7fff79d7023e > libsystem_kernel.dylib`__pthread_kill + 10, stop reason = signal SIGABRT >thread #7: tid = 0x2ffed1, 0x7fff79d6d7de > libsystem_kernel.dylib`__psynch_cvwait + 10 >thread #8: tid = 0x2ffed2, 0x7fff79d6d7de > libsystem_kernel.dylib`__psynch_cvwait + 10 >thread #11: tid = 0x2fff34, 0x7fff79d6a17a > libsystem_kernel.dylib`mach_msg_trap + 10, name = 'com.apple.NSEventThread' >thread #30: tid = 0x300c04, 0x7fff79e233f8 > libsystem_pthread.dylib`start_wqthread >thread #31: tid = 0x300c16, 0x7fff79e233f8 > libsystem_pthread.dylib`start_wqthread >thread #32: tid = 0x300c17, 0x >thread #33: tid = 0x300c93, 0x7fff79d6d7de > libsystem_kernel.dylib`__psynch_cvwait + 10 > > > Crashed thread: > > * thread #6, stop reason = signal SIGABRT >* frame #0: 0x7fff79d7023e libsystem_kernel.dylib`__pthread_kill + 10 > frame #1: 0x7fff79e26c1c libsystem_pthread.dylib`pthread_kill + 285 > frame #2: 0x7fff79cd91c9 libsystem_c.dylib`abort + 127 > frame #3: 0x00010baa476d > qemu-system-x86_64`assert_hvf_ok(ret=) at hvf.c:106 [opt] > frame #4: 0x00010baa4c8f > qemu-system-x86_64`hvf_vcpu_exec(cpu=0x7f8e5283de00) at hvf.c:681 [opt] > frame #5: 0x00010b988423 > qemu-system-x86_64`qemu_hvf_cpu_thread_fn(arg=0x7f8e5283de00) at > cpus.c:1636 [opt] > frame #6: 0x00010bd9dfce > qemu-system-x86_64`qemu_thread_start(args=) at > qemu-thread-posix.c:502 [opt] > frame #7: 0x7fff79e24305 libsystem_pthread.dylib`_pthread_body + 126 > frame #8: 0x7fff79e2726f libsystem_pthread.dylib`_pthread_start + 70 > frame #9: 0x7fff79e23415 libsystem_pthread.dylib`thread_start + 13 > > To manage notifications about this bug go to: > https://bugs.launchpad.net/qemu/+bug/1818937/+subscriptions >
Re: [PATCH v3 0/1] Fix bochs memory leak
Ping. Cameron Esfahani di...@apple.com "Americans are very skilled at creating a custom meaning from something that's mass-produced." Ann Powers > On Dec 12, 2019, at 12:30 AM, Cameron Esfahani via > wrote: > > Fix a small memory leak in the Bochs display driver. > > Each frame would leak about 304 bytes. > > v2: Add missing signed-off-by line. > v3: Add reviewed-by and fixes lines. > > Cameron Esfahani (1): > display/bochs-display: fix memory leak > > hw/display/bochs-display.c | 2 ++ > 1 file changed, 2 insertions(+) > > -- > 2.24.0 > >
[PATCH v3 0/1] Fix bochs memory leak
Fix a small memory leak in the Bochs display driver. Each frame would leak about 304 bytes. v2: Add missing signed-off-by line. v3: Add reviewed-by and fixes lines. Cameron Esfahani (1): display/bochs-display: fix memory leak hw/display/bochs-display.c | 2 ++ 1 file changed, 2 insertions(+) -- 2.24.0
[PATCH v3 1/1] display/bochs-display: fix memory leak
Fix memory leak in bochs_display_update(). Leaks 304 bytes per frame. Fixes: 33ebad54056 Signed-off-by: Cameron Esfahani Reviewed-by: Philippe Mathieu-Daudé --- hw/display/bochs-display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c index dc1bd1641d..215db9a231 100644 --- a/hw/display/bochs-display.c +++ b/hw/display/bochs-display.c @@ -252,6 +252,8 @@ static void bochs_display_update(void *opaque) dpy_gfx_update(s->con, 0, ys, mode.width, y - ys); } + +g_free(snap); } } -- 2.24.0
[PATCH v2 0/1] Fix bochs memory leak
Fix a small memory leak in the Bochs display driver. Each frame would leak about 304 bytes. v2: Add missing signed-off-by line. Cameron Esfahani (1): display/bochs-display: fix memory leak hw/display/bochs-display.c | 2 ++ 1 file changed, 2 insertions(+) -- 2.24.0
[PATCH v2 1/1] display/bochs-display: fix memory leak
Fix memory leak in bochs_display_update(). Leaks 304 bytes per frame. Signed-off-by: Cameron Esfahani --- hw/display/bochs-display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c index dc1bd1641d..215db9a231 100644 --- a/hw/display/bochs-display.c +++ b/hw/display/bochs-display.c @@ -252,6 +252,8 @@ static void bochs_display_update(void *opaque) dpy_gfx_update(s->con, 0, ys, mode.width, y - ys); } + +g_free(snap); } } -- 2.24.0
[PATCH v1 1/1] display/bochs-display: fix memory leak
Fix memory leak in bochs_display_update(). Leaks 304 bytes per frame. --- hw/display/bochs-display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c index dc1bd1641d..215db9a231 100644 --- a/hw/display/bochs-display.c +++ b/hw/display/bochs-display.c @@ -252,6 +252,8 @@ static void bochs_display_update(void *opaque) dpy_gfx_update(s->con, 0, ys, mode.width, y - ys); } + +g_free(snap); } } -- 2.24.0
[PATCH v1 0/1] Fix bochs memory leak
Fix a small memory leak in the Bochs display driver. Each frame would leak about 304 bytes. Cameron Esfahani (1): display/bochs-display: fix memory leak hw/display/bochs-display.c | 2 ++ 1 file changed, 2 insertions(+) -- 2.24.0
[PATCH v2] Fix some comment spelling errors.
Signed-off-by: Cameron Esfahani Reviewed-by: Stefan Weil --- target/i386/machine.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/i386/machine.c b/target/i386/machine.c index 2699eed94e..ee342ddd50 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -261,7 +261,7 @@ static int cpu_pre_save(void *opaque) * intercepted anymore. * * Furthermore, when a L2 exception is intercepted by L1 - * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB) + * hypervisor, its exception payload (CR2/DR6 on #PF/#DB) * should not be set yet in the respective vCPU register. * Thus, in case an exception is pending, it is * important to save the exception payload seperately. @@ -271,9 +271,9 @@ static int cpu_pre_save(void *opaque) * distinguish between a pending and injected exception * and we don't need to store seperately the exception payload. * - * In order to preserve better backwards-compatabile migration, + * In order to preserve better backwards-compatible migration, * convert a pending exception to an injected exception in - * case it is not important to distingiush between them + * case it is not important to distinguish between them * as described above. */ if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) { @@ -415,7 +415,7 @@ static bool exception_info_needed(void *opaque) /* * It is important to save exception-info only in case - * we need to distingiush between a pending and injected + * we need to distinguish between a pending and injected * exception. Which is only required in case there is a * pending exception and vCPU is running L2. * For more info, refer to comment in cpu_pre_save(). -- 2.24.0
[PATCH] Fix some comment spelling errors.
Signed-off-by: Cameron Esfahani --- target/i386/machine.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/machine.c b/target/i386/machine.c index 2699eed94e..f21823f179 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -261,7 +261,7 @@ static int cpu_pre_save(void *opaque) * intercepted anymore. * * Furthermore, when a L2 exception is intercepted by L1 - * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB) + * hypervisor, its exception payload (CR2/DR6 on #PF/#DB) * should not be set yet in the respective vCPU register. * Thus, in case an exception is pending, it is * important to save the exception payload seperately. @@ -273,7 +273,7 @@ static int cpu_pre_save(void *opaque) * * In order to preserve better backwards-compatabile migration, * convert a pending exception to an injected exception in - * case it is not important to distingiush between them + * case it is not important to distinguish between them * as described above. */ if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) { @@ -415,7 +415,7 @@ static bool exception_info_needed(void *opaque) /* * It is important to save exception-info only in case - * we need to distingiush between a pending and injected + * we need to distinguish between a pending and injected * exception. Which is only required in case there is a * pending exception and vCPU is running L2. * For more info, refer to comment in cpu_pre_save(). -- 2.24.0
[PATCH v3 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in
If an area is non-RAM and non-ROMD, then remove mappings so accesses will trap and can be emulated. Change hvf_find_overlap_slot() to take a size instead of an end address: it wouldn't return a slot because callers would pass the same address for start and end. Don't always map area as read/write/execute, respect area flags. Signed-off-by: Cameron Esfahani Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 50 ++- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 231732aaf7..0b50cfcbc6 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -107,14 +107,14 @@ static void assert_hvf_ok(hv_return_t ret) } /* Memory slots */ -hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) +hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) { hvf_slot *slot; int x; for (x = 0; x < hvf_state->num_slots; ++x) { slot = &hvf_state->slots[x]; if (slot->size && start < (slot->start + slot->size) && -end > slot->start) { +(start + size) > slot->start) { return slot; } } @@ -129,12 +129,10 @@ struct mac_slot { }; struct mac_slot mac_slots[32]; -#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) -static int do_hvf_set_memory(hvf_slot *slot) +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) { struct mac_slot *macslot; -hv_memory_flags_t flags; hv_return_t ret; macslot = &mac_slots[slot->slot_id]; @@ -151,8 +149,6 @@ static int do_hvf_set_memory(hvf_slot *slot) return 0; } -flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - macslot->present = 1; macslot->gpa_start = slot->start; macslot->size = slot->size; @@ -165,14 +161,24 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) { hvf_slot *mem; MemoryRegion *area = section->mr; +bool writeable = !area->readonly && !area->rom_device; +hv_memory_flags_t flags; if (!memory_region_is_ram(area)) { -return; +if (writeable) { +return; +} else if (!memory_region_is_romd(area)) { +/* + * If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. + */ + add = false; +} } mem = hvf_find_overlap_slot( section->offset_within_address_space, -section->offset_within_address_space + int128_get64(section->size)); +int128_get64(section->size)); if (mem && add) { if (mem->size == int128_get64(section->size) && @@ -186,7 +192,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) /* Region needs to be reset. set the size to 0 and remap it. */ if (mem) { mem->size = 0; -if (do_hvf_set_memory(mem)) { +if (do_hvf_set_memory(mem, 0)) { error_report("Failed to reset overlapping slot"); abort(); } @@ -196,6 +202,13 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) return; } +if (area->readonly || +(!memory_region_is_ram(area) && memory_region_is_romd(area))) { +flags = HV_MEMORY_READ | HV_MEMORY_EXEC; +} else { +flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; +} + /* Now make a new slot. */ int x; @@ -216,7 +229,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) mem->start = section->offset_within_address_space; mem->region = area; -if (do_hvf_set_memory(mem)) { +if (do_hvf_set_memory(mem, flags)) { error_report("Error registering new memory slot"); abort(); } @@ -345,7 +358,14 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } -return !slot; +if (!slot) { +return true; +} +if (!memory_region_is_ram(slot->region) && +!(read && memory_region_is_romd(slot->region))) { +return true; +} +return false; } static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) @@ -354,7 +374,7 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) slot = hvf_find_overlap_slot( section->offset_within_address_space, -section->offset_within_address_space + int128_get64(section->size)); +int128_get64(section->size)); /* protect region against writes; begin tracking it */ if (on) { @@ -720,7 +740,7 @@ int hvf_vcpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } -/* Need to check if MMIO or unmmaped fault */ +/* Need to check if MMIO or unmapped fault */ case EXIT_REASON_EPT_FAULT: { hvf_slot *slot; @@ -731,7 +75
[PATCH v3 2/5] hvf: remove TSC synchronization code because it isn't fully complete
The existing code in QEMU's HVF support to attempt to synchronize TSC across multiple cores is not sufficient. TSC value on other cores can go backwards. Until implementation is fixed, remove calls to hv_vm_sync_tsc(). Pass through TSC to guest OS. Signed-off-by: Cameron Esfahani Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 3 +-- target/i386/hvf/x86_emu.c | 3 --- target/i386/hvf/x86hvf.c | 4 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 0b50cfcbc6..90fd50acfc 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -518,7 +518,6 @@ void hvf_reset_vcpu(CPUState *cpu) { wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); } -hv_vm_sync_tsc(0); hv_vcpu_invalidate_tlb(cpu->hvf_fd); hv_vcpu_flush(cpu->hvf_fd); } @@ -612,7 +611,7 @@ int hvf_init_vcpu(CPUState *cpu) hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); -/*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ +hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 1b04bd7e94..3df767209d 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -772,9 +772,6 @@ void simulate_wrmsr(struct CPUState *cpu) switch (msr) { case MSR_IA32_TSC: -/* if (!osx_is_sierra()) - wvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET, data - rdtscp()); -hv_vm_sync_tsc(data);*/ break; case MSR_IA32_APICBASE: cpu_set_apic_base(X86_CPU(cpu)->apic_state, data); diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index e0ea02d631..1485b95776 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -152,10 +152,6 @@ void hvf_put_msrs(CPUState *cpu_state) hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base); hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base); - -/* if (!osx_is_sierra()) - wvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET, env->tsc - rdtscp());*/ -hv_vm_sync_tsc(env->tsc); } -- 2.24.0
[PATCH v3 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers
More accurately match SDM when setting CR0 and PDPTE registers. Clear PDPTE registers when resetting vcpus. Signed-off-by: Cameron Esfahani Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 8 target/i386/hvf/vmx.h | 18 ++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 90fd50acfc..784e67d77e 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -441,12 +441,20 @@ static MemoryListener hvf_memory_listener = { }; void hvf_reset_vcpu(CPUState *cpu) { +uint64_t pdpte[4] = {0, 0, 0, 0}; +int i; /* TODO: this shouldn't be needed; there is already a call to * cpu_synchronize_all_post_reset in vl.c */ wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); + +/* Initialize PDPTE */ +for (i = 0; i < 4; i++) { +wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); +} + macvm_set_cr0(cpu->hvf_fd, 0x6010); wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 5dc52ecad6..eb8894cd58 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); +uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET; if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) && !(efer & MSR_EFER_LME)) { @@ -128,18 +129,15 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, MEMTXATTRS_UNSPECIFIED, (uint8_t *)pdpte, 32, 0); +/* Only set PDPTE when appropriate. */ +for (i = 0; i < 4; i++) { +wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); +} } -for (i = 0; i < 4; i++) { -wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); -} - -wvmcs(vcpu, VMCS_CR0_MASK, CR0_CD | CR0_NE | CR0_PG); +wvmcs(vcpu, VMCS_CR0_MASK, mask); wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); -cr0 &= ~CR0_CD; -wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); - if (efer & MSR_EFER_LME) { if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { enter_long_mode(vcpu, cr0, efer); @@ -149,6 +147,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) } } +/* Filter new CR0 after we are finished examining it above. */ +cr0 = (cr0 & ~(mask & ~CR0_PG)); +wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); + hv_vcpu_invalidate_tlb(vcpu); hv_vcpu_flush(vcpu); } -- 2.24.0
[PATCH v3 0/5] hvf: stability fixes for HVF
The following patches fix stability issues with running QEMU on Apple Hypervisor Framework (HVF): - non-RAM, non-ROMD areas need to trap so accesses can be correctly emulated. - Current TSC synchronization implementation is insufficient: when running with more than 1 core, TSC values can go backwards. Until a correct implementation can be written, remove calls to hv_vm_sync_tsc(). Pass through TSC to guest OS. - Fix REX emulation in relation to legacy prefixes. - More correctly match SDM when setting CR0 and PDPTE registers. - Previous implementation in hvf_inject_interrupts() would always inject VMCS_INTR_T_SWINTR even when VMCS_INTR_T_HWINTR was required. Now correctly determine when VMCS_INTR_T_HWINTR is appropriate versus VMCS_INTR_T_SWINTR. Under heavy loads, interrupts got misrouted. Changes in v3: - Change previous code which saved interrupt/exception type in hvf_store_events() to inject later in hvf_inject_interrupts(). Now, hvf_inject_interrupts() will correctly determine when it's appropriate to inject VMCS_INTR_T_HWINTR versus VMCS_INTR_T_SWINTR. From feedback by Paolo Bonzini to make code more similar to KVM model. Changes in v2: - Fix code style errors. Cameron Esfahani (5): hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in hvf: remove TSC synchronization code because it isn't fully complete hvf: correctly handle REX prefix in relation to legacy prefixes hvf: more accurately match SDM when setting CR0 and PDPTE registers hvf: correctly inject VMCS_INTR_T_HWINTR versus VMCS_INTR_T_SWINTR. target/i386/hvf/hvf.c| 65 ++-- target/i386/hvf/vmx.h| 18 +- target/i386/hvf/x86_decode.c | 64 +++ target/i386/hvf/x86_decode.h | 20 +-- target/i386/hvf/x86_emu.c| 3 -- target/i386/hvf/x86hvf.c | 18 +- 6 files changed, 112 insertions(+), 76 deletions(-) -- 2.24.0
[PATCH v3 5/5] hvf: correctly inject VMCS_INTR_T_HWINTR versus VMCS_INTR_T_SWINTR.
Previous implementation in hvf_inject_interrupts() would always inject VMCS_INTR_T_SWINTR even when VMCS_INTR_T_HWINTR was required. Now correctly determine when VMCS_INTR_T_HWINTR is appropriate versus VMCS_INTR_T_SWINTR. Make sure to clear ins_len and has_error_code when ins_len isn't valid and error_code isn't set. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c| 4 +++- target/i386/hvf/x86hvf.c | 14 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 784e67d77e..d72543dc31 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -637,6 +637,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in env->exception_injected = 0; env->interrupt_injected = -1; env->nmi_injected = false; +env->ins_len = 0; +env->has_error_code = false; if (idtvec_info & VMCS_IDT_VEC_VALID) { switch (idtvec_info & VMCS_IDT_VEC_TYPE) { case VMCS_IDT_VEC_HWINTR: @@ -659,7 +661,7 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { env->ins_len = ins_len; } -if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { +if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { env->has_error_code = true; env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); } diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 1485b95776..edefe5319a 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -345,8 +345,6 @@ void vmx_clear_int_window_exiting(CPUState *cpu) ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } -#define NMI_VEC 2 - bool hvf_inject_interrupts(CPUState *cpu_state) { X86CPU *x86cpu = X86_CPU(cpu_state); @@ -357,7 +355,11 @@ bool hvf_inject_interrupts(CPUState *cpu_state) bool have_event = true; if (env->interrupt_injected != -1) { vector = env->interrupt_injected; -intr_type = VMCS_INTR_T_SWINTR; +if (env->ins_len) { +intr_type = VMCS_INTR_T_SWINTR; +} else { +intr_type = VMCS_INTR_T_HWINTR; +} } else if (env->exception_nr != -1) { vector = env->exception_nr; if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { @@ -366,7 +368,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) intr_type = VMCS_INTR_T_HWEXCEPTION; } } else if (env->nmi_injected) { -vector = NMI_VEC; +vector = EXCP02_NMI; intr_type = VMCS_INTR_T_NMI; } else { have_event = false; @@ -390,6 +392,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (env->has_error_code) { wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR, env->error_code); +/* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */ +info |= VMCS_INTR_DEL_ERRCODE; } /*printf("reinject %lx err %d\n", info, err);*/ wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); @@ -399,7 +403,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (cpu_state->interrupt_request & CPU_INTERRUPT_NMI) { if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) { cpu_state->interrupt_request &= ~CPU_INTERRUPT_NMI; -info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | NMI_VEC; +info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI; wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); } else { vmx_set_nmi_window_exiting(cpu_state); -- 2.24.0
[PATCH v3 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes
In real x86 processors, the REX prefix must come after legacy prefixes. REX before legacy is ignored. Update the HVF emulation code to properly handle this. Fix some spelling errors in constants. Fix some decoder table initialization issues found by Coverity. Signed-off-by: Cameron Esfahani Signed-off-by: Paolo Bonzini --- target/i386/hvf/x86_decode.c | 64 target/i386/hvf/x86_decode.h | 20 +-- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 822fa1866e..77c346605f 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -122,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; -op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, +/* Since reg is always AX, REX prefix has no impact. */ +op->ptr = get_reg_ref(env, op->reg, false, 0, decode->operand_size); } @@ -1687,40 +1688,37 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, - int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, + int is_extended, int size) { target_ulong ptr = 0; -int which = 0; if (is_extended) { reg |= R_R8; } - switch (size) { case 1: -if (is_extended || reg < 4 || rex) { -which = 1; +if (is_extended || reg < 4 || rex_present) { ptr = (target_ulong)&RL(env, reg); } else { -which = 2; ptr = (target_ulong)&RH(env, reg - 4); } break; default: -which = 3; ptr = (target_ulong)&RRX(env, reg); break; } return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, - int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, + int is_extended, int size) { target_ulong val = 0; -memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); +memcpy(&val, + (void *)get_reg_ref(env, reg, rex_present, is_extended, size), + size); return val; } @@ -1853,28 +1851,38 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, static void decode_prefix(CPUX86State *env, struct x86_decode *decode) { while (1) { +/* + * REX prefix must come after legacy prefixes. + * REX before legacy is ignored. + * Clear rex to simulate this. + */ uint8_t byte = decode_byte(env, decode); switch (byte) { case PREFIX_LOCK: decode->lock = byte; +decode->rex.rex = 0; break; case PREFIX_REPN: case PREFIX_REP: decode->rep = byte; +decode->rex.rex = 0; break; -case PREFIX_CS_SEG_OVEERIDE: -case PREFIX_SS_SEG_OVEERIDE: -case PREFIX_DS_SEG_OVEERIDE: -case PREFIX_ES_SEG_OVEERIDE: -case PREFIX_FS_SEG_OVEERIDE: -case PREFIX_GS_SEG_OVEERIDE: +case PREFIX_CS_SEG_OVERRIDE: +case PREFIX_SS_SEG_OVERRIDE: +case PREFIX_DS_SEG_OVERRIDE: +case PREFIX_ES_SEG_OVERRIDE: +case PREFIX_FS_SEG_OVERRIDE: +case PREFIX_GS_SEG_OVERRIDE: decode->segment_override = byte; +decode->rex.rex = 0; break; case PREFIX_OP_SIZE_OVERRIDE: decode->op_size_override = byte; +decode->rex.rex = 0; break; case PREFIX_ADDR_SIZE_OVERRIDE: decode->addr_size_override = byte; +decode->rex.rex = 0; break; case PREFIX_REX ... (PREFIX_REX + 0xf): if (x86_is_long_mode(env_cpu(env))) { @@ -2111,14 +2119,14 @@ void init_decoder() { int i; -for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { -memcpy(_decode_tbl1, &invl_inst, sizeof(invl_inst)); +for (i = 0; i < ARRAY_SIZE(_decode_tbl1); i++) { +memcpy(&_decode_tbl1[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { -memcpy(_decode_tbl2, &invl_inst, sizeof(invl_inst)); +memcpy(&_decode_tbl2[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl3); i++) { -memcpy(_decode_tbl3, &invl_inst, sizeof(invl_inst_x87)); +memcpy(&_decode_tbl3[i], &invl_inst_x87, sizeof(invl_inst_x87)); } for (i = 0; i < ARRAY_SIZE(_1op_inst); i++) { @@ -2167,22 +2175,22 @@ target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, target_ulong addr, X86Seg seg) { switch (decode->segment_override) { -case PREFIX_CS_SEG_OVEERIDE: +
Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
So far so good. Without any workaround, I could get it to fail within a few seconds. With your change, I've been running for a few minutes without a problem. But, this is on my laptop, so I'll wait until I can test it on a wider-range of machines at work next week. If it continues to work, I'll update my patch to include this fix. Now, can you help me understand why this approach is better than what I had written? When we're in hvf_store_events(), we have vector type and number. All the information we need to reinject later. So why not save vector type away, instead of attempting to reconstruct it from other information (env->ins_len) in hvf_inject_interrupts()? Cameron Esfahani di...@apple.com "There are times in the life of a nation when the only place a decent man can find himself is in prison." > On Nov 28, 2019, at 5:56 AM, Paolo Bonzini wrote: > > On 26/11/19 21:04, Cameron Esfahani wrote: >> Our test case was booting many concurrent macOS VMs under heavy >> system load. I don't know if I could create one to replicate that. > > Does this work? > > diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c > index 1485b95776..26c6c3a49f 100644 > --- a/target/i386/hvf/x86hvf.c > +++ b/target/i386/hvf/x86hvf.c > @@ -357,7 +357,11 @@ bool hvf_inject_interrupts(CPUState *cpu_state) > bool have_event = true; > if (env->interrupt_injected != -1) { > vector = env->interrupt_injected; > -intr_type = VMCS_INTR_T_SWINTR; > +if (env->ins_len) { > +intr_type = VMCS_INTR_T_SWINTR; > +} else { > +intr_type = VMCS_INTR_T_HWINTR; > +} > } else if (env->exception_nr != -1) { > vector = env->exception_nr; > if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { > > Thanks, > > Paolo >
Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
I added some asserts to our internal version of QEMU. It's a few months off of master and, specifically, doesn't have fd13f23b8c95311eff74426921557eee592b0ed3. With the previous version of hvf_inject_interrupts(), before our fix, the code looked like the following: > if (env->interrupt_injected != -1) { > vector = env->interrupt_injected; > intr_type = VMCS_INTR_T_SWINTR; > } else if (env->exception_injected != -1) { What we were seeing was, under heavy loads, running many concurrent macOS VMs, that we would get spurious interrupts. Tracking it down, we discovered that VMCS_INTR_T_SWINTR was getting injected when VMCS_INTR_T_HWINTR was expected. If I take our proposed patch code, which built on top of master from a few days ago, and has fd13f23b8c95311eff74426921557eee592b0ed3, and add an assert, like the following: > if (env->interrupt_injected != -1) { > /* Type and vector are both saved in interrupt_injected. */ > vector = env->interrupt_injected & VMCS_IDT_VEC_VECNUM; > intr_type = env->interrupt_injected & VMCS_IDT_VEC_TYPE; > if (VMCS_INTR_T_SWINTR != intr_type) { > printf("VMCS_INTR_T_SWINTR (%x) != intr_type (%llx)\n", > VMCS_INTR_T_SWINTR, intr_type); > assert(VMCS_INTR_T_SWINTR == intr_type); > } > } else if (env->exception_nr != -1) { Then we will see the assert trigger and get the following output: > VMCS_INTR_T_SWINTR (400) != intr_type (0) > Assertion failed: (VMCS_INTR_T_SWINTR == intr_type), function > hvf_inject_interrupts, file qemu_upstream/target/i386/hvf/x86hvf.c, line 362. So, as far as I can see, the proposed changes are still necessary. Cameron Esfahani di...@apple.com "Americans are very skilled at creating a custom meaning from something that's mass-produced." Ann Powers > On Nov 26, 2019, at 12:04 PM, Cameron Esfahani via > wrote: > > Our test case was booting many concurrent macOS VMs under heavy system load. > I don't know if I could create one to replicate that. > > Cameron Esfahani > di...@apple.com > > "In the elder days of Art, Builders wrought with greatest care each minute > and unseen part; For the gods see everywhere." > > "The Builders", H. W. Longfellow > > > >> On Nov 25, 2019, at 2:26 AM, Paolo Bonzini wrote: >> >> On 24/11/19 21:05, Cameron Esfahani wrote: >>> Save away type as well as vector in hvf_store_events() so we can >>> correctly reinject both in hvf_inject_interrupts(). >>> >>> Make sure to clear ins_len and has_error_code when ins_len isn't >>> valid and error_code isn't set. >> >> Do you have a testcase for this? (I could guess it's about the INT1 >> instruction). >> >> Paolo >> > >
Re: [PATCH v2 0/5] hvf: stability fixes for HVF
Let me see if I can add some assertions. Cameron Esfahani di...@apple.com "The cake is a lie." Common wisdom > On Nov 25, 2019, at 2:28 AM, Paolo Bonzini wrote: > > Certainly no doubt about patches 1-4, while for patch 5 I'm wondering if > it's masking another bug; I'd prefer to have also some assertions that > interrupt_injected is never an exception and exception_nr is never an > interrupt. >
Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
Our test case was booting many concurrent macOS VMs under heavy system load. I don't know if I could create one to replicate that. Cameron Esfahani di...@apple.com "In the elder days of Art, Builders wrought with greatest care each minute and unseen part; For the gods see everywhere." "The Builders", H. W. Longfellow > On Nov 25, 2019, at 2:26 AM, Paolo Bonzini wrote: > > On 24/11/19 21:05, Cameron Esfahani wrote: >> Save away type as well as vector in hvf_store_events() so we can >> correctly reinject both in hvf_inject_interrupts(). >> >> Make sure to clear ins_len and has_error_code when ins_len isn't >> valid and error_code isn't set. > > Do you have a testcase for this? (I could guess it's about the INT1 > instruction). > > Paolo >
[PATCH v2 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers
More accurately match SDM when setting CR0 and PDPTE registers. Clear PDPTE registers when resetting vcpus. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c | 8 target/i386/hvf/vmx.h | 18 ++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 90fd50acfc..784e67d77e 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -441,12 +441,20 @@ static MemoryListener hvf_memory_listener = { }; void hvf_reset_vcpu(CPUState *cpu) { +uint64_t pdpte[4] = {0, 0, 0, 0}; +int i; /* TODO: this shouldn't be needed; there is already a call to * cpu_synchronize_all_post_reset in vl.c */ wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); + +/* Initialize PDPTE */ +for (i = 0; i < 4; i++) { +wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); +} + macvm_set_cr0(cpu->hvf_fd, 0x6010); wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 5dc52ecad6..eb8894cd58 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); +uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET; if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) && !(efer & MSR_EFER_LME)) { @@ -128,18 +129,15 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, MEMTXATTRS_UNSPECIFIED, (uint8_t *)pdpte, 32, 0); +/* Only set PDPTE when appropriate. */ +for (i = 0; i < 4; i++) { +wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); +} } -for (i = 0; i < 4; i++) { -wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); -} - -wvmcs(vcpu, VMCS_CR0_MASK, CR0_CD | CR0_NE | CR0_PG); +wvmcs(vcpu, VMCS_CR0_MASK, mask); wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); -cr0 &= ~CR0_CD; -wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); - if (efer & MSR_EFER_LME) { if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { enter_long_mode(vcpu, cr0, efer); @@ -149,6 +147,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) } } +/* Filter new CR0 after we are finished examining it above. */ +cr0 = (cr0 & ~(mask & ~CR0_PG)); +wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); + hv_vcpu_invalidate_tlb(vcpu); hv_vcpu_flush(vcpu); } -- 2.24.0
[PATCH v2 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in
If an area is non-RAM and non-ROMD, then remove mappings so accesses will trap and can be emulated. Change hvf_find_overlap_slot() to take a size instead of an end address: it wouldn't return a slot because callers would pass the same address for start and end. Don't always map area as read/write/execute, respect area flags. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c | 50 ++- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 231732aaf7..0b50cfcbc6 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -107,14 +107,14 @@ static void assert_hvf_ok(hv_return_t ret) } /* Memory slots */ -hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) +hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) { hvf_slot *slot; int x; for (x = 0; x < hvf_state->num_slots; ++x) { slot = &hvf_state->slots[x]; if (slot->size && start < (slot->start + slot->size) && -end > slot->start) { +(start + size) > slot->start) { return slot; } } @@ -129,12 +129,10 @@ struct mac_slot { }; struct mac_slot mac_slots[32]; -#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) -static int do_hvf_set_memory(hvf_slot *slot) +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) { struct mac_slot *macslot; -hv_memory_flags_t flags; hv_return_t ret; macslot = &mac_slots[slot->slot_id]; @@ -151,8 +149,6 @@ static int do_hvf_set_memory(hvf_slot *slot) return 0; } -flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - macslot->present = 1; macslot->gpa_start = slot->start; macslot->size = slot->size; @@ -165,14 +161,24 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) { hvf_slot *mem; MemoryRegion *area = section->mr; +bool writeable = !area->readonly && !area->rom_device; +hv_memory_flags_t flags; if (!memory_region_is_ram(area)) { -return; +if (writeable) { +return; +} else if (!memory_region_is_romd(area)) { +/* + * If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. + */ + add = false; +} } mem = hvf_find_overlap_slot( section->offset_within_address_space, -section->offset_within_address_space + int128_get64(section->size)); +int128_get64(section->size)); if (mem && add) { if (mem->size == int128_get64(section->size) && @@ -186,7 +192,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) /* Region needs to be reset. set the size to 0 and remap it. */ if (mem) { mem->size = 0; -if (do_hvf_set_memory(mem)) { +if (do_hvf_set_memory(mem, 0)) { error_report("Failed to reset overlapping slot"); abort(); } @@ -196,6 +202,13 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) return; } +if (area->readonly || +(!memory_region_is_ram(area) && memory_region_is_romd(area))) { +flags = HV_MEMORY_READ | HV_MEMORY_EXEC; +} else { +flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; +} + /* Now make a new slot. */ int x; @@ -216,7 +229,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) mem->start = section->offset_within_address_space; mem->region = area; -if (do_hvf_set_memory(mem)) { +if (do_hvf_set_memory(mem, flags)) { error_report("Error registering new memory slot"); abort(); } @@ -345,7 +358,14 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } -return !slot; +if (!slot) { +return true; +} +if (!memory_region_is_ram(slot->region) && +!(read && memory_region_is_romd(slot->region))) { +return true; +} +return false; } static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) @@ -354,7 +374,7 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) slot = hvf_find_overlap_slot( section->offset_within_address_space, -section->offset_within_address_space + int128_get64(section->size)); +int128_get64(section->size)); /* protect region against writes; begin tracking it */ if (on) { @@ -720,7 +740,7 @@ int hvf_vcpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } -/* Need to check if MMIO or unmmaped fault */ +/* Need to check if MMIO or unmapped fault */ case EXIT_REASON_EPT_FAULT: { hvf_slot *slot; @@ -731,7 +751,7 @@ int hvf_vcpu_exec(CPUSt
[PATCH v2 0/5] hvf: stability fixes for HVF
The following patches fix stability issues with running QEMU on Apple Hypervisor Framework (HVF): - non-RAM, non-ROMD areas need to trap so accesses can be correctly emulated. - Current TSC synchronization implementation is insufficient: when running with more than 1 core, TSC values can go backwards. Until a correct implementation can be written, remove calls to hv_vm_sync_tsc(). Pass through TSC to guest OS. - Fix REX emulation in relation to legacy prefixes. - More correctly match SDM when setting CR0 and PDPTE registers. - Save away exception type as well as vector in hvf_store_events() so they can be correctly reinjected in hvf_inject_interrupts(). Under heavy loads, exceptions got misrouted. Changes in v2: - Fix code style errors. Cameron Esfahani (5): hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in hvf: remove TSC synchronization code because it isn't fully complete hvf: correctly handle REX prefix in relation to legacy prefixes hvf: more accurately match SDM when setting CR0 and PDPTE registers hvf: save away type as well as vector so we can reinject them target/i386/hvf/hvf.c| 79 ++-- target/i386/hvf/vmx.h| 18 target/i386/hvf/x86_decode.c | 64 - target/i386/hvf/x86_decode.h | 20 - target/i386/hvf/x86_emu.c| 3 -- target/i386/hvf/x86hvf.c | 26 +--- 6 files changed, 124 insertions(+), 86 deletions(-) -- 2.24.0
[PATCH v2 2/5] hvf: remove TSC synchronization code because it isn't fully complete
The existing code in QEMU's HVF support to attempt to synchronize TSC across multiple cores is not sufficient. TSC value on other cores can go backwards. Until implementation is fixed, remove calls to hv_vm_sync_tsc(). Pass through TSC to guest OS. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c | 3 +-- target/i386/hvf/x86_emu.c | 3 --- target/i386/hvf/x86hvf.c | 4 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 0b50cfcbc6..90fd50acfc 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -518,7 +518,6 @@ void hvf_reset_vcpu(CPUState *cpu) { wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); } -hv_vm_sync_tsc(0); hv_vcpu_invalidate_tlb(cpu->hvf_fd); hv_vcpu_flush(cpu->hvf_fd); } @@ -612,7 +611,7 @@ int hvf_init_vcpu(CPUState *cpu) hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); -/*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ +hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 1b04bd7e94..3df767209d 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -772,9 +772,6 @@ void simulate_wrmsr(struct CPUState *cpu) switch (msr) { case MSR_IA32_TSC: -/* if (!osx_is_sierra()) - wvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET, data - rdtscp()); -hv_vm_sync_tsc(data);*/ break; case MSR_IA32_APICBASE: cpu_set_apic_base(X86_CPU(cpu)->apic_state, data); diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index e0ea02d631..1485b95776 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -152,10 +152,6 @@ void hvf_put_msrs(CPUState *cpu_state) hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base); hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base); - -/* if (!osx_is_sierra()) - wvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET, env->tsc - rdtscp());*/ -hv_vm_sync_tsc(env->tsc); } -- 2.24.0
[PATCH v2 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes
In real x86 processors, the REX prefix must come after legacy prefixes. REX before legacy is ignored. Update the HVF emulation code to properly handle this. Fix some spelling errors in constants. Fix some decoder table initialization issues found by Coverity. Signed-off-by: Cameron Esfahani --- target/i386/hvf/x86_decode.c | 64 target/i386/hvf/x86_decode.h | 20 +-- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 822fa1866e..77c346605f 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -122,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; -op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, +/* Since reg is always AX, REX prefix has no impact. */ +op->ptr = get_reg_ref(env, op->reg, false, 0, decode->operand_size); } @@ -1687,40 +1688,37 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, - int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, + int is_extended, int size) { target_ulong ptr = 0; -int which = 0; if (is_extended) { reg |= R_R8; } - switch (size) { case 1: -if (is_extended || reg < 4 || rex) { -which = 1; +if (is_extended || reg < 4 || rex_present) { ptr = (target_ulong)&RL(env, reg); } else { -which = 2; ptr = (target_ulong)&RH(env, reg - 4); } break; default: -which = 3; ptr = (target_ulong)&RRX(env, reg); break; } return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, - int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, + int is_extended, int size) { target_ulong val = 0; -memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); +memcpy(&val, + (void *)get_reg_ref(env, reg, rex_present, is_extended, size), + size); return val; } @@ -1853,28 +1851,38 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, static void decode_prefix(CPUX86State *env, struct x86_decode *decode) { while (1) { +/* + * REX prefix must come after legacy prefixes. + * REX before legacy is ignored. + * Clear rex to simulate this. + */ uint8_t byte = decode_byte(env, decode); switch (byte) { case PREFIX_LOCK: decode->lock = byte; +decode->rex.rex = 0; break; case PREFIX_REPN: case PREFIX_REP: decode->rep = byte; +decode->rex.rex = 0; break; -case PREFIX_CS_SEG_OVEERIDE: -case PREFIX_SS_SEG_OVEERIDE: -case PREFIX_DS_SEG_OVEERIDE: -case PREFIX_ES_SEG_OVEERIDE: -case PREFIX_FS_SEG_OVEERIDE: -case PREFIX_GS_SEG_OVEERIDE: +case PREFIX_CS_SEG_OVERRIDE: +case PREFIX_SS_SEG_OVERRIDE: +case PREFIX_DS_SEG_OVERRIDE: +case PREFIX_ES_SEG_OVERRIDE: +case PREFIX_FS_SEG_OVERRIDE: +case PREFIX_GS_SEG_OVERRIDE: decode->segment_override = byte; +decode->rex.rex = 0; break; case PREFIX_OP_SIZE_OVERRIDE: decode->op_size_override = byte; +decode->rex.rex = 0; break; case PREFIX_ADDR_SIZE_OVERRIDE: decode->addr_size_override = byte; +decode->rex.rex = 0; break; case PREFIX_REX ... (PREFIX_REX + 0xf): if (x86_is_long_mode(env_cpu(env))) { @@ -2111,14 +2119,14 @@ void init_decoder() { int i; -for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { -memcpy(_decode_tbl1, &invl_inst, sizeof(invl_inst)); +for (i = 0; i < ARRAY_SIZE(_decode_tbl1); i++) { +memcpy(&_decode_tbl1[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { -memcpy(_decode_tbl2, &invl_inst, sizeof(invl_inst)); +memcpy(&_decode_tbl2[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl3); i++) { -memcpy(_decode_tbl3, &invl_inst, sizeof(invl_inst_x87)); +memcpy(&_decode_tbl3[i], &invl_inst_x87, sizeof(invl_inst_x87)); } for (i = 0; i < ARRAY_SIZE(_1op_inst); i++) { @@ -2167,22 +2175,22 @@ target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, target_ulong addr, X86Seg seg) { switch (decode->segment_override) { -case PREFIX_CS_SEG_OVEERIDE: +case PREFIX_CS_SEG_OVERRIDE:
[PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
Save away type as well as vector in hvf_store_events() so we can correctly reinject both in hvf_inject_interrupts(). Make sure to clear ins_len and has_error_code when ins_len isn't valid and error_code isn't set. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c| 18 ++ target/i386/hvf/x86hvf.c | 22 ++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 784e67d77e..8a8aee4495 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -641,14 +641,18 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in switch (idtvec_info & VMCS_IDT_VEC_TYPE) { case VMCS_IDT_VEC_HWINTR: case VMCS_IDT_VEC_SWINTR: -env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; +/* Save event type as well so we can inject the correct type. */ +env->interrupt_injected = +idtvec_info & (VMCS_IDT_VEC_TYPE | VMCS_IDT_VEC_VECNUM); break; case VMCS_IDT_VEC_NMI: env->nmi_injected = true; break; case VMCS_IDT_VEC_HWEXCEPTION: case VMCS_IDT_VEC_SWEXCEPTION: -env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; +/* Save event type as well so we can inject the correct type. */ +env->exception_nr = +idtvec_info & (VMCS_IDT_VEC_TYPE | VMCS_IDT_VEC_VECNUM); env->exception_injected = 1; break; case VMCS_IDT_VEC_PRIV_SWEXCEPTION: @@ -658,10 +662,16 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { env->ins_len = ins_len; +} else { +/* Clear ins_len when it isn't valid. */ +env->ins_len = 0; } -if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { +if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { env->has_error_code = true; env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); +} else { +/* Clear has_error_code when error_code isn't valid. */ +env->has_error_code = false; } } if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & @@ -942,7 +952,7 @@ int hvf_vcpu_exec(CPUState *cpu) macvm_set_rip(cpu, rip + ins_len); break; case VMX_REASON_VMCALL: -env->exception_nr = EXCP0D_GPF; +env->exception_nr = VMCS_INTR_T_HWEXCEPTION | EXCP0D_GPF; env->exception_injected = 1; env->has_error_code = true; env->error_code = 0; diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 1485b95776..d25ae4585b 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -345,8 +345,6 @@ void vmx_clear_int_window_exiting(CPUState *cpu) ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } -#define NMI_VEC 2 - bool hvf_inject_interrupts(CPUState *cpu_state) { X86CPU *x86cpu = X86_CPU(cpu_state); @@ -356,17 +354,15 @@ bool hvf_inject_interrupts(CPUState *cpu_state) uint64_t intr_type; bool have_event = true; if (env->interrupt_injected != -1) { -vector = env->interrupt_injected; -intr_type = VMCS_INTR_T_SWINTR; +/* Type and vector are both saved in interrupt_injected. */ +vector = env->interrupt_injected & VMCS_IDT_VEC_VECNUM; +intr_type = env->interrupt_injected & VMCS_IDT_VEC_TYPE; } else if (env->exception_nr != -1) { -vector = env->exception_nr; -if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { -intr_type = VMCS_INTR_T_SWEXCEPTION; -} else { -intr_type = VMCS_INTR_T_HWEXCEPTION; -} +/* Type and vector are both saved in exception_nr. */ +vector = env->exception_nr & VMCS_IDT_VEC_VECNUM; +intr_type = env->exception_nr & VMCS_IDT_VEC_TYPE; } else if (env->nmi_injected) { -vector = NMI_VEC; +vector = EXCP02_NMI; intr_type = VMCS_INTR_T_NMI; } else { have_event = false; @@ -390,6 +386,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (env->has_error_code) { wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR, env->error_code); +/* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */ +info |= VMCS_INTR_DEL_ERRCODE; } /*printf("reinject %lx err %d\n", info, err);*/ wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); @@ -399,7 +397,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (cpu_state->interrupt_request & CPU_INTERRUPT_NMI) { if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INT
[PATCH 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers
More accurately match SDM when setting CR0 and PDPTE registers. Clear PDPTE registers when resetting vcpus. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c | 8 target/i386/hvf/vmx.h | 18 ++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index fda0273ba1..7f6ebd2e50 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -434,12 +434,20 @@ static MemoryListener hvf_memory_listener = { }; void hvf_reset_vcpu(CPUState *cpu) { +uint64_t pdpte[4] = {0, 0, 0, 0}; +int i; /* TODO: this shouldn't be needed; there is already a call to * cpu_synchronize_all_post_reset in vl.c */ wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); + +/* Initialize PDPTE */ +for (i = 0; i < 4; i++) { +wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); +} + macvm_set_cr0(cpu->hvf_fd, 0x6010); wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 5dc52ecad6..eb8894cd58 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); +uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET; if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) && !(efer & MSR_EFER_LME)) { @@ -128,18 +129,15 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, MEMTXATTRS_UNSPECIFIED, (uint8_t *)pdpte, 32, 0); +/* Only set PDPTE when appropriate. */ +for (i = 0; i < 4; i++) { +wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); +} } -for (i = 0; i < 4; i++) { -wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); -} - -wvmcs(vcpu, VMCS_CR0_MASK, CR0_CD | CR0_NE | CR0_PG); +wvmcs(vcpu, VMCS_CR0_MASK, mask); wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); -cr0 &= ~CR0_CD; -wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); - if (efer & MSR_EFER_LME) { if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { enter_long_mode(vcpu, cr0, efer); @@ -149,6 +147,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) } } +/* Filter new CR0 after we are finished examining it above. */ +cr0 = (cr0 & ~(mask & ~CR0_PG)); +wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); + hv_vcpu_invalidate_tlb(vcpu); hv_vcpu_flush(vcpu); } -- 2.24.0
[PATCH 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes
In real x86 processors, the REX prefix must come after legacy prefixes. REX before legacy is ignored. Update the HVF emulation code to properly handle this. Fix some spelling errors in constants. Fix some decoder table initialization issues found by Coverity. Signed-off-by: Cameron Esfahani --- target/i386/hvf/x86_decode.c | 55 +++- target/i386/hvf/x86_decode.h | 16 +-- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 822fa1866e..e2806f7967 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -122,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; -op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, +/* Since reg is always AX, REX prefix has no impact. */ +op->ptr = get_reg_ref(env, op->reg, false, 0, decode->operand_size); } @@ -1687,40 +1688,35 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, int is_extended, int size) { target_ulong ptr = 0; -int which = 0; if (is_extended) { reg |= R_R8; } - switch (size) { case 1: -if (is_extended || reg < 4 || rex) { -which = 1; +if (is_extended || reg < 4 || rex_present) { ptr = (target_ulong)&RL(env, reg); } else { -which = 2; ptr = (target_ulong)&RH(env, reg - 4); } break; default: -which = 3; ptr = (target_ulong)&RRX(env, reg); break; } return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, +target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, int is_extended, int size) { target_ulong val = 0; -memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); +memcpy(&val, (void *)get_reg_ref(env, reg, rex_present, is_extended, size), size); return val; } @@ -1853,28 +1849,35 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, static void decode_prefix(CPUX86State *env, struct x86_decode *decode) { while (1) { +/* REX prefix must come after legacy prefixes. REX before legacy is ignored. + Clear rex to simulate this. */ uint8_t byte = decode_byte(env, decode); switch (byte) { case PREFIX_LOCK: decode->lock = byte; +decode->rex.rex = 0; break; case PREFIX_REPN: case PREFIX_REP: decode->rep = byte; +decode->rex.rex = 0; break; -case PREFIX_CS_SEG_OVEERIDE: -case PREFIX_SS_SEG_OVEERIDE: -case PREFIX_DS_SEG_OVEERIDE: -case PREFIX_ES_SEG_OVEERIDE: -case PREFIX_FS_SEG_OVEERIDE: -case PREFIX_GS_SEG_OVEERIDE: +case PREFIX_CS_SEG_OVERRIDE: +case PREFIX_SS_SEG_OVERRIDE: +case PREFIX_DS_SEG_OVERRIDE: +case PREFIX_ES_SEG_OVERRIDE: +case PREFIX_FS_SEG_OVERRIDE: +case PREFIX_GS_SEG_OVERRIDE: decode->segment_override = byte; +decode->rex.rex = 0; break; case PREFIX_OP_SIZE_OVERRIDE: decode->op_size_override = byte; +decode->rex.rex = 0; break; case PREFIX_ADDR_SIZE_OVERRIDE: decode->addr_size_override = byte; +decode->rex.rex = 0; break; case PREFIX_REX ... (PREFIX_REX + 0xf): if (x86_is_long_mode(env_cpu(env))) { @@ -2111,14 +2114,14 @@ void init_decoder() { int i; -for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { -memcpy(_decode_tbl1, &invl_inst, sizeof(invl_inst)); +for (i = 0; i < ARRAY_SIZE(_decode_tbl1); i++) { +memcpy(&_decode_tbl1[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { -memcpy(_decode_tbl2, &invl_inst, sizeof(invl_inst)); +memcpy(&_decode_tbl2[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl3); i++) { -memcpy(_decode_tbl3, &invl_inst, sizeof(invl_inst_x87)); +memcpy(&_decode_tbl3[i], &invl_inst_x87, sizeof(invl_inst_x87)); } for (i = 0; i < ARRAY_SIZE(_1op_inst); i++) { @@ -2167,22 +2170,22 @@ target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, target_ulong addr, X86Seg seg) { switch (decode->segment_override) { -case PREFIX_CS_SEG_OVEERIDE: +case PREFIX_CS_SEG_OVERRIDE: seg = R_CS; break; -case PREFIX_SS_SEG_OVEERIDE: +case PREFIX_SS_SEG_OVERRIDE: seg = R_SS;
[PATCH 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in
If an area is non-RAM and non-ROMD, then remove mappings so accesses will trap and can be emulated. Change hvf_find_overlap_slot() to take a size instead of an end address: it wouldn't return a slot because callers would pass the same address for start and end. Don't always map area as read/write/execute, respect area flags. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c | 47 +++ 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 231732aaf7..60c995470b 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -107,14 +107,14 @@ static void assert_hvf_ok(hv_return_t ret) } /* Memory slots */ -hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) +hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) { hvf_slot *slot; int x; for (x = 0; x < hvf_state->num_slots; ++x) { slot = &hvf_state->slots[x]; if (slot->size && start < (slot->start + slot->size) && -end > slot->start) { +(start + size) > slot->start) { return slot; } } @@ -129,12 +129,10 @@ struct mac_slot { }; struct mac_slot mac_slots[32]; -#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) -static int do_hvf_set_memory(hvf_slot *slot) +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) { struct mac_slot *macslot; -hv_memory_flags_t flags; hv_return_t ret; macslot = &mac_slots[slot->slot_id]; @@ -151,8 +149,6 @@ static int do_hvf_set_memory(hvf_slot *slot) return 0; } -flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - macslot->present = 1; macslot->gpa_start = slot->start; macslot->size = slot->size; @@ -165,14 +161,22 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) { hvf_slot *mem; MemoryRegion *area = section->mr; +bool writeable = !area->readonly && !area->rom_device; +hv_memory_flags_t flags; if (!memory_region_is_ram(area)) { -return; +if (writeable) { +return; +} else if (!memory_region_is_romd(area)) { +/* If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. */ + add = false; +} } mem = hvf_find_overlap_slot( section->offset_within_address_space, -section->offset_within_address_space + int128_get64(section->size)); +int128_get64(section->size)); if (mem && add) { if (mem->size == int128_get64(section->size) && @@ -186,8 +190,8 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) /* Region needs to be reset. set the size to 0 and remap it. */ if (mem) { mem->size = 0; -if (do_hvf_set_memory(mem)) { -error_report("Failed to reset overlapping slot"); +if (do_hvf_set_memory(mem, 0)) { +error_report("Failed to reset overlapping slot\n"); abort(); } } @@ -196,6 +200,11 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) return; } +if (area->readonly || (!memory_region_is_ram(area) && memory_region_is_romd(area))) +flags = HV_MEMORY_READ | HV_MEMORY_EXEC; +else +flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; + /* Now make a new slot. */ int x; @@ -216,8 +225,8 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add) mem->start = section->offset_within_address_space; mem->region = area; -if (do_hvf_set_memory(mem)) { -error_report("Error registering new memory slot"); +if (do_hvf_set_memory(mem, flags)) { +error_report("Error registering new memory slot\n"); abort(); } } @@ -345,7 +354,11 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } -return !slot; +if (!slot) +return true; +if (!memory_region_is_ram(slot->region) && !(read && memory_region_is_romd(slot->region))) +return true; +return false; } static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) @@ -354,7 +367,7 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) slot = hvf_find_overlap_slot( section->offset_within_address_space, -section->offset_within_address_space + int128_get64(section->size)); +int128_get64(section->size)); /* protect region against writes; begin tracking it */ if (on) { @@ -720,7 +733,7 @@ int hvf_vcpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } -/* Need to check if MMIO or unmmaped fault */ +/* Need to check if MMIO or unmapped fault */ case EXIT_REASON_EPT_FAULT: {
[PATCH 5/5] hvf: save away type as well as vector so we can reinject them
Save away type as well as vector in hvf_store_events() so we can correctly reinject both in hvf_inject_interrupts(). Make sure to clear ins_len and has_error_code when ins_len isn't valid and error_code isn't set. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c| 16 target/i386/hvf/x86hvf.c | 22 ++ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 7f6ebd2e50..818591ceee 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -634,14 +634,16 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in switch (idtvec_info & VMCS_IDT_VEC_TYPE) { case VMCS_IDT_VEC_HWINTR: case VMCS_IDT_VEC_SWINTR: -env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; +/* Save away the event type as well so we can inject the correct type. */ +env->interrupt_injected = idtvec_info & (VMCS_IDT_VEC_TYPE | VMCS_IDT_VEC_VECNUM); break; case VMCS_IDT_VEC_NMI: env->nmi_injected = true; break; case VMCS_IDT_VEC_HWEXCEPTION: case VMCS_IDT_VEC_SWEXCEPTION: -env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; +/* Save away the event type as well so we can inject the correct type. */ +env->exception_nr = idtvec_info & (VMCS_IDT_VEC_TYPE | VMCS_IDT_VEC_VECNUM); env->exception_injected = 1; break; case VMCS_IDT_VEC_PRIV_SWEXCEPTION: @@ -651,10 +653,16 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION || (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { env->ins_len = ins_len; +} else { +/* Make sure to clear ins_len when it isn't valid. */ +env->ins_len = 0; } -if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { +if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { env->has_error_code = true; env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); +} else { +/* Make sure to clear has_error_code when error_code isn't valid. */ +env->has_error_code = false; } } if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & @@ -935,7 +943,7 @@ int hvf_vcpu_exec(CPUState *cpu) macvm_set_rip(cpu, rip + ins_len); break; case VMX_REASON_VMCALL: -env->exception_nr = EXCP0D_GPF; +env->exception_nr = VMCS_INTR_T_HWEXCEPTION | EXCP0D_GPF; env->exception_injected = 1; env->has_error_code = true; env->error_code = 0; diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 1485b95776..f9187cee3f 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -345,8 +345,6 @@ void vmx_clear_int_window_exiting(CPUState *cpu) ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } -#define NMI_VEC 2 - bool hvf_inject_interrupts(CPUState *cpu_state) { X86CPU *x86cpu = X86_CPU(cpu_state); @@ -356,17 +354,15 @@ bool hvf_inject_interrupts(CPUState *cpu_state) uint64_t intr_type; bool have_event = true; if (env->interrupt_injected != -1) { -vector = env->interrupt_injected; -intr_type = VMCS_INTR_T_SWINTR; +/* Type and vector are both saved in interrupt_injected. */ +vector = env->interrupt_injected & VMCS_IDT_VEC_VECNUM; +intr_type = env->interrupt_injected & VMCS_IDT_VEC_TYPE; } else if (env->exception_nr != -1) { -vector = env->exception_nr; -if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { -intr_type = VMCS_INTR_T_SWEXCEPTION; -} else { -intr_type = VMCS_INTR_T_HWEXCEPTION; -} +/* Type and vector are both saved in exception_nr. */ +vector = env->exception_nr & VMCS_IDT_VEC_VECNUM; +intr_type = env->exception_nr & VMCS_IDT_VEC_TYPE; } else if (env->nmi_injected) { -vector = NMI_VEC; +vector = EXCP02_NMI; intr_type = VMCS_INTR_T_NMI; } else { have_event = false; @@ -390,6 +386,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (env->has_error_code) { wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR, env->error_code); +/* Make sure to indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */ +info |= VMCS_INTR_DEL_ERRCODE; } /*printf("reinject %lx err %d\n", info, err);*/ wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); @@ -399,7 +397,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (cpu_state->interrupt_request & CPU_INTERRUPT_NMI) { if (!(env->hflags2 & HF2_NMI
[PATCH 0/5] hvf: stability fixes for HVF
The following patches fix stability issues with running QEMU on Apple Hypervisor Framework (HVF): - non-RAM, non-ROMD areas need to trap so accesses can be correctly emulated. - Current TSC synchronization implementation is insufficient: when running with more than 1 core, TSC values can go backwards. Until a correct implementation can be written, remove calls to hv_vm_sync_tsc(). Pass through TSC to guest OS. - Fix REX emulation in relation to legacy prefixes. - More correctly match SDM when setting CR0 and PDPTE registers. - Save away exception type as well as vector in hvf_store_events() so they can be correctly reinjected in hvf_inject_interrupts(). Under heavy loads, exceptions got misrouted. Cameron Esfahani (5): hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in hvf: remove TSC synchronization code because it isn't fully complete hvf: correctly handle REX prefix in relation to legacy prefixes hvf: more accurately match SDM when setting CR0 and PDPTE registers hvf: save away type as well as vector so we can reinject them target/i386/hvf/hvf.c| 74 +--- target/i386/hvf/vmx.h| 18 + target/i386/hvf/x86_decode.c | 55 ++- target/i386/hvf/x86_decode.h | 16 target/i386/hvf/x86_emu.c| 3 -- target/i386/hvf/x86hvf.c | 26 + 6 files changed, 108 insertions(+), 84 deletions(-) -- 2.24.0
[PATCH 2/5] hvf: remove TSC synchronization code because it isn't fully complete
The existing code in QEMU's HVF support to attempt to synchronize TSC across multiple cores is not sufficient. TSC value on other cores can go backwards. Until implementation is fixed, remove calls to hv_vm_sync_tsc(). Pass through TSC to guest OS. Signed-off-by: Cameron Esfahani --- target/i386/hvf/hvf.c | 3 +-- target/i386/hvf/x86_emu.c | 3 --- target/i386/hvf/x86hvf.c | 4 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 60c995470b..fda0273ba1 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -511,7 +511,6 @@ void hvf_reset_vcpu(CPUState *cpu) { wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); } -hv_vm_sync_tsc(0); hv_vcpu_invalidate_tlb(cpu->hvf_fd); hv_vcpu_flush(cpu->hvf_fd); } @@ -605,7 +604,7 @@ int hvf_init_vcpu(CPUState *cpu) hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); -/*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ +hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 1b04bd7e94..3df767209d 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -772,9 +772,6 @@ void simulate_wrmsr(struct CPUState *cpu) switch (msr) { case MSR_IA32_TSC: -/* if (!osx_is_sierra()) - wvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET, data - rdtscp()); -hv_vm_sync_tsc(data);*/ break; case MSR_IA32_APICBASE: cpu_set_apic_base(X86_CPU(cpu)->apic_state, data); diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index e0ea02d631..1485b95776 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -152,10 +152,6 @@ void hvf_put_msrs(CPUState *cpu_state) hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base); hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base); - -/* if (!osx_is_sierra()) - wvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET, env->tsc - rdtscp());*/ -hv_vm_sync_tsc(env->tsc); } -- 2.24.0