Re: [PATCH v5 10/16] audio/coreaudio: Remove a deprecation warning on macOS 12

2022-02-14 Thread Philippe Mathieu-Daudé via

On 15/2/22 07:53, Akihiko Odaki wrote:

On Tue, Feb 15, 2022 at 3:57 AM Philippe Mathieu-Daudé  wrote:


When building on macOS 12 we get:

   audio/coreaudio.c:50:5: error: 'kAudioObjectPropertyElementMaster' is 
deprecated: first deprecated in macOS 12.0 [-Werror,-Wdeprecated-declarations]
   kAudioObjectPropertyElementMaster
   ^
   kAudioObjectPropertyElementMain
   
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/CoreAudio.framework/Headers/AudioHardwareBase.h:208:5:
 note: 'kAudioObjectPropertyElementMaster' has been explicitly marked 
deprecated here
   kAudioObjectPropertyElementMaster 
API_DEPRECATED_WITH_REPLACEMENT("kAudioObjectPropertyElementMain", macos(10.0, 
12.0), ios(2.0, 15.0), watchos(1.0, 8.0), tvos(9.0, 15.0)) = 
kAudioObjectPropertyElementMain
   ^

Replace by kAudioObjectPropertyElementMain, redefining it to
kAudioObjectPropertyElementMaster if not available.

Suggested-by: Akihiko Odaki 
Suggested-by: Christian Schoenebeck 
Suggested-by: Roman Bolshakov 
Reviewed-by: Christian Schoenebeck 
Signed-off-by: Philippe Mathieu-Daudé 
---
  audio/coreaudio.c | 17 +++--
  1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/audio/coreaudio.c b/audio/coreaudio.c
index d8a21d3e50..5b3aeaced0 100644
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -44,10 +44,15 @@ typedef struct coreaudioVoiceOut {
  bool enabled;
  } coreaudioVoiceOut;

+#if !defined(MAC_OS_VERSION_12_0) \
+|| (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_VERSION_12_0)
+#define kAudioObjectPropertyElementMain kAudioObjectPropertyElementMaster
+#endif
+


This still uses MAC_OS_X_VERSION_MAX_ALLOWED. Apparently the change to
use MAC_OS_X_VERSION_MIN_REQUIRED went to "[PATCH v5 16/16] gitlab-ci:
Support macOS 12 via cirrus-run".


Yet another failed rebase... Thanks for noticing it!




Re: [PATCH v5 10/16] audio/coreaudio: Remove a deprecation warning on macOS 12

2022-02-14 Thread Akihiko Odaki
On Tue, Feb 15, 2022 at 3:57 AM Philippe Mathieu-Daudé  wrote:
>
> When building on macOS 12 we get:
>
>   audio/coreaudio.c:50:5: error: 'kAudioObjectPropertyElementMaster' is 
> deprecated: first deprecated in macOS 12.0 [-Werror,-Wdeprecated-declarations]
>   kAudioObjectPropertyElementMaster
>   ^
>   kAudioObjectPropertyElementMain
>   
> /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/CoreAudio.framework/Headers/AudioHardwareBase.h:208:5:
>  note: 'kAudioObjectPropertyElementMaster' has been explicitly marked 
> deprecated here
>   kAudioObjectPropertyElementMaster 
> API_DEPRECATED_WITH_REPLACEMENT("kAudioObjectPropertyElementMain", 
> macos(10.0, 12.0), ios(2.0, 15.0), watchos(1.0, 8.0), tvos(9.0, 15.0)) = 
> kAudioObjectPropertyElementMain
>   ^
>
> Replace by kAudioObjectPropertyElementMain, redefining it to
> kAudioObjectPropertyElementMaster if not available.
>
> Suggested-by: Akihiko Odaki 
> Suggested-by: Christian Schoenebeck 
> Suggested-by: Roman Bolshakov 
> Reviewed-by: Christian Schoenebeck 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  audio/coreaudio.c | 17 +++--
>  1 file changed, 11 insertions(+), 6 deletions(-)
>
> diff --git a/audio/coreaudio.c b/audio/coreaudio.c
> index d8a21d3e50..5b3aeaced0 100644
> --- a/audio/coreaudio.c
> +++ b/audio/coreaudio.c
> @@ -44,10 +44,15 @@ typedef struct coreaudioVoiceOut {
>  bool enabled;
>  } coreaudioVoiceOut;
>
> +#if !defined(MAC_OS_VERSION_12_0) \
> +|| (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_VERSION_12_0)
> +#define kAudioObjectPropertyElementMain kAudioObjectPropertyElementMaster
> +#endif
> +

This still uses MAC_OS_X_VERSION_MAX_ALLOWED. Apparently the change to
use MAC_OS_X_VERSION_MIN_REQUIRED went to "[PATCH v5 16/16] gitlab-ci:
Support macOS 12 via cirrus-run".

>  static const AudioObjectPropertyAddress voice_addr = {
>  kAudioHardwarePropertyDefaultOutputDevice,
>  kAudioObjectPropertyScopeGlobal,
> -kAudioObjectPropertyElementMaster
> +kAudioObjectPropertyElementMain
>  };
>
>  static OSStatus coreaudio_get_voice(AudioDeviceID *id)
> @@ -69,7 +74,7 @@ static OSStatus coreaudio_get_framesizerange(AudioDeviceID 
> id,
>  AudioObjectPropertyAddress addr = {
>  kAudioDevicePropertyBufferFrameSizeRange,
>  kAudioDevicePropertyScopeOutput,
> -kAudioObjectPropertyElementMaster
> +kAudioObjectPropertyElementMain
>  };
>
>  return AudioObjectGetPropertyData(id,
> @@ -86,7 +91,7 @@ static OSStatus coreaudio_get_framesize(AudioDeviceID id, 
> UInt32 *framesize)
>  AudioObjectPropertyAddress addr = {
>  kAudioDevicePropertyBufferFrameSize,
>  kAudioDevicePropertyScopeOutput,
> -kAudioObjectPropertyElementMaster
> +kAudioObjectPropertyElementMain
>  };
>
>  return AudioObjectGetPropertyData(id,
> @@ -103,7 +108,7 @@ static OSStatus coreaudio_set_framesize(AudioDeviceID id, 
> UInt32 *framesize)
>  AudioObjectPropertyAddress addr = {
>  kAudioDevicePropertyBufferFrameSize,
>  kAudioDevicePropertyScopeOutput,
> -kAudioObjectPropertyElementMaster
> +kAudioObjectPropertyElementMain
>  };
>
>  return AudioObjectSetPropertyData(id,
> @@ -121,7 +126,7 @@ static OSStatus coreaudio_set_streamformat(AudioDeviceID 
> id,
>  AudioObjectPropertyAddress addr = {
>  kAudioDevicePropertyStreamFormat,
>  kAudioDevicePropertyScopeOutput,
> -kAudioObjectPropertyElementMaster
> +kAudioObjectPropertyElementMain
>  };
>
>  return AudioObjectSetPropertyData(id,
> @@ -138,7 +143,7 @@ static OSStatus coreaudio_get_isrunning(AudioDeviceID id, 
> UInt32 *result)
>  AudioObjectPropertyAddress addr = {
>  kAudioDevicePropertyDeviceIsRunning,
>  kAudioDevicePropertyScopeOutput,
> -kAudioObjectPropertyElementMaster
> +kAudioObjectPropertyElementMain
>  };
>
>  return AudioObjectGetPropertyData(id,
> --
> 2.34.1
>



Re: [PULL 0/3] Block layer patches

2022-02-14 Thread Peter Maydell
On Fri, 11 Feb 2022 at 17:59, Kevin Wolf  wrote:
>
> The following changes since commit 0a301624c2f4ced3331ffd5bce85b4274fe132af:
>
>   Merge remote-tracking branch 
> 'remotes/pmaydell/tags/pull-target-arm-20220208' into staging (2022-02-08 
> 11:40:08 +)
>
> are available in the Git repository at:
>
>   https://gitlab.com/kmwolf/qemu.git tags/for-upstream
>
> for you to fetch changes up to fdb8541b2e4f6ff60f435fbb5a5e1df20c275a86:
>
>   hw/block/fdc-isa: Respect QOM properties when building AML (2022-02-11 
> 17:37:26 +0100)
>
> 
> Block layer patches
>
> - Fix crash in blockdev-reopen with iothreads
> - fdc-isa: Respect QOM properties when building AML
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.0
for any user-visible changes.

-- PMM



Re: [PULL 10/24] iotest 065: explicit compression type

2022-02-14 Thread Thomas Huth

On 01/02/2022 15.42, Hanna Reitz wrote:

From: Vladimir Sementsov-Ogievskiy 

The test checks different options. It of course fails if set
IMGOPTS='compression_type=zstd'. So, let's be explicit in what
compression type we want and independent of IMGOPTS. Test both existing
compression types.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20211223160144.1097696-9-vsement...@virtuozzo.com>
Signed-off-by: Hanna Reitz 
---
  tests/qemu-iotests/065 | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065
index dc7716275f..f7c1b68dad 100755
--- a/tests/qemu-iotests/065
+++ b/tests/qemu-iotests/065
@@ -88,7 +88,7 @@ class TestQMP(TestImageInfoSpecific):
  
  class TestQCow2(TestQemuImgInfo):

  '''Testing a qcow2 version 2 image'''
-img_options = 'compat=0.10'
+img_options = 'compat=0.10,compression_type=zlib'
  json_compare = { 'compat': '0.10', 'refcount-bits': 16,
   'compression-type': 'zlib' }
  human_compare = [ 'compat: 0.10', 'compression type: zlib',
@@ -96,17 +96,17 @@ class TestQCow2(TestQemuImgInfo):
  
  class TestQCow3NotLazy(TestQemuImgInfo):

  '''Testing a qcow2 version 3 image with lazy refcounts disabled'''
-img_options = 'compat=1.1,lazy_refcounts=off'
+img_options = 'compat=1.1,lazy_refcounts=off,compression_type=zstd'
  json_compare = { 'compat': '1.1', 'lazy-refcounts': False,
   'refcount-bits': 16, 'corrupt': False,
- 'compression-type': 'zlib', 'extended-l2': False }
-human_compare = [ 'compat: 1.1', 'compression type: zlib',
+ 'compression-type': 'zstd', 'extended-l2': False }
+human_compare = [ 'compat: 1.1', 'compression type: zstd',
'lazy refcounts: false', 'refcount bits: 16',
'corrupt: false', 'extended l2: false' ]


iotest 065 is currently failing on my laptop - and I think this patch here 
is the reason (thanks to jsnow for helping me with the debugging): I don't 
have libzstd-devel installed on my system, so my QEMU does not feature the 
zstd compression algorithm. But this tests wants to use it, without checking 
its availability first. Could you please add such a check so that it works 
without zstd, too?


 Thomas




[PATCH v5 13/16] ui/cocoa: Add Services menu

2022-02-14 Thread Philippe Mathieu-Daudé via
From: Akihiko Odaki 

Services menu functionality of Cocoa is described at:
https://developer.apple.com/design/human-interface-guidelines/macos/extensions/services/

Signed-off-by: Akihiko Odaki 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Message-Id: <20220214091320.51750-1-akihiko.od...@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 ui/cocoa.m | 4 
 1 file changed, 4 insertions(+)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 7a1ddd4075..becca58cb7 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1591,11 +1591,15 @@ static void create_initial_menus(void)
 NSMenuItem  *menuItem;
 
 [NSApp setMainMenu:[[NSMenu alloc] init]];
+[NSApp setServicesMenu:[[NSMenu alloc] initWithTitle:@"Services"]];
 
 // Application menu
 menu = [[NSMenu alloc] initWithTitle:@""];
 [menu addItemWithTitle:@"About QEMU" action:@selector(do_about_menu_item:) 
keyEquivalent:@""]; // About QEMU
 [menu addItem:[NSMenuItem separatorItem]]; //Separator
+menuItem = [menu addItemWithTitle:@"Services" action:nil 
keyEquivalent:@""];
+[menuItem setSubmenu:[NSApp servicesMenu]];
+[menu addItem:[NSMenuItem separatorItem]];
 [menu addItemWithTitle:@"Hide QEMU" action:@selector(hide:) 
keyEquivalent:@"h"]; //Hide QEMU
 menuItem = (NSMenuItem *)[menu addItemWithTitle:@"Hide Others" 
action:@selector(hideOtherApplications:) keyEquivalent:@"h"]; // Hide Others
 [menuItem 
setKeyEquivalentModifierMask:(NSEventModifierFlagOption|NSEventModifierFlagCommand)];
-- 
2.34.1




[PATCH v5 11/16] audio/dbus: Fix building with modules on macOS

2022-02-14 Thread Philippe Mathieu-Daudé via
When configuring QEMU with --enable-modules we get on macOS:

  --- stderr ---
  Dependency ui-dbus cannot be satisfied

ui-dbus depends on pixman and opengl, so add these dependencies
to audio-dbus.

Fixes: 739362d420 ("audio: add "dbus" audio backend")
Reviewed-by: Li Zhang 
Signed-off-by: Philippe Mathieu-Daudé 
---
 audio/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/audio/meson.build b/audio/meson.build
index 0ac3791d0b..d9b295514f 100644
--- a/audio/meson.build
+++ b/audio/meson.build
@@ -28,7 +28,7 @@ endforeach
 
 if dbus_display
 module_ss = ss.source_set()
-module_ss.add(when: gio, if_true: files('dbusaudio.c'))
+module_ss.add(when: [gio, pixman, opengl, 'CONFIG_GIO'], if_true: 
files('dbusaudio.c'))
 audio_modules += {'dbus': module_ss}
 endif
 
-- 
2.34.1




[PATCH v5 05/16] hvf: Fix OOB write in RDTSCP instruction decode

2022-02-14 Thread Philippe Mathieu-Daudé via
From: Cameron Esfahani 

A guest could craft a specific stream of instructions that will have QEMU
write 0xF9 to inappropriate locations in memory.  Add additional asserts
to check for this.  Generate a #UD if there are more than 14 prefix bytes.

Found by Julian Stecklina 

Signed-off-by: Cameron Esfahani 
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/i386/hvf/x86_decode.c | 11 +--
 target/i386/hvf/x86hvf.c |  8 
 target/i386/hvf/x86hvf.h |  1 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 062713b1a4..fbaf1813e8 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -24,6 +24,7 @@
 #include "vmx.h"
 #include "x86_mmu.h"
 #include "x86_descr.h"
+#include "x86hvf.h"
 
 #define OPCODE_ESCAPE   0xf
 
@@ -541,7 +542,8 @@ static void decode_lidtgroup(CPUX86State *env, struct 
x86_decode *decode)
 };
 decode->cmd = group[decode->modrm.reg];
 if (0xf9 == decode->modrm.modrm) {
-decode->opcode[decode->len++] = decode->modrm.modrm;
+VM_PANIC_ON(decode->opcode_len >= sizeof(decode->opcode));
+decode->opcode[decode->opcode_len++] = decode->modrm.modrm;
 decode->cmd = X86_DECODE_CMD_RDTSCP;
 }
 }
@@ -1847,7 +1849,8 @@ void calc_modrm_operand(CPUX86State *env, struct 
x86_decode *decode,
 
 static void decode_prefix(CPUX86State *env, struct x86_decode *decode)
 {
-while (1) {
+/* At most 14 prefix bytes. */
+for (int i = 0; i < 14; i++) {
 /*
  * REX prefix must come after legacy prefixes.
  * REX before legacy is ignored.
@@ -1892,6 +1895,8 @@ static void decode_prefix(CPUX86State *env, struct 
x86_decode *decode)
 return;
 }
 }
+/* Too many prefixes!  Generate #UD. */
+hvf_inject_ud(env);
 }
 
 void set_addressing_size(CPUX86State *env, struct x86_decode *decode)
@@ -2090,11 +2095,13 @@ static void decode_opcodes(CPUX86State *env, struct 
x86_decode *decode)
 uint8_t opcode;
 
 opcode = decode_byte(env, decode);
+VM_PANIC_ON(decode->opcode_len >= sizeof(decode->opcode));
 decode->opcode[decode->opcode_len++] = opcode;
 if (opcode != OPCODE_ESCAPE) {
 decode_opcode_1(env, decode, opcode);
 } else {
 opcode = decode_byte(env, decode);
+VM_PANIC_ON(decode->opcode_len >= sizeof(decode->opcode));
 decode->opcode[decode->opcode_len++] = opcode;
 decode_opcode_2(env, decode, opcode);
 }
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 05ec1bddc4..a805c125d9 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -425,6 +425,14 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
 & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR));
 }
 
+void hvf_inject_ud(CPUX86State *env)
+{
+env->exception_nr = EXCP06_ILLOP;
+env->exception_injected = 1;
+env->has_error_code = false;
+env->error_code = 0;
+}
+
 int hvf_process_events(CPUState *cpu_state)
 {
 X86CPU *cpu = X86_CPU(cpu_state);
diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h
index 99ed8d608d..ef472a32f9 100644
--- a/target/i386/hvf/x86hvf.h
+++ b/target/i386/hvf/x86hvf.h
@@ -22,6 +22,7 @@
 
 int hvf_process_events(CPUState *);
 bool hvf_inject_interrupts(CPUState *);
+void hvf_inject_ud(CPUX86State *);
 void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg,
  SegmentCache *qseg, bool is_tr);
 void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg);
-- 
2.34.1




[PATCH v5 04/16] hvf: Use standard CR0 and CR4 register definitions

2022-02-14 Thread Philippe Mathieu-Daudé via
From: Cameron Esfahani 

No need to have our own definitions of these registers.

Signed-off-by: Cameron Esfahani 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/i386/hvf/vmx.h  | 17 +
 target/i386/hvf/x86.c  |  6 +++---
 target/i386/hvf/x86.h  | 34 --
 target/i386/hvf/x86_mmu.c  |  2 +-
 target/i386/hvf/x86_task.c |  3 ++-
 5 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 6df87116f6..29b7deed3c 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -124,10 +124,11 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, 
uint64_t cr0)
 uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER);
 uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0);
 uint64_t changed_cr0 = old_cr0 ^ cr0;
-uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET;
+uint64_t mask = CR0_PG_MASK | CR0_CD_MASK | CR0_NW_MASK |
+CR0_NE_MASK | CR0_ET_MASK;
 uint64_t entry_ctls;
 
-if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) &&
+if ((cr0 & CR0_PG_MASK) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE_MASK) &&
 !(efer & MSR_EFER_LME)) {
 address_space_read(&address_space_memory,
rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f,
@@ -142,8 +143,8 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t 
cr0)
 wvmcs(vcpu, VMCS_CR0_SHADOW, cr0);
 
 if (efer & MSR_EFER_LME) {
-if (changed_cr0 & CR0_PG) {
-if (cr0 & CR0_PG) {
+if (changed_cr0 & CR0_PG_MASK) {
+if (cr0 & CR0_PG_MASK) {
 enter_long_mode(vcpu, cr0, efer);
 } else {
 exit_long_mode(vcpu, cr0, efer);
@@ -155,8 +156,8 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t 
cr0)
 }
 
 /* Filter new CR0 after we are finished examining it above. */
-cr0 = (cr0 & ~(mask & ~CR0_PG));
-wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET);
+cr0 = (cr0 & ~(mask & ~CR0_PG_MASK));
+wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE_MASK | CR0_ET_MASK);
 
 hv_vcpu_invalidate_tlb(vcpu);
 hv_vcpu_flush(vcpu);
@@ -164,11 +165,11 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, 
uint64_t cr0)
 
 static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t cr4)
 {
-uint64_t guest_cr4 = cr4 | CR4_VMXE;
+uint64_t guest_cr4 = cr4 | CR4_VMXE_MASK;
 
 wvmcs(vcpu, VMCS_GUEST_CR4, guest_cr4);
 wvmcs(vcpu, VMCS_CR4_SHADOW, cr4);
-wvmcs(vcpu, VMCS_CR4_MASK, CR4_VMXE);
+wvmcs(vcpu, VMCS_CR4_MASK, CR4_VMXE_MASK);
 
 hv_vcpu_invalidate_tlb(vcpu);
 hv_vcpu_flush(vcpu);
diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c
index 2898bb70a8..91a3fe002c 100644
--- a/target/i386/hvf/x86.c
+++ b/target/i386/hvf/x86.c
@@ -119,7 +119,7 @@ bool x86_read_call_gate(struct CPUState *cpu, struct 
x86_call_gate *idt_desc,
 bool x86_is_protected(struct CPUState *cpu)
 {
 uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
-return cr0 & CR0_PE;
+return cr0 & CR0_PE_MASK;
 }
 
 bool x86_is_real(struct CPUState *cpu)
@@ -150,13 +150,13 @@ bool x86_is_long64_mode(struct CPUState *cpu)
 bool x86_is_paging_mode(struct CPUState *cpu)
 {
 uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
-return cr0 & CR0_PG;
+return cr0 & CR0_PG_MASK;
 }
 
 bool x86_is_pae_enabled(struct CPUState *cpu)
 {
 uint64_t cr4 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4);
-return cr4 & CR4_PAE;
+return cr4 & CR4_PAE_MASK;
 }
 
 target_ulong linear_addr(struct CPUState *cpu, target_ulong addr, X86Seg seg)
diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h
index 782664c2ea..947b98da41 100644
--- a/target/i386/hvf/x86.h
+++ b/target/i386/hvf/x86.h
@@ -42,40 +42,6 @@ typedef struct x86_register {
 };
 } __attribute__ ((__packed__)) x86_register;
 
-typedef enum x86_reg_cr0 {
-CR0_PE =(1L << 0),
-CR0_MP =(1L << 1),
-CR0_EM =(1L << 2),
-CR0_TS =(1L << 3),
-CR0_ET =(1L << 4),
-CR0_NE =(1L << 5),
-CR0_WP =(1L << 16),
-CR0_AM =(1L << 18),
-CR0_NW =(1L << 29),
-CR0_CD =(1L << 30),
-CR0_PG =(1L << 31),
-} x86_reg_cr0;
-
-typedef enum x86_reg_cr4 {
-CR4_VME =(1L << 0),
-CR4_PVI =(1L << 1),
-CR4_TSD =(1L << 2),
-CR4_DE  =(1L << 3),
-CR4_PSE =(1L << 4),
-CR4_PAE =(1L << 5),
-CR4_MSE =(1L << 6),
-CR4_PGE =(1L << 7),
-CR4_PCE =(1L << 8),
-CR4_OSFXSR = (1L << 9),
-CR4_OSXMMEXCPT = (1L << 10),
-CR4_VMXE =   (1L << 13),
-CR4_SMXE =   (1L << 14),
-CR4_FSGSBASE =   (1L << 16),
-CR4_PCIDE =  (1L << 17),
-CR4_OSXSAVE =(1L << 18

[PATCH v5 15/16] lcitool: refresh

2022-02-14 Thread Philippe Mathieu-Daudé via
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/docker/dockerfiles/ubuntu1804.docker | 2 --
 tests/docker/dockerfiles/ubuntu2004.docker | 2 --
 2 files changed, 4 deletions(-)

diff --git a/tests/docker/dockerfiles/ubuntu1804.docker 
b/tests/docker/dockerfiles/ubuntu1804.docker
index 699f2dfc6a..040938277a 100644
--- a/tests/docker/dockerfiles/ubuntu1804.docker
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -65,7 +65,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
 libpam0g-dev \
 libpcre2-dev \
 libpixman-1-dev \
-libpmem-dev \
 libpng-dev \
 libpulse-dev \
 librbd-dev \
@@ -89,7 +88,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
 libvdeplug-dev \
 libvirglrenderer-dev \
 libvte-2.91-dev \
-libxen-dev \
 libzstd-dev \
 llvm \
 locales \
diff --git a/tests/docker/dockerfiles/ubuntu2004.docker 
b/tests/docker/dockerfiles/ubuntu2004.docker
index 87513125b8..159e7f60c9 100644
--- a/tests/docker/dockerfiles/ubuntu2004.docker
+++ b/tests/docker/dockerfiles/ubuntu2004.docker
@@ -66,7 +66,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
 libpam0g-dev \
 libpcre2-dev \
 libpixman-1-dev \
-libpmem-dev \
 libpng-dev \
 libpulse-dev \
 librbd-dev \
@@ -91,7 +90,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
 libvdeplug-dev \
 libvirglrenderer-dev \
 libvte-2.91-dev \
-libxen-dev \
 libzstd-dev \
 llvm \
 locales \
-- 
2.34.1




[PATCH v5 16/16] gitlab-ci: Support macOS 12 via cirrus-run

2022-02-14 Thread Philippe Mathieu-Daudé via
Add support for macOS 12 build on Cirrus-CI, similarly to commit
0e103a65ba1 ("gitlab: support for ... macOS 11 via cirrus-run"),
but with the following differences:
 - Enable modules (configure --enable-modules)
 - Do not run softfloat3 tests (make check-softfloat)
 - Run Aarch64 qtests instead of x86_64 ones

Generate the vars file by calling 'make lcitool-refresh'.

Signed-off-by: Philippe Mathieu-Daudé 
---
 .gitlab-ci.d/cirrus.yml   | 16 
 .gitlab-ci.d/cirrus/macos-12.vars | 16 
 audio/coreaudio.c |  2 +-
 tests/lcitool/refresh |  1 +
 4 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 .gitlab-ci.d/cirrus/macos-12.vars

diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index b96b22e269..be1dce5d4e 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -87,6 +87,22 @@ x64-macos-11-base-build:
 PKG_CONFIG_PATH: 
/usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig
 TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat 
check-qtest-x86_64
 
+x64-macos-12-base-build:
+  extends: .cirrus_build_job
+  variables:
+NAME: macos-12
+CIRRUS_VM_INSTANCE_TYPE: osx_instance
+CIRRUS_VM_IMAGE_SELECTOR: image
+CIRRUS_VM_IMAGE_NAME: monterey-base
+CIRRUS_VM_CPUS: 12
+CIRRUS_VM_RAM: 24G
+UPDATE_COMMAND: brew update
+INSTALL_COMMAND: brew install
+PATH_EXTRA: /usr/local/opt/ccache/libexec:/usr/local/opt/gettext/bin
+PKG_CONFIG_PATH: 
/usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig
+CONFIGURE_ARGS: --enable-modules
+TEST_TARGETS: check-unit check-block check-qapi-schema check-qtest-aarch64
+
 
 # The following jobs run VM-based tests via KVM on a Linux-based Cirrus-CI job
 .cirrus_kvm_job:
diff --git a/.gitlab-ci.d/cirrus/macos-12.vars 
b/.gitlab-ci.d/cirrus/macos-12.vars
new file mode 100644
index 00..a793258c64
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/macos-12.vars
@@ -0,0 +1,16 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables macos-12 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci
+
+CCACHE='/usr/local/bin/ccache'
+CPAN_PKGS='Test::Harness'
+CROSS_PKGS=''
+MAKE='/usr/local/bin/gmake'
+NINJA='/usr/local/bin/ninja'
+PACKAGING_COMMAND='brew'
+PIP3='/usr/local/bin/pip3'
+PKGS='bash bc bzip2 capstone ccache cpanminus ctags curl dbus diffutils dtc 
gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo libepoxy libffi 
libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make 
meson ncurses nettle ninja perl pixman pkg-config python3 rpm2cpio sdl2 
sdl2_image snappy sparse spice-protocol tesseract texinfo usbredir vde vte3 
zlib zstd'
+PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme virtualenv'
+PYTHON='/usr/local/bin/python3'
diff --git a/audio/coreaudio.c b/audio/coreaudio.c
index 5b3aeaced0..1faef7fa7a 100644
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -45,7 +45,7 @@ typedef struct coreaudioVoiceOut {
 } coreaudioVoiceOut;
 
 #if !defined(MAC_OS_VERSION_12_0) \
-|| (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_VERSION_12_0)
+|| (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_12_0)
 #define kAudioObjectPropertyElementMain kAudioObjectPropertyElementMaster
 #endif
 
diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh
index 4ab90a310a..a714e2851d 100755
--- a/tests/lcitool/refresh
+++ b/tests/lcitool/refresh
@@ -89,6 +89,7 @@ try:
generate_cirrus("freebsd-12")
generate_cirrus("freebsd-13")
generate_cirrus("macos-11")
+   generate_cirrus("macos-12")
 
sys.exit(0)
 except Exception as ex:
-- 
2.34.1




[PATCH v5 14/16] ui/cocoa: Do not alert even without block devices

2022-02-14 Thread Philippe Mathieu-Daudé via
From: Akihiko Odaki 

Signed-off-by: Akihiko Odaki 
Message-Id: <20220213021418.2155-1-akihiko.od...@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 ui/cocoa.m | 5 -
 1 file changed, 5 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index becca58cb7..6cadd43309 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1713,11 +1713,6 @@ static void addRemovableDevicesMenuItems(void)
 
 currentDevice = qmp_query_block(NULL);
 pointerToFree = currentDevice;
-if(currentDevice == NULL) {
-NSBeep();
-QEMU_Alert(@"Failed to query for block devices!");
-return;
-}
 
 menu = [[[NSApp mainMenu] itemWithTitle:@"Machine"] submenu];
 
-- 
2.34.1




[PATCH v5 08/16] hvf: Remove deprecated hv_vcpu_flush() calls

2022-02-14 Thread Philippe Mathieu-Daudé via
When building on macOS 11 [*], we get:

  In file included from ../target/i386/hvf/hvf.c:59:
  ../target/i386/hvf/vmx.h:174:5: error: 'hv_vcpu_flush' is deprecated: first 
deprecated in macOS 11.0 - This API has no effect and always returns 
HV_UNSUPPORTED [-Werror,-Wdeprecated-declarations]
  hv_vcpu_flush(vcpu);
  ^
  
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Hypervisor.framework/Headers/hv.h:364:20:
 note: 'hv_vcpu_flush' has been explicitly marked deprecated here
  extern hv_return_t hv_vcpu_flush(hv_vcpuid_t vcpu)
 ^

Since this call "has no effect", simply remove it ¯\_(ツ)_/¯

Not very useful deprecation doc:
https://developer.apple.com/documentation/hypervisor/1441386-hv_vcpu_flush

[*] Also 10.15 (Catalina):
https://lore.kernel.org/qemu-devel/yd3dmsqz1sijw...@roolebo.dev/

Reviewed-by: Roman Bolshakov 
Tested-by: Roman Bolshakov 
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/i386/hvf/vmx.h  | 2 --
 target/i386/hvf/x86_task.c | 1 -
 target/i386/hvf/x86hvf.c   | 2 --
 3 files changed, 5 deletions(-)

diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 29b7deed3c..573ddc33c0 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -160,7 +160,6 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t 
cr0)
 wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE_MASK | CR0_ET_MASK);
 
 hv_vcpu_invalidate_tlb(vcpu);
-hv_vcpu_flush(vcpu);
 }
 
 static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t cr4)
@@ -172,7 +171,6 @@ static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t 
cr4)
 wvmcs(vcpu, VMCS_CR4_MASK, CR4_VMXE_MASK);
 
 hv_vcpu_invalidate_tlb(vcpu);
-hv_vcpu_flush(vcpu);
 }
 
 static inline void macvm_set_rip(CPUState *cpu, uint64_t rip)
diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c
index e1301599e9..d24daf6a41 100644
--- a/target/i386/hvf/x86_task.c
+++ b/target/i386/hvf/x86_task.c
@@ -182,5 +182,4 @@ void vmx_handle_task_switch(CPUState *cpu, 
x68_segment_selector tss_sel, int rea
 store_regs(cpu);
 
 hv_vcpu_invalidate_tlb(cpu->hvf->fd);
-hv_vcpu_flush(cpu->hvf->fd);
 }
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index cff59f3144..a338c207b7 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -125,8 +125,6 @@ static void hvf_put_segments(CPUState *cpu_state)
 
 hvf_set_segment(cpu_state, &seg, &env->ldt, false);
 vmx_write_segment_descriptor(cpu_state, &seg, R_LDTR);
-
-hv_vcpu_flush(cpu_state->hvf->fd);
 }
 
 void hvf_put_msrs(CPUState *cpu_state)
-- 
2.34.1




[PATCH v5 10/16] audio/coreaudio: Remove a deprecation warning on macOS 12

2022-02-14 Thread Philippe Mathieu-Daudé via
When building on macOS 12 we get:

  audio/coreaudio.c:50:5: error: 'kAudioObjectPropertyElementMaster' is 
deprecated: first deprecated in macOS 12.0 [-Werror,-Wdeprecated-declarations]
  kAudioObjectPropertyElementMaster
  ^
  kAudioObjectPropertyElementMain
  
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/CoreAudio.framework/Headers/AudioHardwareBase.h:208:5:
 note: 'kAudioObjectPropertyElementMaster' has been explicitly marked 
deprecated here
  kAudioObjectPropertyElementMaster 
API_DEPRECATED_WITH_REPLACEMENT("kAudioObjectPropertyElementMain", macos(10.0, 
12.0), ios(2.0, 15.0), watchos(1.0, 8.0), tvos(9.0, 15.0)) = 
kAudioObjectPropertyElementMain
  ^

Replace by kAudioObjectPropertyElementMain, redefining it to
kAudioObjectPropertyElementMaster if not available.

Suggested-by: Akihiko Odaki 
Suggested-by: Christian Schoenebeck 
Suggested-by: Roman Bolshakov 
Reviewed-by: Christian Schoenebeck 
Signed-off-by: Philippe Mathieu-Daudé 
---
 audio/coreaudio.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/audio/coreaudio.c b/audio/coreaudio.c
index d8a21d3e50..5b3aeaced0 100644
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -44,10 +44,15 @@ typedef struct coreaudioVoiceOut {
 bool enabled;
 } coreaudioVoiceOut;
 
+#if !defined(MAC_OS_VERSION_12_0) \
+|| (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_VERSION_12_0)
+#define kAudioObjectPropertyElementMain kAudioObjectPropertyElementMaster
+#endif
+
 static const AudioObjectPropertyAddress voice_addr = {
 kAudioHardwarePropertyDefaultOutputDevice,
 kAudioObjectPropertyScopeGlobal,
-kAudioObjectPropertyElementMaster
+kAudioObjectPropertyElementMain
 };
 
 static OSStatus coreaudio_get_voice(AudioDeviceID *id)
@@ -69,7 +74,7 @@ static OSStatus coreaudio_get_framesizerange(AudioDeviceID id,
 AudioObjectPropertyAddress addr = {
 kAudioDevicePropertyBufferFrameSizeRange,
 kAudioDevicePropertyScopeOutput,
-kAudioObjectPropertyElementMaster
+kAudioObjectPropertyElementMain
 };
 
 return AudioObjectGetPropertyData(id,
@@ -86,7 +91,7 @@ static OSStatus coreaudio_get_framesize(AudioDeviceID id, 
UInt32 *framesize)
 AudioObjectPropertyAddress addr = {
 kAudioDevicePropertyBufferFrameSize,
 kAudioDevicePropertyScopeOutput,
-kAudioObjectPropertyElementMaster
+kAudioObjectPropertyElementMain
 };
 
 return AudioObjectGetPropertyData(id,
@@ -103,7 +108,7 @@ static OSStatus coreaudio_set_framesize(AudioDeviceID id, 
UInt32 *framesize)
 AudioObjectPropertyAddress addr = {
 kAudioDevicePropertyBufferFrameSize,
 kAudioDevicePropertyScopeOutput,
-kAudioObjectPropertyElementMaster
+kAudioObjectPropertyElementMain
 };
 
 return AudioObjectSetPropertyData(id,
@@ -121,7 +126,7 @@ static OSStatus coreaudio_set_streamformat(AudioDeviceID id,
 AudioObjectPropertyAddress addr = {
 kAudioDevicePropertyStreamFormat,
 kAudioDevicePropertyScopeOutput,
-kAudioObjectPropertyElementMaster
+kAudioObjectPropertyElementMain
 };
 
 return AudioObjectSetPropertyData(id,
@@ -138,7 +143,7 @@ static OSStatus coreaudio_get_isrunning(AudioDeviceID id, 
UInt32 *result)
 AudioObjectPropertyAddress addr = {
 kAudioDevicePropertyDeviceIsRunning,
 kAudioDevicePropertyScopeOutput,
-kAudioObjectPropertyElementMaster
+kAudioObjectPropertyElementMain
 };
 
 return AudioObjectGetPropertyData(id,
-- 
2.34.1




[PATCH v5 09/16] block/file-posix: Remove a deprecation warning on macOS 12

2022-02-14 Thread Philippe Mathieu-Daudé via
When building on macOS 12 we get:

  block/file-posix.c:3335:18: warning: 'IOMasterPort' is deprecated: first 
deprecated in macOS 12.0 [-Wdeprecated-declarations]
  kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
   ^~~~
   IOMainPort

Replace by IOMainPort, redefining it to IOMasterPort if not available.

Suggested-by: Akihiko Odaki 
Reviewed-by: Christian Schoenebeck 
Reviewed by: Cameron Esfahani 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/file-posix.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 1f1756e192..13393ad296 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -3319,17 +3319,23 @@ BlockDriver bdrv_file = {
 #if defined(__APPLE__) && defined(__MACH__)
 static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
 CFIndex maxPathSize, int flags);
+
+#if !defined(MAC_OS_VERSION_12_0) \
+|| (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_12_0)
+#define IOMainPort IOMasterPort
+#endif
+
 static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
 {
 kern_return_t kernResult = KERN_FAILURE;
-mach_port_t masterPort;
+mach_port_t mainPort;
 CFMutableDictionaryRef  classesToMatch;
 const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
 char *mediaType = NULL;
 
-kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
+kernResult = IOMainPort(MACH_PORT_NULL, &mainPort);
 if ( KERN_SUCCESS != kernResult ) {
-printf( "IOMasterPort returned %d\n", kernResult );
+printf("IOMainPort returned %d\n", kernResult);
 }
 
 int index;
@@ -3342,7 +3348,7 @@ static char *FindEjectableOpticalMedia(io_iterator_t 
*mediaIterator)
 }
 CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
  kCFBooleanTrue);
-kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
+kernResult = IOServiceGetMatchingServices(mainPort, classesToMatch,
   mediaIterator);
 if (kernResult != KERN_SUCCESS) {
 error_report("Note: IOServiceGetMatchingServices returned %d",
-- 
2.34.1




[PATCH v5 12/16] ui/cocoa: Remove allowedFileTypes restriction in SavePanel

2022-02-14 Thread Philippe Mathieu-Daudé via
setAllowedFileTypes is deprecated in macOS 12.

Per Akihiko Odaki [*]:

  An image file, which is being chosen by the panel, can be a
  raw file and have a variety of file extensions and many are not
  covered by the provided list (e.g. "udf"). Other platforms like
  GTK can provide an option to open a file with an extension not
  listed, but Cocoa can't. It forces the user to rename the file
  to give an extension in the list. Moreover, Cocoa does not tell
  which extensions are in the list so the user needs to read the
  source code, which is pretty bad.

Since this code is harming the usability rather than improving it,
simply remove the [NSSavePanel allowedFileTypes:] call, fixing:

  [2789/6622] Compiling Objective-C object libcommon.fa.p/ui_cocoa.m.o
  ui/cocoa.m:1411:16: error: 'setAllowedFileTypes:' is deprecated: first 
deprecated in macOS 12.0 - Use -allowedContentTypes instead 
[-Werror,-Wdeprecated-declarations]
  [openPanel setAllowedFileTypes: supportedImageFileTypes];
 ^
  
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/AppKit.framework/Headers/NSSavePanel.h:215:49:
 note: property 'allowedFileTypes' is declared deprecated here
  @property (nullable, copy) NSArray *allowedFileTypes 
API_DEPRECATED("Use -allowedContentTypes instead", macos(10.3,12.0));
  ^
  
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/AppKit.framework/Headers/NSSavePanel.h:215:49:
 note: 'setAllowedFileTypes:' has been explicitly marked deprecated here
  FAILED: libcommon.fa.p/ui_cocoa.m.o

[*] 
https://lore.kernel.org/qemu-devel/4dde2e66-63cb-4390-9538-c032310db...@gmail.com/

Suggested-by: Akihiko Odaki 
Reviewed-by: Roman Bolshakov 
Tested-by: Roman Bolshakov 
Reviewed-by: Christian Schoenebeck 
Reviewed by: Cameron Esfahani 
Signed-off-by: Philippe Mathieu-Daudé 
---
 ui/cocoa.m | 6 --
 1 file changed, 6 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index ac18e14ce0..7a1ddd4075 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -100,7 +100,6 @@ static int gArgc;
 static char **gArgv;
 static bool stretch_video;
 static NSTextField *pauseLabel;
-static NSArray * supportedImageFileTypes;
 
 static QemuSemaphore display_init_sem;
 static QemuSemaphore app_started_sem;
@@ -1168,10 +1167,6 @@ QemuCocoaView *cocoaView;
 [pauseLabel setTextColor: [NSColor blackColor]];
 [pauseLabel sizeToFit];
 
-// set the supported image file types that can be opened
-supportedImageFileTypes = [NSArray arrayWithObjects: @"img", @"iso", 
@"dmg",
- @"qcow", @"qcow2", @"cloop", @"vmdk", @"cdr",
-  @"toast", nil];
 [self make_about_window];
 }
 return self;
@@ -1414,7 +1409,6 @@ QemuCocoaView *cocoaView;
 openPanel = [NSOpenPanel openPanel];
 [openPanel setCanChooseFiles: YES];
 [openPanel setAllowsMultipleSelection: NO];
-[openPanel setAllowedFileTypes: supportedImageFileTypes];
 if([openPanel runModal] == NSModalResponseOK) {
 NSString * file = [[[openPanel URLs] objectAtIndex: 0] path];
 if(file == nil) {
-- 
2.34.1




[PATCH v5 07/16] hvf: Make hvf_get_segments() / hvf_put_segments() local

2022-02-14 Thread Philippe Mathieu-Daudé via
Both hvf_get_segments/hvf_put_segments() functions are only
used within x86hvf.c: do not declare them as public API.

Reviewed-by: Roman Bolshakov 
Tested-by: Roman Bolshakov 
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/i386/hvf/x86hvf.c | 4 ++--
 target/i386/hvf/x86hvf.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index a805c125d9..cff59f3144 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -83,7 +83,7 @@ void hvf_put_xsave(CPUState *cpu_state)
 }
 }
 
-void hvf_put_segments(CPUState *cpu_state)
+static void hvf_put_segments(CPUState *cpu_state)
 {
 CPUX86State *env = &X86_CPU(cpu_state)->env;
 struct vmx_segment seg;
@@ -166,7 +166,7 @@ void hvf_get_xsave(CPUState *cpu_state)
 x86_cpu_xrstor_all_areas(X86_CPU(cpu_state), xsave, xsave_len);
 }
 
-void hvf_get_segments(CPUState *cpu_state)
+static void hvf_get_segments(CPUState *cpu_state)
 {
 CPUX86State *env = &X86_CPU(cpu_state)->env;
 
diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h
index ef472a32f9..427cdc1c13 100644
--- a/target/i386/hvf/x86hvf.h
+++ b/target/i386/hvf/x86hvf.h
@@ -27,11 +27,9 @@ void hvf_set_segment(struct CPUState *cpu, struct 
vmx_segment *vmx_seg,
  SegmentCache *qseg, bool is_tr);
 void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg);
 void hvf_put_xsave(CPUState *cpu_state);
-void hvf_put_segments(CPUState *cpu_state);
 void hvf_put_msrs(CPUState *cpu_state);
 void hvf_get_xsave(CPUState *cpu_state);
 void hvf_get_msrs(CPUState *cpu_state);
 void vmx_clear_int_window_exiting(CPUState *cpu);
-void hvf_get_segments(CPUState *cpu_state);
 void vmx_update_tpr(CPUState *cpu);
 #endif
-- 
2.34.1




[PATCH v5 06/16] hvf: Enable RDTSCP support

2022-02-14 Thread Philippe Mathieu-Daudé via
From: Cameron Esfahani 

Pass through RDPID and RDTSCP support in CPUID if host supports it.
Correctly detect if CPU_BASED_TSC_OFFSET and CPU_BASED2_RDTSCP would
be supported in primary and secondary processor-based VM-execution
controls.  Enable RDTSCP in secondary processor controls if RDTSCP
support is indicated in CPUID.

Signed-off-by: Cameron Esfahani 
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/i386/hvf/hvf.c   | 26 +-
 target/i386/hvf/vmcs.h  |  3 ++-
 target/i386/hvf/x86_cpuid.c |  7 ---
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 4ba6e82fab..4712fe66d4 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -221,6 +221,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 {
 X86CPU *x86cpu = X86_CPU(cpu);
 CPUX86State *env = &x86cpu->env;
+uint64_t reqCap;
 
 init_emu();
 init_decoder();
@@ -257,19 +258,26 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 /* set VMCS control fields */
 wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
-  VMCS_PIN_BASED_CTLS_EXTINT |
-  VMCS_PIN_BASED_CTLS_NMI |
-  VMCS_PIN_BASED_CTLS_VNMI));
+   VMCS_PIN_BASED_CTLS_EXTINT |
+   VMCS_PIN_BASED_CTLS_NMI |
+   VMCS_PIN_BASED_CTLS_VNMI));
 wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
-  VMCS_PRI_PROC_BASED_CTLS_HLT |
-  VMCS_PRI_PROC_BASED_CTLS_MWAIT |
-  VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
-  VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
+   VMCS_PRI_PROC_BASED_CTLS_HLT |
+   VMCS_PRI_PROC_BASED_CTLS_MWAIT |
+   VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
+   VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
   VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
+
+reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES;
+
+/* Is RDTSCP support in CPUID?  If so, enable it in the VMCS. */
+if (hvf_get_supported_cpuid(0x8001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
+reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP;
+}
+
 wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS,
-  cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
-   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
+  cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap));
 
 wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, 
cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
   0));
diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h
index 42de7ebc3a..bb4c764557 100644
--- a/target/i386/hvf/vmcs.h
+++ b/target/i386/hvf/vmcs.h
@@ -354,7 +354,7 @@
 #define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3)
 #define VMCS_PRI_PROC_BASED_CTLS_HLT (1 << 7)
 #define VMCS_PRI_PROC_BASED_CTLS_MWAIT (1 << 10)
-#define VMCS_PRI_PROC_BASED_CTLS_TSC   (1 << 12)
+#define VMCS_PRI_PROC_BASED_CTLS_RDTSC (1 << 12)
 #define VMCS_PRI_PROC_BASED_CTLS_CR8_LOAD  (1 << 19)
 #define VMCS_PRI_PROC_BASED_CTLS_CR8_STORE (1 << 20)
 #define VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW(1 << 21)
@@ -362,6 +362,7 @@
 #define VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL   (1 << 31)
 
 #define VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES (1 << 0)
+#define VMCS_PRI_PROC_BASED2_CTLS_RDTSCP(1 << 3)
 #define VMCS_PRI_PROC_BASED2_CTLS_X2APIC(1 << 4)
 
 enum task_switch_reason {
diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index 32b0d131df..b11ddaa349 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -96,7 +96,8 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
 ebx &= ~CPUID_7_0_EBX_INVPCID;
 }
 
-ecx &= CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_AVX512_VPOPCNTDQ;
+ecx &= CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_AVX512_VPOPCNTDQ |
+   CPUID_7_0_ECX_RDPID;
 edx &= CPUID_7_0_EDX_AVX512_4VNNIW | CPUID_7_0_EDX_AVX512_4FMAPS;
 } else {
 ebx = 0;
@@ -133,11 +134,11 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t 
idx,
 CPUID_FXSR | CPUID_EXT2_FXSR | CPUID_EXT2_PDPE1GB | 
CPUID_EXT2_3DNOWEXT |
 CPUID_EXT2_3DNOW | CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | 
CPUID_EXT2_NX;
 hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap);
-if (!(cap & CPU_BASED2_RDTSCP)) {
+if (!(cap2ctrl(cap, CPU_BASED2_RDTSCP) & CPU_BASED2_RDTSCP)) {
 edx &= ~CPUID_EXT2_RDTSCP;
 }
 hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, &cap);
-if (!(cap & CPU_BASED_TSC_OFFSET)) {
+if (!(cap2ctrl(cap, CPU_BASED_TSC_OFFSET) & CPU_BASED_TSC_OFFSET)) {
 edx &= ~CPUID_EXT2_RDTSCP;
 }
 ecx &= CPUID_EXT3_LAHF_LM | CPUID_EXT3_CMP_LEG | CPUID_EXT3_CR8LEG |
-- 
2.34.1




[PATCH v5 03/16] tests/fp/berkeley-testfloat-3: Ignore ignored #pragma directives

2022-02-14 Thread Philippe Mathieu-Daudé via
Since we already use -Wno-unknown-pragmas, we can also use
-Wno-ignored-pragmas. This silences hundred of warnings using
clang 13 on macOS Monterey:

  [409/771] Compiling C object 
tests/fp/libtestfloat.a.p/berkeley-testfloat-3_source_test_az_f128_rx.c.o
  ../tests/fp/berkeley-testfloat-3/source/test_az_f128_rx.c:49:14: warning: 
'#pragma FENV_ACCESS' is not supported on this target - ignored 
[-Wignored-pragmas]
  #pragma STDC FENV_ACCESS ON
   ^
  1 warning generated.

Having:

  $ cc -v
  Apple clang version 13.0.0 (clang-1300.0.29.30)

Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/fp/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/fp/meson.build b/tests/fp/meson.build
index 59776a00a7..5192264a08 100644
--- a/tests/fp/meson.build
+++ b/tests/fp/meson.build
@@ -30,6 +30,7 @@ tfcflags = [
   '-Wno-implicit-fallthrough',
   '-Wno-strict-prototypes',
   '-Wno-unknown-pragmas',
+  '-Wno-ignored-pragmas',
   '-Wno-uninitialized',
   '-Wno-missing-prototypes',
   '-Wno-return-type',
-- 
2.34.1




[PATCH v5 01/16] MAINTAINERS: Add Akihiko Odaki to macOS-relateds

2022-02-14 Thread Philippe Mathieu-Daudé via
From: Akihiko Odaki 

Signed-off-by: Akihiko Odaki 
Reviewed-by: Christian Schoenebeck 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20220213021215.1974-1-akihiko.od...@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index b0b845f445..ce6f4f9228 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2406,6 +2406,7 @@ F: audio/alsaaudio.c
 Core Audio framework backend
 M: Gerd Hoffmann 
 R: Christian Schoenebeck 
+R: Akihiko Odaki 
 S: Odd Fixes
 F: audio/coreaudio.c
 
@@ -2658,6 +2659,7 @@ F: util/drm.c
 
 Cocoa graphics
 M: Peter Maydell 
+R: Akihiko Odaki 
 S: Odd Fixes
 F: ui/cocoa.m
 
-- 
2.34.1




[PATCH v5 02/16] configure: Allow passing extra Objective C compiler flags

2022-02-14 Thread Philippe Mathieu-Daudé via
We can pass C/CPP/LD flags via CFLAGS/CXXFLAGS/LDFLAGS environment
variables, or via configure --extra-cflags / --extra-cxxflags /
--extra-ldflags options. Provide similar behavior for Objective C:
use existing flags from $OBJCFLAGS, or passed via --extra-objcflags.

Signed-off-by: Philippe Mathieu-Daudé 
---
 configure   | 8 
 meson.build | 5 +
 2 files changed, 13 insertions(+)

diff --git a/configure b/configure
index 3a29eff5cc..06c03cebd3 100755
--- a/configure
+++ b/configure
@@ -287,6 +287,7 @@ done
 
 EXTRA_CFLAGS=""
 EXTRA_CXXFLAGS=""
+EXTRA_OBJCFLAGS=""
 EXTRA_LDFLAGS=""
 
 xen_ctrl_version="$default_feature"
@@ -391,9 +392,12 @@ for opt do
   --extra-cflags=*)
 EXTRA_CFLAGS="$EXTRA_CFLAGS $optarg"
 EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg"
+EXTRA_OBJCFLAGS="$EXTRA_OBJCFLAGS $optarg"
 ;;
   --extra-cxxflags=*) EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg"
   ;;
+  --extra-objcflags=*) EXTRA_OBJCFLAGS="$EXTRA_OBJCFLAGS $optarg"
+  ;;
   --extra-ldflags=*) EXTRA_LDFLAGS="$EXTRA_LDFLAGS $optarg"
   ;;
   --enable-debug-info) debug_info="yes"
@@ -774,6 +778,8 @@ for opt do
   ;;
   --extra-cxxflags=*)
   ;;
+  --extra-objcflags=*)
+  ;;
   --extra-ldflags=*)
   ;;
   --enable-debug-info)
@@ -1312,6 +1318,7 @@ Advanced options (experts only):
   --objcc=OBJCCuse Objective-C compiler OBJCC [$objcc]
   --extra-cflags=CFLAGSappend extra C compiler flags CFLAGS
   --extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS
+  --extra-objcflags=OBJCFLAGS append extra Objective C compiler flags OBJCFLAGS
   --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS
   --cross-cc-ARCH=CC   use compiler when building ARCH guest test cases
   --cross-cc-cflags-ARCH=  use compiler flags when building ARCH guest tests
@@ -3724,6 +3731,7 @@ if test "$skip_meson" = no; then
   echo "[built-in options]" >> $cross
   echo "c_args = [$(meson_quote $CFLAGS $EXTRA_CFLAGS)]" >> $cross
   echo "cpp_args = [$(meson_quote $CXXFLAGS $EXTRA_CXXFLAGS)]" >> $cross
+  test -n "$objcc" && echo "objc_args = [$(meson_quote $OBJCFLAGS 
$EXTRA_OBJCFLAGS)]" >> $cross
   echo "c_link_args = [$(meson_quote $CFLAGS $LDFLAGS $EXTRA_CFLAGS 
$EXTRA_LDFLAGS)]" >> $cross
   echo "cpp_link_args = [$(meson_quote $CXXFLAGS $LDFLAGS $EXTRA_CXXFLAGS 
$EXTRA_LDFLAGS)]" >> $cross
   echo "[binaries]" >> $cross
diff --git a/meson.build b/meson.build
index ae5f7eec6e..df25e7a5e7 100644
--- a/meson.build
+++ b/meson.build
@@ -3292,6 +3292,11 @@ if link_language == 'cpp'
+ ['-O' + 
get_option('optimization')]
+ (get_option('debug') ? ['-g'] 
: []))}
 endif
+if targetos == 'darwin'
+  summary_info += {'OBJCFLAGS':   ' '.join(get_option('objc_args')
+   + ['-O' + 
get_option('optimization')]
+   + (get_option('debug') ? ['-g'] 
: []))}
+endif
 link_args = get_option(link_language + '_link_args')
 if link_args.length() > 0
   summary_info += {'LDFLAGS': ' '.join(link_args)}
-- 
2.34.1




[PATCH v5 00/16] host: Support macOS 12

2022-02-14 Thread Philippe Mathieu-Daudé via
Few patches to be able to build QEMU on macOS 12 (Monterey).

This basically consists of adapting deprecated APIs.

CI job added to avoid bitrotting.

Since v4:
- Use MAC_OS_X_VERSION_MIN_REQUIRED definition (Akihiko)
- Include patches from Akihiko

Since v3:
- Fix --enable-modules
- Ignore #pragma on softfloat3 tests
- Addressed Akihiko Odaki comments
- Include Cameron Esfahani patches

Since v2:
- Addressed Akihiko Odaki comments:
  . use __is_identifier(),
  . remove cocoa setAllowedFileTypes()
- Addressed Daniel Berrangé comment:
  . rebased on testing/next, update libvirt-ci/lcitool

Based on Alex's testing/next

Akihiko Odaki (3):
  MAINTAINERS: Add Akihiko Odaki to macOS-relateds
  ui/cocoa: Add Services menu
  ui/cocoa: Do not alert even without block devices

Cameron Esfahani (3):
  hvf: Use standard CR0 and CR4 register definitions
  hvf: Fix OOB write in RDTSCP instruction decode
  hvf: Enable RDTSCP support

Philippe Mathieu-Daudé (10):
  configure: Allow passing extra Objective C compiler flags
  tests/fp/berkeley-testfloat-3: Ignore ignored #pragma directives
  hvf: Make hvf_get_segments() / hvf_put_segments() local
  hvf: Remove deprecated hv_vcpu_flush() calls
  block/file-posix: Remove a deprecation warning on macOS 12
  audio/coreaudio: Remove a deprecation warning on macOS 12
  audio/dbus: Fix building with modules on macOS
  ui/cocoa: Remove allowedFileTypes restriction in SavePanel
  lcitool: refresh
  gitlab-ci: Support macOS 12 via cirrus-run

 .gitlab-ci.d/cirrus.yml| 16 ++
 .gitlab-ci.d/cirrus/macos-12.vars  | 16 ++
 MAINTAINERS|  2 ++
 audio/coreaudio.c  | 17 +++
 audio/meson.build  |  2 +-
 block/file-posix.c | 14 ++---
 configure  |  8 +
 meson.build|  5 
 target/i386/hvf/hvf.c  | 26 +++--
 target/i386/hvf/vmcs.h |  3 +-
 target/i386/hvf/vmx.h  | 19 ++--
 target/i386/hvf/x86.c  |  6 ++--
 target/i386/hvf/x86.h  | 34 --
 target/i386/hvf/x86_cpuid.c|  7 +++--
 target/i386/hvf/x86_decode.c   | 11 +--
 target/i386/hvf/x86_mmu.c  |  2 +-
 target/i386/hvf/x86_task.c |  4 +--
 target/i386/hvf/x86hvf.c   | 14 ++---
 target/i386/hvf/x86hvf.h   |  3 +-
 tests/docker/dockerfiles/ubuntu1804.docker |  2 --
 tests/docker/dockerfiles/ubuntu2004.docker |  2 --
 tests/fp/meson.build   |  1 +
 tests/lcitool/refresh  |  1 +
 ui/cocoa.m | 15 +++---
 24 files changed, 133 insertions(+), 97 deletions(-)
 create mode 100644 .gitlab-ci.d/cirrus/macos-12.vars

-- 
2.34.1




Re: [PATCH] hw/ide: implement ich6 ide controller support

2022-02-14 Thread Liav Albani

Hello BALATON,

Thank you for helping keeping this patch noticeable to everyone :)

I tried to reach out to John via a private email last Saturday (two days 
ago) so I don't "spam" the mailing list for no good reason.
It might be that I should actually refrain from doing so and talk to the 
maintainer directly on the mailing list once the patch

has been submitted to the mailing list.
I've not yet seen any response from John so I assume it's a matter of 
days before he can take care of this.


Best regards,
Liav

On 2/14/22 14:26, BALATON Zoltan wrote:

On Sat, 5 Feb 2022, BALATON Zoltan wrote:

Hello,


Ping? John, do you agree with my comments? Should Liav proceed to send 
a v2?


Thanks,
BALATON Zoltan


On Sat, 5 Feb 2022, Liav Albani wrote:

On 2/5/22 17:48, BALATON Zoltan wrote:

On Sat, 5 Feb 2022, Liav Albani wrote:
This type of IDE controller has support for relocating the IO 
ports and
doesn't use IRQ 14 and 15 but one allocated PCI IRQ for the 
controller.


I haven't looked at in detail so only a few comments I've got while 
reading it. What machine needs this? In QEMU I think we only have 
piix and ich9 emulated for pc and q35 machines but maybe ich6 is 
also used by some machine I don't know about. Otherwise it looks 
odd to have ide part of ich6 but not the other parts of this chip.



Hi BALATON,

This is my first patch to QEMU and the first time I send patches 
over the mail. I sent my github tree to John Snow (the maintainer of 
the IDE code in QEMU) for advice if I should send them here and I 
was encouraged to do that.


Welcome and thanks a lot for taking time to contribute and share your 
results. In case you're not yet aware, these docs should explain how 
patches are handled on the list:


https://www.qemu.org/docs/master/devel/submitting-a-patch.html

For the next time patch I'll put a note on writing a descriptive 
cover letter as it could have put more valuable details on why I 
sent this patch.


There's no such machine type emulating the ICH6 chipset in QEMU. 
However, I wrote this emulation component as a test for the 
SerenityOS kernel because I have a machine from 2009 which has
an ICH7 southbridge, so, I wanted to emulate such device with QEMU 
to ease development on it.


I found out that Linux with libata was using the controller without 
any noticeable problems, but the SerenityOS kernel struggled to use 
this device, so I decided that
I should send this patch to get it merged and then I can use it 
locally and maybe other people will benefit from it.


In regard to other components of the ICH6 chipset - I don't think 
it's worth anybody's time to actually implement them as the ICH9 
chipset is quite close to what the ICH6 chipset offers as far as I 
can tell.
The idea of implementing ich6-ide controller was to enable the 
option of people like me and other OS developers to ensure their 
kernels operate correctly on such type of device,
which is legacy-free device in the aspect of PCI bus resource 
management but still is a legacy device which belongs to chipsets of 
late 2000s.


That's OK, maybe a short mention (just one sentence) in the commit 
message explaining this would help to understand why this device 
model was added.



Signed-off-by: Liav Albani 
---
hw/i386/Kconfig  |   2 +
hw/ide/Kconfig   |   5 +
hw/ide/bmdma.c   |  83 +++
hw/ide/ich6.c    | 211 
+++

hw/ide/meson.build   |   3 +-
hw/ide/piix.c    |  50 +-
include/hw/ide/pci.h |   5 +
include/hw/pci/pci_ids.h |   1 +
8 files changed, 311 insertions(+), 49 deletions(-)
create mode 100644 hw/ide/bmdma.c
create mode 100644 hw/ide/ich6.c

diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d22ac4a4b9..a18de2d962 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -75,6 +75,7 @@ config I440FX
    select PCI_I440FX
    select PIIX3
    select IDE_PIIX
+    select IDE_ICH6
    select DIMM
    select SMBIOS
    select FW_CFG_DMA
@@ -101,6 +102,7 @@ config Q35
    select PCI_EXPRESS_Q35
    select LPC_ICH9
    select AHCI_ICH9
+    select IDE_ICH6
    select DIMM
    select SMBIOS
    select FW_CFG_DMA
diff --git a/hw/ide/Kconfig b/hw/ide/Kconfig
index dd85fa3619..63304325a5 100644
--- a/hw/ide/Kconfig
+++ b/hw/ide/Kconfig
@@ -38,6 +38,11 @@ config IDE_VIA
    select IDE_PCI
    select IDE_QDEV

+config IDE_ICH6
+    bool
+    select IDE_PCI
+    select IDE_QDEV
+
config MICRODRIVE
    bool
    select IDE_QDEV
diff --git a/hw/ide/bmdma.c b/hw/ide/bmdma.c
new file mode 100644
index 00..979f5974fd
--- /dev/null
+++ b/hw/ide/bmdma.c
@@ -0,0 +1,83 @@
+/*
+ * QEMU IDE Emulation: PCI PIIX3/4 support.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2006 Openedhand Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a copy
+ * of this software and associated documentation files (the 
"Software"), to deal
+ * in the Software without restriction, including without 

[PULL 0/3] Block patches

2022-02-14 Thread Stefan Hajnoczi
The following changes since commit cc5ce8b8b6be83e5fe3b668dbd061ad97c534e3f:

  Merge remote-tracking branch 'remotes/legoater/tags/pull-ppc-20220210' into 
staging (2022-02-13 20:33:28 +)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b:

  util: adjust coroutine pool size to virtio block queue (2022-02-14 17:11:25 
+)


Pull request

This contains coroutine poll size scaling, virtiofsd rseq seccomp for new glibc
versions, and the QEMU C virtiofsd deprecation notice.



Christian Ehrhardt (1):
  tools/virtiofsd: Add rseq syscall to the seccomp allowlist

Dr. David Alan Gilbert (1):
  Deprecate C virtiofsd

Hiroki Narukawa (1):
  util: adjust coroutine pool size to virtio block queue

 docs/about/deprecated.rst | 17 +
 include/qemu/coroutine.h  | 10 ++
 hw/block/virtio-blk.c |  5 +
 tools/virtiofsd/passthrough_seccomp.c |  3 +++
 util/qemu-coroutine.c | 20 
 5 files changed, 51 insertions(+), 4 deletions(-)

-- 
2.34.1





Re: [PATCH 5/6] test-bdrv-drain.c: remove test_detach_by_parent_cb()

2022-02-14 Thread Paolo Bonzini

On 2/14/22 16:28, Kevin Wolf wrote:

The BlockBackend could safely return false from blk_root_drained_poll()
while requests are still in their callbacks (if they do anything that
touches a node, they would increase in_flight again), it just doesn't do
it yet. It's only blk_drain(_all)() that would still have to wait for
those.


That would be very subtle, especially it's not clear to me why this 
wouldn't be "a drain completing while the callback hasn't actually 
completed yet".  The drain referred to in the commit message of 
46aaf2a56 could *not* use blk_drain, because it is not coroutine 
friendly; it must use bdrv_drained_begin.



My answer is respectively 1) it's correct, many coroutines do inc_in_flight
before creation and dec_in_flight at the end, we're just saying that it's
_always_  the case for callback-based operations; 2) no, it's not unexpected
and therefore the test is the incorrect one.

My question isn't really only about the test, though. If it is now
forbidden to call bdrv_replace_child_noperm() in a callback, how do we
verify that the test is the only incorrect one rather than just the
obvious one?

And is it better to throw away the test and find and fix all other
places that are using something that is now forbidden, or wouldn't it be
better to actually allow bdrv_replace_child_noperm() in callbacks?


The question is would you ever need bdrv_replace_child_noperm() in 
callbacks?  The AIO functions are called from any iothread and so are 
the callbacks.  We do have a usecase (in block/mirror.c) for 
bdrv_drained_begin from a coroutine; do we have a usecase for calling 
global-state functions from a callback, in such a way that:


1) going through a bottom half would not be possible

2) it's only needed in the special case of a BlockBackend homed in the 
main event loop (because otherwise you'd have to go through a bottom 
half, and we have excluded that already)?


Thanks,

Paolo



Re: [PATCH v3 1/1] util: adjust coroutine pool size to virtio block queue

2022-02-14 Thread Stefan Hajnoczi
On Mon, Feb 14, 2022 at 11:54:34AM +, Hiroki Narukawa wrote:
> > Coroutine pool size was 64 from long ago, and the basis was organized in the
> > commit message in c740ad92.
> 
> Sorry, I noticed that commit ID mentioning here was incorrect.
> The correct one is 4d68e86b.
> 
> https://gitlab.com/qemu-project/qemu/-/commit/4d68e86bb10159099da0798f74e7512955f15eec
> 
> I have resent this patch as v4 with exactly the same code as v3, just 
> changing this commit message.

Thanks!

Stefan


signature.asc
Description: PGP signature


Re: [PATCH v4 0/1] Patch to adjust coroutine pool size adaptively

2022-02-14 Thread Stefan Hajnoczi
On Mon, Feb 14, 2022 at 08:53:01PM +0900, Hiroki Narukawa wrote:
> Resending with correct commit message
> 
> Resending patch with decreasing coroutine pool size on device remove
> 
> We encountered random disk IO performance drop since qemu-5.0.0, and this 
> patch fixes it.
> 
> Commit message in 4d68e86b implied to adjust coroutine pool size adaptively, 
> so I tried to implement this.
> 
> Changes from v3:
> No code changed. Changed commit message so that first line indicates to
> correct commit ID.
> 
> Changes from v2:
> Decrease coroutine pool size on device remove
> 
> Changes from v1:
> Use qatomic_read properly
> 
> Hiroki Narukawa (1):
>   util: adjust coroutine pool size to virtio block queue
> 
>  hw/block/virtio-blk.c|  5 +
>  include/qemu/coroutine.h | 10 ++
>  util/qemu-coroutine.c| 20 
>  3 files changed, 31 insertions(+), 4 deletions(-)
> 
> -- 
> 2.17.1
> 

Thanks, applied to my block tree:
https://gitlab.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature


Re: [PATCH 5/6] test-bdrv-drain.c: remove test_detach_by_parent_cb()

2022-02-14 Thread Kevin Wolf
Am 14.02.2022 um 12:42 hat Paolo Bonzini geschrieben:
> On 2/11/22 16:38, Kevin Wolf wrote:
> > The callback of an I/O function isn't I/O, though. It is code_after_
> > the I/O has completed. If this doesn't work any more, it feels like this
> > is a bug.
> 
> The issue is that the I/O has *not* completed yet.  blk_aio_preadv(..., cb,
> opaque) is not equivalent to
> 
>   ret = blk_co_preadv(...);
>   cb(ret, opaque);
> 
> but rather to
> 
>   blk_inc_in_flight(blk);
>   ret = blk_co_preadv(...);
>   cb(ret, opaque);
>   blk_dec_in_flight(blk);

It depends on what you call "the I/O". The request to blk_bs(blk) has
already completed, but the request to blk itself hasn't yet.

This is exactly the difference I was talking about:
bdrv_replace_child_noperm() really only cares about requests to the
BlockDriverState, but drain currently waits for attached BlockBackends,
too.

The BlockBackend could safely return false from blk_root_drained_poll()
while requests are still in their callbacks (if they do anything that
touches a node, they would increase in_flight again), it just doesn't do
it yet. It's only blk_drain(_all)() that would still have to wait for
those.

> Your own commit message (yeah I know we've all been through that :))
> explains why, and notes that it is now invalid to drain in a callback:
> 
> commit 46aaf2a566e364a62315219255099cbf1c9b990d
> Author: Kevin Wolf 
> Date:   Thu Sep 6 17:47:22 2018 +0200
> 
> block-backend: Decrease in_flight only after callback
> 
> Request callbacks can do pretty much anything, including operations
> that will yield from the coroutine (such as draining the backend).
> In that case, a decreased in_flight would be visible to other code
> and could lead to a drain completing while the callback hasn't
> actually completed yet.
> 
> Note that reordering these operations forbids calling drain directly
> inside an AIO callback.

Yes, I wasn't aware of this any more, but I've come to the same
conclusion in my previous message in this thread.

> So the questions are:
> 
> 1) is the above commit wrong though well-intentioned?
> 
> 2) is it unexpected that bdrv_replace_child_noperm() drains (thus becoming
> invalid from the callback, albeit valid through a bottom half)?
> 
> 
> My answer is respectively 1) it's correct, many coroutines do inc_in_flight
> before creation and dec_in_flight at the end, we're just saying that it's
> _always_ the case for callback-based operations; 2) no, it's not unexpected
> and therefore the test is the incorrect one.

My question isn't really only about the test, though. If it is now
forbidden to call bdrv_replace_child_noperm() in a callback, how do we
verify that the test is the only incorrect one rather than just the
obvious one?

And is it better to throw away the test and find and fix all other
places that are using something that is now forbidden, or wouldn't it be
better to actually allow bdrv_replace_child_noperm() in callbacks?

Kevin




Re: [RFC] thread-pool: Add option to fix the pool size

2022-02-14 Thread Kevin Wolf
Am 14.02.2022 um 14:10 hat Stefan Hajnoczi geschrieben:
> On Mon, Feb 14, 2022 at 12:38:22PM +0100, Kevin Wolf wrote:
> > Am 14.02.2022 um 11:09 hat Stefan Hajnoczi geschrieben:
> > > On Fri, Feb 11, 2022 at 12:32:57PM +0100, Kevin Wolf wrote:
> > > > Am 03.02.2022 um 15:19 hat Stefan Hajnoczi geschrieben:
> > > > > On Thu, Feb 03, 2022 at 10:56:49AM +, Daniel P. Berrangé wrote:
> > > > > > On Thu, Feb 03, 2022 at 10:53:07AM +, Stefan Hajnoczi wrote:
> > > > > > > On Wed, Feb 02, 2022 at 06:52:34PM +0100, Nicolas Saenz Julienne 
> > > > > > > wrote:
> > > > > 1. A global default value that all new AioContext take. The QEMU main
> > > > >loop's qemu_aio_context will use this and all IOThread AioContext
> > > > >will use it (unless they have been overridden).
> > > > > 
> > > > >I would define it on --machine because that's the "global" object 
> > > > > for
> > > > >a guest, but that's not very satisfying.
> > > > 
> > > > Semantically, -machine is about the virtual hardware where as iothreads
> > > > are about the backend, so I agree it's not a good fit.
> > > > 
> > > > For the main thread, you may want to configure all the same options that
> > > > you can configure for an iothread. So to me that sounds like we would
> > > > want to allow using an iothread object for the main thread, too.
> > > > 
> > > > That would still require us to tell QEMU which iothread object should be
> > > > used for the main thread, though.
> > > 
> > > Making the main loop thread an IOThread is an interesting direction but
> > > not an easy change to make.
> > > 
> > > The main loop thread has a custom event loop that is not interchangeable
> > > with the IOThread event loop:
> > > - The main loop has a poll notifier interface for libslirp fd monitoring
> > >   integration.
> > > - The main loop is a GLib event loop but manually polls to get
> > >   nanosecond resolution timers.
> > > - The main loop has icount integration.
> > > - The main loop has the special iohandler AioContext
> > > 
> > > The IOThread event loop runs an optimized AioContext event loop instead.
> > > It falls back to regular g_main_loop_run() if there is a GSource user.
> > > 
> > > It would definitely be nice to unify the main loop with IOThread and
> > > then use --object iothread,... to configure main loop parameters.
> > > 
> > > I'm not sure if requiring that of Nicolas is fair though. The event
> > > loops in QEMU are complex and changes are likely to introduce subtle
> > > bugs or performance regressions.
> > 
> > I'm not suggesting actually running the iothread event loop instead,
> > merely using the properties of an object to configure the main thread as
> > the external user interface.
> > Whether this uses the same main loop code as today or is moved to the
> > regular iothread event loop is an implementation detail that can be
> > changed later.
> > 
> > Or we could maybe use a different object type like 'mainthread' and
> > share the properties using QOM inheritance.
> 
> That seems cleaner than trying faking an IOThread to me since I don't
> see a concrete plan to unify the two event loops.
> 
> The main loop code is in util/main-loop.c. Maybe call it --object
> main-loop? Attempting to instantiate more than one main-loop object
> should fail.

Sounds good. And if you don't create one explicitly, we'll just
internally create a default main-loop object.

Kevin


signature.asc
Description: PGP signature


Re: [PATCH] ui/cocoa: Do not alert even without block devices

2022-02-14 Thread Markus Armbruster
Philippe Mathieu-Daudé  writes:

> On 13/2/22 03:14, Akihiko Odaki wrote:
>> Signed-off-by: Akihiko Odaki 
>> ---
>>   ui/cocoa.m | 5 -
>>   1 file changed, 5 deletions(-)
>> diff --git a/ui/cocoa.m b/ui/cocoa.m
>> index ac18e14ce01..271a2676026 100644
>> --- a/ui/cocoa.m
>> +++ b/ui/cocoa.m
>> @@ -1715,11 +1715,6 @@ static void addRemovableDevicesMenuItems(void)
>> currentDevice = qmp_query_block(NULL);
>>   pointerToFree = currentDevice;
>> -if(currentDevice == NULL) {
>> -NSBeep();
>> -QEMU_Alert(@"Failed to query for block devices!");
>> -return;
>> -}
>> menu = [[[NSApp mainMenu] itemWithTitle:@"Machine"]
>> submenu];
>>   
>
> Cc'ing qemu-block@ and Markus (QMP).
>
> I always wondered the point of this annoying warning but never
> found out.

The condition conflates "query failed" (returns null with error and "no
block devices" (returns empty list, i.e. null, with no error set).
Quite suspicious.

Goes back to

Author: John Arbuckle 
Date:   Fri Jun 19 10:53:27 2015 +0100

ui/cocoa.m: Add machine menu items to change and eject removable drive 
media

Adds all removable devices to the Machine menu as a Change and Eject 
menu
item pair. ide-cd0 would have a "Change ide-cd0..." and "Eject ide-cd0"
menu items.

Signed-off-by: John Arbuckle 
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 

> Is this menu updated when removable blkdev are hot-plugged from
> the monitor or QMP?
>
> Tested-by: Philippe Mathieu-Daudé 




Re: [PATCH] ui/cocoa: Do not alert even without block devices

2022-02-14 Thread Akihiko Odaki
On Mon, Feb 14, 2022 at 9:31 PM Philippe Mathieu-Daudé  wrote:
> Is this menu updated when removable blkdev are hot-plugged from
> the monitor or QMP?

Actually no.



Re: [RFC] thread-pool: Add option to fix the pool size

2022-02-14 Thread Stefan Hajnoczi
On Mon, Feb 14, 2022 at 12:38:22PM +0100, Kevin Wolf wrote:
> Am 14.02.2022 um 11:09 hat Stefan Hajnoczi geschrieben:
> > On Fri, Feb 11, 2022 at 12:32:57PM +0100, Kevin Wolf wrote:
> > > Am 03.02.2022 um 15:19 hat Stefan Hajnoczi geschrieben:
> > > > On Thu, Feb 03, 2022 at 10:56:49AM +, Daniel P. Berrangé wrote:
> > > > > On Thu, Feb 03, 2022 at 10:53:07AM +, Stefan Hajnoczi wrote:
> > > > > > On Wed, Feb 02, 2022 at 06:52:34PM +0100, Nicolas Saenz Julienne 
> > > > > > wrote:
> > > > 1. A global default value that all new AioContext take. The QEMU main
> > > >loop's qemu_aio_context will use this and all IOThread AioContext
> > > >will use it (unless they have been overridden).
> > > > 
> > > >I would define it on --machine because that's the "global" object for
> > > >a guest, but that's not very satisfying.
> > > 
> > > Semantically, -machine is about the virtual hardware where as iothreads
> > > are about the backend, so I agree it's not a good fit.
> > > 
> > > For the main thread, you may want to configure all the same options that
> > > you can configure for an iothread. So to me that sounds like we would
> > > want to allow using an iothread object for the main thread, too.
> > > 
> > > That would still require us to tell QEMU which iothread object should be
> > > used for the main thread, though.
> > 
> > Making the main loop thread an IOThread is an interesting direction but
> > not an easy change to make.
> > 
> > The main loop thread has a custom event loop that is not interchangeable
> > with the IOThread event loop:
> > - The main loop has a poll notifier interface for libslirp fd monitoring
> >   integration.
> > - The main loop is a GLib event loop but manually polls to get
> >   nanosecond resolution timers.
> > - The main loop has icount integration.
> > - The main loop has the special iohandler AioContext
> > 
> > The IOThread event loop runs an optimized AioContext event loop instead.
> > It falls back to regular g_main_loop_run() if there is a GSource user.
> > 
> > It would definitely be nice to unify the main loop with IOThread and
> > then use --object iothread,... to configure main loop parameters.
> > 
> > I'm not sure if requiring that of Nicolas is fair though. The event
> > loops in QEMU are complex and changes are likely to introduce subtle
> > bugs or performance regressions.
> 
> I'm not suggesting actually running the iothread event loop instead,
> merely using the properties of an object to configure the main thread as
> the external user interface.
> Whether this uses the same main loop code as today or is moved to the
> regular iothread event loop is an implementation detail that can be
> changed later.
> 
> Or we could maybe use a different object type like 'mainthread' and
> share the properties using QOM inheritance.

That seems cleaner than trying faking an IOThread to me since I don't
see a concrete plan to unify the two event loops.

The main loop code is in util/main-loop.c. Maybe call it --object
main-loop? Attempting to instantiate more than one main-loop object
should fail.

Stefan


signature.asc
Description: PGP signature


[PATCH 2/6] hw/nvme: add host behavior support feature

2022-02-14 Thread Klaus Jensen
From: Naveen Nagar 

Add support for getting and setting the Host Behavior Support feature.

Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 8 
 hw/nvme/nvme.h   | 4 +++-
 include/block/nvme.h | 9 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d08af3bdc1a2..71c60482c75f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -196,6 +196,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
 [NVME_WRITE_ATOMICITY]  = true,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
 [NVME_TIMESTAMP]= true,
+[NVME_HOST_BEHAVIOR_SUPPORT]= true,
 [NVME_COMMAND_SET_PROFILE]  = true,
 };
 
@@ -206,6 +207,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
 [NVME_TIMESTAMP]= NVME_FEAT_CAP_CHANGE,
+[NVME_HOST_BEHAVIOR_SUPPORT]= NVME_FEAT_CAP_CHANGE,
 [NVME_COMMAND_SET_PROFILE]  = NVME_FEAT_CAP_CHANGE,
 };
 
@@ -5091,6 +5093,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest 
*req)
 goto out;
 case NVME_TIMESTAMP:
 return nvme_get_feature_timestamp(n, req);
+case NVME_HOST_BEHAVIOR_SUPPORT:
+return nvme_c2h(n, (uint8_t *)&n->features.hbs,
+sizeof(n->features.hbs), req);
 default:
 break;
 }
@@ -5281,6 +5286,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest 
*req)
 break;
 case NVME_TIMESTAMP:
 return nvme_set_feature_timestamp(n, req);
+case NVME_HOST_BEHAVIOR_SUPPORT:
+return nvme_h2c(n, (uint8_t *)&n->features.hbs,
+sizeof(n->features.hbs), req);
 case NVME_COMMAND_SET_PROFILE:
 if (dw11 & 0x1ff) {
 trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 801176a2bd5e..103407038e74 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -468,7 +468,9 @@ typedef struct NvmeCtrl {
 uint16_t temp_thresh_hi;
 uint16_t temp_thresh_low;
 };
-uint32_tasync_config;
+
+uint32_tasync_config;
+NvmeHostBehaviorSupport hbs;
 } features;
 } NvmeCtrl;
 
diff --git a/include/block/nvme.h b/include/block/nvme.h
index cd068ac89142..e527c728f975 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1216,6 +1216,7 @@ enum NvmeFeatureIds {
 NVME_WRITE_ATOMICITY= 0xa,
 NVME_ASYNCHRONOUS_EVENT_CONF= 0xb,
 NVME_TIMESTAMP  = 0xe,
+NVME_HOST_BEHAVIOR_SUPPORT  = 0x16,
 NVME_COMMAND_SET_PROFILE= 0x19,
 NVME_SOFTWARE_PROGRESS_MARKER   = 0x80,
 NVME_FID_MAX= 0x100,
@@ -1257,6 +1258,13 @@ typedef struct QEMU_PACKED NvmeRangeType {
 uint8_t rsvd48[16];
 } NvmeRangeType;
 
+typedef struct NvmeHostBehaviorSupport {
+uint8_t acre;
+uint8_t etdas;
+uint8_t lbafee;
+uint8_t rsvd3[509];
+} NvmeHostBehaviorSupport;
+
 typedef struct QEMU_PACKED NvmeLBAF {
 uint16_tms;
 uint8_t ds;
@@ -1520,6 +1528,7 @@ static inline void _nvme_check_size(void)
 QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
+QEMU_BUILD_BUG_ON(sizeof(NvmeHostBehaviorSupport) != 512);
 QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
 QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
-- 
2.35.1




[PATCH 6/6] hw/nvme: 64-bit pi support

2022-02-14 Thread Klaus Jensen
From: Naveen Nagar 

This adds support for one possible new protection information format
introduced in TP4068 (and integrated in NVMe 2.0): the 64-bit CRC guard
and 48-bit reference tag. This version does not support storage tags.

Like the CRC16 support already present, this uses a software
implementation of CRC64 (so it is naturally pretty slow). But its good
enough for verification purposes.

This may go nicely hand-in-hand with the support that Keith submitted
for the Linux kernel[1].

  [1]: 
https://lore.kernel.org/linux-nvme/20220126165214.ga1782...@dhcp-10-100-145-180.wdc.com/T/

Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 163 +++
 hw/nvme/dif.c| 363 +--
 hw/nvme/dif.h| 143 -
 hw/nvme/ns.c |  12 ++
 hw/nvme/nvme.h   |   3 +
 hw/nvme/trace-events |  12 +-
 include/block/nvme.h |  67 ++--
 7 files changed, 629 insertions(+), 134 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f1683960b87e..03760ddeae8c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2050,9 +2050,12 @@ static void nvme_verify_cb(void *opaque, int ret)
 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
 uint16_t apptag = le16_to_cpu(rw->apptag);
 uint16_t appmask = le16_to_cpu(rw->appmask);
-uint32_t reftag = le32_to_cpu(rw->reftag);
+uint64_t reftag = le32_to_cpu(rw->reftag);
+uint64_t cdw3 = le32_to_cpu(rw->cdw3);
 uint16_t status;
 
+reftag |= cdw3 << 32;
+
 trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
 
 if (ret) {
@@ -2141,7 +2144,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
 uint16_t apptag = le16_to_cpu(rw->apptag);
 uint16_t appmask = le16_to_cpu(rw->appmask);
-uint32_t reftag = le32_to_cpu(rw->reftag);
+uint64_t reftag = le32_to_cpu(rw->reftag);
+uint64_t cdw3 = le32_to_cpu(rw->cdw3);
 struct nvme_compare_ctx *ctx = req->opaque;
 g_autofree uint8_t *buf = NULL;
 BlockBackend *blk = ns->blkconf.blk;
@@ -2149,6 +2153,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
 BlockAcctStats *stats = blk_get_stats(blk);
 uint16_t status = NVME_SUCCESS;
 
+reftag |= cdw3 << 32;
+
 trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
 
 if (ret) {
@@ -2527,7 +2533,8 @@ typedef struct NvmeCopyAIOCB {
 QEMUBH *bh;
 int ret;
 
-NvmeCopySourceRange *ranges;
+void *ranges;
+unsigned int format;
 int nr;
 int idx;
 
@@ -2538,7 +2545,7 @@ typedef struct NvmeCopyAIOCB {
 BlockAcctCookie write;
 } acct;
 
-uint32_t reftag;
+uint64_t reftag;
 uint64_t slba;
 
 NvmeZone *zone;
@@ -2592,13 +2599,101 @@ static void nvme_copy_bh(void *opaque)
 
 static void nvme_copy_cb(void *opaque, int ret);
 
+static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
+{
+NvmeCopySourceRangeFormat0 *_ranges = ranges;
+
+if (slba) {
+*slba = le64_to_cpu(_ranges[idx].slba);
+}
+
+if (nlb) {
+*nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
+}
+
+if (apptag) {
+*apptag = le16_to_cpu(_ranges[idx].apptag);
+}
+
+if (appmask) {
+*appmask = le16_to_cpu(_ranges[idx].appmask);
+}
+
+if (reftag) {
+*reftag = le32_to_cpu(_ranges[idx].reftag);
+}
+}
+
+static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
+{
+NvmeCopySourceRangeFormat1 *_ranges = ranges;
+
+if (slba) {
+*slba = le64_to_cpu(_ranges[idx].slba);
+}
+
+if (nlb) {
+*nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
+}
+
+if (apptag) {
+*apptag = le16_to_cpu(_ranges[idx].apptag);
+}
+
+if (appmask) {
+*appmask = le16_to_cpu(_ranges[idx].appmask);
+}
+
+if (reftag) {
+*reftag = 0;
+
+*reftag |= (uint64_t)_ranges[idx].sr[4] << 40;
+*reftag |= (uint64_t)_ranges[idx].sr[5] << 32;
+*reftag |= (uint64_t)_ranges[idx].sr[6] << 24;
+*reftag |= (uint64_t)_ranges[idx].sr[7] << 16;
+*reftag |= (uint64_t)_ranges[idx].sr[8] << 8;
+*reftag |= (uint64_t)_ranges[idx].sr[9];
+}
+}
+
+static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
+ uint64_t *s

[PATCH 4/6] hw/nvme: add support for the lbafee hbs feature

2022-02-14 Thread Klaus Jensen
From: Naveen Nagar 

Add support for up to 64 LBA formats through the LBAFEE field of the
Host Behavior Support feature.

Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 34 +++---
 hw/nvme/ns.c | 15 +--
 hw/nvme/nvme.h   |  1 +
 include/block/nvme.h |  7 +--
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d8701ebf2fa8..52ab3450b975 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5165,6 +5165,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest 
*req)
 uint32_t nsid = le32_to_cpu(cmd->nsid);
 uint8_t fid = NVME_GETSETFEAT_FID(dw10);
 uint8_t save = NVME_SETFEAT_SAVE(dw10);
+uint16_t status;
 int i;
 
 trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
@@ -5287,8 +5288,26 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
NvmeRequest *req)
 case NVME_TIMESTAMP:
 return nvme_set_feature_timestamp(n, req);
 case NVME_HOST_BEHAVIOR_SUPPORT:
-return nvme_h2c(n, (uint8_t *)&n->features.hbs,
-sizeof(n->features.hbs), req);
+status = nvme_h2c(n, (uint8_t *)&n->features.hbs,
+  sizeof(n->features.hbs), req);
+if (status) {
+return status;
+}
+
+for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ns = nvme_ns(n, i);
+
+if (!ns) {
+continue;
+}
+
+ns->id_ns.nlbaf = ns->nlbaf - 1;
+if (!n->features.hbs.lbafee) {
+ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15);
+}
+}
+
+return status;
 case NVME_COMMAND_SET_PROFILE:
 if (dw11 & 0x1ff) {
 trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
@@ -5479,10 +5498,13 @@ static const AIOCBInfo nvme_format_aiocb_info = {
 static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
 uint8_t pi, uint8_t pil)
 {
+uint8_t lbafl = lbaf & 0xf;
+uint8_t lbafu = lbaf >> 4;
+
 trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
 
 ns->id_ns.dps = (pil << 3) | pi;
-ns->id_ns.flbas = lbaf | (mset << 4);
+ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl;
 
 nvme_ns_init_format(ns);
 }
@@ -5596,6 +5618,7 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
 uint8_t mset = (dw10 >> 4) & 0x1;
 uint8_t pi = (dw10 >> 5) & 0x7;
 uint8_t pil = (dw10 >> 8) & 0x1;
+uint8_t lbafu = (dw10 >> 12) & 0x3;
 uint16_t status;
 
 iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
@@ -5612,6 +5635,10 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest 
*req)
 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
 iocb->offset = 0;
 
+if (n->features.hbs.lbafee) {
+iocb->lbaf |= lbafu << 4;
+}
+
 if (!iocb->broadcast) {
 if (!nvme_nsid_valid(n, nsid)) {
 status = NVME_INVALID_NSID | NVME_DNR;
@@ -6587,6 +6614,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->cntlid = cpu_to_le16(n->cntlid);
 
 id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
+id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS);
 
 id->rab = 6;
 
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index ee673f1a5bef..8dfb55130beb 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -112,10 +112,11 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 [7] = { .ds = 12, .ms = 64 },
 };
 
+ns->nlbaf = 8;
+
 memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
-id_ns->nlbaf = 7;
 
-for (i = 0; i <= id_ns->nlbaf; i++) {
+for (i = 0; i < ns->nlbaf; i++) {
 NvmeLBAF *lbaf = &id_ns->lbaf[i];
 if (lbaf->ds == ds) {
 if (lbaf->ms == ms) {
@@ -126,12 +127,14 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 }
 
 /* add non-standard lba format */
-id_ns->nlbaf++;
-id_ns->lbaf[id_ns->nlbaf].ds = ds;
-id_ns->lbaf[id_ns->nlbaf].ms = ms;
-id_ns->flbas |= id_ns->nlbaf;
+id_ns->lbaf[ns->nlbaf].ds = ds;
+id_ns->lbaf[ns->nlbaf].ms = ms;
+ns->nlbaf++;
+
+id_ns->flbas |= i;
 
 lbaf_found:
+id_ns->nlbaf = ns->nlbaf - 1;
 nvme_ns_init_format(ns);
 
 return 0;
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 103407038e74..e715c3255a29 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -128,6 +128,7 @@ typedef struct NvmeNamespace {
 int64_t  moff;
 NvmeIdNs id_ns;
 NvmeLBAF lbaf;
+unsigned int nlbaf;
 size_t   lbasz;
 const uint32_t *iocs;
 uint8_t  csi;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index e527c728f975..37afc9be9b18 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -,6 +,10 @@ enum NvmeIdCtrlOaes {
 NVME_OAES_NS_ATTR   = 1 << 8,
 };
 
+enum NvmeIdCtrlCtratt {
+NVME_CTRATT_ELBAS   = 1 << 15,
+};
+
 enum NvmeIdCtrlOacs

Re: [PATCH] ui/cocoa: Do not alert even without block devices

2022-02-14 Thread Philippe Mathieu-Daudé via

On 13/2/22 03:14, Akihiko Odaki wrote:

Signed-off-by: Akihiko Odaki 
---
  ui/cocoa.m | 5 -
  1 file changed, 5 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index ac18e14ce01..271a2676026 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1715,11 +1715,6 @@ static void addRemovableDevicesMenuItems(void)
  
  currentDevice = qmp_query_block(NULL);

  pointerToFree = currentDevice;
-if(currentDevice == NULL) {
-NSBeep();
-QEMU_Alert(@"Failed to query for block devices!");
-return;
-}
  
  menu = [[[NSApp mainMenu] itemWithTitle:@"Machine"] submenu];
  


Cc'ing qemu-block@ and Markus (QMP).

I always wondered the point of this annoying warning but never
found out.

Is this menu updated when removable blkdev are hot-plugged from
the monitor or QMP?

Tested-by: Philippe Mathieu-Daudé 



[PATCH 3/6] hw/nvme: move format parameter parsing

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

There is no need to extract the format command parameters for each
namespace. Move it to the entry point.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 71c60482c75f..d8701ebf2fa8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5452,6 +5452,11 @@ typedef struct NvmeFormatAIOCB {
 uint32_t nsid;
 bool broadcast;
 int64_t offset;
+
+uint8_t lbaf;
+uint8_t mset;
+uint8_t pi;
+uint8_t pil;
 } NvmeFormatAIOCB;
 
 static void nvme_format_bh(void *opaque);
@@ -5471,14 +5476,9 @@ static const AIOCBInfo nvme_format_aiocb_info = {
 .get_aio_context = nvme_get_aio_context,
 };
 
-static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd)
+static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
+uint8_t pi, uint8_t pil)
 {
-uint32_t dw10 = le32_to_cpu(cmd->cdw10);
-uint8_t lbaf = dw10 & 0xf;
-uint8_t pi = (dw10 >> 5) & 0x7;
-uint8_t mset = (dw10 >> 4) & 0x1;
-uint8_t pil = (dw10 >> 8) & 0x1;
-
 trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
 
 ns->id_ns.dps = (pil << 3) | pi;
@@ -5490,7 +5490,6 @@ static void nvme_format_set(NvmeNamespace *ns, NvmeCmd 
*cmd)
 static void nvme_format_ns_cb(void *opaque, int ret)
 {
 NvmeFormatAIOCB *iocb = opaque;
-NvmeRequest *req = iocb->req;
 NvmeNamespace *ns = iocb->ns;
 int bytes;
 
@@ -5512,7 +5511,7 @@ static void nvme_format_ns_cb(void *opaque, int ret)
 return;
 }
 
-nvme_format_set(ns, &req->cmd);
+nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil);
 ns->status = 0x0;
 iocb->ns = NULL;
 iocb->offset = 0;
@@ -5548,9 +5547,6 @@ static void nvme_format_bh(void *opaque)
 NvmeFormatAIOCB *iocb = opaque;
 NvmeRequest *req = iocb->req;
 NvmeCtrl *n = nvme_ctrl(req);
-uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
-uint8_t lbaf = dw10 & 0xf;
-uint8_t pi = (dw10 >> 5) & 0x7;
 uint16_t status;
 int i;
 
@@ -5572,7 +5568,7 @@ static void nvme_format_bh(void *opaque)
 goto done;
 }
 
-status = nvme_format_check(iocb->ns, lbaf, pi);
+status = nvme_format_check(iocb->ns, iocb->lbaf, iocb->pi);
 if (status) {
 req->status = status;
 goto done;
@@ -5595,6 +5591,11 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest 
*req)
 {
 NvmeFormatAIOCB *iocb;
 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+uint8_t lbaf = dw10 & 0xf;
+uint8_t mset = (dw10 >> 4) & 0x1;
+uint8_t pi = (dw10 >> 5) & 0x7;
+uint8_t pil = (dw10 >> 8) & 0x1;
 uint16_t status;
 
 iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
@@ -5604,6 +5605,10 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest 
*req)
 iocb->ret = 0;
 iocb->ns = NULL;
 iocb->nsid = 0;
+iocb->lbaf = lbaf;
+iocb->mset = mset;
+iocb->pi = pi;
+iocb->pil = pil;
 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
 iocb->offset = 0;
 
-- 
2.35.1




[PATCH 5/6] hw/nvme: add pi tuple size helper

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

A subsequent patch will introduce a new tuple size; so add a helper and
use that instead of sizeof() and magic numbers.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 14 --
 hw/nvme/dif.c  | 16 
 hw/nvme/dif.h  |  5 +
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 52ab3450b975..f1683960b87e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1068,7 +1068,8 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, 
NvmeRequest *req)
 size_t len = nvme_l2b(ns, nlb);
 uint16_t status;
 
-if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+if (nvme_ns_ext(ns) &&
+!(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
 NvmeSg sg;
 
 len += nvme_m2b(ns, nlb);
@@ -1247,7 +1248,8 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, 
uint32_t len,
 bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
 bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
 
-if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+if (nvme_ns_ext(ns) &&
+!(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
 return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
ns->lbaf.ms, 0, dir);
 }
@@ -2184,7 +2186,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
  * tuple.
  */
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += 
ns->lbaf.ms) {
@@ -3167,7 +3169,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
 bool pract = prinfo & NVME_PRINFO_PRACT;
 
-if (pract && ns->lbaf.ms == 8) {
+if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
 mapped_size = data_size;
 }
 }
@@ -3244,7 +3246,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest 
*req, bool append,
 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
 bool pract = prinfo & NVME_PRINFO_PRACT;
 
-if (pract && ns->lbaf.ms == 8) {
+if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
 mapped_size -= nvme_m2b(ns, nlb);
 }
 }
@@ -5553,7 +,7 @@ static uint16_t nvme_format_check(NvmeNamespace *ns, 
uint8_t lbaf, uint8_t pi)
 return NVME_INVALID_FORMAT | NVME_DNR;
 }
 
-if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) {
+if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) {
 return NVME_INVALID_FORMAT | NVME_DNR;
 }
 
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index cd0cea2b5ebd..891385f33f20 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -48,7 +48,7 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t 
*buf, size_t len,
 int16_t pil = 0;
 
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
@@ -145,7 +145,7 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, 
size_t len,
 }
 
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
@@ -184,7 +184,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t 
*mbuf, size_t mlen,
 
 
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 do {
@@ -210,7 +210,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t 
*mbuf, size_t mlen,
 end = mbufp + mlen;
 
 for (; mbufp < end; mbufp += ns->lbaf.ms) {
-memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
+memset(mbufp + pil, 0xff, nvme_pi_tuple_size(ns));
 }
 }
 
@@ -284,7 +284,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
 goto out;
 }
 
-if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
+if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
 goto out;
 }
 
@@ -388,7 +388,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
 
 if (pract) {
 uint8_t *mbuf, *end;
-int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+int16_t pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 
 status = nvme_check_prinfo(ns, prinfo, slba, reftag);
 if (status) {
@@ -428,7 +428,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)

[PATCH 1/6] hw/nvme: move dif/pi prototypes into dif.h

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

Move dif/pi data structures and inlines to dif.h.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c |  1 +
 hw/nvme/dif.c  |  1 +
 hw/nvme/dif.h  | 53 ++
 hw/nvme/nvme.h | 50 ---
 4 files changed, 55 insertions(+), 50 deletions(-)
 create mode 100644 hw/nvme/dif.h

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 98aac98bef5f..d08af3bdc1a2 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -163,6 +163,7 @@
 #include "migration/vmstate.h"
 
 #include "nvme.h"
+#include "dif.h"
 #include "trace.h"
 
 #define NVME_MAX_IOQPAIRS 0x
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index 5dbd18b2a4a5..cd0cea2b5ebd 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -13,6 +13,7 @@
 #include "sysemu/block-backend.h"
 
 #include "nvme.h"
+#include "dif.h"
 #include "trace.h"
 
 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
diff --git a/hw/nvme/dif.h b/hw/nvme/dif.h
new file mode 100644
index ..e36fea30e71e
--- /dev/null
+++ b/hw/nvme/dif.h
@@ -0,0 +1,53 @@
+#ifndef HW_NVME_DIF_H
+#define HW_NVME_DIF_H
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static const uint16_t t10_dif_crc_table[256] = {
+0x, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
+   uint32_t reftag);
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+   uint64_t slba);
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint16_t apptag,
+ uint32_t *reftag);
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+uint8_t *mbuf, size_t mlen, uint8_t prinfo,
+uint64_t slba, uint16_t apptag,
+uint16_t appmask, uint32_t *reftag);
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
+
+#endif /* HW_NVME_DIF_H */
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 90c0bb7ce236..801176a2bd5e 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -513,54 +513,4 @@ void nvme_rw_complete_cb(void *opaque, int ret);
 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
NvmeCmd *cmd);
 
-/* from Linux kernel (crypto/crct10dif_common.c) */
-static const uint16_t t10_dif_crc_table[256] = {
-0x, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-0xA99A, 0x222D, 0x3543, 0xBEF4, 

[PATCH 0/6] hw/nvme: enhanced protection information (64-bit guard)

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

This adds support for one possible new protection information format
introduced in TP4068 (and integrated in NVMe 2.0): the 64-bit CRC guard
and 48-bit reference tag. This version does not support storage tags.

Like the CRC16 support already present, this uses a software
implementation of CRC64 (so it is naturally pretty slow). But its good
enough for verification purposes.

This goes hand-in-hand with the support that Keith submitted for the
Linux kernel[1].

[1]: 
https://lore.kernel.org/linux-nvme/20220201190128.3075065-1-kbu...@kernel.org/

Klaus Jensen (3):
  hw/nvme: move dif/pi prototypes into dif.h
  hw/nvme: move format parameter parsing
  hw/nvme: add pi tuple size helper

Naveen Nagar (3):
  hw/nvme: add host behavior support feature
  hw/nvme: add support for the lbafee hbs feature
  hw/nvme: 64-bit pi support

 hw/nvme/ctrl.c   | 235 +--
 hw/nvme/dif.c| 378 +--
 hw/nvme/dif.h| 191 ++
 hw/nvme/ns.c |  27 +++-
 hw/nvme/nvme.h   |  58 +--
 hw/nvme/trace-events |  12 +-
 include/block/nvme.h |  81 --
 7 files changed, 774 insertions(+), 208 deletions(-)
 create mode 100644 hw/nvme/dif.h

-- 
2.35.1




Re: [PATCH] hw/ide: implement ich6 ide controller support

2022-02-14 Thread BALATON Zoltan

On Sat, 5 Feb 2022, BALATON Zoltan wrote:

Hello,


Ping? John, do you agree with my comments? Should Liav proceed to send a 
v2?


Thanks,
BALATON Zoltan


On Sat, 5 Feb 2022, Liav Albani wrote:

On 2/5/22 17:48, BALATON Zoltan wrote:

On Sat, 5 Feb 2022, Liav Albani wrote:

This type of IDE controller has support for relocating the IO ports and
doesn't use IRQ 14 and 15 but one allocated PCI IRQ for the controller.


I haven't looked at in detail so only a few comments I've got while 
reading it. What machine needs this? In QEMU I think we only have piix and 
ich9 emulated for pc and q35 machines but maybe ich6 is also used by some 
machine I don't know about. Otherwise it looks odd to have ide part of 
ich6 but not the other parts of this chip.



Hi BALATON,

This is my first patch to QEMU and the first time I send patches over the 
mail. I sent my github tree to John Snow (the maintainer of the IDE code in 
QEMU) for advice if I should send them here and I was encouraged to do 
that.


Welcome and thanks a lot for taking time to contribute and share your 
results. In case you're not yet aware, these docs should explain how patches 
are handled on the list:


https://www.qemu.org/docs/master/devel/submitting-a-patch.html

For the next time patch I'll put a note on writing a descriptive cover 
letter as it could have put more valuable details on why I sent this patch.


There's no such machine type emulating the ICH6 chipset in QEMU. However, I 
wrote this emulation component as a test for the SerenityOS kernel because 
I have a machine from 2009 which has
an ICH7 southbridge, so, I wanted to emulate such device with QEMU to ease 
development on it.


I found out that Linux with libata was using the controller without any 
noticeable problems, but the SerenityOS kernel struggled to use this 
device, so I decided that
I should send this patch to get it merged and then I can use it locally and 
maybe other people will benefit from it.


In regard to other components of the ICH6 chipset - I don't think it's 
worth anybody's time to actually implement them as the ICH9 chipset is 
quite close to what the ICH6 chipset offers as far as I can tell.
The idea of implementing ich6-ide controller was to enable the option of 
people like me and other OS developers to ensure their kernels operate 
correctly on such type of device,
which is legacy-free device in the aspect of PCI bus resource management 
but still is a legacy device which belongs to chipsets of late 2000s.


That's OK, maybe a short mention (just one sentence) in the commit message 
explaining this would help to understand why this device model was added.



Signed-off-by: Liav Albani 
---
hw/i386/Kconfig  |   2 +
hw/ide/Kconfig   |   5 +
hw/ide/bmdma.c   |  83 +++
hw/ide/ich6.c    | 211 +++
hw/ide/meson.build   |   3 +-
hw/ide/piix.c    |  50 +-
include/hw/ide/pci.h |   5 +
include/hw/pci/pci_ids.h |   1 +
8 files changed, 311 insertions(+), 49 deletions(-)
create mode 100644 hw/ide/bmdma.c
create mode 100644 hw/ide/ich6.c

diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d22ac4a4b9..a18de2d962 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -75,6 +75,7 @@ config I440FX
    select PCI_I440FX
    select PIIX3
    select IDE_PIIX
+    select IDE_ICH6
    select DIMM
    select SMBIOS
    select FW_CFG_DMA
@@ -101,6 +102,7 @@ config Q35
    select PCI_EXPRESS_Q35
    select LPC_ICH9
    select AHCI_ICH9
+    select IDE_ICH6
    select DIMM
    select SMBIOS
    select FW_CFG_DMA
diff --git a/hw/ide/Kconfig b/hw/ide/Kconfig
index dd85fa3619..63304325a5 100644
--- a/hw/ide/Kconfig
+++ b/hw/ide/Kconfig
@@ -38,6 +38,11 @@ config IDE_VIA
    select IDE_PCI
    select IDE_QDEV

+config IDE_ICH6
+    bool
+    select IDE_PCI
+    select IDE_QDEV
+
config MICRODRIVE
    bool
    select IDE_QDEV
diff --git a/hw/ide/bmdma.c b/hw/ide/bmdma.c
new file mode 100644
index 00..979f5974fd
--- /dev/null
+++ b/hw/ide/bmdma.c
@@ -0,0 +1,83 @@
+/*
+ * QEMU IDE Emulation: PCI PIIX3/4 support.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2006 Openedhand Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
a copy
+ * of this software and associated documentation files (the "Software"), 
to deal
+ * in the Software without restriction, including without limitation the 
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or 
sell

+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULA

Re: [PATCH 1/6] block/io.c: fix bdrv_child_cb_drained_begin invocations from a coroutine

2022-02-14 Thread Kevin Wolf
Am 14.02.2022 um 11:27 hat Emanuele Giuseppe Esposito geschrieben:
> 
> 
> On 11/02/2022 12:54, Kevin Wolf wrote:
> > Am 08.02.2022 um 16:36 hat Emanuele Giuseppe Esposito geschrieben:
> >> Using bdrv_do_drained_begin_quiesce() in bdrv_child_cb_drained_begin()
> >> is not a good idea: the callback might be called when running
> >> a drain in a coroutine, and bdrv_drained_begin_poll() does not
> >> handle that case, resulting in assertion failure.
> > 
> > I remembered that we talked about this only recently on IRC, but it
> > didn't make any sense to me again when I read this commit message. So I
> > think we need --verbose.
> > 
> > The .drained_begin callback was always meant to run outside of coroutine
> > context, so the unexpected part isn't that it calls a function that
> > can't run in coroutine context, but that it is already called itself in
> > coroutine context.
> > 
> > The problematic path is bdrv_replace_child_noperm() which then calls
> > bdrv_parent_drained_begin_single(poll=true). Polling in coroutine
> > context is dangerous, it can cause deadlocks because the caller of the
> > coroutine can't make progress. So I believe this call is already wrong
> > in coroutine context.
> 
> Ok, you added this assertion in dcf94a23, but at that time there was no
> bdrv_parent_drained_begin_single, and the polling was only done in
> bdrv_do_drained_begin. So I think that to keep the same logic, the
> assertion should be moved in bdrv_parent_drained_begin_single()? And
> even more specifically, only if the poll flag is true.

I wouldn't necessarily say move, but copying it there makes sense to me.
In order to keep the interface constraints simple, I would assert it
independent of the poll parameter.

> I triggered this by adding additional drains in the callers of
> bdrv_replace_child_noperm(), and I think some test (probably unit test)
> was failing because of either the drained_begin callback itself called
> by the drain, or as you suggested the callbacks called by
> bdrv_parent_drained_begin_single from bdrv_replace_child_noperm.
> 
> Anyways, I think that in addition to the fix in this patch, we should
> also fix bdrv_parent_drained_begin_single(poll=true) in
> bdrv_replace_child_noperm, with something similar to what is done in
> bdrv_co_yield_to_drain? ie if we are in coroutine, schedule a BH that
> runs the same logic but in the main loop, but then somehow wait that it
> finishes before continuing?
> Even though at that point we would have a coroutine waiting for the main
> loop, which I don't think it's something we want.

Coroutines are waiting for the main loop all the time, why would this be
a problem?

Yes, I think a mechanism similar to bdrv_co_yield_to_drain() is needed
if we want to allow callers to be in coroutine context.

And once we have this mechanism, it's actually not in addition to this
patch, but instead of it, because this patch isn't needed any more when
we know that we can't be in coroutine context.

> Alternatively, we would forbid polling in coroutines at all. And the
> only place I can see that is using the drain in coroutine is mirror
> (see below).

Well, my point is that it is already forbidden because it can deadlock.
Code that polls in coroutine context anyway is probably buggy, unless it
can guarantee very specific circumstances that make a deadlock
impossible.

Maybe we can actually assert this in AIO_WAIT_WHILE().

> Additional question: I also noticed that there is a bdrv_drained_begin()
> call in mirror.c in the JobDriver run() callback. How can this even
> work? If a parent uses bdrv_child_cb_drained_begin (which should not be
> so rare) it will crash because of the assertion.

bdrv_co_yield_to_drain() lets this code run in the main loop.

> Further additional question: actually I don't understand also the
> polling logic of mirror (mirror_drained_poll), as if we are draining in
> the coroutine with in_drain = true I think we can have a deadlock if
> in_flight>0?

You mean for a drain issued by the mirror job itself? The in-flight
requests are still processed by the polling loop, so eventually
in_flight should become 0.

Kevin




Re: [PATCH 1/6] block/io.c: fix bdrv_child_cb_drained_begin invocations from a coroutine

2022-02-14 Thread Paolo Bonzini

On 2/14/22 11:27, Emanuele Giuseppe Esposito wrote:

Anyways, I think that in addition to the fix in this patch, we should
also fix bdrv_parent_drained_begin_single(poll=true) in
bdrv_replace_child_noperm, with something similar to what is done in
bdrv_co_yield_to_drain? ie if we are in coroutine, schedule a BH that
runs the same logic but in the main loop, but then somehow wait that it
finishes before continuing?

Alternatively, we would forbid polling in coroutines at all. And the
only place I can see that is using the drain in coroutine is mirror (see
below).


I think you should first of all see what breaks if you forbid 
bdrv_replace_child_noperm() from coroutine context.


Drain in coroutines does not poll, it gets out of the coroutine through 
a bottom half before polling.  So if bdrv_replace_child_noperm() doesn't 
require it, polling in coroutines can still be forbidden.


This patch is correct nevertheless.

Paolo



RE: [PATCH v3 1/1] util: adjust coroutine pool size to virtio block queue

2022-02-14 Thread Hiroki Narukawa
> Coroutine pool size was 64 from long ago, and the basis was organized in the
> commit message in c740ad92.

Sorry, I noticed that commit ID mentioning here was incorrect.
The correct one is 4d68e86b.

https://gitlab.com/qemu-project/qemu/-/commit/4d68e86bb10159099da0798f74e7512955f15eec

I have resent this patch as v4 with exactly the same code as v3, just changing 
this commit message.

> 
> At that time, virtio-blk queue-size and num-queue were not configuable, and
> equivalent values were 128 and 1.
> 
> Coroutine pool size 64 was fine then.
> 
> Later queue-size and num-queue got configuable, and default values were
> increased.
> 
> Coroutine pool with size 64 exhausts frequently with random disk IO in new 
> size,
> and slows down.
> 
> This commit adjusts coroutine pool size adaptively with new values.
> 
> This commit adds 64 by default, but now coroutine is not only for block 
> devices,
> 
> and is not too much burdon comparing with new default.
> 
> pool size of 128 * vCPUs.
> 
> Signed-off-by: Hiroki Narukawa 
> ---
>  hw/block/virtio-blk.c|  5 +
>  include/qemu/coroutine.h | 10 ++
>  util/qemu-coroutine.c| 20 
>  3 files changed, 31 insertions(+), 4 deletions(-)
> 




[PATCH v4 0/1] Patch to adjust coroutine pool size adaptively

2022-02-14 Thread Hiroki Narukawa
Resending with correct commit message

Resending patch with decreasing coroutine pool size on device remove

We encountered random disk IO performance drop since qemu-5.0.0, and this patch 
fixes it.

Commit message in 4d68e86b implied to adjust coroutine pool size adaptively, so 
I tried to implement this.

Changes from v3:
No code changed. Changed commit message so that first line indicates to
correct commit ID.

Changes from v2:
Decrease coroutine pool size on device remove

Changes from v1:
Use qatomic_read properly

Hiroki Narukawa (1):
  util: adjust coroutine pool size to virtio block queue

 hw/block/virtio-blk.c|  5 +
 include/qemu/coroutine.h | 10 ++
 util/qemu-coroutine.c| 20 
 3 files changed, 31 insertions(+), 4 deletions(-)

-- 
2.17.1




[PATCH v4 1/1] util: adjust coroutine pool size to virtio block queue

2022-02-14 Thread Hiroki Narukawa
Coroutine pool size was 64 from long ago, and the basis was organized in the 
commit message in 4d68e86b.

At that time, virtio-blk queue-size and num-queue were not configuable, and 
equivalent values were 128 and 1.

Coroutine pool size 64 was fine then.

Later queue-size and num-queue got configuable, and default values were 
increased.

Coroutine pool with size 64 exhausts frequently with random disk IO in new 
size, and slows down.

This commit adjusts coroutine pool size adaptively with new values.

This commit adds 64 by default, but now coroutine is not only for block devices,

and is not too much burdon comparing with new default.

pool size of 128 * vCPUs.

Signed-off-by: Hiroki Narukawa 
---
 hw/block/virtio-blk.c|  5 +
 include/qemu/coroutine.h | 10 ++
 util/qemu-coroutine.c| 20 
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 82676cdd01..540c38f829 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -32,6 +32,7 @@
 #include "hw/virtio/virtio-bus.h"
 #include "migration/qemu-file-types.h"
 #include "hw/virtio/virtio-access.h"
+#include "qemu/coroutine.h"
 
 /* Config size before the discard support (hide associated config fields) */
 #define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \
@@ -1214,6 +1215,8 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 for (i = 0; i < conf->num_queues; i++) {
 virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
 }
+qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size
+/ 2);
 virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
 if (err != NULL) {
 error_propagate(errp, err);
@@ -1250,6 +1253,8 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
 for (i = 0; i < conf->num_queues; i++) {
 virtio_del_queue(vdev, i);
 }
+qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size
+/ 2);
 qemu_del_vm_change_state_handler(s->change);
 blockdev_mark_auto_del(s->blk);
 virtio_cleanup(vdev);
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 4829ff373d..c828a95ee0 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -331,6 +331,16 @@ void qemu_co_sleep_wake(QemuCoSleep *w);
  */
 void coroutine_fn yield_until_fd_readable(int fd);
 
+/**
+ * Increase coroutine pool size
+ */
+void qemu_coroutine_increase_pool_batch_size(unsigned int 
additional_pool_size);
+
+/**
+ * Devcrease coroutine pool size
+ */
+void qemu_coroutine_decrease_pool_batch_size(unsigned int 
additional_pool_size);
+
 #include "qemu/lockable.h"
 
 #endif /* QEMU_COROUTINE_H */
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 38fb6d3084..c03b2422ff 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -20,12 +20,14 @@
 #include "qemu/coroutine_int.h"
 #include "block/aio.h"
 
+/** Initial batch size is 64, and is increased on demand */
 enum {
-POOL_BATCH_SIZE = 64,
+POOL_INITIAL_BATCH_SIZE = 64,
 };
 
 /** Free list to speed up creation */
 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
 static unsigned int release_pool_size;
 static __thread QSLIST_HEAD(, Coroutine) alloc_pool = 
QSLIST_HEAD_INITIALIZER(pool);
 static __thread unsigned int alloc_pool_size;
@@ -49,7 +51,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void 
*opaque)
 if (CONFIG_COROUTINE_POOL) {
 co = QSLIST_FIRST(&alloc_pool);
 if (!co) {
-if (release_pool_size > POOL_BATCH_SIZE) {
+if (release_pool_size > qatomic_read(&pool_batch_size)) {
 /* Slow path; a good place to register the destructor, too.  */
 if (!coroutine_pool_cleanup_notifier.notify) {
 coroutine_pool_cleanup_notifier.notify = 
coroutine_pool_cleanup;
@@ -86,12 +88,12 @@ static void coroutine_delete(Coroutine *co)
 co->caller = NULL;
 
 if (CONFIG_COROUTINE_POOL) {
-if (release_pool_size < POOL_BATCH_SIZE * 2) {
+if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
 QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
 qatomic_inc(&release_pool_size);
 return;
 }
-if (alloc_pool_size < POOL_BATCH_SIZE) {
+if (alloc_pool_size < qatomic_read(&pool_batch_size)) {
 QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
 alloc_pool_size++;
 return;
@@ -202,3 +204,13 @@ AioContext *coroutine_fn 
qemu_coroutine_get_aio_context(Coroutine *co)
 {
 return co->ctx;
 }
+
+void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size)
+{
+qatomic_add(&pool_batch_size, additional_

Re: [PATCH 5/6] test-bdrv-drain.c: remove test_detach_by_parent_cb()

2022-02-14 Thread Paolo Bonzini

On 2/11/22 16:38, Kevin Wolf wrote:

The callback of an I/O function isn't I/O, though. It is code_after_
the I/O has completed. If this doesn't work any more, it feels like this
is a bug.


The issue is that the I/O has *not* completed yet.  blk_aio_preadv(..., 
cb, opaque) is not equivalent to


ret = blk_co_preadv(...);
cb(ret, opaque);

but rather to

blk_inc_in_flight(blk);
ret = blk_co_preadv(...);
cb(ret, opaque);
blk_dec_in_flight(blk);

Your own commit message (yeah I know we've all been through that :)) 
explains why, and notes that it is now invalid to drain in a callback:


commit 46aaf2a566e364a62315219255099cbf1c9b990d
Author: Kevin Wolf 
Date:   Thu Sep 6 17:47:22 2018 +0200

block-backend: Decrease in_flight only after callback

Request callbacks can do pretty much anything, including operations
that will yield from the coroutine (such as draining the backend).
In that case, a decreased in_flight would be visible to other code
and could lead to a drain completing while the callback hasn't
actually completed yet.

Note that reordering these operations forbids calling drain directly
inside an AIO callback.

So the questions are:

1) is the above commit wrong though well-intentioned?

2) is it unexpected that bdrv_replace_child_noperm() drains (thus 
becoming invalid from the callback, albeit valid through a bottom half)?



My answer is respectively 1) it's correct, many coroutines do 
inc_in_flight before creation and dec_in_flight at the end, we're just 
saying that it's _always_ the case for callback-based operations; 2) no, 
it's not unexpected and therefore the test is the incorrect one.


Paolo



Re: [RFC] thread-pool: Add option to fix the pool size

2022-02-14 Thread Kevin Wolf
Am 14.02.2022 um 11:09 hat Stefan Hajnoczi geschrieben:
> On Fri, Feb 11, 2022 at 12:32:57PM +0100, Kevin Wolf wrote:
> > Am 03.02.2022 um 15:19 hat Stefan Hajnoczi geschrieben:
> > > On Thu, Feb 03, 2022 at 10:56:49AM +, Daniel P. Berrangé wrote:
> > > > On Thu, Feb 03, 2022 at 10:53:07AM +, Stefan Hajnoczi wrote:
> > > > > On Wed, Feb 02, 2022 at 06:52:34PM +0100, Nicolas Saenz Julienne 
> > > > > wrote:
> > > 1. A global default value that all new AioContext take. The QEMU main
> > >loop's qemu_aio_context will use this and all IOThread AioContext
> > >will use it (unless they have been overridden).
> > > 
> > >I would define it on --machine because that's the "global" object for
> > >a guest, but that's not very satisfying.
> > 
> > Semantically, -machine is about the virtual hardware where as iothreads
> > are about the backend, so I agree it's not a good fit.
> > 
> > For the main thread, you may want to configure all the same options that
> > you can configure for an iothread. So to me that sounds like we would
> > want to allow using an iothread object for the main thread, too.
> > 
> > That would still require us to tell QEMU which iothread object should be
> > used for the main thread, though.
> 
> Making the main loop thread an IOThread is an interesting direction but
> not an easy change to make.
> 
> The main loop thread has a custom event loop that is not interchangeable
> with the IOThread event loop:
> - The main loop has a poll notifier interface for libslirp fd monitoring
>   integration.
> - The main loop is a GLib event loop but manually polls to get
>   nanosecond resolution timers.
> - The main loop has icount integration.
> - The main loop has the special iohandler AioContext
> 
> The IOThread event loop runs an optimized AioContext event loop instead.
> It falls back to regular g_main_loop_run() if there is a GSource user.
> 
> It would definitely be nice to unify the main loop with IOThread and
> then use --object iothread,... to configure main loop parameters.
> 
> I'm not sure if requiring that of Nicolas is fair though. The event
> loops in QEMU are complex and changes are likely to introduce subtle
> bugs or performance regressions.

I'm not suggesting actually running the iothread event loop instead,
merely using the properties of an object to configure the main thread as
the external user interface.
Whether this uses the same main loop code as today or is moved to the
regular iothread event loop is an implementation detail that can be
changed later.

Or we could maybe use a different object type like 'mainthread' and
share the properties using QOM inheritance.

Kevin


signature.asc
Description: PGP signature


Re: [PATCH 5/6] test-bdrv-drain.c: remove test_detach_by_parent_cb()

2022-02-14 Thread Emanuele Giuseppe Esposito



On 11/02/2022 16:38, Kevin Wolf wrote:
> Am 08.02.2022 um 16:36 hat Emanuele Giuseppe Esposito geschrieben:
>> This test uses a callback of an I/O function (blk_aio_preadv)
>> to modify the graph, using bdrv_attach_child.
>> This is simply not allowed anymore. I/O cannot change the graph.
> 
> The callback of an I/O function isn't I/O, though. It is code _after_
> the I/O has completed. If this doesn't work any more, it feels like this
> is a bug.
> 
> I think it becomes a bit more obvious when you translate the AIO into
> the equivalent coroutine function:
> 
> void coroutine_fn foo(...)
> {
> GLOBAL_STATE_CODE();
> 
> blk_co_preadv(...);
> detach_by_parent_aio_cb(...);
> }
> 
> This is obviously correct code that must work. Calling an I/O function
> from a GS function is allowed, and of course the GS function may
> continue to do GS stuff after the I/O function returned.
> 
> (Actually, I'm not sure if it really works when blk is not in the main
> AioContext, but your API split patches claim that it does, so let's
> assume for the moment that this is already true :-))

Uhmm why does my split claims that it is? all blk_aio_* are IO_CODE.
Also, as you said, if blk is not in the main loop, the callback is not
GS either.

> 
>> Before "block/io.c: make bdrv_do_drained_begin_quiesce static
>> and introduce bdrv_drained_begin_no_poll", the test would simply
>> be at risk of failure, because if bdrv_replace_child_noperm()
>> (called to modify the graph) would call a drain,
>> then one callback of .drained_begin() is bdrv_do_drained_begin_quiesce,
>> that specifically asserts that we are not in a coroutine.
>>
>> Now that we fixed the behavior, the drain will invoke a bh in the
>> main loop, so we don't have such problem. However, this test is still
>> illegal and fails because we forbid graph changes from I/O paths.
>>
>> Once we add the required subtree_drains to protect
>> bdrv_replace_child_noperm(), the key problem in this test is in:
> 
> Probably a question for a different patch, but why is a subtree drain
> required instead of just a normal node drain? It feels like a bigger
> hammer than what is needed for replacing a single child.
> 
>> acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
>> /* Drain and check the expected result */
>> bdrv_subtree_drained_begin(parent_b);
>>
>> because the detach_by_parent_aio_cb calls detach_indirect_bh(), that
>> modifies the graph and is invoked during bdrv_subtree_drained_begin().
>> The call stack is the following:
>> 1. blk_aio_preadv() creates a coroutine, increments in_flight counter
>> and enters the coroutine running blk_aio_read_entry()
>> 2. blk_aio_read_entry() performs the read and then schedules a bh to
>>complete (blk_aio_complete)
>> 3. at this point, subtree_drained_begin() kicks in and waits for all
>>in_flight requests, polling
>> 4. polling allows the bh to be scheduled, so blk_aio_complete runs
>> 5. blk_aio_complete *first* invokes the callback
>>(detach_by_parent_aio_cb) and then decrements the in_flight counter
>> 6. Here we have the problem: detach_by_parent_aio_cb modifies the graph,
>>so both bdrv_unref_child() and bdrv_attach_child() will have
>>subtree_drains inside. And this causes a deadlock, because the
>>nested drain will wait for in_flight counter to go to zero, which
>>is only happening once the drain itself finishes.
> 
> So the problem has nothing to do with detach_by_parent_aio_cb() being
> an I/O function in the sense of locking rules (which it isn't), but with
> calling a drain operation while having the in_flight counter increased.
> 
> In other words, an AIO callback like detach_by_parent_aio_cb() must
> never call drain - and it doesn't before your changes to
> bdrv_replace_child_noperm() break it. How confident are we that this
> only breaks tests and not real code?
I am not sure. From a quick look, the AIO callback is really used pretty
much everywhere. Maybe we should really find a way to asseert
GLOBAL_STATE_CODE and friends?

> 
> Part of the problem is probably that drain is serving two slightly
> different purposes inside the block layer (just make sure that nothing
> touches the node any more) and callers outside of the block layer (make
> sure that everything has completed and no more callbacks will be
> called). This bug sits exactly in the difference between those two
> concepts - we're waiting for more than we would have to wait for, and it
> causes a deadlock in this combination.
> 
> I guess it could be fixed if BlockBackend accounted for requests that
> are already completed, but their callback hasn't yet. blk_drain() would
> then have to wait for those requests, but blk_root_drained_poll()
> wouldn't because these requests don't affect the root node any more.
> 
>> Different story is test_detach_by_driver_cb(): in this case,
>> detach_by_parent_aio_cb() does not call detach_indirect_bh(),
>> but it is instead called as a bh r

Re: [PATCH 3/6] block.c: bdrv_replace_child_noperm: first call ->attach(), and then add child

2022-02-14 Thread Emanuele Giuseppe Esposito



On 11/02/2022 13:34, Kevin Wolf wrote:
> Am 08.02.2022 um 16:36 hat Emanuele Giuseppe Esposito geschrieben:
>> Doing the opposite can make adding the child node to a non-drained node,
>> as apply_subtree_drain is only done in ->attach() and thus make
>> assert_bdrv_graph_writable fail.
>>
>> This can happen for example during a transaction rollback (test 245,
>> test_io_with_graph_changes):
>> 1. a node is removed from the graph, thus it is undrained
>> 2. then something happens, and we need to roll back the transactions
>>through tran_abort()
>> 3. at this point, the current code would first attach the undrained node
>>to the graph via QLIST_INSERT_HEAD, and then call ->attach() that
>>will take care of restoring the drain with apply_subtree_drain(),
>>leaving the node undrained between the two operations.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito 
>> ---
>>  block.c | 6 --
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index ec346a7e2e..08a6e3a4ef 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -2872,8 +2872,6 @@ static void bdrv_replace_child_noperm(BdrvChild 
>> **childp,
>>  }
>>  
>>  if (new_bs) {
>> -assert_bdrv_graph_writable(new_bs);
>> -QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
>>  
>>  /*
>>   * Detaching the old node may have led to the new node's
>> @@ -2890,6 +2888,10 @@ static void bdrv_replace_child_noperm(BdrvChild 
>> **childp,
>>  if (child->klass->attach) {
>>  child->klass->attach(child);
>>  }
>> +
>> +assert_bdrv_graph_writable(new_bs);
>> +QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
>> +
>>  }
> 
> Extra empty line. Looks good otherwise.
> 
> Does this also mean that the order in bdrv_child_cb_attach/detach() is
> wrong? Or maybe adding a new node to bs->children is okay even when the
> child node isn't drained.

No I don't think it's wrong. In fact, if we are just replacing a node
(so old_bs and new_bs are both != NULL), the child will be just removed
and then re-added to the same children's list of the same parent
(child->opaque).

Whether adding a new node to bs->children requires a drain or not is
still under debate in the other serie with Vladimir. We'll see about
that, but in the meanwhile this is just a safe fix that makes sure that
*if* drains are added, everything will always stay under proper drain.

Emanuele

> 
> Kevin
> 




Re: [PATCH 1/6] block/io.c: fix bdrv_child_cb_drained_begin invocations from a coroutine

2022-02-14 Thread Emanuele Giuseppe Esposito



On 11/02/2022 12:54, Kevin Wolf wrote:
> Am 08.02.2022 um 16:36 hat Emanuele Giuseppe Esposito geschrieben:
>> Using bdrv_do_drained_begin_quiesce() in bdrv_child_cb_drained_begin()
>> is not a good idea: the callback might be called when running
>> a drain in a coroutine, and bdrv_drained_begin_poll() does not
>> handle that case, resulting in assertion failure.
> 
> I remembered that we talked about this only recently on IRC, but it
> didn't make any sense to me again when I read this commit message. So I
> think we need --verbose.
> 
> The .drained_begin callback was always meant to run outside of coroutine
> context, so the unexpected part isn't that it calls a function that
> can't run in coroutine context, but that it is already called itself in
> coroutine context.
> 
> The problematic path is bdrv_replace_child_noperm() which then calls
> bdrv_parent_drained_begin_single(poll=true). Polling in coroutine
> context is dangerous, it can cause deadlocks because the caller of the
> coroutine can't make progress. So I believe this call is already wrong
> in coroutine context.

Ok, you added this assertion in dcf94a23, but at that time there was no
bdrv_parent_drained_begin_single, and the polling was only done in
bdrv_do_drained_begin. So I think that to keep the same logic, the
assertion should be moved in bdrv_parent_drained_begin_single()? And
even more specifically, only if the poll flag is true.

I triggered this by adding additional drains in the callers of
bdrv_replace_child_noperm(), and I think some test (probably unit test)
was failing because of either the drained_begin callback itself called
by the drain, or as you suggested the callbacks called by
bdrv_parent_drained_begin_single from bdrv_replace_child_noperm.

Anyways, I think that in addition to the fix in this patch, we should
also fix bdrv_parent_drained_begin_single(poll=true) in
bdrv_replace_child_noperm, with something similar to what is done in
bdrv_co_yield_to_drain? ie if we are in coroutine, schedule a BH that
runs the same logic but in the main loop, but then somehow wait that it
finishes before continuing?
Even though at that point we would have a coroutine waiting for the main
loop, which I don't think it's something we want.

Alternatively, we would forbid polling in coroutines at all. And the
only place I can see that is using the drain in coroutine is mirror (see
below).


Additional question: I also noticed that there is a bdrv_drained_begin()
call in mirror.c in the JobDriver run() callback. How can this even
work? If a parent uses bdrv_child_cb_drained_begin (which should not be
so rare) it will crash because of the assertion.

Further additional question: actually I don't understand also the
polling logic of mirror (mirror_drained_poll), as if we are draining in
the coroutine with in_drain = true I think we can have a deadlock if
in_flight>0?

Emanuele

> 
> Now I don't know the call path up to bdrv_replace_child_noperm(), but as
> far as I remember, that was another function that was originally never
> run in coroutine context. Maybe we have good reason to change this, I
> can't point to anything that would be inherently wrong with it, but I
> would still be curious in which context it does run in a coroutine now.
> 
> Anyway, whatever the specific place is, I believe we must drop out of
> coroutine context _before_ calling bdrv_parent_drained_begin_single(),
> not only in callbacks called by it.
> 
>> Instead, bdrv_do_drained_begin with no recursion and poll
>> will accomplish the same thing (invoking bdrv_do_drained_begin_quiesce)
>> but will firstly check if we are already in a coroutine, and exit
>> from that via bdrv_co_yield_to_drain().
>>
>> Signed-off-by: Emanuele Giuseppe Esposito 
> 
> Kevin
> 




Re: [RFC] thread-pool: Add option to fix the pool size

2022-02-14 Thread Stefan Hajnoczi
On Fri, Feb 11, 2022 at 12:32:57PM +0100, Kevin Wolf wrote:
> Am 03.02.2022 um 15:19 hat Stefan Hajnoczi geschrieben:
> > On Thu, Feb 03, 2022 at 10:56:49AM +, Daniel P. Berrangé wrote:
> > > On Thu, Feb 03, 2022 at 10:53:07AM +, Stefan Hajnoczi wrote:
> > > > On Wed, Feb 02, 2022 at 06:52:34PM +0100, Nicolas Saenz Julienne wrote:
> > 1. A global default value that all new AioContext take. The QEMU main
> >loop's qemu_aio_context will use this and all IOThread AioContext
> >will use it (unless they have been overridden).
> > 
> >I would define it on --machine because that's the "global" object for
> >a guest, but that's not very satisfying.
> 
> Semantically, -machine is about the virtual hardware where as iothreads
> are about the backend, so I agree it's not a good fit.
> 
> For the main thread, you may want to configure all the same options that
> you can configure for an iothread. So to me that sounds like we would
> want to allow using an iothread object for the main thread, too.
> 
> That would still require us to tell QEMU which iothread object should be
> used for the main thread, though.

Making the main loop thread an IOThread is an interesting direction but
not an easy change to make.

The main loop thread has a custom event loop that is not interchangeable
with the IOThread event loop:
- The main loop has a poll notifier interface for libslirp fd monitoring
  integration.
- The main loop is a GLib event loop but manually polls to get
  nanosecond resolution timers.
- The main loop has icount integration.
- The main loop has the special iohandler AioContext

The IOThread event loop runs an optimized AioContext event loop instead.
It falls back to regular g_main_loop_run() if there is a GSource user.

It would definitely be nice to unify the main loop with IOThread and
then use --object iothread,... to configure main loop parameters.

I'm not sure if requiring that of Nicolas is fair though. The event
loops in QEMU are complex and changes are likely to introduce subtle
bugs or performance regressions.

Stefan


signature.asc
Description: PGP signature


Re: qemu iotest 161 and make check

2022-02-14 Thread Christian Borntraeger




Am 10.02.22 um 15:47 schrieb Vladimir Sementsov-Ogievskiy:

10.02.2022 10:57, Christian Borntraeger wrote:

Hello,

I do see spurious failures of 161 in our CI, but only when I use
make check with parallelism (-j).
I have not yet figured out which other testcase could interfere

@@ -34,6 +34,8 @@
  *** Commit and then change an option on the backing file

  Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1048576
+qemu-img: TEST_DIR/t.IMGFMT.base: Failed to get "write" lock
+Is another process using the image [TEST_DIR/t.IMGFMT.base]?
  Formatting 'TEST_DIR/t.IMGFMT.int', fmt=IMGFMT size=1048576 
backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
  Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 
backing_file=TEST_DIR/t.IMGFMT.int backing_fmt=IMGFMT
  { 'execute': 'qmp_capabilities' }


any ideas?



Hmm, interesting.. Is it always 161 and always exactly this diff?


Seems to be always this diff.



[PULL 6/6] hw/nvme: add support for zoned random write area

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

Add support for TP 4076 ("Zoned Random Write Area"), v2021.08.23
("Ratified").

This adds three new namespace parameters: "zoned.numzrwa" (number of
zrwa resources, i.e. number of zones that can have a zrwa),
"zoned.zrwas" (zrwa size in LBAs), "zoned.zrwafg" (granularity in LBAs
for flushes).

Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 171 ++-
 hw/nvme/ns.c |  58 +++
 hw/nvme/nvme.h   |  10 +++
 hw/nvme/trace-events |   1 +
 include/block/nvme.h |  17 -
 5 files changed, 237 insertions(+), 20 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 7cb4974c5e83..98aac98bef5f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -299,26 +299,37 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, 
NvmeZone *zone,
 }
 }
 
-/*
- * Check if we can open a zone without exceeding open/active limits.
- * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
- */
-static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
+ uint32_t opn, uint32_t zrwa)
 {
 if (ns->params.max_active_zones != 0 &&
 ns->nr_active_zones + act > ns->params.max_active_zones) {
 trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
 return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
 }
+
 if (ns->params.max_open_zones != 0 &&
 ns->nr_open_zones + opn > ns->params.max_open_zones) {
 trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
 return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
 }
 
+if (zrwa > ns->zns.numzrwa) {
+return NVME_NOZRWA | NVME_DNR;
+}
+
 return NVME_SUCCESS;
 }
 
+/*
+ * Check if we can open a zone without exceeding open/active limits.
+ * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
+ */
+static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+{
+return nvme_zns_check_resources(ns, act, opn, 0);
+}
+
 static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 {
 hwaddr hi, lo;
@@ -1628,9 +1639,19 @@ static uint16_t nvme_check_zone_write(NvmeNamespace *ns, 
NvmeZone *zone,
 return status;
 }
 
-if (unlikely(slba != zone->w_ptr)) {
-trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr);
-return NVME_ZONE_INVALID_WRITE;
+if (zone->d.za & NVME_ZA_ZRWA_VALID) {
+uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas;
+
+if (slba < zone->w_ptr || slba + nlb > ezrwa) {
+trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr);
+return NVME_ZONE_INVALID_WRITE;
+}
+} else {
+if (unlikely(slba != zone->w_ptr)) {
+trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
+   zone->w_ptr);
+return NVME_ZONE_INVALID_WRITE;
+}
 }
 
 if (unlikely((slba + nlb) > zcap)) {
@@ -1710,6 +1731,14 @@ static uint16_t nvme_zrm_finish(NvmeNamespace *ns, 
NvmeZone *zone)
 /* fallthrough */
 case NVME_ZONE_STATE_CLOSED:
 nvme_aor_dec_active(ns);
+
+if (zone->d.za & NVME_ZA_ZRWA_VALID) {
+zone->d.za &= ~NVME_ZA_ZRWA_VALID;
+if (ns->params.numzrwa) {
+ns->zns.numzrwa++;
+}
+}
+
 /* fallthrough */
 case NVME_ZONE_STATE_EMPTY:
 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
@@ -1745,6 +1774,13 @@ static uint16_t nvme_zrm_reset(NvmeNamespace *ns, 
NvmeZone *zone)
 /* fallthrough */
 case NVME_ZONE_STATE_CLOSED:
 nvme_aor_dec_active(ns);
+
+if (zone->d.za & NVME_ZA_ZRWA_VALID) {
+if (ns->params.numzrwa) {
+ns->zns.numzrwa++;
+}
+}
+
 /* fallthrough */
 case NVME_ZONE_STATE_FULL:
 zone->w_ptr = zone->d.zslba;
@@ -1778,6 +1814,7 @@ static void nvme_zrm_auto_transition_zone(NvmeNamespace 
*ns)
 
 enum {
 NVME_ZRM_AUTO = 1 << 0,
+NVME_ZRM_ZRWA = 1 << 1,
 };
 
 static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
@@ -1796,7 +1833,8 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, 
NvmeNamespace *ns,
 if (n->params.auto_transition_zones) {
 nvme_zrm_auto_transition_zone(ns);
 }
-status = nvme_aor_check(ns, act, 1);
+status = nvme_zns_check_resources(ns, act, 1,
+  (flags & NVME_ZRM_ZRWA) ? 1 : 0);
 if (status) {
 return status;
 }
@@ -1824,6 +1862,12 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, 
NvmeNamespace *ns,
 /* fallthrough */
 
 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+if (flags & NVME_ZRM_ZRWA) {
+ns->zns.numzrwa--;
+
+zone->d.za |= NVME_ZA_ZRWA_VA

[PULL 4/6] hw/nvme: add struct for zone management send

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

Add struct for Zone Management Send in preparation for more zone send
flags.

Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 10 --
 include/block/nvme.h | 19 +++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 4344405e5939..7cb4974c5e83 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -3616,26 +3616,24 @@ done:
 
 static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
 {
-NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
+NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
 NvmeNamespace *ns = req->ns;
 NvmeZone *zone;
 NvmeZoneResetAIOCB *iocb;
 uint8_t *zd_ext;
-uint32_t dw13 = le32_to_cpu(cmd->cdw13);
 uint64_t slba = 0;
 uint32_t zone_idx = 0;
 uint16_t status;
-uint8_t action;
+uint8_t action = cmd->zsa;
 bool all;
 enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
 
-action = dw13 & 0xff;
-all = !!(dw13 & 0x100);
+all = cmd->zsflags & NVME_ZSFLAG_SELECT_ALL;
 
 req->status = NVME_SUCCESS;
 
 if (!all) {
-status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
+status = nvme_get_mgmt_zone_slba_idx(ns, &req->cmd, &slba, &zone_idx);
 if (status) {
 return status;
 }
diff --git a/include/block/nvme.h b/include/block/nvme.h
index e3bd47bf76ab..709d491c70d8 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1433,6 +1433,21 @@ enum NvmeZoneType {
 NVME_ZONE_TYPE_SEQ_WRITE = 0x02,
 };
 
+typedef struct QEMU_PACKED NvmeZoneSendCmd {
+uint8_t opcode;
+uint8_t flags;
+uint16_tcid;
+uint32_tnsid;
+uint32_trsvd8[4];
+NvmeCmdDptr dptr;
+uint64_tslba;
+uint32_trsvd48;
+uint8_t zsa;
+uint8_t zsflags;
+uint8_t rsvd54[2];
+uint32_trsvd56[2];
+} NvmeZoneSendCmd;
+
 enum NvmeZoneSendAction {
 NVME_ZONE_ACTION_RSD = 0x00,
 NVME_ZONE_ACTION_CLOSE   = 0x01,
@@ -1443,6 +1458,10 @@ enum NvmeZoneSendAction {
 NVME_ZONE_ACTION_SET_ZD_EXT  = 0x10,
 };
 
+enum {
+NVME_ZSFLAG_SELECT_ALL = 1 << 0,
+};
+
 typedef struct QEMU_PACKED NvmeZoneDescr {
 uint8_t zt;
 uint8_t zs;
-- 
2.35.1




[PULL 3/6] hw/nvme/ctrl: Pass buffers as 'void *' types

2022-02-14 Thread Klaus Jensen
From: Philippe Mathieu-Daudé 

These buffers can be anything, not an array of chars,
so use the 'void *' type for them.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 10 +-
 hw/nvme/nvme.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 12e1fcda7c85..4344405e5939 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1162,7 +1162,7 @@ static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg 
*sg, uint8_t *ptr,
 return NVME_SUCCESS;
 }
 
-static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len,
+static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, void *ptr, uint32_t len,
 NvmeTxDirection dir)
 {
 assert(sg->flags & NVME_SG_ALLOC);
@@ -1199,7 +1199,7 @@ static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t 
*ptr, uint32_t len,
 return NVME_SUCCESS;
 }
 
-static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+static inline uint16_t nvme_c2h(NvmeCtrl *n, void *ptr, uint32_t len,
 NvmeRequest *req)
 {
 uint16_t status;
@@ -1212,7 +1212,7 @@ static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t 
*ptr, uint32_t len,
 return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
 }
 
-static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+static inline uint16_t nvme_h2c(NvmeCtrl *n, void *ptr, uint32_t len,
 NvmeRequest *req)
 {
 uint16_t status;
@@ -1225,7 +1225,7 @@ static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t 
*ptr, uint32_t len,
 return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
 }
 
-uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
   NvmeTxDirection dir, NvmeRequest *req)
 {
 NvmeNamespace *ns = req->ns;
@@ -1241,7 +1241,7 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, 
uint32_t len,
 return nvme_tx(n, &req->sg, ptr, len, dir);
 }
 
-uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req)
 {
 NvmeNamespace *ns = req->ns;
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 83ffabade4cf..38f3ebf7f6c0 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -495,9 +495,9 @@ static inline uint16_t nvme_cid(NvmeRequest *req)
 }
 
 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
-uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
   NvmeTxDirection dir, NvmeRequest *req);
-uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req);
 void nvme_rw_complete_cb(void *opaque, int ret);
 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
-- 
2.35.1




[PULL 0/6] hw/nvme updates

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

Hi Peter,

The following changes since commit 48033ad678ae2def43bf0d543a2c4c3d2a93feaf:

  Merge remote-tracking branch 'remotes/vsementsov/tags/pull-nbd-2022-02-09-v2' 
into staging (2022-02-12 22:04:07 +)

are available in the Git repository at:

  git://git.infradead.org/qemu-nvme.git tags/nvme-next-pull-request

for you to fetch changes up to e321b4cdc2dd0b5e806ecf759138be7f83774142:

  hw/nvme: add support for zoned random write area (2022-02-14 08:58:29 +0100)


hw/nvme updates

  - fix CVE-2021-3929
  - add zone random write area support
  - misc cleanups from Philippe



Klaus Jensen (4):
  hw/nvme: fix CVE-2021-3929
  hw/nvme: add struct for zone management send
  hw/nvme: add ozcs enum
  hw/nvme: add support for zoned random write area

Philippe Mathieu-Daudé (2):
  hw/nvme/ctrl: Have nvme_addr_write() take const buffer
  hw/nvme/ctrl: Pass buffers as 'void *' types

 hw/nvme/ctrl.c   | 215 ---
 hw/nvme/ns.c |  61 +++-
 hw/nvme/nvme.h   |  14 ++-
 hw/nvme/trace-events |   1 +
 include/block/nvme.h |  40 +++-
 5 files changed, 296 insertions(+), 35 deletions(-)

-- 
2.35.1




[PULL 1/6] hw/nvme: fix CVE-2021-3929

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

This fixes CVE-2021-3929 "locally" by denying DMA to the iomem of the
device itself. This still allows DMA to MMIO regions of other devices
(e.g. doing P2P DMA to the controller memory buffer of another NVMe
device).

Fixes: CVE-2021-3929
Reported-by: Qiuhao Li 
Reviewed-by: Keith Busch 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 1f62116af985..37681a975986 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -357,6 +357,24 @@ static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr 
addr)
 return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
 }
 
+static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
+{
+hwaddr hi, lo;
+
+/*
+ * The purpose of this check is to guard against invalid "local" access to
+ * the iomem (i.e. controller registers). Thus, we check against the range
+ * covered by the 'bar0' MemoryRegion since that is currently composed of
+ * two subregions (the NVMe "MBAR" and the MSI-X table/pba). Note, however,
+ * that if the device model is ever changed to allow the CMB to be located
+ * in BAR0 as well, then this must be changed.
+ */
+lo = n->bar0.addr;
+hi = lo + int128_get64(n->bar0.size);
+
+return addr >= lo && addr < hi;
+}
+
 static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
 hwaddr hi = addr + size - 1;
@@ -614,6 +632,10 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, 
hwaddr addr, size_t len)
 
 trace_pci_nvme_map_addr(addr, len);
 
+if (nvme_addr_is_iomem(n, addr)) {
+return NVME_DATA_TRAS_ERROR;
+}
+
 if (nvme_addr_is_cmb(n, addr)) {
 cmb = true;
 } else if (nvme_addr_is_pmr(n, addr)) {
-- 
2.35.1




[PULL 5/6] hw/nvme: add ozcs enum

2022-02-14 Thread Klaus Jensen
From: Klaus Jensen 

Add enumeration for OZCS values.

Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ns.c | 3 ++-
 include/block/nvme.h | 4 
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 8b5f98c76180..356b6c1c2f14 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -266,7 +266,8 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
 id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
 id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
 id_ns_z->zoc = 0;
-id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
+id_ns_z->ozcs = ns->params.cross_zone_read ?
+NVME_ID_NS_ZONED_OZCS_RAZB : 0x00;
 
 for (i = 0; i <= ns->id_ns.nlbaf; i++) {
 id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 709d491c70d8..e10ea6f0eb88 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1351,6 +1351,10 @@ typedef struct QEMU_PACKED NvmeIdNsZoned {
 uint8_t vs[256];
 } NvmeIdNsZoned;
 
+enum NvmeIdNsZonedOzcs {
+NVME_ID_NS_ZONED_OZCS_RAZB= 1 << 0,
+};
+
 /*Deallocate Logical Block Features*/
 #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat)   ((dlfeat) & 0x10)
 #define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat)((dlfeat) & 0x08)
-- 
2.35.1




[PULL 2/6] hw/nvme/ctrl: Have nvme_addr_write() take const buffer

2022-02-14 Thread Klaus Jensen
From: Philippe Mathieu-Daudé 

The 'buf' argument is not modified, so better pass it as const type.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 37681a975986..12e1fcda7c85 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -395,7 +395,7 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void 
*buf, int size)
 return pci_dma_read(&n->parent_obj, addr, buf, size);
 }
 
-static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size)
 {
 hwaddr hi = addr + size - 1;
 if (hi < addr) {
-- 
2.35.1