date:20240515

RE: [PATCH ats_vtd v1 03/24] intel_iommu: check if the input address is canonical

2024-05-15 Thread Duan, Zhenzhong



>-Original Message-
>From: CLEMENT MATHIEU--DRIF 
>Subject: Re: [PATCH ats_vtd v1 03/24] intel_iommu: check if the input
>address is canonical
>
>Hi zhenzhong,
>
>On 14/05/2024 09:34, Duan, Zhenzhong wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> Hi Clement,
>>
>>> -Original Message-
>>> From: CLEMENT MATHIEU--DRIF 
>>> Subject: [PATCH ats_vtd v1 03/24] intel_iommu: check if the input
>address
>>> is canonical
>>>
>>> First stage translation must fail if the address to translate is
>>> not canonical.
>>>
>>> Signed-off-by: Clément Mathieu--Drif d...@eviden.com>
>>> ---
>>> hw/i386/intel_iommu.c  | 22 ++
>>> hw/i386/intel_iommu_internal.h |  2 ++
>>> 2 files changed, 24 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 80cdf37870..240ecb8f72 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -1912,6 +1912,7 @@ static const bool vtd_qualified_faults[] = {
>>>  [VTD_FR_PASID_ENTRY_P] = true,
>>>  [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>>>  [VTD_FR_SM_INTERRUPT_ADDR] = true,
>>> +[VTD_FR_FS_NON_CANONICAL] = true,
>>>  [VTD_FR_MAX] = false,
>>> };
>>>
>>> @@ -2023,6 +2024,21 @@ static inline uint64_t
>>> vtd_get_flpte_addr(uint64_t flpte, uint8_t aw)
>>>  return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw);
>>> }
>>>
>>> +/* Return true if IOVA is canonical, otherwise false. */
>>> +static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s,
>>> +uint64_t iova, VTDContextEntry *ce,
>>> +uint8_t aw, uint32_t pasid)
>>> +{
>>> +uint64_t iova_limit = vtd_iova_limit(s, ce, aw, pasid);
>> According to spec:
>>
>> "Input-address in the request subjected to first-stage translation is not
>> canonical (i.e., address bits 63:N are not same value as address bits [N-
>> 1], where N is 48 bits with 4-level paging and 57 bits with 5-level paging)."
>>
>> So it looks not correct to use aw filed in pasid entry to calculate 
>> iova_limit.
>> Aw can be a value configured by guest and it's used for stage-2 table. See
>spec:
>>
>> " This field is treated as Reserved(0) for implementations not supporting
>Second-stage
>> Translation (SSTS=0 in the Extended Capability Register).
>> This field indicates the adjusted guest-address-width (AGAW) to be used by
>hardware
>> for second-stage translation through paging structures referenced through
>the
>> SSPTPTR field.
>> • The following encodings are defined for this field:
>> • 001b: 39-bit AGAW (3-level page table)
>> • 010b: 48-bit AGAW (4-level page table)
>> • 011b: 57-bit AGAW (5-level page table)
>> • 000b,100b-111b: Reserved
>> When not treated as Reserved(0), hardware ignores this field for first-
>stage-only
>> (PGTT=001b) and pass-through (PGTT=100b) translations."
>>
>> Thanks
>> Zhenzhong
>>
>Not sure to understand.
>Are you talking about the aw field of Scalable-Mode PASID Table Entry?
Yes.

>The aw parameter is set to s->aw_bits in vtd_do_iommu_translate so I
>think it's safe to use it for canonical address check.
>Maybe we can just use s->aw_bits directly from
>vtd_iova_fl_check_canonical to avoid any mistake?
Agaw can be different from s->aw_bits.
Yes, I think using s->aw_bits is safe.

Thanks
Zhenzhong

>>> +uint64_t upper_bits_mask = ~(iova_limit - 1);
>>> +uint64_t upper_bits = iova & upper_bits_mask;
>>> +bool msb = ((iova & (iova_limit >> 1)) != 0);
>>> +return !(
>>> + (!msb && (upper_bits != 0)) ||
>>> + (msb && (upper_bits != upper_bits_mask))
>>> +);
>>> +}
>>> +
>>> /*
>>>   * Given the @iova, get relevant @flptep. @flpte_level will be the last
>level
>>>   * of the translation, can be used for deciding the size of large page.
>>> @@ -2038,6 +2054,12 @@ static int
>vtd_iova_to_flpte(IntelIOMMUState *s,
>>> VTDContextEntry *ce,
>>>  uint32_t offset;
>>>  uint64_t flpte;
>>>
>>> +if (!vtd_iova_fl_check_canonical(s, iova, ce, aw_bits, pasid)) {
>>> +error_report_once("%s: detected non canonical IOVA (iova=0x%"
>>> PRIx64 ","
>>> +  "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
>>> +return -VTD_FR_FS_NON_CANONICAL;
>>> +}
>>> +
>>>  while (true) {
>>>  offset = vtd_iova_fl_level_offset(iova, level);
>>>  flpte = vtd_get_flpte(addr, offset);
>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>> index 901691afb9..e9448291a4 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -324,6 +324,8 @@ typedef enum VTDFaultReason {
>>>  VTD_FR_PASID_ENTRY_P = 0x59, /* The Present(P) field of pasidt-
>entry is
>>> 0 */
>>>  VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry
>*/
>>>
>>> +VTD_FR_FS_NON_CANONICAL = 0x80, /*

Re: [PATCH 07/20] qapi/parser: add semantic 'kind' parameter to QAPIDoc.Section

2024-05-15 Thread Markus Armbruster

John Snow  writes:

> When iterating all_sections, this is helpful to be able to distinguish
> "members" from "features"; the only other way to do so is to
> cross-reference these sections against QAPIDoc.args or QAPIDoc.features,
> but if the desired end goal for QAPIDoc is to remove everything except
> all_sections, we need *something* accessible to distinguish them.
>
> To keep types simple, add this semantic parameter to the base Section
> and not just ArgSection; we can use this to filter out paragraphs and
> tagged sections, too.
>
> Signed-off-by: John Snow 
> ---
>  scripts/qapi/parser.py | 25 -
>  1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
> index 161768b8b96..cf4cbca1c1f 100644
> --- a/scripts/qapi/parser.py
> +++ b/scripts/qapi/parser.py
> @@ -613,21 +613,27 @@ class QAPIDoc:
>  
>  class Section:
>  # pylint: disable=too-few-public-methods
> -def __init__(self, info: QAPISourceInfo,
> - tag: Optional[str] = None):
> +def __init__(
> +self,
> +info: QAPISourceInfo,
> +tag: Optional[str] = None,
> +kind: str = 'paragraph',
> +):
>  # section source info, i.e. where it begins
>  self.info = info
>  # section tag, if any ('Returns', '@name', ...)
>  self.tag = tag
>  # section text without tag
>  self.text = ''
> +# section type - {paragraph, feature, member, tagged}
> +self.kind = kind

Hmm.  .kind is almost redundant with .tag.

Untagged section:.kind is 'paragraph', .tag is None

Member description:  .kind is 'member', .tag matches @NAME

Feature description: .kind is 'feature', .tag matches @NAME

Tagged section:  .kind is 'tagged', .tag matches
  r'Returns|Errors|Since|Notes?|Examples?|TODO'

.kind can directly be derived from .tag except for member and feature
descriptions.  And you want to tell these two apart in a straightforward
manner in later patches, as you explain in your commit message.

If .kind is 'member' or 'feature', then self must be an ArgSection.
Worth a comment?  An assertion?

Some time back, I considered changing .tag for member and feature
descriptions to suitable strings, like your 'member' and 'feature', and
move the member / feature name into ArgSection.  I didn't, because the
benefit wasn't worth the churn at the time.  Perhaps it's worth it now.
Would it result in simpler code than your solution?

Terminology nit: the section you call 'paragraph' isn't actually a
paragraph: it could be several paragraphs.  Best to call it 'untagged',
as in .ensure_untagged_section().

>  
>  def append_line(self, line: str) -> None:
>  self.text += line + '\n'
>  
>  class ArgSection(Section):
> -def __init__(self, info: QAPISourceInfo, tag: str):
> -super().__init__(info, tag)
> +def __init__(self, info: QAPISourceInfo, tag: str, kind: str):
> +super().__init__(info, tag, kind)
>  self.member: Optional['QAPISchemaMember'] = None
>  
>  def connect(self, member: 'QAPISchemaMember') -> None:

[...]

Re: [PATCH 06/20] qapi/parser: fix comment parsing immediately following a doc block

2024-05-15 Thread Markus Armbruster

John Snow  writes:

> If a comment immediately follows a doc block, the parser doesn't ignore
> that token appropriately. Fix that.

Reproducer?

>
> Signed-off-by: John Snow 
> ---
>  scripts/qapi/parser.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
> index 41b9319e5cb..161768b8b96 100644
> --- a/scripts/qapi/parser.py
> +++ b/scripts/qapi/parser.py
> @@ -587,7 +587,7 @@ def get_doc(self) -> 'QAPIDoc':
>  line = self.get_doc_line()
>  first = False
>  
> -self.accept(False)
> +self.accept()
>  doc.end()
>  return doc

Can't judge the fix without understanding the problem, and the problem
will be easier to understand for me with a reproducer.

Re: [PATCH 05/20] qapi/parser: adjust info location for doc body section

2024-05-15 Thread Markus Armbruster

John Snow  writes:

> Instead of using the info object for the doc block as a whole, update
> the info pointer for each call to ensure_untagged_section when the
> existing section is otherwise empty. This way, Sphinx error information
> will match precisely to where the text actually starts.
>
> Signed-off-by: John Snow 
> ---
>  scripts/qapi/parser.py | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
> index 8cdd5334ec6..41b9319e5cb 100644
> --- a/scripts/qapi/parser.py
> +++ b/scripts/qapi/parser.py
> @@ -662,8 +662,13 @@ def end(self) -> None:
>  
>  def ensure_untagged_section(self, info: QAPISourceInfo) -> None:
>  if self.all_sections and not self.all_sections[-1].tag:
> -# extend current section
> -self.all_sections[-1].text += '\n'

Before, we always append a newline.

> +section = self.all_sections[-1]
> +# Section is empty so far; update info to start *here*.
> +if not section.text:
> +section.info = info
> +else:
> +# extend current section
> +self.all_sections[-1].text += '\n'

Afterwards, we append it only when the section already has some text.

The commit message claims the patch only adjusts section.info.  That's a
lie :)

I believe the change makes no difference because .end() strips leading
and trailing newline.

>  return
>  # start new section
>  section = self.Section(info)

You could fix the commit message, but I think backing out the
no-difference change is easier.  The appended patch works in my testing.

Next one.  Your patch changes the meaning of section.info.  Here's its
initialization:

class Section:
# pylint: disable=too-few-public-methods
def __init__(self, info: QAPISourceInfo,
 tag: Optional[str] = None):
---># section source info, i.e. where it begins
self.info = info
# section tag, if any ('Returns', '@name', ...)
self.tag = tag
# section text without tag
self.text = ''

The comment is now wrong.  Calls for a thorough review of .info's uses.

The alternative to changing .info's meaning is to add another member
with the meaning you need.  Then we have to review .info's uses to find
out which ones to switch to the new one.

Left for later.


diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 8cdd5334ec..abeae1ca77 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -663,7 +663,10 @@ def end(self) -> None:
 def ensure_untagged_section(self, info: QAPISourceInfo) -> None:
 if self.all_sections and not self.all_sections[-1].tag:
 # extend current section
-self.all_sections[-1].text += '\n'
+section = self.all_sections[-1]
+if not section.text:
+section.info = info
+section.text += '\n'
 return
 # start new section
 section = self.Section(info)

[PATCH v2 2/4] accel/kvm: Introduce kvm_create_and_park_vcpu() helper

2024-05-15 Thread Harsh Prateek Bora

There are distinct helpers for creating and parking a KVM vCPU.
However, there can be cases where a platform needs to create and
immediately park the vCPU during early stages of vcpu init which
can later be reused when vcpu thread gets initialized. This would
help detect failures with kvm_create_vcpu at an early stage.

Based on api refactoring to create/park vcpus introduced in 1/8 of patch series:
https://lore.kernel.org/qemu-devel/2024031202.12992-2-salil.me...@huawei.com/

Suggested-by: Nicholas Piggin 
Signed-off-by: Harsh Prateek Bora 
---
 include/sysemu/kvm.h |  8 
 accel/kvm/kvm-all.c  | 12 
 2 files changed, 20 insertions(+)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index fa3ec74442..221e6bd55b 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -450,6 +450,14 @@ int kvm_create_vcpu(CPUState *cpu);
  */
 void kvm_park_vcpu(CPUState *cpu);
 
+/**
+ * kvm_create_and_park_vcpu - Create and park a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be created and parked.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_and_park_vcpu(CPUState *cpu);
+
 #endif /* COMPILING_PER_TARGET */
 
 void kvm_cpu_synchronize_state(CPUState *cpu);
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 30d42847de..3d7e5eaf0b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -381,6 +381,18 @@ int kvm_create_vcpu(CPUState *cpu)
 return 0;
 }
 
+int kvm_create_and_park_vcpu(CPUState *cpu)
+{
+int ret = 0;
+
+ret = kvm_create_vcpu(cpu);
+if (!ret) {
+kvm_park_vcpu(cpu);
+}
+
+return ret;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
 KVMState *s = kvm_state;
-- 
2.39.3

[PATCH v2 3/4] cpu-common.c: export cpu_get_free_index to be reused later

2024-05-15 Thread Harsh Prateek Bora

This helper provides an easy way to identify the next available free cpu
index which can be used for vcpu creation. Until now, this is being
called at a very later stage and there is a need to be able to call it
earlier (for now, with ppc64) hence the need to export.

Suggested-by: Nicholas Piggin 
Signed-off-by: Harsh Prateek Bora 
---
 include/exec/cpu-common.h | 2 ++
 cpu-common.c  | 7 ---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 6d5318895a..0386f1ab29 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -29,6 +29,8 @@ void cpu_list_lock(void);
 void cpu_list_unlock(void);
 unsigned int cpu_list_generation_id_get(void);
 
+int cpu_get_free_index(void);
+
 void tcg_iommu_init_notifier_list(CPUState *cpu);
 void tcg_iommu_free_notifier_list(CPUState *cpu);
 
diff --git a/cpu-common.c b/cpu-common.c
index ce78273af5..82bd1b432d 100644
--- a/cpu-common.c
+++ b/cpu-common.c
@@ -57,14 +57,12 @@ void cpu_list_unlock(void)
 qemu_mutex_unlock(&qemu_cpu_list_lock);
 }
 
-static bool cpu_index_auto_assigned;
 
-static int cpu_get_free_index(void)
+int cpu_get_free_index(void)
 {
 CPUState *some_cpu;
 int max_cpu_index = 0;
 
-cpu_index_auto_assigned = true;
 CPU_FOREACH(some_cpu) {
 if (some_cpu->cpu_index >= max_cpu_index) {
 max_cpu_index = some_cpu->cpu_index + 1;
@@ -83,8 +81,11 @@ unsigned int cpu_list_generation_id_get(void)
 
 void cpu_list_add(CPUState *cpu)
 {
+static bool cpu_index_auto_assigned;
+
 QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
 if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
+cpu_index_auto_assigned = true;
 cpu->cpu_index = cpu_get_free_index();
 assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
 } else {
-- 
2.39.3

[PATCH v2 1/4] accel/kvm: Extract common KVM vCPU {creation, parking} code

2024-05-15 Thread Harsh Prateek Bora

From: Salil Mehta 

KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
is spawned. This is common to all the architectures as of now.

Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
support vCPU removal. Therefore, its representative KVM vCPU object/context in
Qemu is parked.

Refactor architecture common logic so that some APIs could be reused by vCPU
Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
with trace events instead of DPRINTF. No functional change is intended here.

Signed-off-by: Salil Mehta 
Reviewed-by: Gavin Shan 
Tested-by: Vishnu Pajjuri 
Reviewed-by: Jonathan Cameron 
Tested-by: Xianglai Li 
Tested-by: Miguel Luis 
Reviewed-by: Shaoqin Huang 
[harshpb: fixed rebase failures in include/sysemu/kvm.h]
Signed-off-by: Harsh Prateek Bora 
---
 include/sysemu/kvm.h   | 15 ++
 accel/kvm/kvm-all.c| 64 --
 accel/kvm/trace-events |  5 +++-
 3 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index eaf801bc93..fa3ec74442 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -434,6 +434,21 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int 
sigmask_len);
 
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
hwaddr *phys_addr);
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_vcpu(CPUState *cpu);
+
+/**
+ * kvm_park_vcpu - Park QEMU KVM vCPU context
+ * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
+ *
+ * @returns: none
+ */
+void kvm_park_vcpu(CPUState *cpu);
 
 #endif /* COMPILING_PER_TARGET */
 
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index d7281b93f3..30d42847de 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -128,6 +128,7 @@ static QemuMutex kml_slots_lock;
 #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
 
 static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
+static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
 
 static inline void kvm_resample_fd_remove(int gsi)
 {
@@ -340,14 +341,53 @@ err:
 return ret;
 }
 
+void kvm_park_vcpu(CPUState *cpu)
+{
+struct KVMParkedVcpu *vcpu;
+
+trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+vcpu = g_malloc0(sizeof(*vcpu));
+vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
+vcpu->kvm_fd = cpu->kvm_fd;
+QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+}
+
+int kvm_create_vcpu(CPUState *cpu)
+{
+unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
+KVMState *s = kvm_state;
+int kvm_fd;
+
+trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+/* check if the KVM vCPU already exist but is parked */
+kvm_fd = kvm_get_vcpu(s, vcpu_id);
+if (kvm_fd < 0) {
+/* vCPU not parked: create a new KVM vCPU */
+kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+if (kvm_fd < 0) {
+error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
+return kvm_fd;
+}
+}
+
+cpu->kvm_fd = kvm_fd;
+cpu->kvm_state = s;
+cpu->vcpu_dirty = true;
+cpu->dirty_pages = 0;
+cpu->throttle_us_per_full = 0;
+
+return 0;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
 KVMState *s = kvm_state;
 long mmap_size;
-struct KVMParkedVcpu *vcpu = NULL;
 int ret = 0;
 
-trace_kvm_destroy_vcpu();
+trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
 ret = kvm_arch_destroy_vcpu(cpu);
 if (ret < 0) {
@@ -373,10 +413,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
 }
 }
 
-vcpu = g_malloc0(sizeof(*vcpu));
-vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
-vcpu->kvm_fd = cpu->kvm_fd;
-QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+kvm_park_vcpu(cpu);
 err:
 return ret;
 }
@@ -397,6 +434,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
 if (cpu->vcpu_id == vcpu_id) {
 int kvm_fd;
 
+trace_kvm_get_vcpu(vcpu_id);
+
 QLIST_REMOVE(cpu, node);
 kvm_fd = cpu->kvm_fd;
 g_free(cpu);
@@ -404,7 +443,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
 }
 }
 
-return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
+return -ENOENT;
 }
 
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
@@ -415,19 +454,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
 
 trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
-ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+ret = kvm_create_vcpu(cpu);
 if (ret < 0) {
-error_setg_errno(e

[PATCH v2 4/4] target/ppc: handle vcpu hotplug failure gracefully

2024-05-15 Thread Harsh Prateek Bora

On ppc64, the PowerVM hypervisor runs with limited memory and a VCPU
creation during hotplug may fail during kvm_ioctl for KVM_CREATE_VCPU,
leading to termination of guest since errp is set to &error_fatal while
calling kvm_init_vcpu. This unexpected behaviour can be avoided by
pre-creating and parking vcpu on success or return error otherwise.
This enables graceful error delivery for any vcpu hotplug failures while
the guest can keep running.

Based on api refactoring to create/park vcpus introduced in 1/8 of patch series:
https://lore.kernel.org/qemu-devel/2024031202.12992-2-salil.me...@huawei.com/

Tested OK by repeatedly doing a hotplug/unplug of vcpus as below:

 #virsh setvcpus hotplug 40
 #virsh setvcpus hotplug 70
error: internal error: unable to execute QEMU command 'device_add':
kvmppc_cpu_realize: vcpu hotplug failed with -12

Reported-by: Anushree Mathur 
Suggested-by: Shivaprasad G Bhat 
Suggested-by: Vaibhav Jain 
Signed-off by: Harsh Prateek Bora 
Tested-by: Anushree Mathur 
---
 target/ppc/kvm.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 63930d4a77..25f0cf0ba8 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -48,6 +48,8 @@
 #include "qemu/mmap-alloc.h"
 #include "elf.h"
 #include "sysemu/kvm_int.h"
+#include "sysemu/kvm.h"
+#include "hw/core/accel-cpu.h"
 
 #define PROC_DEVTREE_CPU  "/proc/device-tree/cpus/"
 
@@ -2339,6 +2341,26 @@ static void alter_insns(uint64_t *word, uint64_t flags, 
bool on)
 }
 }
 
+static bool kvmppc_cpu_realize(CPUState *cs, Error **errp)
+{
+int ret;
+
+cs->cpu_index = cpu_get_free_index();
+
+POWERPC_CPU(cs)->vcpu_id = cs->cpu_index;
+
+if (cs->parent_obj.hotplugged) {
+/* create and park to fail gracefully in case vcpu hotplug fails */
+ret = kvm_create_and_park_vcpu(cs);
+if (ret) {
+error_setg(errp, "%s: vcpu hotplug failed with %d",
+ __func__, ret);
+return false;
+}
+}
+return true;
+}
+
 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
 {
 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
@@ -2958,4 +2980,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t 
tb_offset)
 
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
+AccelClass *ac = ACCEL_CLASS(oc);
+ac->cpu_common_realize = kvmppc_cpu_realize;
 }
-- 
2.39.3

[PATCH v2 0/4] target/ppc: vcpu hotplug failure handling fixes

2024-05-15 Thread Harsh Prateek Bora

On ppc64, the PowerVM hypervisor runs with limited memory and a VCPU
creation during hotplug may fail during kvm_ioctl for KVM_CREATE_VCPU,
leading to termination of guest since errp is set to &error_fatal while
calling kvm_init_vcpu. This unexpected behaviour can be avoided by
pre-creating and parking vcpu on success or return error otherwise.
This enables graceful error delivery for any vcpu hotplug failures while
the guest can keep running.

This series adds another helper to create and park vcpu (based on below
patch by Salil), exports cpu_get_free_index to be reused later and adds
ppc arch specfic handling for vcpu hotplug failure.

Based on api refactoring to create/park vcpus introduced in 1/8 of patch series:
https://lore.kernel.org/qemu-devel/2024031202.12992-2-salil.me...@huawei.com/

PS: I have just included patch 1 of above series after fixing a rebase
failure along with this series for better review purpose only.

Changelog:

v2: Addressed review comments from Nick
v1: Initial patch

Harsh Prateek Bora (3):
  accel/kvm: Introduce kvm_create_and_park_vcpu() helper
  cpu-common.c: export cpu_get_free_index to be reused later
  target/ppc: handle vcpu hotplug failure gracefully

Salil Mehta (1):
  accel/kvm: Extract common KVM vCPU {creation, parking} code

 include/exec/cpu-common.h |  2 ++
 include/sysemu/kvm.h  | 23 
 accel/kvm/kvm-all.c   | 76 +++
 cpu-common.c  |  7 ++--
 target/ppc/kvm.c  | 24 +
 accel/kvm/trace-events|  5 ++-
 6 files changed, 118 insertions(+), 19 deletions(-)

-- 
2.39.3

Re: [PATCH] target/riscv: rvzicbo: Fixup CBO extension register calculation

2024-05-15 Thread Alistair Francis

On Tue, May 14, 2024 at 7:11 PM Daniel Henrique Barboza
 wrote:
>
>
>
> On 5/13/24 23:39, Alistair Francis wrote:
> > When running the instruction
> >
> > ```
> >  cbo.flush 0(x0)
> > ```
> >
> > QEMU would segfault.
> >
> > The issue was in cpu_gpr[a->rs1] as QEMU does not have cpu_gpr[0]
> > allocated.
> >
> > In order to fix this let's use the existing get_address()
> > helper. This also has the benefit of performing pointer mask
> > calculations on the address specified in rs1.
> >
> > The pointer masking specificiation specifically states:
> >
> > """
> > Cache Management Operations: All instructions in Zicbom, Zicbop and Zicboz
> > """
> >
> > So this is the correct behaviour and we previously have been incorrectly
> > not masking the address.
> >
> > Signed-off-by: Alistair Francis 
> > Reported-by: Fabian Thomas 
> > Fixes: e05da09b7cfd ("target/riscv: implement Zicbom extension")
> > ---
>
> LGTM but I wonder if this is the same fix as this one sent by Phil a month
> ago or so:
>
> https://lore.kernel.org/qemu-riscv/20240419110514.69697-1-phi...@linaro.org/
> ("[PATCH] target/riscv: Use get_address() to get address with Zicbom 
> extensions")

It is the same fix!

I somehow missed that patch at the time. Sorry Philippe!

I'm going to merge this one as it includes the details about pointer
masking, which I think is useful as that's why we are using
get_address() instead of get_gpr()

Alistair

>
>
> Thanks,
>
> Daniel
>
> >   target/riscv/insn_trans/trans_rvzicbo.c.inc | 16 
> >   1 file changed, 12 insertions(+), 4 deletions(-)
> >
> > diff --git a/target/riscv/insn_trans/trans_rvzicbo.c.inc 
> > b/target/riscv/insn_trans/trans_rvzicbo.c.inc
> > index d5d7095903..15711c3140 100644
> > --- a/target/riscv/insn_trans/trans_rvzicbo.c.inc
> > +++ b/target/riscv/insn_trans/trans_rvzicbo.c.inc
> > @@ -31,27 +31,35 @@
> >   static bool trans_cbo_clean(DisasContext *ctx, arg_cbo_clean *a)
> >   {
> >   REQUIRE_ZICBOM(ctx);
> > -gen_helper_cbo_clean_flush(tcg_env, cpu_gpr[a->rs1]);
> > +TCGv src = get_address(ctx, a->rs1, 0);
> > +
> > +gen_helper_cbo_clean_flush(tcg_env, src);
> >   return true;
> >   }
> >
> >   static bool trans_cbo_flush(DisasContext *ctx, arg_cbo_flush *a)
> >   {
> >   REQUIRE_ZICBOM(ctx);
> > -gen_helper_cbo_clean_flush(tcg_env, cpu_gpr[a->rs1]);
> > +TCGv src = get_address(ctx, a->rs1, 0);
> > +
> > +gen_helper_cbo_clean_flush(tcg_env, src);
> >   return true;
> >   }
> >
> >   static bool trans_cbo_inval(DisasContext *ctx, arg_cbo_inval *a)
> >   {
> >   REQUIRE_ZICBOM(ctx);
> > -gen_helper_cbo_inval(tcg_env, cpu_gpr[a->rs1]);
> > +TCGv src = get_address(ctx, a->rs1, 0);
> > +
> > +gen_helper_cbo_inval(tcg_env, src);
> >   return true;
> >   }
> >
> >   static bool trans_cbo_zero(DisasContext *ctx, arg_cbo_zero *a)
> >   {
> >   REQUIRE_ZICBOZ(ctx);
> > -gen_helper_cbo_zero(tcg_env, cpu_gpr[a->rs1]);
> > +TCGv src = get_address(ctx, a->rs1, 0);
> > +
> > +gen_helper_cbo_zero(tcg_env, src);
> >   return true;
> >   }

Re: [PATCH v11 08/10] virtio-gpu: Handle resource blob commands

2024-05-15 Thread Akihiko Odaki


On 2024/05/16 2:15, Dmitry Osipenko wrote:

On 5/15/24 20:04, Akihiko Odaki wrote:




VIRTIO_GPU_CMD_RESOURCE_UNREF should also call
virtio_gpu_virgl_async_unmap_resource_blob(). I guess that's the
original intention of having a function for this instead of inlining the
content of this function to virgl_cmd_resource_unmap_blob().


Correct, previous patchset versions unmapped resource on unref.

In v11 I dropped unmapping from unref to avoid adding additional
`async_unmap_in_progress` flag because normally map/unmap will be
balanced by guest anyways.

The virtio-gpu spec doesn't tell that resource have to be implicitly
unmapped on unref. In a case of Linux guest, it actually will be a bug
to unref a mapped resource because guest will continue to map and use
the destroyed resource.



Additional `async_unmap_in_progress` flag should not be necessary as 
explained earlier.


It is a valid design not to issue UNMAP_BLOB before UNREF if the 
automatically performs the unmapping operation. A guest needs to ensure 
the blob is not mapped in a guest userspace virtual address space, but 
it does not require issuing UNMAP_BLOB, which is to unmap the blob from 
the guest physical address space.


In case of Linux, virtio_gpu_vram_free() calls virtio_gpu_cmd_unmap() to 
issue UNMAP_BLOB before UNREF, which is actually not necessary. Linux 
still needs to ensure that the blob is not mapped in a guest userspace 
virtual address space, but that is done before virtio_gpu_vram_free() 
gets called, and virtio_gpu_cmd_unmap() has nothing to do with that.


It is still a good practice for a guest to issue UNMAP_BLOB in such a 
case because the spec does not say the VMM will automatically unmap the 
blob for UNREF, and that's what Linux does. From the VMM perspective, 
it's better to perform the unmapping operation for UNREF because the 
spec does not say the guest always issue UNMAP_BLOB before UNREF.

Re: [PATCH v2] target/riscv: Remove experimental prefix from "B" extension

2024-05-15 Thread Alistair Francis

On Tue, May 14, 2024 at 9:04 PM Rob Bradford  wrote:
>
> This extension has now been ratified:
> https://jira.riscv.org/browse/RVS-2006 so the "x-" prefix can be
> removed.
>
> Since this is now a ratified extension add it to the list of extensions
> included in the "max" CPU variant.
>
> Signed-off-by: Rob Bradford 
> Reviewed-by: Andrew Jones 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu.c | 2 +-
>  target/riscv/tcg/tcg-cpu.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index eb1a2e7d6d..861d9f4350 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1396,7 +1396,7 @@ static const MISAExtInfo misa_ext_info_arr[] = {
>  MISA_EXT_INFO(RVJ, "x-j", "Dynamic translated languages"),
>  MISA_EXT_INFO(RVV, "v", "Vector operations"),
>  MISA_EXT_INFO(RVG, "g", "General purpose (IMAFD_Zicsr_Zifencei)"),
> -MISA_EXT_INFO(RVB, "x-b", "Bit manipulation (Zba_Zbb_Zbs)")
> +MISA_EXT_INFO(RVB, "b", "Bit manipulation (Zba_Zbb_Zbs)")
>  };
>
>  static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc)
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index 40054a391a..164a13ad0f 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -1281,7 +1281,7 @@ static void riscv_init_max_cpu_extensions(Object *obj)
>  const RISCVCPUMultiExtConfig *prop;
>
>  /* Enable RVG, RVJ and RVV that are disabled by default */
> -riscv_cpu_set_misa_ext(env, env->misa_ext | RVG | RVJ | RVV);
> +riscv_cpu_set_misa_ext(env, env->misa_ext | RVB | RVG | RVJ | RVV);
>
>  for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
>  isa_ext_update_enabled(cpu, prop->offset, true);
> --
> 2.44.0
>
>

Re: [PATCH v2] target/riscv: Remove experimental prefix from "B" extension

2024-05-15 Thread Alistair Francis

On Tue, May 14, 2024 at 9:04 PM Rob Bradford  wrote:
>
> This extension has now been ratified:
> https://jira.riscv.org/browse/RVS-2006 so the "x-" prefix can be
> removed.
>
> Since this is now a ratified extension add it to the list of extensions
> included in the "max" CPU variant.
>
> Signed-off-by: Rob Bradford 
> Reviewed-by: Andrew Jones 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 2 +-
>  target/riscv/tcg/tcg-cpu.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index eb1a2e7d6d..861d9f4350 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1396,7 +1396,7 @@ static const MISAExtInfo misa_ext_info_arr[] = {
>  MISA_EXT_INFO(RVJ, "x-j", "Dynamic translated languages"),
>  MISA_EXT_INFO(RVV, "v", "Vector operations"),
>  MISA_EXT_INFO(RVG, "g", "General purpose (IMAFD_Zicsr_Zifencei)"),
> -MISA_EXT_INFO(RVB, "x-b", "Bit manipulation (Zba_Zbb_Zbs)")
> +MISA_EXT_INFO(RVB, "b", "Bit manipulation (Zba_Zbb_Zbs)")
>  };
>
>  static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc)
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index 40054a391a..164a13ad0f 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -1281,7 +1281,7 @@ static void riscv_init_max_cpu_extensions(Object *obj)
>  const RISCVCPUMultiExtConfig *prop;
>
>  /* Enable RVG, RVJ and RVV that are disabled by default */
> -riscv_cpu_set_misa_ext(env, env->misa_ext | RVG | RVJ | RVV);
> +riscv_cpu_set_misa_ext(env, env->misa_ext | RVB | RVG | RVJ | RVV);
>
>  for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
>  isa_ext_update_enabled(cpu, prop->offset, true);
> --
> 2.44.0
>
>

Re: [PATCH] target/ppc: handle vcpu hotplug failure gracefully

2024-05-15 Thread Harsh Prateek Bora


Hi Nick,

On 5/14/24 08:39, Nicholas Piggin wrote:

On Tue Apr 23, 2024 at 4:30 PM AEST, Harsh Prateek Bora wrote:

+ qemu-devel

On 4/23/24 11:40, Harsh Prateek Bora wrote:

On ppc64, the PowerVM hypervisor runs with limited memory and a VCPU
creation during hotplug may fail during kvm_ioctl for KVM_CREATE_VCPU,
leading to termination of guest since errp is set to &error_fatal while
calling kvm_init_vcpu. This unexpected behaviour can be avoided by
pre-creating vcpu and parking it on success or return error otherwise.
This enables graceful error delivery for any vcpu hotplug failures while
the guest can keep running.


So this puts in on the park list so when kvm_init_vcpu() later runs it
will just take it off the park list instead of issuing another
KVM_CREATE_VCPU ioctl.

And kvm_init_vcpu() runs in the vcpu thread function, which does not
have a good way to indicate failure to the caller.

I'm don't know a lot about this part of qemu but it seems like a good
idea to move fail-able initialisation out of the vcpu thread in that
case. So the general idea seems good to me.



Yeh ..



Based on api refactoring to create/park vcpus introduced in 1/8 of patch series:
https://lore.kernel.org/qemu-devel/2024031202.12992-2-salil.me...@huawei.com/


So from this series AFAIKS you're just using kvm_create / kvm_park
routines? You could easily pull that patch 1 out ahead of that larger
series if progress is slow on it, it's a decent cleanup by itself by
the looks.



Yeh, patch 1 of that series is only we need but the author mentioned on 
the list that he is about to post next version soon.




Tested OK by repeatedly doing a hotplug/unplug of vcpus as below:

   #virsh setvcpus hotplug 40
   #virsh setvcpus hotplug 70
error: internal error: unable to execute QEMU command 'device_add':
kvmppc_cpu_realize: vcpu hotplug failed with -12

Reported-by: Anushree Mathur 
Suggested-by: Shivaprasad G Bhat 
Suggested-by: Vaibhav Jain 
Signed-off by: Harsh Prateek Bora 
---
---
   target/ppc/kvm.c | 42 ++
   1 file changed, 42 insertions(+)

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8231feb2d4..c887f6dfa0 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -48,6 +48,8 @@
   #include "qemu/mmap-alloc.h"
   #include "elf.h"
   #include "sysemu/kvm_int.h"
+#include "sysemu/kvm.h"
+#include "hw/core/accel-cpu.h"
   
   #define PROC_DEVTREE_CPU  "/proc/device-tree/cpus/"
   
@@ -2339,6 +2341,43 @@ static void alter_insns(uint64_t *word, uint64_t flags, bool on)

   }
   }
   
+static int max_cpu_index = 0;

+
+static bool kvmppc_cpu_realize(CPUState *cs, Error **errp)
+{
+int ret;
+
+cs->cpu_index = max_cpu_index++;
+
+POWERPC_CPU(cs)->vcpu_id = cs->cpu_index;


So you're overriding the cpu_get_free_index() allocator here.
And you need to because vcpu_id needs to be assigned before
the KVM create, I guess.



Yes ..


I guess it works. I would add a comment like s390x has.


Not sure which comment you were referring to but with exporting
cpu_get_free_index as suggested later, not sure if we still need any
comment.


+
+if (cs->parent_obj.hotplugged) {


Can _all_ kvm cpu creation go via this path? Why just limit it to
hotplugged?


For the initial bootup, we actually want to abort if the requested vCPUs
cant be allocated so that user can retry until the requested vCPUs are
allocated. For hotplug failure, bringing down entire guest isn't fair,
hence the fix.




+/* create and park to fail gracefully in case vcpu hotplug fails */
+ret = kvm_create_vcpu(cs);
+if (!ret) {
+kvm_park_vcpu(cs);


Seems like a small thing, but I would add a new core kvm function
that creates and parks the vcpu, so the target code doesn't have
to know about the parking internals, just that it needs to be
called.


Make sense, I will add another kvm helper: kvm_create_and_park_vcpu()



Unless I'm missing something, we could get all targets to move their kvm
create to here and remove it removed from kvm_init_vcpu(), that would
just expect it to be on the parked list. But that could be done
incrementally.


Hmm ..




+} else {
+max_cpu_index--;
+error_setg(errp, "%s: vcpu hotplug failed with %d",
+ __func__, ret);
+return false;
+}
+}
+return true;
+}
+
+static void kvmppc_cpu_unrealize(CPUState *cpu)
+{
+if (POWERPC_CPU(cpu)->vcpu_id == (max_cpu_index - 1)) {
+/* only reclaim vcpuid if its the last one assigned
+ * as reclaiming random vcpuid for parked vcpus may lead
+ * to unexpected behaviour due to an existing kernel bug
+ * when drc_index doesnt get reclaimed as expected.
+ */
+max_cpu_index--;
+}


This looks like a fairly lossy allocator. Using cpu_get_free_index()
would be the way to go I think. I would export that and call it here,
and then you don't need this. Just have to take care of the assert,
somet

Re: [PATCH v4 2/2] vhost-user: fix lost reconnect again

2024-05-15 Thread Raphael Norwitz

On Wed, May 15, 2024 at 10:58 PM Li Feng  wrote:
>
> When the vhost-user is reconnecting to the backend, and if the vhost-user 
> fails
> at the get_features in vhost_dev_init(), then the reconnect will fail
> and it will not be retriggered forever.
>
> The reason is:
> When the vhost-user fail at get_features, the vhost_dev_cleanup will be called
> immediately.
>
> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
>
> The reconnect path is:
> vhost_user_blk_event
>vhost_user_async_close(.. vhost_user_blk_disconnect ..)
>  qemu_chr_fe_set_handlers <- clear the notifier callback
>schedule vhost_user_async_close_bh
>
> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
> called, then the event fd callback will not be reinstalled.
>
> We need to ensure that even if vhost_dev_init initialization fails, the event
> handler still needs to be reinstalled when s->connected is false.
>
> All vhost-user devices have this issue, including vhost-user-blk/scsi.
>
> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
>

Reviewed-by: Raphael Norwitz 

> Signed-off-by: Li Feng 
> ---
>  hw/block/vhost-user-blk.c   |  3 ++-
>  hw/scsi/vhost-user-scsi.c   |  3 ++-
>  hw/virtio/vhost-user-base.c |  3 ++-
>  hw/virtio/vhost-user.c  | 10 +-
>  4 files changed, 7 insertions(+), 12 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 41d1ac3a5a..c6842ced48 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
>
>  if (!s->connected) {
> -return;
> +goto done;
>  }
>  s->connected = false;
>
> @@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>
>  vhost_dev_cleanup(&s->dev);
>
> +done:
>  /* Re-instate the event handler for new connections */
>  qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
>   NULL, dev, NULL, true);
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index 48a59e020e..b49a11d23b 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -181,7 +181,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
>  VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>
>  if (!s->connected) {
> -return;
> +goto done;
>  }
>  s->connected = false;
>
> @@ -189,6 +189,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
>
>  vhost_dev_cleanup(&vsc->dev);
>
> +done:
>  /* Re-instate the event handler for new connections */
>  qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
>   vhost_user_scsi_event, NULL, dev, NULL, true);
> diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
> index 4b54255682..11e72b1e3b 100644
> --- a/hw/virtio/vhost-user-base.c
> +++ b/hw/virtio/vhost-user-base.c
> @@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev)
>  VHostUserBase *vub = VHOST_USER_BASE(vdev);
>
>  if (!vub->connected) {
> -return;
> +goto done;
>  }
>  vub->connected = false;
>
>  vub_stop(vdev);
>  vhost_dev_cleanup(&vub->vhost_dev);
>
> +done:
>  /* Re-instate the event handler for new connections */
>  qemu_chr_fe_set_handlers(&vub->chardev,
>   NULL, NULL, vub_event,
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index c929097e87..c407ea8939 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2781,16 +2781,8 @@ typedef struct {
>  static void vhost_user_async_close_bh(void *opaque)
>  {
>  VhostAsyncCallback *data = opaque;
> -struct vhost_dev *vhost = data->vhost;
>
> -/*
> - * If the vhost_dev has been cleared in the meantime there is
> - * nothing left to do as some other path has completed the
> - * cleanup.
> - */
> -if (vhost->vdev) {
> -data->cb(data->dev);
> -}
> +data->cb(data->dev);
>
>  g_free(data);
>  }
> --
> 2.45.0
>

Re: [PATCH v4 1/2] Revert "vhost-user: fix lost reconnect"

2024-05-15 Thread Raphael Norwitz

On Wed, May 15, 2024 at 10:58 PM Li Feng  wrote:
>
> This reverts commit f02a4b8e6431598612466f76aac64ab492849abf.
>
> Since the current patch cannot completely fix the lost reconnect
> problem, there is a scenario that is not considered:
> - When the virtio-blk driver is removed from the guest os,
>   s->connected has no chance to be set to false, resulting in
>   subsequent reconnection not being executed.
>
> The next patch will completely fix this issue with a better approach.
>

Reviewed-by: Raphael Norwitz 

> Signed-off-by: Li Feng 
> ---
>  hw/block/vhost-user-blk.c  |  2 +-
>  hw/scsi/vhost-user-scsi.c  |  3 +--
>  hw/virtio/vhost-user-base.c|  2 +-
>  hw/virtio/vhost-user.c | 10 ++
>  include/hw/virtio/vhost-user.h |  3 +--
>  5 files changed, 6 insertions(+), 14 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 9e6bbc6950..41d1ac3a5a 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -384,7 +384,7 @@ static void vhost_user_blk_event(void *opaque, 
> QEMUChrEvent event)
>  case CHR_EVENT_CLOSED:
>  /* defer close until later to avoid circular close */
>  vhost_user_async_close(dev, &s->chardev, &s->dev,
> -   vhost_user_blk_disconnect, 
> vhost_user_blk_event);
> +   vhost_user_blk_disconnect);
>  break;
>  case CHR_EVENT_BREAK:
>  case CHR_EVENT_MUX_IN:
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index a63b1f4948..48a59e020e 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -214,8 +214,7 @@ static void vhost_user_scsi_event(void *opaque, 
> QEMUChrEvent event)
>  case CHR_EVENT_CLOSED:
>  /* defer close until later to avoid circular close */
>  vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
> -   vhost_user_scsi_disconnect,
> -   vhost_user_scsi_event);
> +   vhost_user_scsi_disconnect);
>  break;
>  case CHR_EVENT_BREAK:
>  case CHR_EVENT_MUX_IN:
> diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
> index a83167191e..4b54255682 100644
> --- a/hw/virtio/vhost-user-base.c
> +++ b/hw/virtio/vhost-user-base.c
> @@ -254,7 +254,7 @@ static void vub_event(void *opaque, QEMUChrEvent event)
>  case CHR_EVENT_CLOSED:
>  /* defer close until later to avoid circular close */
>  vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev,
> -   vub_disconnect, vub_event);
> +   vub_disconnect);
>  break;
>  case CHR_EVENT_BREAK:
>  case CHR_EVENT_MUX_IN:
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index cdf9af4a4b..c929097e87 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2776,7 +2776,6 @@ typedef struct {
>  DeviceState *dev;
>  CharBackend *cd;
>  struct vhost_dev *vhost;
> -IOEventHandler *event_cb;
>  } VhostAsyncCallback;
>
>  static void vhost_user_async_close_bh(void *opaque)
> @@ -2791,10 +2790,7 @@ static void vhost_user_async_close_bh(void *opaque)
>   */
>  if (vhost->vdev) {
>  data->cb(data->dev);
> -} else if (data->event_cb) {
> -qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
> - NULL, data->dev, NULL, true);
> -   }
> +}
>
>  g_free(data);
>  }
> @@ -2806,8 +2802,7 @@ static void vhost_user_async_close_bh(void *opaque)
>   */
>  void vhost_user_async_close(DeviceState *d,
>  CharBackend *chardev, struct vhost_dev *vhost,
> -vu_async_close_fn cb,
> -IOEventHandler *event_cb)
> +vu_async_close_fn cb)
>  {
>  if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>  /*
> @@ -2823,7 +2818,6 @@ void vhost_user_async_close(DeviceState *d,
>  data->dev = d;
>  data->cd = chardev;
>  data->vhost = vhost;
> -data->event_cb = event_cb;
>
>  /* Disable any further notifications on the chardev */
>  qemu_chr_fe_set_handlers(chardev,
> diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> index d7c09ffd34..324cd8663a 100644
> --- a/include/hw/virtio/vhost-user.h
> +++ b/include/hw/virtio/vhost-user.h
> @@ -108,7 +108,6 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
>
>  void vhost_user_async_close(DeviceState *d,
>  CharBackend *chardev, struct vhost_dev *vhost,
> -vu_async_close_fn cb,
> -IOEventHandler *event_cb);
> +vu_async_close_fn cb);
>
>  #endif
> --
> 2.45.0
>

Re: [PATCH v3 2/2] vhost-user: fix lost reconnect again

2024-05-15 Thread Raphael Norwitz

OK - I'm happy with this approach then.

On Wed, May 15, 2024 at 10:48 PM Li Feng  wrote:
>
>
>
> 2024年5月15日 23:47，Raphael Norwitz  写道：
>
> The case your describing makes sense but now I have some concerns on
> the vhost_dev_cleanup bit.
>
> On Wed, May 15, 2024 at 1:47 AM Li Feng  wrote:
>
>
>
>
> 2024年5月14日 21:58，Raphael Norwitz  写道：
>
> Code looks good. Just a question on the error case you're trying to fix.
>
> On Tue, May 14, 2024 at 2:12 AM Li Feng  wrote:
>
>
> When the vhost-user is reconnecting to the backend, and if the vhost-user 
> fails
> at the get_features in vhost_dev_init(), then the reconnect will fail
> and it will not be retriggered forever.
>
> The reason is:
> When the vhost-user fail at get_features, the vhost_dev_cleanup will be called
> immediately.
>
> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
>
> The reconnect path is:
> vhost_user_blk_event
>  vhost_user_async_close(.. vhost_user_blk_disconnect ..)
>qemu_chr_fe_set_handlers <- clear the notifier callback
>  schedule vhost_user_async_close_bh
>
> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
> called, then the event fd callback will not be reinstalled.
>
> With this patch, the vhost_user_blk_disconnect will call the
> vhost_dev_cleanup() again, it's safe.
>
> In addition, the CLOSE event may occur in a scenario where connected is false.
> At this time, the event handler will be cleared. We need to ensure that the
> event handler can remain installed.
>
>
> Following on from the prior patch, why would "connected" be false when
> a CLOSE event happens?
>
>
> In OPEN event handling, vhost_user_blk_connect calls vhost_dev_init and 
> encounters
> an error such that s->connected remains false.
> Next, after the CLOSE event arrives, it is found that s->connected is false, 
> so nothing
> is done, but the event handler will be cleaned up in `vhost_user_async_close`
> before the CLOSE event is executed.
>
>
> Got it - I see why the event handler is never re-installed in the code
> as it was before if we fail at get_features. That said, how do you
> explain your comment:
>
>
> OK, I will update the commit message because this code has changed some 
> months ago.
>
>
> With this patch, the vhost_user_blk_disconnect will call the
> vhost_dev_cleanup() again, it's safe.
>
>
> I see vhost_dev_cleanup() accessing hdev without even a NULL check. In
> the case we're talking about here I don't think it's a problem because
> if vhost_dev_init() fails, connected will be false and hit the goto
> but I am concerned that there could be double-frees or use-after-frees
> in other cases.
>
>
> OK, you are right, with this patch, the vhost_dev_cleanup will not be
> called multiple times now.
>
> I think there is no need to worry about calling vhost_dev_cleanup multiple 
> times,
> because historically vhost_dev_cleanup has been allowed to be called multiple
> times, and looking at the code, it can be found that calling vhost_dev_cleanup
> multiple times is indeed safe.
>
> Look this patch:
>
> commit e0547b59dc0ead4c605d3f02d1c8829630a1311b
> Author: Marc-André Lureau 
> Date:   Wed Jul 27 01:15:02 2016 +0400
>
> vhost: make vhost_dev_cleanup() idempotent
>
> It is called on multiple code path, so make it safe to call several
> times (note: I don't remember a reproducer here, but a function called
> 'cleanup' should probably be idempotent in my book)
>
> Signed-off-by: Marc-André Lureau 
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 
>
> Thanks,
> Li
>
>
> Thanks,
> Li
>
>
>
> All vhost-user devices have this issue, including vhost-user-blk/scsi.
>
> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
>
> Signed-off-by: Li Feng 
> ---
> hw/block/vhost-user-blk.c   |  3 ++-
> hw/scsi/vhost-user-scsi.c   |  3 ++-
> hw/virtio/vhost-user-base.c |  3 ++-
> hw/virtio/vhost-user.c  | 10 +-
> 4 files changed, 7 insertions(+), 12 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 41d1ac3a5a..c6842ced48 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>VHostUserBlk *s = VHOST_USER_BLK(vdev);
>
>if (!s->connected) {
> -return;
> +goto done;
>}
>s->connected = false;
>
> @@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>
>vhost_dev_cleanup(&s->dev);
>
> +done:
>/* Re-instate the event handler for new connections */
>qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> NULL, dev, NULL, true);
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index 48a59e020e..b49a11d23b 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -181,7 +181,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
>VirtIOSCSICommon *

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Peter Xu

On Wed, May 15, 2024 at 06:15:48PM +0100, Daniel P. Berrangé wrote:
> On Wed, May 15, 2024 at 11:03:27AM -0600, Peter Xu wrote:
> > On Wed, May 15, 2024 at 05:03:44PM +0100, Daniel P. Berrangé wrote:
> > > Above all, I'm failing to see why there's a compelling reason
> > > for virtio_gpu to diverge from our long standing practice of
> > > adding a named property flag "virtio_scanout_vmstate_fix"
> > > on the machine class, and then setting it in machine types
> > > which need it.
> > 
> > The reason to introduce that is definitely avoid introducing fields /
> > properties in similar cases in which case all the fields may represent the
> > same thing ("return true if MC is older than xxx version").  Especially
> > when such change is not bound to a new feature so in which case it won't
> > make sense to allow user to even control that propoerty, even if we
> > exported this "x-virtio-scanout-fix" property, but now we must export it
> > because compat fields require it.
> > 
> > However I think agree that having upstream specific MC versions in VMSD
> > checks is kind of unwanted.  I think the major problem is we don't have
> > that extra machine type abstract where we can have simply a number showing
> > the release of QEMU, then we can map that number to whatever
> > upstream/downstream machine types.  E.g.:
> > 
> >   Release No. Upstream version   Downstream version
> >   50  9.0Y.0
> >   51  9.1
> >   52  9.2Y.1
> >   ...
> 
> Downstream versions do not map cleanly to individual upstream versions
> across the whole code base. If we have two distinct features in upstream
> version X, each of them may map to a different downstream release. 
> 
> This can happen when downstream skips one or more upstream releases.
> One feature from the skipped release might be backported to an earlier
> downstream release, while other feature might not arrive downstream
> until they later rebase. Version based checks are an inherantly
> undesirable idea for a situation where there is any backporting taking
> place, whether its machine type versions or something else. Named feature
> / flag based checks are always the way to go.

I thought this should work better with things like this where we only want
to fix a break in ABI, and none of downstream should special case things
like such fix.. but I agree even with that in mind such case could be so
rare to bother with above scheme.  I could have raised a bad idea I
suppose. :-( Let's stick with the simple until someone has better idea.

Thanks,

-- 
Peter Xu

[PATCH v2 3/3] migration/colo: Tidy up bql_unlock() around bdrv_activate_all()

2024-05-15 Thread Li Zhijian via

Make the code more tight.

Suggested-by: Michael Tokarev 
Reviewed-by: Peter Xu 
Reviewed-by: Zhang Chen 
Signed-off-by: Li Zhijian 
---
V2: Collected reviewed-by tags
This change/comment suggested by "Michael Tokarev " came
a bit late at that time, let's update it together in these minor set
this time.
---
 migration/colo.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index 991806c06a..1b6d9da1c8 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -838,12 +838,11 @@ static void *colo_process_incoming_thread(void *opaque)
 /* Make sure all file formats throw away their mutable metadata */
 bql_lock();
 bdrv_activate_all(&local_err);
+bql_unlock();
 if (local_err) {
-bql_unlock();
 error_report_err(local_err);
 return NULL;
 }
-bql_unlock();
 
 failover_init_state();
 
-- 
2.31.1

[PATCH v2 2/3] migration/colo: make colo_incoming_co() return void

2024-05-15 Thread Li Zhijian via

Currently, it always returns 0, no need to check the return value at all.
In addition, enter colo coroutine only if migration_incoming_colo_enabled()
is true.
Once the destination side enters the COLO* state, the COLO process will
take over the remaining processes until COLO exits.

Cc: Fabiano Rosas 
Reviewed-by: Peter Xu 
Reviewed-by: Zhang Chen 
Signed-off-by: Li Zhijian 
---
V2: Fix compilation failed, reported by Fabiano Rosas 
Collected reviewed-by tags
---
 include/migration/colo.h | 2 +-
 migration/colo-stubs.c   | 3 +--
 migration/colo.c | 9 ++---
 migration/migration.c| 6 +++---
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/include/migration/colo.h b/include/migration/colo.h
index eaac07f26d..43222ef5ae 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -49,7 +49,7 @@ void colo_checkpoint_delay_set(void);
  *
  * Called with BQL locked, may temporary release BQL.
  */
-int coroutine_fn colo_incoming_co(void);
+void coroutine_fn colo_incoming_co(void);
 
 void colo_shutdown(void);
 #endif
diff --git a/migration/colo-stubs.c b/migration/colo-stubs.c
index f8c069b739..e22ce65234 100644
--- a/migration/colo-stubs.c
+++ b/migration/colo-stubs.c
@@ -9,9 +9,8 @@ void colo_shutdown(void)
 {
 }
 
-int coroutine_fn colo_incoming_co(void)
+void coroutine_fn colo_incoming_co(void)
 {
-return 0;
 }
 
 void colo_checkpoint_delay_set(void)
diff --git a/migration/colo.c b/migration/colo.c
index 5600a43d78..991806c06a 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -929,16 +929,13 @@ out:
 return NULL;
 }
 
-int coroutine_fn colo_incoming_co(void)
+void coroutine_fn colo_incoming_co(void)
 {
 MigrationIncomingState *mis = migration_incoming_get_current();
 QemuThread th;
 
 assert(bql_locked());
-
-if (!migration_incoming_colo_enabled()) {
-return 0;
-}
+assert(migration_incoming_colo_enabled());
 
 qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread,
mis, QEMU_THREAD_JOINABLE);
@@ -954,6 +951,4 @@ int coroutine_fn colo_incoming_co(void)
 
 /* We hold the global BQL, so it is safe here */
 colo_release_ram_cache();
-
-return 0;
 }
diff --git a/migration/migration.c b/migration/migration.c
index 0feb354e47..607fb44842 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -789,9 +789,9 @@ process_incoming_migration_co(void *opaque)
 goto fail;
 }
 
-if (colo_incoming_co() < 0) {
-error_setg(&local_err, "colo incoming failed");
-goto fail;
+if (migration_incoming_colo_enabled()) {
+/* yield until COLO exit */
+colo_incoming_co();
 }
 
 migration_bh_schedule(process_incoming_migration_bh, mis);
-- 
2.31.1

[PATCH v2 1/3] migration/colo: Minor fix for colo error message

2024-05-15 Thread Li Zhijian via

- Explicitly show the missing module name: replication
- Fix capability name to x-colo

Reviewed-by: Peter Xu 
Reviewed-by: Zhang Chen 
Signed-off-by: Li Zhijian 
---
V2: Collected reviewed-by tags
---
 migration/migration.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 5cfe420a76..0feb354e47 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -517,13 +517,13 @@ void migration_incoming_disable_colo(void)
 int migration_incoming_enable_colo(void)
 {
 #ifndef CONFIG_REPLICATION
-error_report("ENABLE_COLO command come in migration stream, but COLO "
- "module is not built in");
+error_report("ENABLE_COLO command come in migration stream, but the "
+ "replication module is not built in");
 return -ENOTSUP;
 #endif
 
 if (!migrate_colo()) {
-error_report("ENABLE_COLO command come in migration stream, but c-colo 
"
+error_report("ENABLE_COLO command come in migration stream, but x-colo 
"
  "capability is not set");
 return -EINVAL;
 }
-- 
2.31.1

Re: [PATCH 2/3] migration/colo: make colo_incoming_co() return void

2024-05-15 Thread Zhijian Li (Fujitsu)



On 16/05/2024 03:04, Fabiano Rosas wrote:
> Li Zhijian via  writes:
> 
>> Currently, it always returns 0, no need to check the return value at all.
>> In addition, enter colo coroutine only if migration_incoming_colo_enabled()
>> is true.
>> Once the destination side enters the COLO* state, the COLO process will
>> take over the remaining processes until COLO exits.
>>
>> Signed-off-by: Li Zhijian 
>> ---
>>   migration/colo.c  | 9 ++---
>>   migration/migration.c | 6 +++---
>>   2 files changed, 5 insertions(+), 10 deletions(-)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 5600a43d78..991806c06a 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -929,16 +929,13 @@ out:
>>   return NULL;
>>   }
>>   
>> -int coroutine_fn colo_incoming_co(void)
>> +void coroutine_fn colo_incoming_co(void)
>>   {
>>   MigrationIncomingState *mis = migration_incoming_get_current();
>>   QemuThread th;
>>   
>>   assert(bql_locked());
>> -
>> -if (!migration_incoming_colo_enabled()) {
>> -return 0;
>> -}
>> +assert(migration_incoming_colo_enabled());
> 
> FAILED: libcommon.fa.p/migration_colo.c.o
> /usr/bin/gcc-13 ... ../migration/colo.c
> ../migration/colo.c:930:19: error: conflicting types for ‘colo_incoming_co’; 
> have ‘void(void)’
>930 | void coroutine_fn colo_incoming_co(void)
>|   ^~~~
> In file included from ../migration/colo.c:20:
> ... qemu/include/migration/colo.h:52:18: note: previous declaration of 
> ‘colo_incoming_co’ with type ‘int(void)’
> 52 | int coroutine_fn colo_incoming_co(void);

My fault, will fix it soon

Thanks
Zhijian
>|  ^~~~

[PATCH v4 1/2] Revert "vhost-user: fix lost reconnect"

2024-05-15 Thread Li Feng

This reverts commit f02a4b8e6431598612466f76aac64ab492849abf.

Since the current patch cannot completely fix the lost reconnect
problem, there is a scenario that is not considered:
- When the virtio-blk driver is removed from the guest os,
  s->connected has no chance to be set to false, resulting in
  subsequent reconnection not being executed.

The next patch will completely fix this issue with a better approach.

Signed-off-by: Li Feng 
---
 hw/block/vhost-user-blk.c  |  2 +-
 hw/scsi/vhost-user-scsi.c  |  3 +--
 hw/virtio/vhost-user-base.c|  2 +-
 hw/virtio/vhost-user.c | 10 ++
 include/hw/virtio/vhost-user.h |  3 +--
 5 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 9e6bbc6950..41d1ac3a5a 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -384,7 +384,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent 
event)
 case CHR_EVENT_CLOSED:
 /* defer close until later to avoid circular close */
 vhost_user_async_close(dev, &s->chardev, &s->dev,
-   vhost_user_blk_disconnect, 
vhost_user_blk_event);
+   vhost_user_blk_disconnect);
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index a63b1f4948..48a59e020e 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -214,8 +214,7 @@ static void vhost_user_scsi_event(void *opaque, 
QEMUChrEvent event)
 case CHR_EVENT_CLOSED:
 /* defer close until later to avoid circular close */
 vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
-   vhost_user_scsi_disconnect,
-   vhost_user_scsi_event);
+   vhost_user_scsi_disconnect);
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
index a83167191e..4b54255682 100644
--- a/hw/virtio/vhost-user-base.c
+++ b/hw/virtio/vhost-user-base.c
@@ -254,7 +254,7 @@ static void vub_event(void *opaque, QEMUChrEvent event)
 case CHR_EVENT_CLOSED:
 /* defer close until later to avoid circular close */
 vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev,
-   vub_disconnect, vub_event);
+   vub_disconnect);
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index cdf9af4a4b..c929097e87 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2776,7 +2776,6 @@ typedef struct {
 DeviceState *dev;
 CharBackend *cd;
 struct vhost_dev *vhost;
-IOEventHandler *event_cb;
 } VhostAsyncCallback;
 
 static void vhost_user_async_close_bh(void *opaque)
@@ -2791,10 +2790,7 @@ static void vhost_user_async_close_bh(void *opaque)
  */
 if (vhost->vdev) {
 data->cb(data->dev);
-} else if (data->event_cb) {
-qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
- NULL, data->dev, NULL, true);
-   }
+}
 
 g_free(data);
 }
@@ -2806,8 +2802,7 @@ static void vhost_user_async_close_bh(void *opaque)
  */
 void vhost_user_async_close(DeviceState *d,
 CharBackend *chardev, struct vhost_dev *vhost,
-vu_async_close_fn cb,
-IOEventHandler *event_cb)
+vu_async_close_fn cb)
 {
 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
 /*
@@ -2823,7 +2818,6 @@ void vhost_user_async_close(DeviceState *d,
 data->dev = d;
 data->cd = chardev;
 data->vhost = vhost;
-data->event_cb = event_cb;
 
 /* Disable any further notifications on the chardev */
 qemu_chr_fe_set_handlers(chardev,
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index d7c09ffd34..324cd8663a 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -108,7 +108,6 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
 
 void vhost_user_async_close(DeviceState *d,
 CharBackend *chardev, struct vhost_dev *vhost,
-vu_async_close_fn cb,
-IOEventHandler *event_cb);
+vu_async_close_fn cb);
 
 #endif
-- 
2.45.0

[PATCH v4 2/2] vhost-user: fix lost reconnect again

2024-05-15 Thread Li Feng

When the vhost-user is reconnecting to the backend, and if the vhost-user fails
at the get_features in vhost_dev_init(), then the reconnect will fail
and it will not be retriggered forever.

The reason is:
When the vhost-user fail at get_features, the vhost_dev_cleanup will be called
immediately.

vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.

The reconnect path is:
vhost_user_blk_event
   vhost_user_async_close(.. vhost_user_blk_disconnect ..)
 qemu_chr_fe_set_handlers <- clear the notifier callback
   schedule vhost_user_async_close_bh

The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
called, then the event fd callback will not be reinstalled.

We need to ensure that even if vhost_dev_init initialization fails, the event
handler still needs to be reinstalled when s->connected is false.

All vhost-user devices have this issue, including vhost-user-blk/scsi.

Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")

Signed-off-by: Li Feng 
---
 hw/block/vhost-user-blk.c   |  3 ++-
 hw/scsi/vhost-user-scsi.c   |  3 ++-
 hw/virtio/vhost-user-base.c |  3 ++-
 hw/virtio/vhost-user.c  | 10 +-
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 41d1ac3a5a..c6842ced48 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
 VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
 if (!s->connected) {
-return;
+goto done;
 }
 s->connected = false;
 
@@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
 
 vhost_dev_cleanup(&s->dev);
 
+done:
 /* Re-instate the event handler for new connections */
 qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
  NULL, dev, NULL, true);
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 48a59e020e..b49a11d23b 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -181,7 +181,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
 VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
 
 if (!s->connected) {
-return;
+goto done;
 }
 s->connected = false;
 
@@ -189,6 +189,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
 
 vhost_dev_cleanup(&vsc->dev);
 
+done:
 /* Re-instate the event handler for new connections */
 qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
  vhost_user_scsi_event, NULL, dev, NULL, true);
diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
index 4b54255682..11e72b1e3b 100644
--- a/hw/virtio/vhost-user-base.c
+++ b/hw/virtio/vhost-user-base.c
@@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev)
 VHostUserBase *vub = VHOST_USER_BASE(vdev);
 
 if (!vub->connected) {
-return;
+goto done;
 }
 vub->connected = false;
 
 vub_stop(vdev);
 vhost_dev_cleanup(&vub->vhost_dev);
 
+done:
 /* Re-instate the event handler for new connections */
 qemu_chr_fe_set_handlers(&vub->chardev,
  NULL, NULL, vub_event,
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index c929097e87..c407ea8939 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2781,16 +2781,8 @@ typedef struct {
 static void vhost_user_async_close_bh(void *opaque)
 {
 VhostAsyncCallback *data = opaque;
-struct vhost_dev *vhost = data->vhost;
 
-/*
- * If the vhost_dev has been cleared in the meantime there is
- * nothing left to do as some other path has completed the
- * cleanup.
- */
-if (vhost->vdev) {
-data->cb(data->dev);
-}
+data->cb(data->dev);
 
 g_free(data);
 }
-- 
2.45.0

Re: [PATCH 2/2] tests: add testing of parameter=1 for SMP topology

2024-05-15 Thread Xiaoyao Li


On 5/13/2024 8:33 PM, Daniel P. Berrangé wrote:

Validate that it is possible to pass 'parameter=1' for any SMP topology
parameter, since unsupported parameters are implicitly considered to
always have a value of 1.

Signed-off-by: Daniel P. Berrangé 
---
  tests/unit/test-smp-parse.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 56165e6644..56ce5128f1 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -330,6 +330,14 @@ static const struct SMPTestData data_generic_valid[] = {
  .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, T, 2, T, 16),
  .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
  .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+}, {
+/*
+ * Unsupported parameters are always allowed to be set to '1'
+ * config: -smp 
8,books=1,drawers=1,sockets=2,modules=1,dies=1,cores=4,threads=2,maxcpus=8


cores=2 not 4.


+ * expect: cpus=8,sockets=2,cores=2,threads=2,maxcpus=8 */
+.config = SMP_CONFIG_WITH_FULL_TOPO(8, 1, 1, 2, 1, 1, 2, 2, 8),
+.expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 2, 2, 8),
+.expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 2, 2, 8),
  },
  };

Re: [PATCH v3 2/2] vhost-user: fix lost reconnect again

2024-05-15 Thread Li Feng

> 2024年5月15日 23:47，Raphael Norwitz  写道：
> 
> The case your describing makes sense but now I have some concerns on
> the vhost_dev_cleanup bit.
> 
> On Wed, May 15, 2024 at 1:47 AM Li Feng  > wrote:
>> 
>> 
>> 
>>> 2024年5月14日 21:58，Raphael Norwitz  写道：
>>> 
>>> Code looks good. Just a question on the error case you're trying to fix.
>>> 
>>> On Tue, May 14, 2024 at 2:12 AM Li Feng  wrote:

 When the vhost-user is reconnecting to the backend, and if the vhost-user 
 fails
 at the get_features in vhost_dev_init(), then the reconnect will fail
 and it will not be retriggered forever.

 The reason is:
 When the vhost-user fail at get_features, the vhost_dev_cleanup will be 
 called
 immediately.

 vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.

 The reconnect path is:
 vhost_user_blk_event
  vhost_user_async_close(.. vhost_user_blk_disconnect ..)
qemu_chr_fe_set_handlers <- clear the notifier callback
  schedule vhost_user_async_close_bh

 The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
 called, then the event fd callback will not be reinstalled.

 With this patch, the vhost_user_blk_disconnect will call the
 vhost_dev_cleanup() again, it's safe.

 In addition, the CLOSE event may occur in a scenario where connected is 
 false.
 At this time, the event handler will be cleared. We need to ensure that the
 event handler can remain installed.
>>> 
>>> Following on from the prior patch, why would "connected" be false when
>>> a CLOSE event happens?
>> 
>> In OPEN event handling, vhost_user_blk_connect calls vhost_dev_init and 
>> encounters
>> an error such that s->connected remains false.
>> Next, after the CLOSE event arrives, it is found that s->connected is false, 
>> so nothing
>> is done, but the event handler will be cleaned up in `vhost_user_async_close`
>> before the CLOSE event is executed.
>> 
> 
> Got it - I see why the event handler is never re-installed in the code
> as it was before if we fail at get_features. That said, how do you
> explain your comment:

OK, I will update the commit message because this code has changed some months 
ago.

> 
 With this patch, the vhost_user_blk_disconnect will call the
 vhost_dev_cleanup() again, it's safe.
> 
> I see vhost_dev_cleanup() accessing hdev without even a NULL check. In
> the case we're talking about here I don't think it's a problem because
> if vhost_dev_init() fails, connected will be false and hit the goto
> but I am concerned that there could be double-frees or use-after-frees
> in other cases.

OK, you are right, with this patch, the vhost_dev_cleanup will not be
called multiple times now.

I think there is no need to worry about calling vhost_dev_cleanup multiple 
times,
because historically vhost_dev_cleanup has been allowed to be called multiple
times, and looking at the code, it can be found that calling vhost_dev_cleanup
multiple times is indeed safe.

Look this patch:

commit e0547b59dc0ead4c605d3f02d1c8829630a1311b
Author: Marc-André Lureau 
Date:   Wed Jul 27 01:15:02 2016 +0400

vhost: make vhost_dev_cleanup() idempotent

It is called on multiple code path, so make it safe to call several
times (note: I don't remember a reproducer here, but a function called
'cleanup' should probably be idempotent in my book)

Signed-off-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 

Thanks,
Li

> 
>> Thanks,
>> Li
>> 
>>> 

 All vhost-user devices have this issue, including vhost-user-blk/scsi.

 Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")

 Signed-off-by: Li Feng 
 ---
 hw/block/vhost-user-blk.c   |  3 ++-
 hw/scsi/vhost-user-scsi.c   |  3 ++-
 hw/virtio/vhost-user-base.c |  3 ++-
 hw/virtio/vhost-user.c  | 10 +-
 4 files changed, 7 insertions(+), 12 deletions(-)

 diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
 index 41d1ac3a5a..c6842ced48 100644
 --- a/hw/block/vhost-user-blk.c
 +++ b/hw/block/vhost-user-blk.c
 @@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
VHostUserBlk *s = VHOST_USER_BLK(vdev);

if (!s->connected) {
 -return;
 +goto done;
}
s->connected = false;

 @@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)

vhost_dev_cleanup(&s->dev);

 +done:
/* Re-instate the event handler for new connections */
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
 NULL, dev, NULL, true);
 diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
 index 48a59e020e..b49a11d23b 100644
 --- a/hw/scsi/vhost-user-scsi

Re: [PATCH RFC v4 0/7] virtio-net: add support for SR-IOV emulation

2024-05-15 Thread Yui Washizu




On 2024/04/28 18:05, Akihiko Odaki wrote:

Based-on: <20240315-reuse-v9-0-67aa69af4...@daynix.com>
("[PATCH for 9.1 v9 00/11] hw/pci: SR-IOV related fixes and improvements")

Introduction


This series is based on the RFC series submitted by Yui Washizu[1].
See also [2] for the context.

This series enables SR-IOV emulation for virtio-net. It is useful
to test SR-IOV support on the guest, or to expose several vDPA devices
in a VM. vDPA devices can also provide L2 switching feature for
offloading though it is out of scope to allow the guest to configure
such a feature.

The PF side code resides in virtio-pci. The VF side code resides in
the PCI common infrastructure, but it is restricted to work only for
virtio-net-pci because of lack of validation.

User Interface
--

A user can configure a SR-IOV capable virtio-net device by adding
virtio-net-pci functions to a bus. Below is a command line example:
   -netdev user,id=n -netdev user,id=o
   -netdev user,id=p -netdev user,id=q
   -device pcie-root-port,id=b
   -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
   -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f

The VFs specify the paired PF with "sriov-pf" property. The PF must be
added after all VFs. It is user's responsibility to ensure that VFs have
function numbers larger than one of the PF, and the function numbers
have a consistent stride.



I tried to start a VM with more than 8 VFs allocated using your patch,
but the following error occured and qemu didn't work:
VF function number overflows.

I think the cause of this error is that virtio-net-pci PFs don't have ARI.
(pcie_ari_init is not added to virtio-net-pci when PFs are initialized.)
I think it is possible to add it later,
but how about adding pcie_ari_init ?

As a trial,
adding pcie_ari_init to virtio_pci_realize enabled the creation of more 
than 8 VFs.





Keeping VF instances


A problem with SR-IOV emulation is that it needs to hotplug the VFs as
the guest requests. Previously, this behavior was implemented by
realizing and unrealizing VFs at runtime. However, this strategy does
not work well for the proposed virtio-net emulation; in this proposal,
device options passed in the command line must be maintained as VFs
are hotplugged, but they are consumed when the machine starts and not
available after that, which makes realizing VFs at runtime impossible.

As an strategy alternative to runtime realization/unrealization, this
series proposes to reuse the code to power down PCI Express devices.
When a PCI Express device is powered down, it will be hidden from the
guest but will be kept realized. This effectively implements the
behavior we need for the SR-IOV emulation.

Summary
---

Patch 1 disables ROM BAR, which virtio-net-pci enables by default, for
VFs.
Patch 2 makes zero stride valid for 1 VF configuration.
Patch 3 and 4 adds validations.
Patch 5 adds user-created SR-IOV VF infrastructure.
Patch 6 makes virtio-pci work as SR-IOV PF for user-created VFs.
Patch 7 allows user to create SR-IOV VFs with virtio-net-pci.

[1] 
https://patchew.org/QEMU/1689731808-3009-1-git-send-email-yui.wash...@gmail.com/
[2] https://lore.kernel.org/all/5d46f455-f530-4e5e-9ae7-13a2297d4...@daynix.com/

Co-developed-by: Yui Washizu 
Signed-off-by: Akihiko Odaki 
---
Changes in v4:
- Added patch "hw/pci: Fix SR-IOV VF number calculation" to fix division
   by zero reported by Yui Washizu.
- Rebased.
- Link to v3: 
https://lore.kernel.org/r/20240305-sriov-v3-0-abdb75770...@daynix.com

Changes in v3:
- Rebased.
- Link to v2: 
https://lore.kernel.org/r/20231210-sriov-v2-0-b959e8a6d...@daynix.com

Changes in v2:
- Changed to keep VF instances.
- Link to v1: 
https://lore.kernel.org/r/20231202-sriov-v1-0-32b3570f7...@daynix.com

---
Akihiko Odaki (7):
   hw/pci: Do not add ROM BAR for SR-IOV VF
   hw/pci: Fix SR-IOV VF number calculation
   pcie_sriov: Ensure PF and VF are mutually exclusive
   pcie_sriov: Check PCI Express for SR-IOV PF
   pcie_sriov: Allow user to create SR-IOV device
   virtio-pci: Implement SR-IOV PF
   virtio-net: Implement SR-IOV VF

  include/hw/pci/pci_device.h |   6 +-
  include/hw/pci/pcie_sriov.h |  19 +++
  hw/pci/pci.c|  76 +++
  hw/pci/pcie_sriov.c | 298 +++-
  hw/virtio/virtio-net-pci.c  |   1 +
  hw/virtio/virtio-pci.c  |   7 ++
  6 files changed, 323 insertions(+), 84 deletions(-)
---
base-commit: 2ac5458086ab61282f30c2f8bdf2ae9a0a06a75d
change-id: 20231202-sriov-9402fb262be8

Best regards,

Re: [RFC PATCH-for-9.1 0/4] hw/i2c: Convert to spec v7 (inclusive) terminology

2024-05-15 Thread Corey Minyard via

On Mon, Apr 08, 2024 at 11:33:34PM +0200, Philippe Mathieu-Daudé wrote:
> Mechanical (mostly) conversion inspired by Wolfram [*] to
> use inclusive terminology, similarly to the other renames
> we did 3 years ago, shortly before the I2C spec v7 was
> published.

Sorry, I've been extremely busy on this.

Since the spec has been redone, I'm good with the renames.

As far as the >80 character lines, I'm fine either way.  I actually like
to keep them in 80 characters, but the Linux kernel has moved away from
that, and if it's easier to read with a longer line, that's probably
better.

-corey

> 
> Posted as RFC to get feedback, if no objection I plan to
> finish the conversion (SMBus and rest if hw/i2c/).
> 
> [*] 
> https://lore.kernel.org/all/20240322132619.6389-1-wsa+rene...@sang-engineering.com/
> 
> Philippe Mathieu-Daudé (4):
>   hw/i2c: Fix checkpatch block comment warnings
>   hw/i2c: Fix checkpatch line over 80 chars warnings
>   hw/i2c: Convert to spec v7 terminology (automatically)
>   hw/i2c: Convert to spec v7 terminology (manually)
> 
>  include/hw/display/i2c-ddc.h |   2 +-
>  include/hw/gpio/pca9552.h|   2 +-
>  include/hw/gpio/pca9554.h|   2 +-
>  include/hw/i2c/aspeed_i2c.h  |   4 +-
>  include/hw/i2c/i2c.h | 123 ++---
>  include/hw/i2c/i2c_mux_pca954x.h |   2 +-
>  include/hw/i2c/smbus_slave.h |   4 +-
>  include/hw/nvram/eeprom_at24c.h  |   8 +-
>  include/hw/sensor/tmp105.h   |   2 +-
>  hw/arm/aspeed.c  | 290 +--
>  hw/arm/bananapi_m2u.c|   2 +-
>  hw/arm/cubieboard.c  |   2 +-
>  hw/arm/musicpal.c|   6 +-
>  hw/arm/npcm7xx_boards.c  |  44 ++---
>  hw/arm/nseries.c |   6 +-
>  hw/arm/pxa2xx.c  |  36 ++--
>  hw/arm/realview.c|   2 +-
>  hw/arm/spitz.c   |  12 +-
>  hw/arm/stellaris.c   |   2 +-
>  hw/arm/tosa.c|  14 +-
>  hw/arm/versatilepb.c |   2 +-
>  hw/arm/vexpress.c|   2 +-
>  hw/arm/z2.c  |  20 +--
>  hw/audio/wm8750.c|  18 +-
>  hw/display/ati.c |   4 +-
>  hw/display/i2c-ddc.c |  10 +-
>  hw/display/sii9022.c |  16 +-
>  hw/display/sm501.c   |   2 +-
>  hw/display/ssd0303.c |  14 +-
>  hw/display/xlnx_dp.c |   2 +-
>  hw/gpio/max7310.c|  14 +-
>  hw/gpio/pca9552.c|  14 +-
>  hw/gpio/pca9554.c|  14 +-
>  hw/gpio/pcf8574.c|  12 +-
>  hw/i2c/aspeed_i2c.c  |  16 +-
>  hw/i2c/core.c|  90 +-
>  hw/i2c/i2c_mux_pca954x.c |   6 +-
>  hw/i2c/imx_i2c.c |   2 +-
>  hw/i2c/smbus_slave.c |  12 +-
>  hw/input/lm832x.c|  14 +-
>  hw/misc/axp2xx.c |  14 +-
>  hw/misc/i2c-echo.c   |  14 +-
>  hw/nvram/eeprom_at24c.c  |  26 +--
>  hw/ppc/e500.c|   2 +-
>  hw/ppc/pnv.c |   4 +-
>  hw/ppc/sam460ex.c|   2 +-
>  hw/rtc/ds1338.c  |  14 +-
>  hw/rtc/m41t80.c  |  12 +-
>  hw/rtc/twl92230.c|  16 +-
>  hw/sensor/dps310.c   |  14 +-
>  hw/sensor/emc141x.c  |  16 +-
>  hw/sensor/lsm303dlhc_mag.c   |  16 +-
>  hw/sensor/tmp105.c   |  16 +-
>  hw/sensor/tmp421.c   |  20 +--
>  hw/tpm/tpm_tis_i2c.c |  12 +-
>  55 files changed, 541 insertions(+), 506 deletions(-)
> 
> -- 
> 2.41.0
> 
>

Re: [PATCH v5 7/8] xen: mapcache: Add support for grant mappings

2024-05-15 Thread Stefano Stabellini

On Fri, 3 May 2024, Edgar E. Iglesias wrote:
> From: "Edgar E. Iglesias" 
> 
> Add a second mapcache for grant mappings. The mapcache for
> grants needs to work with XC_PAGE_SIZE granularity since
> we can't map larger ranges than what has been granted to us.
> 
> Like with foreign mappings (xen_memory), machines using grants
> are expected to initialize the xen_grants MR and map it
> into their address-map accordingly.
> 
> Signed-off-by: Edgar E. Iglesias 

Reviewed-by: Stefano Stabellini

Re: [PATCH v5 6/8] xen: mapcache: Pass the ram_addr offset to xen_map_cache()

2024-05-15 Thread Stefano Stabellini

On Fri, 3 May 2024, Edgar E. Iglesias wrote:
> From: "Edgar E. Iglesias" 
> 
> Pass the ram_addr offset to xen_map_cache.
> This is in preparation for adding grant mappings that need
> to compute the address within the RAMBlock.
> 
> No functional changes.
> 
> Signed-off-by: Edgar E. Iglesias 
> ---
>  hw/xen/xen-mapcache.c | 16 +++-
>  include/sysemu/xen-mapcache.h |  2 ++
>  system/physmem.c  |  9 +
>  3 files changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
> index ec95445696..26bc38a9e3 100644
> --- a/hw/xen/xen-mapcache.c
> +++ b/hw/xen/xen-mapcache.c
> @@ -167,7 +167,8 @@ static void xen_remap_bucket(MapCache *mc,
>   void *vaddr,
>   hwaddr size,
>   hwaddr address_index,
> - bool dummy)
> + bool dummy,
> + ram_addr_t ram_offset)
>  {
>  uint8_t *vaddr_base;
>  xen_pfn_t *pfns;
> @@ -266,6 +267,7 @@ static void xen_remap_bucket(MapCache *mc,
>  
>  static uint8_t *xen_map_cache_unlocked(MapCache *mc,
> hwaddr phys_addr, hwaddr size,
> +   ram_addr_t ram_offset,
> uint8_t lock, bool dma, bool is_write)
>  {
>  MapCacheEntry *entry, *pentry = NULL,
> @@ -337,14 +339,16 @@ tryagain:
>  if (!entry) {
>  entry = g_new0(MapCacheEntry, 1);
>  pentry->next = entry;
> -xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy);
> +xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
> + ram_offset);
>  } else if (!entry->lock) {
>  if (!entry->vaddr_base || entry->paddr_index != address_index ||
>  entry->size != cache_size ||
>  !test_bits(address_offset >> XC_PAGE_SHIFT,
>  test_bit_size >> XC_PAGE_SHIFT,
>  entry->valid_mapping)) {
> -xen_remap_bucket(mc, entry, NULL, cache_size, address_index, 
> dummy);
> +xen_remap_bucket(mc, entry, NULL, cache_size, address_index, 
> dummy,
> + ram_offset);
>  }
>  }
>  
> @@ -391,13 +395,15 @@ tryagain:
>  
>  uint8_t *xen_map_cache(MemoryRegion *mr,
> hwaddr phys_addr, hwaddr size,
> +   ram_addr_t ram_addr_offset,
> uint8_t lock, bool dma,
> bool is_write)
>  {
>  uint8_t *p;
>  
>  mapcache_lock(mapcache);
> -p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma, 
> is_write);
> +p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
> +   lock, dma, is_write);
>  mapcache_unlock(mapcache);
>  return p;
>  }
> @@ -632,7 +638,7 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache 
> *mc,
>  trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
>  
>  xen_remap_bucket(mc, entry, entry->vaddr_base,
> - cache_size, address_index, false);
> + cache_size, address_index, false, new_phys_addr);

Everything else makes sense, but I don't understand how can it be that
new_phys_addr is the block->offset here?


>  if (!test_bits(address_offset >> XC_PAGE_SHIFT,
>  test_bit_size >> XC_PAGE_SHIFT,
>  entry->valid_mapping)) {
> diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h
> index 1ec9e66752..b5e3ea1bc0 100644
> --- a/include/sysemu/xen-mapcache.h
> +++ b/include/sysemu/xen-mapcache.h
> @@ -19,6 +19,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset,
>  void xen_map_cache_init(phys_offset_to_gaddr_t f,
>  void *opaque);
>  uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size,
> +   ram_addr_t ram_addr_offset,
> uint8_t lock, bool dma,
> bool is_write);
>  ram_addr_t xen_ram_addr_from_mapcache(void *ptr);
> @@ -37,6 +38,7 @@ static inline void 
> xen_map_cache_init(phys_offset_to_gaddr_t f,
>  static inline uint8_t *xen_map_cache(MemoryRegion *mr,
>   hwaddr phys_addr,
>   hwaddr size,
> + ram_addr_t ram_addr_offset,
>   uint8_t lock,
>   bool dma,
>   bool is_write)
> diff --git a/system/physmem.c b/system/physmem.c
> index b7847db1a2..33d09f7571 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -2231,13 +2231,14 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
> ram_addr_t addr,
>   */
>  if (xen_mr_

Re: [PATCH v5 5/8] softmmu: Replace check for RAMBlock offset 0 with xen_mr_is_memory

2024-05-15 Thread Stefano Stabellini

On Fri, 3 May 2024, Edgar E. Iglesias wrote:
> From: "Edgar E. Iglesias" 
> 
> For xen, when checking for the first RAM (xen_memory), use
> xen_mr_is_memory() rather than checking for a RAMBlock with
> offset 0.
> 
> All Xen machines create xen_memory first so this has no
> functional change for existing machines.
> 
> Signed-off-by: Edgar E. Iglesias 

Reviewed-by: Stefano Stabellini 


> ---
>  system/physmem.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/system/physmem.c b/system/physmem.c
> index 5e6257ef65..b7847db1a2 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -2229,7 +2229,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
> ram_addr_t addr,
>   * because we don't want to map the entire memory in QEMU.
>   * In that case just map the requested area.
>   */
> -if (block->offset == 0) {
> +if (xen_mr_is_memory(block->mr)) {
>  return xen_map_cache(block->mr, block->offset + addr,
>   len, lock, lock,
>   is_write);
> -- 
> 2.40.1
>

Re: [PATCH v5 4/8] softmmu: xen: Always pass offset + addr to xen_map_cache

2024-05-15 Thread Stefano Stabellini

On Fri, 3 May 2024, Edgar E. Iglesias wrote:
> From: "Edgar E. Iglesias" 
> 
> Always pass address with offset to xen_map_cache().
> This is in preparation for support for grant mappings.
> 
> Since this is within a block that checks for offset == 0,
> this has no functional changes.
> 
> Signed-off-by: Edgar E. Iglesias 

Reviewed-by: Stefano Stabellini 


> ---
>  system/physmem.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/system/physmem.c b/system/physmem.c
> index 342b7a8fd4..5e6257ef65 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -2230,7 +2230,8 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
> ram_addr_t addr,
>   * In that case just map the requested area.
>   */
>  if (block->offset == 0) {
> -return xen_map_cache(block->mr, addr, len, lock, lock,
> +return xen_map_cache(block->mr, block->offset + addr,
> + len, lock, lock,
>   is_write);
>  }
>  
> -- 
> 2.40.1
>

Re: [PATCH 1/1] vhost-vsock: add VIRTIO_F_RING_PACKED to feaure_bits

2024-05-15 Thread Halil Pasic

On Tue, 7 May 2024 21:26:30 +0200
Halil Pasic  wrote:

> > Not having VIRTIO_F_RING_PACKED in feature_bits[] is a problem when the
> > vhost-vsock device does not offer the feature bit VIRTIO_F_RING_PACKED
> > but the in QEMU device is configured to try to use the packed layout
> > (the virtio property "packed" is on).  
> 
> polite ping

ping

Re: [PATCH v2 02/11] qcow2: simplify L2 entries accounting for discard-no-unref

2024-05-15 Thread Alberto Garcia

On Mon 13 May 2024 09:31:54 AM +03, Andrey Drobyshev wrote:
> Commits 42a2890a and b2b10904 introduce handling of discard-no-unref
> option in discard_in_l2_slice() and zero_in_l2_slice().  They add even
> more if's when chosing the right l2 entry.  What we really need for this
> option is the new entry simply to contain the same host cluster offset,
> no matter whether we unmap or zeroize the cluster.  For that OR'ing with
> the old entry is enough.
>
> This patch doesn't change the logic and is pure refactoring.
>
> Signed-off-by: Andrey Drobyshev 

Reviewed-by: Alberto Garcia 

Berto

Re: [PATCH v2 01/11] qcow2: make function update_refcount_discard() global

2024-05-15 Thread Alberto Garcia

On Mon 13 May 2024 09:31:53 AM +03, Andrey Drobyshev wrote:
> We are going to need it for discarding separate subclusters.  The
> function itself doesn't do anything with the refcount tables, it simply
> adds a discard request to the queue, so rename it to qcow2_queue_discard().
>
> Signed-off-by: Andrey Drobyshev 
> Reviewed-by: Hanna Czenczek 

Reviewed-by: Alberto Garcia 

Berto

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Michael S. Tsirkin

On Wed, May 15, 2024 at 10:31:32AM -0600, Peter Xu wrote:
> On Wed, May 15, 2024 at 12:02:49PM -0400, Michael S. Tsirkin wrote:
> > On Wed, May 15, 2024 at 06:15:56PM +0400, marcandre.lur...@redhat.com wrote:
> > > From: Marc-André Lureau 
> > > 
> > > Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke
> > > forward/backward version migration. Versioning of nested VMSD structures
> > > is not straightforward, as the wire format doesn't have nested
> > > structures versions.
> > > 
> > > Use the previously introduced check_machine_version() function as a
> > > field test to ensure proper saving/loading based on the machine version.
> > > The VMSD.version is irrelevant now.
> > > 
> > > Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load")
> > > Suggested-by: Peter Xu 
> > > Signed-off-by: Marc-André Lureau 
> > 
> > I don't get it. Our standard way to do it is:
> > - add a property (begin name with x- so we don't commit to an API)
> > - set from compat machinery
> > - test property value in VMSTATE macros
> > 
> > Big advantage is, it works well with any downstreams
> > which pick any properties they like.
> > Why is this not a good fit here?
> 
> I think it'll simplify upstream to avoid introducing one new field + one
> new property for each of such protocol change, which fundamentally are the
> same thing.  But it's indeed a good point that such helper can slightly
> complicate the backport a bit.. I assume a global replacement of versions
> over the helper will be needed after downstream settles on how to map
> downstream MCs to upstream's.
> 
> Thanks,

There's nothing special about this specific code. If we want to rework
how machine compat is handled we can do it, but I wouldn't start with
this virtio gpu bug.

It's a big if though, I don't like how this patch works at all.

-- 
MST

Re: [RFC PATCH v3 3/5] KVM: x86: Add notifications for Heki policy configuration and violation

2024-05-15 Thread Sean Christopherson

On Tue, May 14, 2024, Mickaël Salaün wrote:
> On Fri, May 10, 2024 at 10:07:00AM +, Nicolas Saenz Julienne wrote:
> > Development happens
> > https://github.com/vianpl/{linux,qemu,kvm-unit-tests} and the vsm-next
> > branch, but I'd advice against looking into it until we add some order
> > to the rework. Regardless, feel free to get in touch.
> 
> Thanks for the update.
> 
> Could we schedule a PUCK meeting to synchronize and help each other?
> What about June 12?

June 12th works on my end.

Re: [RFC PATCH v3 04/18] hw/arm/smmu: Use enum for SMMU stage

2024-05-15 Thread Alex Bennée

Mostafa Saleh  writes:

> Currently, translation stage is represented as an int, where 1 is stage-1 and
> 2 is stage-2, when nested is added, 3 would be confusing to represent nesting,
> so we use an enum instead.
>
> While keeping the same values, this is useful for:
>  - Doing tricks with bit masks, where BIT(0) is stage-1 and BIT(1) is
>stage-2 and both is nested.
>  - Tracing, as stage is printed as int.
>
> Signed-off-by: Mostafa Saleh 
> Reviewed-by: Eric Auger 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PULL 03/11] plugins: prepare introduction of new inline ops

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

Until now, only add_u64 was available, and all functions assumed this or
were named uniquely.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-4-alex.ben...@linaro.org>

diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index b535bfd5de..93da98b76c 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -69,7 +69,7 @@ union qemu_plugin_cb_sig {
 enum plugin_dyn_cb_type {
 PLUGIN_CB_REGULAR,
 PLUGIN_CB_MEM_REGULAR,
-PLUGIN_CB_INLINE,
+PLUGIN_CB_INLINE_ADD_U64,
 };
 
 /*
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 49f5d1c2e4..4069d51daf 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -113,7 +113,7 @@ static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb)
 tcg_temp_free_i32(cpu_index);
 }
 
-static void gen_inline_cb(struct qemu_plugin_dyn_cb *cb)
+static void gen_inline_add_u64_cb(struct qemu_plugin_dyn_cb *cb)
 {
 GArray *arr = cb->inline_insn.entry.score->data;
 size_t offset = cb->inline_insn.entry.offset;
@@ -158,8 +158,8 @@ static void inject_cb(struct qemu_plugin_dyn_cb *cb)
 case PLUGIN_CB_REGULAR:
 gen_udata_cb(cb);
 break;
-case PLUGIN_CB_INLINE:
-gen_inline_cb(cb);
+case PLUGIN_CB_INLINE_ADD_U64:
+gen_inline_add_u64_cb(cb);
 break;
 default:
 g_assert_not_reached();
diff --git a/plugins/core.c b/plugins/core.c
index 1e58a57bf1..59771eda8f 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -316,6 +316,16 @@ static struct qemu_plugin_dyn_cb *plugin_get_dyn_cb(GArray 
**arr)
 return &g_array_index(cbs, struct qemu_plugin_dyn_cb, cbs->len - 1);
 }
 
+static enum plugin_dyn_cb_type op_to_cb_type(enum qemu_plugin_op op)
+{
+switch (op) {
+case QEMU_PLUGIN_INLINE_ADD_U64:
+return PLUGIN_CB_INLINE_ADD_U64;
+default:
+g_assert_not_reached();
+}
+}
+
 void plugin_register_inline_op_on_entry(GArray **arr,
 enum qemu_plugin_mem_rw rw,
 enum qemu_plugin_op op,
@@ -326,7 +336,7 @@ void plugin_register_inline_op_on_entry(GArray **arr,
 
 dyn_cb = plugin_get_dyn_cb(arr);
 dyn_cb->userp = NULL;
-dyn_cb->type = PLUGIN_CB_INLINE;
+dyn_cb->type = op_to_cb_type(op);
 dyn_cb->rw = rw;
 dyn_cb->inline_insn.entry = entry;
 dyn_cb->inline_insn.op = op;
@@ -551,7 +561,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
 cb->regular.f.vcpu_mem(cpu->cpu_index, make_plugin_meminfo(oi, rw),
vaddr, cb->userp);
 break;
-case PLUGIN_CB_INLINE:
+case PLUGIN_CB_INLINE_ADD_U64:
 exec_inline_op(cb, cpu->cpu_index);
 break;
 default:
-- 
2.39.2

[PULL 09/11] plugins: distinct types for callbacks

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

To prevent errors when writing new types of callbacks or inline
operations, we split callbacks data to distinct types.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-8-pierrick.bouv...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-10-alex.ben...@linaro.org>

diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index c7b3b1cd66..98d27dded9 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -74,34 +74,40 @@ enum plugin_dyn_cb_type {
 PLUGIN_CB_INLINE_STORE_U64,
 };
 
+struct qemu_plugin_regular_cb {
+union qemu_plugin_cb_sig f;
+TCGHelperInfo *info;
+void *userp;
+enum qemu_plugin_mem_rw rw;
+};
+
+struct qemu_plugin_inline_cb {
+qemu_plugin_u64 entry;
+enum qemu_plugin_op op;
+uint64_t imm;
+enum qemu_plugin_mem_rw rw;
+};
+
+struct qemu_plugin_conditional_cb {
+union qemu_plugin_cb_sig f;
+TCGHelperInfo *info;
+void *userp;
+qemu_plugin_u64 entry;
+enum qemu_plugin_cond cond;
+uint64_t imm;
+};
+
 /*
  * A dynamic callback has an insertion point that is determined at run-time.
  * Usually the insertion point is somewhere in the code cache; think for
  * instance of a callback to be called upon the execution of a particular TB.
  */
 struct qemu_plugin_dyn_cb {
-void *userp;
 enum plugin_dyn_cb_type type;
-/* @rw applies to mem callbacks only (both regular and inline) */
-enum qemu_plugin_mem_rw rw;
-/* fields specific to each dyn_cb type go here */
 union {
-struct {
-union qemu_plugin_cb_sig f;
-TCGHelperInfo *info;
-} regular;
-struct {
-union qemu_plugin_cb_sig f;
-TCGHelperInfo *info;
-qemu_plugin_u64 entry;
-enum qemu_plugin_cond cond;
-uint64_t imm;
-} cond;
-struct {
-qemu_plugin_u64 entry;
-enum qemu_plugin_op op;
-uint64_t imm;
-} inline_insn;
+struct qemu_plugin_regular_cb regular;
+struct qemu_plugin_conditional_cb cond;
+struct qemu_plugin_inline_cb inline_insn;
 };
 };
 
diff --git a/plugins/plugin.h b/plugins/plugin.h
index 7d4b4e21f7..80d5daa917 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -108,7 +108,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr,
  enum qemu_plugin_mem_rw rw,
  void *udata);
 
-void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index);
+void exec_inline_op(struct qemu_plugin_inline_cb *cb, int cpu_index);
 
 int plugin_num_vcpus(void);
 
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index f2190f3511..e018728573 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -101,13 +101,13 @@ static void gen_disable_mem_helper(void)
offsetof(ArchCPU, env));
 }
 
-static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb)
+static void gen_udata_cb(struct qemu_plugin_regular_cb *cb)
 {
 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
 
 tcg_gen_ld_i32(cpu_index, tcg_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
-tcg_gen_call2(cb->regular.f.vcpu_udata, cb->regular.info, NULL,
+tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL,
   tcgv_i32_temp(cpu_index),
   tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
 tcg_temp_free_i32(cpu_index);
@@ -153,21 +153,21 @@ static TCGCond plugin_cond_to_tcgcond(enum 
qemu_plugin_cond cond)
 }
 }
 
-static void gen_udata_cond_cb(struct qemu_plugin_dyn_cb *cb)
+static void gen_udata_cond_cb(struct qemu_plugin_conditional_cb *cb)
 {
-TCGv_ptr ptr = gen_plugin_u64_ptr(cb->cond.entry);
+TCGv_ptr ptr = gen_plugin_u64_ptr(cb->entry);
 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
 TCGv_i64 val = tcg_temp_ebb_new_i64();
 TCGLabel *after_cb = gen_new_label();
 
 /* Condition should be negated, as calling the cb is the "else" path */
-TCGCond cond = tcg_invert_cond(plugin_cond_to_tcgcond(cb->cond.cond));
+TCGCond cond = tcg_invert_cond(plugin_cond_to_tcgcond(cb->cond));
 
 tcg_gen_ld_i64(val, ptr, 0);
-tcg_gen_brcondi_i64(cond, val, cb->cond.imm, after_cb);
+tcg_gen_brcondi_i64(cond, val, cb->imm, after_cb);
 tcg_gen_ld_i32(cpu_index, tcg_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
-tcg_gen_call2(cb->cond.f.vcpu_udata, cb->cond.info, NULL,
+tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL,
   tcgv_i32_temp(cpu_index),
   tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
 gen_set_label(after_cb);
@@ -177,37 +177,37 @@ static void gen_udata_cond_cb(struct qemu_plugin_dyn_cb 
*cb)
 tcg_temp_free_ptr(ptr);
 }
 
-static void gen_inline_add_u64_cb(struct qemu_plugin_dyn_cb *cb)
+static void gen_in

[PULL 07/11] plugins: conditional callbacks

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

Extend plugins API to support callback called with a given criteria
(evaluated inline).

Added functions:
- qemu_plugin_register_vcpu_tb_exec_cond_cb
- qemu_plugin_register_vcpu_insn_exec_cond_cb

They expect as parameter a condition, a qemu_plugin_u64_t (op1) and an
immediate (op2). Callback is called if op1 |cond| op2 is true.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-6-pierrick.bouv...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-8-alex.ben...@linaro.org>

diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index 6c21a30105..c7b3b1cd66 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -68,6 +68,7 @@ union qemu_plugin_cb_sig {
 
 enum plugin_dyn_cb_type {
 PLUGIN_CB_REGULAR,
+PLUGIN_CB_COND,
 PLUGIN_CB_MEM_REGULAR,
 PLUGIN_CB_INLINE_ADD_U64,
 PLUGIN_CB_INLINE_STORE_U64,
@@ -89,6 +90,13 @@ struct qemu_plugin_dyn_cb {
 union qemu_plugin_cb_sig f;
 TCGHelperInfo *info;
 } regular;
+struct {
+union qemu_plugin_cb_sig f;
+TCGHelperInfo *info;
+qemu_plugin_u64 entry;
+enum qemu_plugin_cond cond;
+uint64_t imm;
+} cond;
 struct {
 qemu_plugin_u64 entry;
 enum qemu_plugin_op op;
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index c5cac897a0..337de25ece 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -262,6 +262,29 @@ enum qemu_plugin_mem_rw {
 QEMU_PLUGIN_MEM_RW,
 };
 
+/**
+ * enum qemu_plugin_cond - condition to enable callback
+ *
+ * @QEMU_PLUGIN_COND_NEVER: false
+ * @QEMU_PLUGIN_COND_ALWAYS: true
+ * @QEMU_PLUGIN_COND_EQ: is equal?
+ * @QEMU_PLUGIN_COND_NE: is not equal?
+ * @QEMU_PLUGIN_COND_LT: is less than?
+ * @QEMU_PLUGIN_COND_LE: is less than or equal?
+ * @QEMU_PLUGIN_COND_GT: is greater than?
+ * @QEMU_PLUGIN_COND_GE: is greater than or equal?
+ */
+enum qemu_plugin_cond {
+QEMU_PLUGIN_COND_NEVER,
+QEMU_PLUGIN_COND_ALWAYS,
+QEMU_PLUGIN_COND_EQ,
+QEMU_PLUGIN_COND_NE,
+QEMU_PLUGIN_COND_LT,
+QEMU_PLUGIN_COND_LE,
+QEMU_PLUGIN_COND_GT,
+QEMU_PLUGIN_COND_GE,
+};
+
 /**
  * typedef qemu_plugin_vcpu_tb_trans_cb_t - translation callback
  * @id: unique plugin id
@@ -301,6 +324,32 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct 
qemu_plugin_tb *tb,
   enum qemu_plugin_cb_flags flags,
   void *userdata);
 
+/**
+ * qemu_plugin_register_vcpu_tb_exec_cond_cb() - register conditional callback
+ * @tb: the opaque qemu_plugin_tb handle for the translation
+ * @cb: callback function
+ * @cond: condition to enable callback
+ * @entry: first operand for condition
+ * @imm: second operand for condition
+ * @flags: does the plugin read or write the CPU's registers?
+ * @userdata: any plugin data to pass to the @cb?
+ *
+ * The @cb function is called when a translated unit executes if
+ * entry @cond imm is true.
+ * If condition is QEMU_PLUGIN_COND_ALWAYS, condition is never interpreted and
+ * this function is equivalent to qemu_plugin_register_vcpu_tb_exec_cb.
+ * If condition QEMU_PLUGIN_COND_NEVER, condition is never interpreted and
+ * callback is never installed.
+ */
+QEMU_PLUGIN_API
+void qemu_plugin_register_vcpu_tb_exec_cond_cb(struct qemu_plugin_tb *tb,
+   qemu_plugin_vcpu_udata_cb_t cb,
+   enum qemu_plugin_cb_flags flags,
+   enum qemu_plugin_cond cond,
+   qemu_plugin_u64 entry,
+   uint64_t imm,
+   void *userdata);
+
 /**
  * enum qemu_plugin_op - describes an inline op
  *
@@ -344,6 +393,33 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct 
qemu_plugin_insn *insn,
 enum qemu_plugin_cb_flags flags,
 void *userdata);
 
+/**
+ * qemu_plugin_register_vcpu_insn_exec_cond_cb() - conditional insn execution 
cb
+ * @insn: the opaque qemu_plugin_insn handle for an instruction
+ * @cb: callback function
+ * @flags: does the plugin read or write the CPU's registers?
+ * @cond: condition to enable callback
+ * @entry: first operand for condition
+ * @imm: second operand for condition
+ * @userdata: any plugin data to pass to the @cb?
+ *
+ * The @cb function is called when an instruction executes if
+ * entry @cond imm is true.
+ * If condition is QEMU_PLUGIN_COND_ALWAYS, condition is never interpreted and
+ * this function is equivalent to qemu_plugin_register_vcpu_insn_exec_cb.
+ * If condition QEMU_PLUGIN_COND_NEVER, condition is never interpreted and
+ * callback is ne

[PULL 10/11] plugins: extract cpu_index generate

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

Factorizes function to access current cpu index for a given vcpu.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-9-pierrick.bouv...@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-11-alex.ben...@linaro.org>

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index e018728573..c9b298667f 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -101,12 +101,17 @@ static void gen_disable_mem_helper(void)
offsetof(ArchCPU, env));
 }
 
-static void gen_udata_cb(struct qemu_plugin_regular_cb *cb)
+static TCGv_i32 gen_cpu_index(void)
 {
 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
-
 tcg_gen_ld_i32(cpu_index, tcg_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+return cpu_index;
+}
+
+static void gen_udata_cb(struct qemu_plugin_regular_cb *cb)
+{
+TCGv_i32 cpu_index = gen_cpu_index();
 tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL,
   tcgv_i32_temp(cpu_index),
   tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
@@ -121,9 +126,7 @@ static TCGv_ptr gen_plugin_u64_ptr(qemu_plugin_u64 entry)
 char *base_ptr = arr->data + entry.offset;
 size_t entry_size = g_array_get_element_size(arr);
 
-TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
-tcg_gen_ld_i32(cpu_index, tcg_env,
-   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+TCGv_i32 cpu_index = gen_cpu_index();
 tcg_gen_muli_i32(cpu_index, cpu_index, entry_size);
 tcg_gen_ext_i32_ptr(ptr, cpu_index);
 tcg_temp_free_i32(cpu_index);
@@ -156,7 +159,6 @@ static TCGCond plugin_cond_to_tcgcond(enum qemu_plugin_cond 
cond)
 static void gen_udata_cond_cb(struct qemu_plugin_conditional_cb *cb)
 {
 TCGv_ptr ptr = gen_plugin_u64_ptr(cb->entry);
-TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
 TCGv_i64 val = tcg_temp_ebb_new_i64();
 TCGLabel *after_cb = gen_new_label();
 
@@ -165,15 +167,14 @@ static void gen_udata_cond_cb(struct 
qemu_plugin_conditional_cb *cb)
 
 tcg_gen_ld_i64(val, ptr, 0);
 tcg_gen_brcondi_i64(cond, val, cb->imm, after_cb);
-tcg_gen_ld_i32(cpu_index, tcg_env,
-   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+TCGv_i32 cpu_index = gen_cpu_index();
 tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL,
   tcgv_i32_temp(cpu_index),
   tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
+tcg_temp_free_i32(cpu_index);
 gen_set_label(after_cb);
 
 tcg_temp_free_i64(val);
-tcg_temp_free_i32(cpu_index);
 tcg_temp_free_ptr(ptr);
 }
 
@@ -203,10 +204,7 @@ static void gen_inline_store_u64_cb(struct 
qemu_plugin_inline_cb *cb)
 static void gen_mem_cb(struct qemu_plugin_regular_cb *cb,
qemu_plugin_meminfo_t meminfo, TCGv_i64 addr)
 {
-TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
-
-tcg_gen_ld_i32(cpu_index, tcg_env,
-   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+TCGv_i32 cpu_index = gen_cpu_index();
 tcg_gen_call4(cb->f.vcpu_mem, cb->info, NULL,
   tcgv_i32_temp(cpu_index),
   tcgv_i32_temp(tcg_constant_i32(meminfo)),
-- 
2.39.2

[PULL 01/11] tests/tcg: don't append QEMU_OPTS for armv6m-undef test

2024-05-15 Thread Alex Bennée

We don't want to build on the default machine setup here but define a
custom one for the microbit.

Reviewed-by: Pierrick Bouvier 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-2-alex.ben...@linaro.org>

diff --git a/tests/tcg/arm/Makefile.softmmu-target 
b/tests/tcg/arm/Makefile.softmmu-target
index 4c9264057f..39e01ce49d 100644
--- a/tests/tcg/arm/Makefile.softmmu-target
+++ b/tests/tcg/arm/Makefile.softmmu-target
@@ -16,7 +16,7 @@ test-armv6m-undef: test-armv6m-undef.S
$< -o $@ -nostdlib -N -static \
-T $(ARM_SRC)/$@.ld
 
-run-test-armv6m-undef: QEMU_OPTS+=-semihosting -M microbit -kernel
+run-test-armv6m-undef: QEMU_OPTS=-semihosting-config 
enable=on,target=native,chardev=output -M microbit -kernel
 
 ARM_TESTS+=test-armv6m-undef
 
-- 
2.39.2

[PULL 00/11] testing and plugin updates

2024-05-15 Thread Alex Bennée

The following changes since commit 265aad58e9cab31d0e69c374ec2efcede7fa8881:

  Merge tag 'ui-pull-request' of https://gitlab.com/marcandre.lureau/qemu into 
staging (2024-05-15 08:52:27 +0200)

are available in the Git repository at:

  https://gitlab.com/stsquad/qemu.git tags/pull-maintainer-may24-150524-1

for you to fetch changes up to 8f9df78afac40d60a0f8162aae80c90a9c58972d:

  plugins: remove op from qemu_plugin_inline_cb (2024-05-15 13:59:13 +0100)


plugin and testing updates

 - don't duplicate options for microbit test
 - don't spam the linux source tree when importing headers
 - add STORE_U64 inline op to TCG plugins
 - add conditional callback op to TCG plugins


Alex Bennée (2):
  tests/tcg: don't append QEMU_OPTS for armv6m-undef test
  scripts/update-linux-header.sh: be more src tree friendly

Pierrick Bouvier (9):
  plugins: prepare introduction of new inline ops
  plugins: extract generate ptr for qemu_plugin_u64
  plugins: add new inline op STORE_U64
  tests/plugin/inline: add test for STORE_U64 inline op
  plugins: conditional callbacks
  tests/plugin/inline: add test for conditional callback
  plugins: distinct types for callbacks
  plugins: extract cpu_index generate
  plugins: remove op from qemu_plugin_inline_cb

 include/qemu/plugin.h |  42 +++
 include/qemu/qemu-plugin.h|  80 +++-
 plugins/plugin.h  |  12 ++-
 accel/tcg/plugin-gen.c| 136 ++
 plugins/api.c |  39 ++
 plugins/core.c| 109 +++
 tests/plugin/inline.c | 130 ++--
 plugins/qemu-plugins.symbols  |   2 +
 scripts/update-linux-headers.sh   |  80 ++--
 tests/tcg/arm/Makefile.softmmu-target |   2 +-
 10 files changed, 508 insertions(+), 124 deletions(-)

-- 
2.39.2

[PULL 02/11] scripts/update-linux-header.sh: be more src tree friendly

2024-05-15 Thread Alex Bennée

Running "install_headers" in the Linux source tree is fairly
unfriendly as out-of-tree builds will start complaining about the
kernel source being non-pristine. As we have a temporary directory for
the install we should also do the build step here. So now we have:

  $tmpdir/
$blddir/
$hdrdir/

Reviewed-by: Pierrick Bouvier 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-3-alex.ben...@linaro.org>

diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 36f3e91fe4..8963c39189 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -27,6 +27,8 @@
 #   types like "__u64".  This work is done in the cp_portable function.
 
 tmpdir=$(mktemp -d)
+hdrdir="$tmpdir/headers"
+blddir="$tmpdir/build"
 linux="$1"
 output="$2"
 
@@ -110,56 +112,56 @@ for arch in $ARCHLIST; do
 arch_var=ARCH
 fi
 
-make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install
+make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch 
headers_install
 
 rm -rf "$output/linux-headers/asm-$arch"
 mkdir -p "$output/linux-headers/asm-$arch"
 for header in kvm.h unistd.h bitsperlong.h mman.h; do
-cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch"
+cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch"
 done
 
 if [ $arch = mips ]; then
-cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/"
-cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/"
-cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/"
-cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/"
+cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/"
+cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/"
+cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/"
+cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/"
 fi
 if [ $arch = powerpc ]; then
-cp "$tmpdir/include/asm/unistd_32.h" 
"$output/linux-headers/asm-powerpc/"
-cp "$tmpdir/include/asm/unistd_64.h" 
"$output/linux-headers/asm-powerpc/"
+cp "$hdrdir/include/asm/unistd_32.h" 
"$output/linux-headers/asm-powerpc/"
+cp "$hdrdir/include/asm/unistd_64.h" 
"$output/linux-headers/asm-powerpc/"
 fi
 
 rm -rf "$output/include/standard-headers/asm-$arch"
 mkdir -p "$output/include/standard-headers/asm-$arch"
 if [ $arch = s390 ]; then
-cp_portable "$tmpdir/include/asm/virtio-ccw.h" 
"$output/include/standard-headers/asm-s390/"
-cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/"
-cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/"
+cp_portable "$hdrdir/include/asm/virtio-ccw.h" 
"$output/include/standard-headers/asm-s390/"
+cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/"
+cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/"
 fi
 if [ $arch = arm ]; then
-cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
-cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
-cp "$tmpdir/include/asm/unistd-common.h" 
"$output/linux-headers/asm-arm/"
+cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
+cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
+cp "$hdrdir/include/asm/unistd-common.h" 
"$output/linux-headers/asm-arm/"
 fi
 if [ $arch = arm64 ]; then
-cp "$tmpdir/include/asm/sve_context.h" 
"$output/linux-headers/asm-arm64/"
+cp "$hdrdir/include/asm/sve_context.h" 
"$output/linux-headers/asm-arm64/"
 fi
 if [ $arch = x86 ]; then
-cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
-cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
-cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
-cp_portable "$tmpdir/include/asm/kvm_para.h" 
"$output/include/standard-headers/asm-$arch"
+cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
+cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
+cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
+cp_portable "$hdrdir/include/asm/kvm_para.h" 
"$output/include/standard-headers/asm-$arch"
 # Remove everything except the macros from bootparam.h avoiding the
 # unnecessary import of several video/ist/etc headers
 sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \
-   "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h"
-cp_portable "$tmpdir/bootparam.h" \
+   "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h"
+cp_port

[PULL 06/11] tests/plugin/inline: add test for STORE_U64 inline op

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-5-pierrick.bouv...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-7-alex.ben...@linaro.org>

diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
index 0163e9b51c..103c3a22f6 100644
--- a/tests/plugin/inline.c
+++ b/tests/plugin/inline.c
@@ -22,6 +22,12 @@ typedef struct {
 uint64_t count_mem_inline;
 } CPUCount;
 
+typedef struct {
+uint64_t data_insn;
+uint64_t data_tb;
+uint64_t data_mem;
+} CPUData;
+
 static struct qemu_plugin_scoreboard *counts;
 static qemu_plugin_u64 count_tb;
 static qemu_plugin_u64 count_tb_inline;
@@ -29,6 +35,10 @@ static qemu_plugin_u64 count_insn;
 static qemu_plugin_u64 count_insn_inline;
 static qemu_plugin_u64 count_mem;
 static qemu_plugin_u64 count_mem_inline;
+static struct qemu_plugin_scoreboard *data;
+static qemu_plugin_u64 data_insn;
+static qemu_plugin_u64 data_tb;
+static qemu_plugin_u64 data_mem;
 
 static uint64_t global_count_tb;
 static uint64_t global_count_insn;
@@ -109,11 +119,13 @@ static void plugin_exit(qemu_plugin_id_t id, void *udata)
 stats_mem();
 
 qemu_plugin_scoreboard_free(counts);
+qemu_plugin_scoreboard_free(data);
 }
 
 static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
 {
 qemu_plugin_u64_add(count_tb, cpu_index, 1);
+g_assert(qemu_plugin_u64_get(data_tb, cpu_index) == (uintptr_t) udata);
 g_mutex_lock(&tb_lock);
 max_cpu_index = MAX(max_cpu_index, cpu_index);
 global_count_tb++;
@@ -123,6 +135,7 @@ static void vcpu_tb_exec(unsigned int cpu_index, void 
*udata)
 static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
 {
 qemu_plugin_u64_add(count_insn, cpu_index, 1);
+g_assert(qemu_plugin_u64_get(data_insn, cpu_index) == (uintptr_t) udata);
 g_mutex_lock(&insn_lock);
 global_count_insn++;
 g_mutex_unlock(&insn_lock);
@@ -131,9 +144,10 @@ static void vcpu_insn_exec(unsigned int cpu_index, void 
*udata)
 static void vcpu_mem_access(unsigned int cpu_index,
 qemu_plugin_meminfo_t info,
 uint64_t vaddr,
-void *userdata)
+void *udata)
 {
 qemu_plugin_u64_add(count_mem, cpu_index, 1);
+g_assert(qemu_plugin_u64_get(data_mem, cpu_index) == (uintptr_t) udata);
 g_mutex_lock(&mem_lock);
 global_count_mem++;
 g_mutex_unlock(&mem_lock);
@@ -141,20 +155,34 @@ static void vcpu_mem_access(unsigned int cpu_index,
 
 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 {
+void *tb_store = tb;
+qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+tb, QEMU_PLUGIN_INLINE_STORE_U64, data_tb, (uintptr_t) tb_store);
 qemu_plugin_register_vcpu_tb_exec_cb(
-tb, vcpu_tb_exec, QEMU_PLUGIN_CB_NO_REGS, 0);
+tb, vcpu_tb_exec, QEMU_PLUGIN_CB_NO_REGS, tb_store);
 qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
 tb, QEMU_PLUGIN_INLINE_ADD_U64, count_tb_inline, 1);
 
 for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
+void *insn_store = insn;
+void *mem_store = (char *)insn_store + 0xff;
+
+qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
+insn, QEMU_PLUGIN_INLINE_STORE_U64, data_insn,
+(uintptr_t) insn_store);
 qemu_plugin_register_vcpu_insn_exec_cb(
-insn, vcpu_insn_exec, QEMU_PLUGIN_CB_NO_REGS, 0);
+insn, vcpu_insn_exec, QEMU_PLUGIN_CB_NO_REGS, insn_store);
 qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
 insn, QEMU_PLUGIN_INLINE_ADD_U64, count_insn_inline, 1);
+
+qemu_plugin_register_vcpu_mem_inline_per_vcpu(
+insn, QEMU_PLUGIN_MEM_RW,
+QEMU_PLUGIN_INLINE_STORE_U64,
+data_mem, (uintptr_t) mem_store);
 qemu_plugin_register_vcpu_mem_cb(insn, &vcpu_mem_access,
  QEMU_PLUGIN_CB_NO_REGS,
- QEMU_PLUGIN_MEM_RW, 0);
+ QEMU_PLUGIN_MEM_RW, mem_store);
 qemu_plugin_register_vcpu_mem_inline_per_vcpu(
 insn, QEMU_PLUGIN_MEM_RW,
 QEMU_PLUGIN_INLINE_ADD_U64,
@@ -179,6 +207,11 @@ int qemu_plugin_install(qemu_plugin_id_t id, const 
qemu_info_t *info,
 counts, CPUCount, count_insn_inline);
 count_mem_inline = qemu_plugin_scoreboard_u64_in_struct(
 counts, CPUCount, count_mem_inline);
+data = qemu_plugin_scoreboard_new(sizeof(CPUData));
+data_insn = qemu_plugin_scoreboard_u64_in_struct(data, CPUData, data_insn);
+data_tb = qemu_plugin_scoreboard_u64_in_struct(data, CPUData, data_tb);
+data_mem = qemu_plugin_scoreboard_u64_in_struct(data, CPUData, data_mem);
+
 qemu_plugin_register_v

[PULL 04/11] plugins: extract generate ptr for qemu_plugin_u64

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

Plugin operations can access a scoreboard. This function factorizes code
generation for accessing entry associated to a given vcpu.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-3-pierrick.bouv...@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-5-alex.ben...@linaro.org>

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 4069d51daf..97868781fe 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -113,24 +113,33 @@ static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb)
 tcg_temp_free_i32(cpu_index);
 }
 
-static void gen_inline_add_u64_cb(struct qemu_plugin_dyn_cb *cb)
+static TCGv_ptr gen_plugin_u64_ptr(qemu_plugin_u64 entry)
 {
-GArray *arr = cb->inline_insn.entry.score->data;
-size_t offset = cb->inline_insn.entry.offset;
-TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
-TCGv_i64 val = tcg_temp_ebb_new_i64();
 TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
 
+GArray *arr = entry.score->data;
+char *base_ptr = arr->data + entry.offset;
+size_t entry_size = g_array_get_element_size(arr);
+
+TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
 tcg_gen_ld_i32(cpu_index, tcg_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
-tcg_gen_muli_i32(cpu_index, cpu_index, g_array_get_element_size(arr));
+tcg_gen_muli_i32(cpu_index, cpu_index, entry_size);
 tcg_gen_ext_i32_ptr(ptr, cpu_index);
 tcg_temp_free_i32(cpu_index);
+tcg_gen_addi_ptr(ptr, ptr, (intptr_t) base_ptr);
+
+return ptr;
+}
+
+static void gen_inline_add_u64_cb(struct qemu_plugin_dyn_cb *cb)
+{
+TCGv_ptr ptr = gen_plugin_u64_ptr(cb->inline_insn.entry);
+TCGv_i64 val = tcg_temp_ebb_new_i64();
 
-tcg_gen_addi_ptr(ptr, ptr, (intptr_t)arr->data);
-tcg_gen_ld_i64(val, ptr, offset);
+tcg_gen_ld_i64(val, ptr, 0);
 tcg_gen_addi_i64(val, val, cb->inline_insn.imm);
-tcg_gen_st_i64(val, ptr, offset);
+tcg_gen_st_i64(val, ptr, 0);
 
 tcg_temp_free_i64(val);
 tcg_temp_free_ptr(ptr);
-- 
2.39.2

[PULL 05/11] plugins: add new inline op STORE_U64

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

This new operation can store an immediate u64 value to a given
scoreboard.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-4-pierrick.bouv...@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-6-alex.ben...@linaro.org>

diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index 93da98b76c..6c21a30105 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -70,6 +70,7 @@ enum plugin_dyn_cb_type {
 PLUGIN_CB_REGULAR,
 PLUGIN_CB_MEM_REGULAR,
 PLUGIN_CB_INLINE_ADD_U64,
+PLUGIN_CB_INLINE_STORE_U64,
 };
 
 /*
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 4fc6c3739b..c5cac897a0 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -305,12 +305,12 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct 
qemu_plugin_tb *tb,
  * enum qemu_plugin_op - describes an inline op
  *
  * @QEMU_PLUGIN_INLINE_ADD_U64: add an immediate value uint64_t
- *
- * Note: currently only a single inline op is supported.
+ * @QEMU_PLUGIN_INLINE_STORE_U64: store an immediate value uint64_t
  */
 
 enum qemu_plugin_op {
 QEMU_PLUGIN_INLINE_ADD_U64,
+QEMU_PLUGIN_INLINE_STORE_U64,
 };
 
 /**
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 97868781fe..88976289eb 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -145,6 +145,16 @@ static void gen_inline_add_u64_cb(struct 
qemu_plugin_dyn_cb *cb)
 tcg_temp_free_ptr(ptr);
 }
 
+static void gen_inline_store_u64_cb(struct qemu_plugin_dyn_cb *cb)
+{
+TCGv_ptr ptr = gen_plugin_u64_ptr(cb->inline_insn.entry);
+TCGv_i64 val = tcg_constant_i64(cb->inline_insn.imm);
+
+tcg_gen_st_i64(val, ptr, 0);
+
+tcg_temp_free_ptr(ptr);
+}
+
 static void gen_mem_cb(struct qemu_plugin_dyn_cb *cb,
qemu_plugin_meminfo_t meminfo, TCGv_i64 addr)
 {
@@ -170,6 +180,9 @@ static void inject_cb(struct qemu_plugin_dyn_cb *cb)
 case PLUGIN_CB_INLINE_ADD_U64:
 gen_inline_add_u64_cb(cb);
 break;
+case PLUGIN_CB_INLINE_STORE_U64:
+gen_inline_store_u64_cb(cb);
+break;
 default:
 g_assert_not_reached();
 }
diff --git a/plugins/core.c b/plugins/core.c
index 59771eda8f..848d482fc4 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -321,6 +321,8 @@ static enum plugin_dyn_cb_type op_to_cb_type(enum 
qemu_plugin_op op)
 switch (op) {
 case QEMU_PLUGIN_INLINE_ADD_U64:
 return PLUGIN_CB_INLINE_ADD_U64;
+case QEMU_PLUGIN_INLINE_STORE_U64:
+return PLUGIN_CB_INLINE_STORE_U64;
 default:
 g_assert_not_reached();
 }
@@ -535,6 +537,9 @@ void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int 
cpu_index)
 case QEMU_PLUGIN_INLINE_ADD_U64:
 *val += cb->inline_insn.imm;
 break;
+case QEMU_PLUGIN_INLINE_STORE_U64:
+*val = cb->inline_insn.imm;
+break;
 default:
 g_assert_not_reached();
 }
@@ -562,6 +567,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
vaddr, cb->userp);
 break;
 case PLUGIN_CB_INLINE_ADD_U64:
+case PLUGIN_CB_INLINE_STORE_U64:
 exec_inline_op(cb, cpu->cpu_index);
 break;
 default:
-- 
2.39.2

[PULL 08/11] tests/plugin/inline: add test for conditional callback

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

Count number of tb and insn executed using a conditional callback. We
ensure the callback has been called expected number of time (per vcpu).

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-7-pierrick.bouv...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-9-alex.ben...@linaro.org>

diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
index 103c3a22f6..cd63827b7d 100644
--- a/tests/plugin/inline.c
+++ b/tests/plugin/inline.c
@@ -20,8 +20,14 @@ typedef struct {
 uint64_t count_insn_inline;
 uint64_t count_mem;
 uint64_t count_mem_inline;
+uint64_t tb_cond_num_trigger;
+uint64_t tb_cond_track_count;
+uint64_t insn_cond_num_trigger;
+uint64_t insn_cond_track_count;
 } CPUCount;
 
+static const uint64_t cond_trigger_limit = 100;
+
 typedef struct {
 uint64_t data_insn;
 uint64_t data_tb;
@@ -35,6 +41,10 @@ static qemu_plugin_u64 count_insn;
 static qemu_plugin_u64 count_insn_inline;
 static qemu_plugin_u64 count_mem;
 static qemu_plugin_u64 count_mem_inline;
+static qemu_plugin_u64 tb_cond_num_trigger;
+static qemu_plugin_u64 tb_cond_track_count;
+static qemu_plugin_u64 insn_cond_num_trigger;
+static qemu_plugin_u64 insn_cond_track_count;
 static struct qemu_plugin_scoreboard *data;
 static qemu_plugin_u64 data_insn;
 static qemu_plugin_u64 data_tb;
@@ -56,12 +66,19 @@ static void stats_insn(void)
 const uint64_t per_vcpu = qemu_plugin_u64_sum(count_insn);
 const uint64_t inl_per_vcpu =
 qemu_plugin_u64_sum(count_insn_inline);
+const uint64_t cond_num_trigger =
+qemu_plugin_u64_sum(insn_cond_num_trigger);
+const uint64_t cond_track_left = 
qemu_plugin_u64_sum(insn_cond_track_count);
+const uint64_t conditional =
+cond_num_trigger * cond_trigger_limit + cond_track_left;
 printf("insn: %" PRIu64 "\n", expected);
 printf("insn: %" PRIu64 " (per vcpu)\n", per_vcpu);
 printf("insn: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+printf("insn: %" PRIu64 " (cond cb)\n", conditional);
 g_assert(expected > 0);
 g_assert(per_vcpu == expected);
 g_assert(inl_per_vcpu == expected);
+g_assert(conditional == expected);
 }
 
 static void stats_tb(void)
@@ -70,12 +87,18 @@ static void stats_tb(void)
 const uint64_t per_vcpu = qemu_plugin_u64_sum(count_tb);
 const uint64_t inl_per_vcpu =
 qemu_plugin_u64_sum(count_tb_inline);
+const uint64_t cond_num_trigger = qemu_plugin_u64_sum(tb_cond_num_trigger);
+const uint64_t cond_track_left = qemu_plugin_u64_sum(tb_cond_track_count);
+const uint64_t conditional =
+cond_num_trigger * cond_trigger_limit + cond_track_left;
 printf("tb: %" PRIu64 "\n", expected);
 printf("tb: %" PRIu64 " (per vcpu)\n", per_vcpu);
 printf("tb: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+printf("tb: %" PRIu64 " (conditional cb)\n", conditional);
 g_assert(expected > 0);
 g_assert(per_vcpu == expected);
 g_assert(inl_per_vcpu == expected);
+g_assert(conditional == expected);
 }
 
 static void stats_mem(void)
@@ -104,14 +127,35 @@ static void plugin_exit(qemu_plugin_id_t id, void *udata)
 const uint64_t insn_inline = qemu_plugin_u64_get(count_insn_inline, i);
 const uint64_t mem = qemu_plugin_u64_get(count_mem, i);
 const uint64_t mem_inline = qemu_plugin_u64_get(count_mem_inline, i);
-printf("cpu %d: tb (%" PRIu64 ", %" PRIu64 ") | "
-   "insn (%" PRIu64 ", %" PRIu64 ") | "
+const uint64_t tb_cond_trigger =
+qemu_plugin_u64_get(tb_cond_num_trigger, i);
+const uint64_t tb_cond_left =
+qemu_plugin_u64_get(tb_cond_track_count, i);
+const uint64_t insn_cond_trigger =
+qemu_plugin_u64_get(insn_cond_num_trigger, i);
+const uint64_t insn_cond_left =
+qemu_plugin_u64_get(insn_cond_track_count, i);
+printf("cpu %d: tb (%" PRIu64 ", %" PRIu64
+   ", %" PRIu64 " * %" PRIu64 " + %" PRIu64
+   ") | "
+   "insn (%" PRIu64 ", %" PRIu64
+   ", %" PRIu64 " * %" PRIu64 " + %" PRIu64
+   ") | "
"mem (%" PRIu64 ", %" PRIu64 ")"
"\n",
-   i, tb, tb_inline, insn, insn_inline, mem, mem_inline);
+   i,
+   tb, tb_inline,
+   tb_cond_trigger, cond_trigger_limit, tb_cond_left,
+   insn, insn_inline,
+   insn_cond_trigger, cond_trigger_limit, insn_cond_left,
+   mem, mem_inline);
 g_assert(tb == tb_inline);
 g_assert(insn == insn_inline);
 g_assert(mem == mem_inline);
+g_assert(tb_cond_trigger == tb / cond_trigger_limit);
+g_assert(tb_cond_left == tb % cond_trigger_limit);
+g_assert(insn_cond_trigger == insn / cond_trigger_limit);
+g_assert(insn_cond_l

[PULL 11/11] plugins: remove op from qemu_plugin_inline_cb

2024-05-15 Thread Alex Bennée

From: Pierrick Bouvier 

This field is not needed as the callback type already holds this
information.

Reviewed-by: Richard Henderson 
Signed-off-by: Pierrick Bouvier 
Message-Id: <20240502211522.346467-10-pierrick.bouv...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Alex Bennée 
Message-Id: <20240514174253.694591-12-alex.ben...@linaro.org>

diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index 98d27dded9..796fc13706 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -83,7 +83,6 @@ struct qemu_plugin_regular_cb {
 
 struct qemu_plugin_inline_cb {
 qemu_plugin_u64 entry;
-enum qemu_plugin_op op;
 uint64_t imm;
 enum qemu_plugin_mem_rw rw;
 };
diff --git a/plugins/plugin.h b/plugins/plugin.h
index 80d5daa917..30e2299a54 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -108,7 +108,9 @@ void plugin_register_vcpu_mem_cb(GArray **arr,
  enum qemu_plugin_mem_rw rw,
  void *udata);
 
-void exec_inline_op(struct qemu_plugin_inline_cb *cb, int cpu_index);
+void exec_inline_op(enum plugin_dyn_cb_type type,
+struct qemu_plugin_inline_cb *cb,
+int cpu_index);
 
 int plugin_num_vcpus(void);
 
diff --git a/plugins/core.c b/plugins/core.c
index 1c85edc5e5..0726bc7f25 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -338,7 +338,6 @@ void plugin_register_inline_op_on_entry(GArray **arr,
 
 struct qemu_plugin_inline_cb inline_cb = { .rw = rw,
.entry = entry,
-   .op = op,
.imm = imm };
 dyn_cb = plugin_get_dyn_cb(arr);
 dyn_cb->type = op_to_cb_type(op);
@@ -557,7 +556,9 @@ void qemu_plugin_flush_cb(void)
 plugin_cb__simple(QEMU_PLUGIN_EV_FLUSH);
 }
 
-void exec_inline_op(struct qemu_plugin_inline_cb *cb, int cpu_index)
+void exec_inline_op(enum plugin_dyn_cb_type type,
+struct qemu_plugin_inline_cb *cb,
+int cpu_index)
 {
 char *ptr = cb->entry.score->data->data;
 size_t elem_size = g_array_get_element_size(
@@ -565,11 +566,11 @@ void exec_inline_op(struct qemu_plugin_inline_cb *cb, int 
cpu_index)
 size_t offset = cb->entry.offset;
 uint64_t *val = (uint64_t *)(ptr + offset + cpu_index * elem_size);
 
-switch (cb->op) {
-case QEMU_PLUGIN_INLINE_ADD_U64:
+switch (type) {
+case PLUGIN_CB_INLINE_ADD_U64:
 *val += cb->imm;
 break;
-case QEMU_PLUGIN_INLINE_STORE_U64:
+case PLUGIN_CB_INLINE_STORE_U64:
 *val = cb->imm;
 break;
 default:
@@ -601,7 +602,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
 case PLUGIN_CB_INLINE_ADD_U64:
 case PLUGIN_CB_INLINE_STORE_U64:
 if (rw && cb->inline_insn.rw) {
-exec_inline_op(&cb->inline_insn, cpu->cpu_index);
+exec_inline_op(cb->type, &cb->inline_insn, cpu->cpu_index);
 }
 break;
 default:
-- 
2.39.2

Re: [PATCH 2/3] migration/colo: make colo_incoming_co() return void

2024-05-15 Thread Fabiano Rosas

Li Zhijian via  writes:

> Currently, it always returns 0, no need to check the return value at all.
> In addition, enter colo coroutine only if migration_incoming_colo_enabled()
> is true.
> Once the destination side enters the COLO* state, the COLO process will
> take over the remaining processes until COLO exits.
>
> Signed-off-by: Li Zhijian 
> ---
>  migration/colo.c  | 9 ++---
>  migration/migration.c | 6 +++---
>  2 files changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/migration/colo.c b/migration/colo.c
> index 5600a43d78..991806c06a 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -929,16 +929,13 @@ out:
>  return NULL;
>  }
>  
> -int coroutine_fn colo_incoming_co(void)
> +void coroutine_fn colo_incoming_co(void)
>  {
>  MigrationIncomingState *mis = migration_incoming_get_current();
>  QemuThread th;
>  
>  assert(bql_locked());
> -
> -if (!migration_incoming_colo_enabled()) {
> -return 0;
> -}
> +assert(migration_incoming_colo_enabled());

FAILED: libcommon.fa.p/migration_colo.c.o
/usr/bin/gcc-13 ... ../migration/colo.c
../migration/colo.c:930:19: error: conflicting types for ‘colo_incoming_co’; 
have ‘void(void)’
  930 | void coroutine_fn colo_incoming_co(void)
  |   ^~~~
In file included from ../migration/colo.c:20:
... qemu/include/migration/colo.h:52:18: note: previous declaration of 
‘colo_incoming_co’ with type ‘int(void)’
   52 | int coroutine_fn colo_incoming_co(void);
  |  ^~~~

RE: [PULL 00/11] Ui patches

2024-05-15 Thread Kim, Dongwon

Hi Marc-André,

> -Original Message-
> From: Marc-André Lureau 
> Sent: Wednesday, May 15, 2024 3:44 AM
> To: Michael Tokarev 
> Cc: qemu-devel@nongnu.org; qemu-stable ;
> hikalium ; Kim, Dongwon 
> Subject: Re: [PULL 00/11] Ui patches
> 
> Hi
> 
> On Wed, May 15, 2024 at 2:29 PM Michael Tokarev  wrote:
> >
> > 14.05.2024 16:17, marcandre.lur...@redhat.com wrote:
> > > 
> > > UI: small fixes and improvements
> > >
> > > 
> > >
> > > Bernhard Beschow (1):
> > >ui/sdl2: Allow host to power down screen
> > >
> > > Dongwon Kim (7):
> > >ui/gtk: Draw guest frame at refresh cycle
> > >ui/gtk: Check if fence_fd is equal to or greater than 0
> > >ui/console: new dmabuf.h and dmabuf.c for QemuDmaBuf struct and
> > >  helpers
> > >ui/console: Use qemu_dmabuf_get_..() helpers instead
> > >ui/console: Use qemu_dmabuf_set_..() helpers instead
> > >ui/console: Use qemu_dmabuf_new() and free() helpers instead
> > >ui/console: move QemuDmaBuf struct def to dmabuf.c
> > >
> > > Sergii Zasenko (1):
> > >Allow UNIX socket option for VNC websocket
> > >
> > > hikalium (2):
> > >ui/gtk: Add gd_motion_event trace event
> > >ui/gtk: Fix mouse/motion event scaling issue with GTK display
> > > backend
> >
> >  From this list, it looks like
> >
> >ui/gtk: Draw guest frame at refresh cycle
> 
> I would allow a bit more time for this to be actually more widely tested.
> 
> Dongwon, wdyt?
[Kim, Dongwon] Ok, that sounds good to me.

> 
> >ui/gtk: Check if fence_fd is equal to or greater than 0
> > (questionable, minor issue)
> 
> minor, but fine in stable too.
> 
> >ui/gtk: Fix mouse/motion event scaling issue with GTK display
> > backend
> 
> ok for stable imho (even though I don't like that we don't support hidpi
> correctly, as I described in the patch review)

Re: [PATCH 03/20] docs/qapidoc: delint a tiny portion of the module

2024-05-15 Thread John Snow

On Wed, May 15, 2024, 1:27 PM Markus Armbruster  wrote:

> John Snow  writes:
>
> > On Wed, May 15, 2024 at 5:17 AM Markus Armbruster 
> wrote:
> >
> >> John Snow  writes:
> >>
> >> > In the coming patches, it's helpful to have a linting baseline.
> However,
> >> > there's no need to shuffle around the deck chairs too much, because
> most
> >> > of this code will be removed once the new qapidoc generator (the
> >> > "transmogrifier") is in place.
> >> >
> >> > To ease my pain: just turn off the black auto-formatter for most, but
> >> > not all, of qapidoc.py. This will help ensure that *new* code follows
> a
> >> > coding standard without bothering too much with cleaning up the
> existing
> >> > code.
> >> >
> >> > For manual checking for now, try "black --check qapidoc.py" from the
> >> > docs/sphinx directory. "pip install black" (without root permissions)
> if
> >> > you do not have it installed otherwise.
> >> >
> >> > Signed-off-by: John Snow 
> >> > ---
> >> >  docs/sphinx/qapidoc.py | 16 +---
> >> >  1 file changed, 9 insertions(+), 7 deletions(-)
> >> >
> >> > diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
> >> > index f270b494f01..1655682d4c7 100644
> >> > --- a/docs/sphinx/qapidoc.py
> >> > +++ b/docs/sphinx/qapidoc.py
> >> > @@ -28,28 +28,30 @@
> >> >  import re
> >> >
> >> >  from docutils import nodes
> >> > +from docutils.parsers.rst import Directive, directives
> >> >  from docutils.statemachine import ViewList
> >> > -from docutils.parsers.rst import directives, Directive
> >> > -from sphinx.errors import ExtensionError
> >> > -from sphinx.util.nodes import nested_parse_with_titles
> >> > -import sphinx
> >> > -from qapi.gen import QAPISchemaVisitor
> >> >  from qapi.error import QAPIError, QAPISemError
> >> > +from qapi.gen import QAPISchemaVisitor
> >> >  from qapi.schema import QAPISchema
> >> >
> >> > +import sphinx
> >> > +from sphinx.errors import ExtensionError
> >> > +from sphinx.util.nodes import nested_parse_with_titles
> >> > +
> >>
> >> Exchanges old pylint gripe
> >>
> >> docs/sphinx/qapidoc.py:45:4: C0412: Imports from package sphinx are
> >> not grouped (ungrouped-imports)
> >>
> >> for new gripes
> >>
> >> docs/sphinx/qapidoc.py:37:0: C0411: third party import "import
> sphinx"
> >> should be placed before "from qapi.error import QAPIError, QAPISemError"
> >> (wrong-import-order)
> >> docs/sphinx/qapidoc.py:38:0: C0411: third party import "from
> >> sphinx.errors import ExtensionError" should be placed before "from
> >> qapi.error import QAPIError, QAPISemError" (wrong-import-order)
> >> docs/sphinx/qapidoc.py:39:0: C0411: third party import "from
> >> sphinx.util.nodes import nested_parse_with_titles" should be placed
> before
> >> "from qapi.error import QAPIError, QAPISemError" (wrong-import-order)
> >>
> >> Easy enough to fix.
> >>
> >
> > I believe these errors are caused by the fact that the tools are confused
> > about the "sphinx" namespace - some interpret them as being the local
> > "module", the docs/sphinx/ directory, and others believe them to be the
> > third party external package.
> >
> > I have not been using pylint on docs/sphinx/ files because of the
> > difficulty of managing imports - this environment is generally beyond the
> > reaches of my python borgcube and at present I don't have plans to
> > integrate it.
> >
> > At the moment, I am using black, isort and flake8 for qapidoc.py and
> > they're happy with it. I am not using mypy because I never did the typing
> > boogaloo with qapidoc.py and I won't be bothering - except for any new
> code
> > I write, which *will* bother. By the end of the new transmogrifier,
> > qapidoc.py *will* strictly typecheck.
> >
> > pylint may prove to be an issue with the imports, though. isort also
> seems
> > to misunderstand "sphinx, the stuff in this folder" and "sphinx, the
> stuff
> > in a third party package" and so I'm afraid I don't have any good ability
> > to get pylint to play along, here.
> >
> > Pleading for "Sorry, this sucks and I can't figure out how to solve it
> > quickly". Maybe a future project, apologies.
>
> Is this pain we inflict on ourselves by naming the directory "sphinx"?
>

More or less, yeah. If you check the file from a CWD where there is no
"sphinx" directory, it behaves more normally.

Just not worth renaming it and futzing about for now. However, I did get an
invocation that lets me get a clean pylint run by abusing PYTHONPATH again,
so I have at least one standard baseline we can count on. I updated the
do-not-merge patch to include the special magick incantations.

Maybe in the future I'll make a qemu.plugins submodule instead, but that's
for quite a bit later.


> >> >
> >> >  # Sphinx up to 1.6 uses AutodocReporter; 1.7 and later
> >> >  # use switch_source_input. Check borrowed from kerneldoc.py.
> >> > -Use_SSI = sphinx.__version__[:3] >= '1.7'
> >> > +Use_SSI = sphinx.__version__[:3] >= "1.7"
> >> >  if Use_SSI:
> >> >  from sphinx.uti

[PATCH v3 2/5] ppc/pnv: Extend SPI model

2024-05-15 Thread Chalapathi V

In this commit SPI shift engine and sequencer logic is implemented.
Shift engine performs serialization and de-serialization according to the
control by the sequencer and according to the setup defined in the
configuration registers. Sequencer implements the main control logic and
FSM to handle data transmit and data receive control of the shift engine.

Signed-off-by: Chalapathi V 
---
 include/hw/ssi/pnv_spi.h|   28 +
 hw/ppc/pnv_spi_controller.c | 1074 +++
 hw/ppc/trace-events |   15 +
 3 files changed, 1117 insertions(+)

diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h
index 244ee1cfc0..6e2bceab3b 100644
--- a/include/hw/ssi/pnv_spi.h
+++ b/include/hw/ssi/pnv_spi.h
@@ -8,6 +8,14 @@
  * This model Supports a connection to a single SPI responder.
  * Introduced for P10 to provide access to SPI seeproms, TPM, flash device
  * and an ADC controller.
+ *
+ * All SPI function control is mapped into the SPI register space to enable
+ * full control by firmware.
+ *
+ * SPI Controller has sequencer and shift engine. The SPI shift engine
+ * performs serialization and de-serialization according to the control by
+ * the sequencer and according to the setup defined in the configuration
+ * registers and the SPI sequencer implements the main control logic.
  */
 #include "hw/ssi/ssi.h"
 
@@ -29,6 +37,25 @@ typedef struct PnvSpiController {
 MemoryRegionxscom_spic_regs;
 /* SPI controller object number */
 uint32_tspic_num;
+uint8_t transfer_len;
+uint8_t responder_select;
+/* To verify if shift_n1 happens prior to shift_n2 */
+boolshift_n1_done;
+/* Loop counter for branch operation opcode Ex/Fx */
+uint8_t loop_counter_1;
+uint8_t loop_counter_2;
+/* N1/N2_bits specifies the size of the N1/N2 segment of a frame in bits.*/
+uint8_t N1_bits;
+uint8_t N2_bits;
+/* Number of bytes in a payload for the N1/N2 frame segment.*/
+uint8_t N1_bytes;
+uint8_t N2_bytes;
+/* Number of N1/N2 bytes marked for transmit */
+uint8_t N1_tx;
+uint8_t N2_tx;
+/* Number of N1/N2 bytes marked for receive */
+uint8_t N1_rx;
+uint8_t N2_rx;
 
 /* SPI Controller registers */
 uint64_terror_reg;
@@ -40,5 +67,6 @@ typedef struct PnvSpiController {
 uint64_treceive_data_reg;
 uint8_t sequencer_operation_reg[SPI_CONTROLLER_REG_SIZE];
 uint64_tstatus_reg;
+
 } PnvSpiController;
 #endif /* PPC_PNV_SPI_CONTROLLER_H */
diff --git a/hw/ppc/pnv_spi_controller.c b/hw/ppc/pnv_spi_controller.c
index 11b119cf0f..e87f583074 100644
--- a/hw/ppc/pnv_spi_controller.c
+++ b/hw/ppc/pnv_spi_controller.c
@@ -19,6 +19,1072 @@
 #include "hw/irq.h"
 #include "trace.h"
 
+/* PnvXferBuffer */
+typedef struct PnvXferBuffer {
+
+uint32_tlen;
+uint8_t*data;
+
+} PnvXferBuffer;
+
+/* pnv_spi_xfer_buffer_methods */
+static PnvXferBuffer *pnv_spi_xfer_buffer_new(void)
+{
+PnvXferBuffer *payload = g_malloc0(sizeof(*payload));
+
+return payload;
+}
+
+static void pnv_spi_xfer_buffer_free(PnvXferBuffer *payload)
+{
+free(payload->data);
+free(payload);
+}
+
+static uint8_t *pnv_spi_xfer_buffer_write_ptr(PnvXferBuffer *payload,
+uint32_t offset, uint32_t length)
+{
+if (payload->len < (offset + length)) {
+payload->len = offset + length;
+payload->data = g_realloc(payload->data, payload->len);
+}
+return &payload->data[offset];
+}
+
+static bool does_rdr_match(PnvSpiController *s)
+{
+/*
+ * According to spec, the mask bits that are 0 are compared and the
+ * bits that are 1 are ignored.
+ */
+uint16_t rdr_match_mask = GETFIELD(MEMORY_MAPPING_REG_RDR_MATCH_MASK,
+s->memory_mapping_reg);
+uint16_t rdr_match_val = GETFIELD(MEMORY_MAPPING_REG_RDR_MATCH_VAL,
+s->memory_mapping_reg);
+
+if ((~rdr_match_mask & rdr_match_val) == ((~rdr_match_mask) &
+GETFIELD(PPC_BITMASK(48, 63), s->receive_data_reg))) {
+return true;
+}
+return false;
+}
+
+static uint8_t get_from_offset(PnvSpiController *s, uint8_t offset)
+{
+uint8_t byte;
+
+/*
+ * Offset is an index between 0 and SPI_CONTROLLER_REG_SIZE - 1
+ * Check the offset before using it.
+ */
+if (offset < SPI_CONTROLLER_REG_SIZE) {
+byte = (s->transmit_data_reg >> (56 - offset * 8)) & 0xFF;
+} else {
+/*
+ * Log an error and return a 0xFF since we have to assign something
+ * to byte before returning.
+ */
+qemu_log_mask(LOG_GUEST_ERROR, "Invalid offset = %d used to get byte "
+  "from TDR\n", offset);
+byte = 0xff;
+}
+return byte;
+}
+
+static uint8_t read_from_frame(PnvSpiController *s, uint8_t *read

[PATCH v3 5/5] tests/qtest: Add pnv-spi-seeprom qtest

2024-05-15 Thread Chalapathi V

In this commit Write a qtest pnv-spi-seeprom-test to check the
SPI transactions between spi controller and seeprom device.

Signed-off-by: Chalapathi V 
---
 tests/qtest/pnv-spi-seeprom-test.c | 129 +
 tests/qtest/meson.build|   1 +
 2 files changed, 130 insertions(+)
 create mode 100644 tests/qtest/pnv-spi-seeprom-test.c

diff --git a/tests/qtest/pnv-spi-seeprom-test.c 
b/tests/qtest/pnv-spi-seeprom-test.c
new file mode 100644
index 00..bfa57f3234
--- /dev/null
+++ b/tests/qtest/pnv-spi-seeprom-test.c
@@ -0,0 +1,129 @@
+/*
+ * QTest testcase for PowerNV 10 Seeprom Communications
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/bswap.h"
+#include "hw/ssi/pnv_spi_regs.h"
+
+#define P10_XSCOM_BASE  0x000603fcull
+#define SPIC2_XSCOM_BASE0xc0040
+
+/* To transmit READ opcode and address */
+#define READ_OP_TDR_DATA0x03000100
+/*
+ * N1 shift - tx 4 bytes (transmit opcode and address)
+ * N2 shift - tx and rx 8 bytes.
+ */
+#define READ_OP_COUNTER_CONFIG  0x20402b00
+/* SEQ_OP_SELECT_RESPONDER - N1 Shift - N2 Shift * 5 - SEQ_OP_STOP */
+#define READ_OP_SEQUENCER   0x1130404040404010
+
+/* To transmit WREN(Set Write Enable Latch in status0 register) opcode */
+#define WRITE_OP_WREN   0x0600
+/* To transmit WRITE opcode, address and data */
+#define WRITE_OP_TDR_DATA   0x0300010012345678
+/* N1 shift - tx 8 bytes (transmit opcode, address and data) */
+#define WRITE_OP_COUNTER_CONFIG 0x40002000
+/* SEQ_OP_SELECT_RESPONDER - N1 Shift - SEQ_OP_STOP */
+#define WRITE_OP_SEQUENCER  0x11301000
+
+static uint64_t pnv_xscom_addr(uint32_t pcba)
+{
+return P10_XSCOM_BASE | ((uint64_t) pcba << 3);
+}
+
+static uint64_t pnv_spi_seeprom_xscom_addr(uint32_t reg)
+{
+return pnv_xscom_addr(SPIC2_XSCOM_BASE + reg);
+}
+
+static void pnv_spi_controller_xscom_write(QTestState *qts, uint32_t reg,
+uint64_t val)
+{
+qtest_writeq(qts, pnv_spi_seeprom_xscom_addr(reg), val);
+}
+
+static uint64_t pnv_spi_controller_xscom_read(QTestState *qts, uint32_t reg)
+{
+return qtest_readq(qts, pnv_spi_seeprom_xscom_addr(reg));
+}
+
+static void spi_seeprom_transaction(QTestState *qts)
+{
+/* SPI transactions to SEEPROM to read from SEEPROM image */
+pnv_spi_controller_xscom_write(qts, COUNTER_CONFIG_REG,
+READ_OP_COUNTER_CONFIG);
+pnv_spi_controller_xscom_write(qts, SEQUENCER_OPERATION_REG,
+READ_OP_SEQUENCER);
+pnv_spi_controller_xscom_write(qts, TRANSMIT_DATA_REG, READ_OP_TDR_DATA);
+pnv_spi_controller_xscom_write(qts, TRANSMIT_DATA_REG, 0);
+/* Read 5*8 bytes from SEEPROM at 0x100 */
+uint64_t rdr_val = pnv_spi_controller_xscom_read(qts, RECEIVE_DATA_REG);
+printf("RDR READ = 0x%lx\n", rdr_val);
+rdr_val = pnv_spi_controller_xscom_read(qts, RECEIVE_DATA_REG);
+rdr_val = pnv_spi_controller_xscom_read(qts, RECEIVE_DATA_REG);
+rdr_val = pnv_spi_controller_xscom_read(qts, RECEIVE_DATA_REG);
+rdr_val = pnv_spi_controller_xscom_read(qts, RECEIVE_DATA_REG);
+printf("RDR READ = 0x%lx\n", rdr_val);
+
+/* SPI transactions to SEEPROM to write to SEEPROM image */
+pnv_spi_controller_xscom_write(qts, COUNTER_CONFIG_REG,
+WRITE_OP_COUNTER_CONFIG);
+/* Set Write Enable Latch bit of status0 register */
+pnv_spi_controller_xscom_write(qts, SEQUENCER_OPERATION_REG,
+WRITE_OP_SEQUENCER);
+pnv_spi_controller_xscom_write(qts, TRANSMIT_DATA_REG, WRITE_OP_WREN);
+/* write 8 bytes to SEEPROM at 0x100 */
+pnv_spi_controller_xscom_write(qts, SEQUENCER_OPERATION_REG,
+WRITE_OP_SEQUENCER);
+pnv_spi_controller_xscom_write(qts, TRANSMIT_DATA_REG, WRITE_OP_TDR_DATA);
+}
+
+/* Find complete path of in_file in the current working directory */
+static void find_file(const char *in_file, char *in_path)
+{
+g_autofree char *cwd = g_get_current_dir();
+char *filepath = g_build_filename(cwd, in_file, NULL);
+if (!access(filepath, F_OK)) {
+strcpy(in_path, filepath);
+} else {
+strcpy(in_path, "");
+printf("File %s not found within %s\n", in_file, cwd);
+}
+}
+
+static void test_spi_seeprom(void)
+{
+QTestState *qts = NULL;
+char seepromfile[500];
+find_file("sbe_measurement_seeprom.bin.ecc", seepromfile);
+if (strcmp(seepromfile, "")) {
+printf("Starting QEMU with seeprom file.\n");
+qts = qtest_initf("-m 2G -machine powernv10 -smp 2,cores=2,"
+  "threads=1 -accel tcg,thread=single -nographic "
+  "-blockdev node-name=pib_spic2,driver=file,"
+ "filename=sbe_measurement_seeprom.b

[PATCH v3 4/5] hw/ppc: SPI controller wiring to P10 chip

2024-05-15 Thread Chalapathi V

In this commit, create SPI controller on p10 chip and connect cs irq.

The QOM tree of spi controller and seeprom are.
/machine (powernv10-machine)
  /chip[0] (power10_v2.0-pnv-chip)
/pib_spic[2] (pnv-spi-controller)
  /pnv-spi-bus.2 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)

/machine (powernv10-machine)
  /peripheral-anon (container)
/device[0] (25csm04)
  /WP#[0] (irq)
  /ssi-gpio-cs[0] (irq)

(qemu) qom-get /machine/peripheral-anon /device[76] "parent_bus"
"/machine/chip[0]/pib_spic[2]/pnv-spi-bus.2"

Signed-off-by: Chalapathi V 
---
 include/hw/ppc/pnv_chip.h   |  3 +++
 hw/ppc/pnv.c| 21 -
 hw/ppc/pnv_spi_controller.c |  8 
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index 8589f3291e..d464858f79 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -6,6 +6,7 @@
 #include "hw/ppc/pnv_core.h"
 #include "hw/ppc/pnv_homer.h"
 #include "hw/ppc/pnv_n1_chiplet.h"
+#include "hw/ssi/pnv_spi.h"
 #include "hw/ppc/pnv_lpc.h"
 #include "hw/ppc/pnv_occ.h"
 #include "hw/ppc/pnv_psi.h"
@@ -118,6 +119,8 @@ struct Pnv10Chip {
 PnvSBE   sbe;
 PnvHomer homer;
 PnvN1Chiplet n1_chiplet;
+#define PNV10_CHIP_MAX_PIB_SPIC 6
+PnvSpiController pib_spic[PNV10_CHIP_MAX_PIB_SPIC];
 
 uint32_t nr_quads;
 PnvQuad  *quads;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6e3a5ccdec..6850592a85 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1829,6 +1829,11 @@ static void pnv_chip_power10_instance_init(Object *obj)
 for (i = 0; i < pcc->i2c_num_engines; i++) {
 object_initialize_child(obj, "i2c[*]", &chip10->i2c[i], TYPE_PNV_I2C);
 }
+
+for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC ; i++) {
+object_initialize_child(obj, "pib_spic[*]", &chip10->pib_spic[i],
+TYPE_PNV_SPI_CONTROLLER);
+}
 }
 
 static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp)
@@ -2043,7 +2048,21 @@ static void pnv_chip_power10_realize(DeviceState *dev, 
Error **errp)
   qdev_get_gpio_in(DEVICE(&chip10->psi),
PSIHB9_IRQ_SBE_I2C));
 }
-
+/* PIB SPI Controller */
+for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) {
+object_property_set_int(OBJECT(&chip10->pib_spic[i]), "spic_num",
+i, &error_fatal);
+/* pib_spic[2] connected to 25csm04 which implements 1 byte transfer */
+object_property_set_int(OBJECT(&chip10->pib_spic[i]), "transfer_len",
+(i == 2) ? 1 : 4, &error_fatal);
+if (!sysbus_realize(SYS_BUS_DEVICE(OBJECT
+(&chip10->pib_spic[i])), errp)) {
+return;
+}
+pnv_xscom_add_subregion(chip, PNV10_XSCOM_PIB_SPIC_BASE +
+i * PNV10_XSCOM_PIB_SPIC_SIZE,
+&chip10->pib_spic[i].xscom_spic_regs);
+}
 }
 
 static void pnv_rainier_i2c_init(PnvMachineState *pnv)
diff --git a/hw/ppc/pnv_spi_controller.c b/hw/ppc/pnv_spi_controller.c
index e87f583074..3d47e932de 100644
--- a/hw/ppc/pnv_spi_controller.c
+++ b/hw/ppc/pnv_spi_controller.c
@@ -1067,9 +1067,17 @@ static void operation_sequencer(PnvSpiController *s)
 static void do_reset(DeviceState *dev)
 {
 PnvSpiController *s = PNV_SPICONTROLLER(dev);
+DeviceState *ssi_dev;
 
 trace_pnv_spi_reset();
 
+/* Connect cs irq */
+ssi_dev = ssi_get_cs(s->ssi_bus, 0);
+if (ssi_dev) {
+qemu_irq cs_line = qdev_get_gpio_in_named(ssi_dev, SSI_GPIO_CS, 0);
+qdev_connect_gpio_out_named(DEVICE(s), "cs", 0, cs_line);
+}
+
 /* Reset all N1 and N2 counters, and other constants */
 s->N2_bits = 0;
 s->N2_bytes = 0;
-- 
2.39.3

[PATCH v3 0/5] hw/ppc: SPI model

2024-05-15 Thread Chalapathi V

Hello,

Thank You so much for reviewing patch v2.
In patch v3, most of Cedric's comments are addressed.
- PnvSPIBus model is removed and added SSIBus to PnvSpiController.
- Added trace-events and removed SPI_DEBUG macro.
- Added Microchip's 25csm04 device on top of m25p80 and create seeprom
  device from a command line.
- Use qdev_connect_gpio_out_named instead of sysbus_connect_irq to
  connect cs.

The new qom-tree looks like below.
(qemu) info qom-tree 
/machine (powernv10-machine)
  /chip[0] (power10_v2.0-pnv-chip)
/pib_spic[0] (pnv-spi-controller)
  /pnv-spi-bus.0 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)
/pib_spic[1] (pnv-spi-controller)
  /pnv-spi-bus.1 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)
/pib_spic[2] (pnv-spi-controller)
  /pnv-spi-bus.2 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)
/pib_spic[3] (pnv-spi-controller)
  /pnv-spi-bus.3 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)
/pib_spic[4] (pnv-spi-controller)
  /pnv-spi-bus.4 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)
/pib_spic[5] (pnv-spi-controller)
  /pnv-spi-bus.5 (SSI)
  /xscom-spi-controller-regs[0] (memory-region)

(qemu) info qom-tree 
/machine (powernv10-machine)
  /peripheral-anon (container)
/device[0] (25csm04)
  /WP#[0] (irq)
  /ssi-gpio-cs[0] (irq)

Patches overview in v3.
PATCH1: Create a SPI controller model and implement configuration unit
to model SCOM registers. Move header files to include/hw/ssi/
PATCH2: SPI controller model: implement sequencer FSM and shift engine.
PATCH3: Add Microchip's SEEPROM 25csm04 model on top of m25p80.
PATCH4: Connect SPI controllers to p10 chip and connect cs lines.
PATCH5: Write a qtest pnv-spi-seeprom-test to check the SPI transactions
between spi controller and seeprom device.

Test covered:
Ran make check.

Thank You,
Chalapathi

Chalapathi V (5):
  ppc/pnv: Add SPI controller model
  ppc/pnv: Extend SPI model
  hw/block: Add Microchip's 25CSM04 to m25p80
  hw/ppc: SPI controller wiring to P10 chip
  tests/qtest: Add pnv-spi-seeprom qtest

 include/hw/ppc/pnv_chip.h  |3 +
 include/hw/ppc/pnv_xscom.h |3 +
 include/hw/ssi/pnv_spi.h   |   72 ++
 include/hw/ssi/pnv_spi_regs.h  |  114 +++
 hw/block/m25p80.c  |3 +
 hw/ppc/pnv.c   |   21 +-
 hw/ppc/pnv_spi_controller.c| 1310 
 tests/qtest/pnv-spi-seeprom-test.c |  129 +++
 hw/ppc/Kconfig |2 +
 hw/ppc/meson.build |1 +
 hw/ppc/trace-events|   21 +
 tests/qtest/meson.build|1 +
 12 files changed, 1679 insertions(+), 1 deletion(-)
 create mode 100644 include/hw/ssi/pnv_spi.h
 create mode 100644 include/hw/ssi/pnv_spi_regs.h
 create mode 100644 hw/ppc/pnv_spi_controller.c
 create mode 100644 tests/qtest/pnv-spi-seeprom-test.c

-- 
2.39.3

[PATCH v3 1/5] ppc/pnv: Add SPI controller model

2024-05-15 Thread Chalapathi V

SPI controller device model supports a connection to a single SPI responder.
This provide access to SPI seeproms, TPM, flash device and an ADC controller.

All SPI function control is mapped into the SPI register space to enable full
control by firmware. In this commit SPI configuration component is modelled
which contains all SPI configuration and status registers as well as the hold
registers for data to be sent or having been received.

An existing QEMU SSI framework is used and SSI_BUS is created.

Signed-off-by: Chalapathi V 
---
 include/hw/ppc/pnv_xscom.h|   3 +
 include/hw/ssi/pnv_spi.h  |  44 +++
 include/hw/ssi/pnv_spi_regs.h | 114 +
 hw/ppc/pnv_spi_controller.c   | 228 ++
 hw/ppc/Kconfig|   1 +
 hw/ppc/meson.build|   1 +
 hw/ppc/trace-events   |   6 +
 7 files changed, 397 insertions(+)
 create mode 100644 include/hw/ssi/pnv_spi.h
 create mode 100644 include/hw/ssi/pnv_spi_regs.h
 create mode 100644 hw/ppc/pnv_spi_controller.c

diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index 6209e18492..a77b97f9b1 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -194,6 +194,9 @@ struct PnvXScomInterfaceClass {
 #define PNV10_XSCOM_PEC_PCI_BASE   0x8010800 /* index goes upwards ... */
 #define PNV10_XSCOM_PEC_PCI_SIZE   0x200
 
+#define PNV10_XSCOM_PIB_SPIC_BASE 0xc
+#define PNV10_XSCOM_PIB_SPIC_SIZE 0x20
+
 void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr);
 int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
  uint64_t xscom_base, uint64_t xscom_size,
diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h
new file mode 100644
index 00..244ee1cfc0
--- /dev/null
+++ b/include/hw/ssi/pnv_spi.h
@@ -0,0 +1,44 @@
+/*
+ * QEMU PowerPC SPI Controller model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This model Supports a connection to a single SPI responder.
+ * Introduced for P10 to provide access to SPI seeproms, TPM, flash device
+ * and an ADC controller.
+ */
+#include "hw/ssi/ssi.h"
+
+#ifndef PPC_PNV_SPI_CONTROLLER_H
+#define PPC_PNV_SPI_CONTROLLER_H
+
+#define TYPE_PNV_SPI_CONTROLLER "pnv-spi-controller"
+#define PNV_SPICONTROLLER(obj) \
+OBJECT_CHECK(PnvSpiController, (obj), TYPE_PNV_SPI_CONTROLLER)
+
+#define SPI_CONTROLLER_REG_SIZE 8
+
+#define TYPE_PNV_SPI_BUS "pnv-spi-bus"
+typedef struct PnvSpiController {
+SysBusDevice parent_obj;
+
+SSIBus *ssi_bus;
+qemu_irq *cs_line;
+MemoryRegionxscom_spic_regs;
+/* SPI controller object number */
+uint32_tspic_num;
+
+/* SPI Controller registers */
+uint64_terror_reg;
+uint64_tcounter_config_reg;
+uint64_tconfig_reg1;
+uint64_tclock_config_reset_control;
+uint64_tmemory_mapping_reg;
+uint64_ttransmit_data_reg;
+uint64_treceive_data_reg;
+uint8_t sequencer_operation_reg[SPI_CONTROLLER_REG_SIZE];
+uint64_tstatus_reg;
+} PnvSpiController;
+#endif /* PPC_PNV_SPI_CONTROLLER_H */
diff --git a/include/hw/ssi/pnv_spi_regs.h b/include/hw/ssi/pnv_spi_regs.h
new file mode 100644
index 00..6f613aca5e
--- /dev/null
+++ b/include/hw/ssi/pnv_spi_regs.h
@@ -0,0 +1,114 @@
+/*
+ * QEMU PowerPC SPI Controller model
+ *
+ * Copyright (c) 2023, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SPI_CONTROLLER_REGS_H
+#define SPI_CONTROLLER_REGS_H
+
+/* Error Register */
+#define ERROR_REG   0x00
+
+/* counter_config_reg */
+#define COUNTER_CONFIG_REG  0x01
+#define COUNTER_CONFIG_REG_SHIFT_COUNT_N1   PPC_BITMASK(0, 7)
+#define COUNTER_CONFIG_REG_SHIFT_COUNT_N2   PPC_BITMASK(8, 15)
+#define COUNTER_CONFIG_REG_COUNT_COMPARE1   PPC_BITMASK(24, 31)
+#define COUNTER_CONFIG_REG_COUNT_COMPARE2   PPC_BITMASK(32, 39)
+#define COUNTER_CONFIG_REG_N1_COUNT_CONTROL PPC_BITMASK(48, 51)
+#define COUNTER_CONFIG_REG_N2_COUNT_CONTROL PPC_BITMASK(52, 55)
+
+/* config_reg */
+#define CONFIG_REG1 0x02
+
+/* clock_config_reset_control_ecc_enable_reg */
+#define CLOCK_CONFIG_REG0x03
+#define CLOCK_CONFIG_RESET_CONTROL_HARD_RESET   0x0084;
+#define CLOCK_CONFIG_REG_RESET_CONTROL  PPC_BITMASK(24, 27)
+#define CLOCK_CONFIG_REG_ECC_CONTROLPPC_BITMASK(28, 30)
+
+/* memory_mapping_reg */
+#define MEMORY_MAPPING_REG  0x04
+#define MEMORY_MAPPING_REG_MMSPISM_BASE_ADDRPPC_BITMASK(0, 15)
+#define MEMORY_MAPPING_REG_MMSPISM_ADDR_MASKPPC_BITMASK(16, 31)
+#define MEMORY_MAPPING_REG_RDR_MATCH_VALPPC_BITMASK(32, 47)
+#define MEMORY_MAPPING_REG_RDR_MATCH_MASK   PPC_BITMASK(48, 63)
+
+/* transmit_data_reg */
+#define TRANSMIT_DATA_REG   0x05
+
+/* receive_dat

[PATCH v3 3/5] hw/block: Add Microchip's 25CSM04 to m25p80

2024-05-15 Thread Chalapathi V

Add Microchip's 25CSM04 Serial EEPROM to m25p80.  25CSM04 provides 4 Mbits
of Serial EEPROM utilizing the Serial Peripheral Interface (SPI) compatible
bus. The device is organized as 524288 bytes of 8 bits each (512Kbyte) and
is optimized for use in consumer and industrial applications where reliable
and dependable nonvolatile memory storage is essential.

Signed-off-by: Chalapathi V 
---
 hw/block/m25p80.c | 3 +++
 hw/ppc/Kconfig| 1 +
 2 files changed, 4 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 8dec134832..824a6c5c60 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -357,6 +357,9 @@ static const FlashPartInfo known_devices[] = {
   .sfdp_read = m25p80_sfdp_w25q512jv },
 { INFO("w25q01jvq",   0xef4021,  0,  64 << 10, 2048, ER_4K),
   .sfdp_read = m25p80_sfdp_w25q01jvq },
+
+/* Microchip */
+{ INFO("25csm04",  0x29cc00,  0x100,  64 << 10,  8, 0) },
 };
 
 typedef enum {
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index 6f9670b377..a93430b734 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -40,6 +40,7 @@ config POWERNV
 select PCA9552
 select PCA9554
 select SSI
+select SSI_M25P80
 
 config PPC405
 bool
-- 
2.39.3

[PATCH 2/4] gdbstub: Add support for MTE in user mode

2024-05-15 Thread Gustavo Romero

This commit implements the stubs to handle the qIsAddressTagged,
qMemTag, and QMemTag GDB packets, allowing all GDB 'memory-tag'
subcommands to work with QEMU gdbstub on aarch64 user mode. It also
implements the get/set function for the special GDB MTE register
'tag_ctl', used to control the MTE fault type at runtime.

Signed-off-by: Gustavo Romero 
---
 configs/targets/aarch64-linux-user.mak |   2 +-
 target/arm/cpu.c   |   1 +
 target/arm/gdbstub.c   | 321 +
 target/arm/internals.h |   2 +
 4 files changed, 325 insertions(+), 1 deletion(-)

diff --git a/configs/targets/aarch64-linux-user.mak 
b/configs/targets/aarch64-linux-user.mak
index ba8bc5fe3f..8f0ed21d76 100644
--- a/configs/targets/aarch64-linux-user.mak
+++ b/configs/targets/aarch64-linux-user.mak
@@ -1,6 +1,6 @@
 TARGET_ARCH=aarch64
 TARGET_BASE_ARCH=arm
-TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml 
gdb-xml/aarch64-pauth.xml
+TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml 
gdb-xml/aarch64-pauth.xml gdb-xml/aarch64-mte.xml
 TARGET_HAS_BFLT=y
 CONFIG_SEMIHOSTING=y
 CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 77f8c9c748..29f7b99a88 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2479,6 +2479,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 
 register_cp_regs_for_features(cpu);
 arm_cpu_register_gdb_regs_for_features(cpu);
+arm_cpu_register_gdb_commands(cpu);
 
 init_cpreg_list(cpu);
 
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index a3bb73cfa7..f3897f75b3 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -474,6 +474,317 @@ static GDBFeature 
*arm_gen_dynamic_m_secextreg_feature(CPUState *cs,
 #endif
 #endif /* CONFIG_TCG */
 
+#ifdef TARGET_AARCH64
+#ifdef CONFIG_USER_ONLY
+static int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, struct _GByteArray *buf, 
int reg)
+{
+ARMCPU *cpu = ARM_CPU(cs);
+CPUARMState *env = &cpu->env;
+uint64_t tcf0;
+
+assert(reg == 0);
+
+/* TCF0, bits [39:38]. */
+tcf0 = extract64(env->cp15.sctlr_el[1], 38, 2);
+
+return gdb_get_reg64(buf, tcf0);
+}
+
+static int aarch64_gdb_set_tag_ctl_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+ARMCPU *cpu = ARM_CPU(cs);
+CPUARMState *env = &cpu->env;
+
+assert(reg == 0);
+
+/* Sanitize TCF0 bits. */
+*buf &= 0x03;
+
+if (!isar_feature_aa64_mte3(&cpu->isar) && *buf == 3) {
+/*
+ * If FEAT_MTE3 is not implemented, the value 0b11 is reserved, hence
+ * ignore setting it.
+ */
+return 0;
+}
+
+/*
+ * 'tag_ctl' register is actually a "pseudo-register" provided by GDB to
+ * expose options that can be controlled at runtime and has the same effect
+ * of prctl() with option PR_SET_TAGGED_ADDR_CTRL,
+ * i.e. prctl(PR_SET_TAGGED_ADDR_CTRL, tcf, 0, 0, 0), hence it controls
+ * the effect of Tag Check Faults (TCF) due to Loads and Stores in EL0.
+ */
+env->cp15.sctlr_el[1] = deposit64(env->cp15.sctlr_el[1], 38, 2, *buf);
+
+return 1;
+}
+
+static void handle_q_memtag(GArray *params, G_GNUC_UNUSED void *user_ctx)
+{
+uint64_t addr = get_param(params, 0)->val_ull;
+uint64_t len = get_param(params, 1)->val_ul;
+int type = get_param(params, 2)->val_ul;
+
+uint64_t clean_addr;
+uint8_t *tags;
+int granules_index;
+int granule_index;
+uint8_t addr_tag;
+
+g_autoptr(GString) str_buf = g_string_new(NULL);
+
+/*
+ * GDB does not query tags for a memory range on remote targets, so that's
+ * not supported either by gdbstub.
+ */
+if (len != 1) {
+gdb_put_packet("E02");
+}
+
+/* GDB never queries a tag different from an allocation tag (type 1). */
+if (type != 1) {
+gdb_put_packet("E02");
+}
+
+/* Remove any non-addressing bits. */
+clean_addr = useronly_clean_ptr(addr);
+
+/*
+ * Get pointer to all tags in the page where the address is. Note that tags
+ * are packed, so there are 2 tags packed in one byte.
+ */
+tags = page_get_target_data(clean_addr);
+
+/*
+ * Tags are per granule (16 bytes). 2 tags (4 bits each) are kept in a
+ * single byte for compactness, so first a page tag index for 2 packed
+ * granule tags (1 byte) is found, and then an index for a single granule
+ * tag (nibble) is found, and finally the address tag is obtained.
+ */
+granules_index = extract32(clean_addr, LOG2_TAG_GRANULE + 1,
+   TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1);
+granule_index = extract32(clean_addr, LOG2_TAG_GRANULE, 1);
+
+addr_tag = *(tags + granules_index);
+/* Extract tag from the right nibble. */
+if (granule_index == 0) {
+addr_tag &= 0xF;
+} else {
+addr_tag >>= 4;
+}
+
+g_string_printf(str_buf, "m%.2x", addr_tag);
+
+gdb_put_packet

[PATCH 4/4] tests/tcg/aarch64: Add MTE gdbstub tests

2024-05-15 Thread Gustavo Romero

Add tests to exercise the MTE stubs.

Signed-off-by: Gustavo Romero 
---
 tests/tcg/aarch64/Makefile.target |  11 ++-
 tests/tcg/aarch64/gdbstub/test-mte.py |  86 ++
 tests/tcg/aarch64/mte-8.c | 102 ++
 3 files changed, 197 insertions(+), 2 deletions(-)
 create mode 100644 tests/tcg/aarch64/gdbstub/test-mte.py
 create mode 100644 tests/tcg/aarch64/mte-8.c

diff --git a/tests/tcg/aarch64/Makefile.target 
b/tests/tcg/aarch64/Makefile.target
index 70d728ae9a..d2e3f251eb 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -62,7 +62,7 @@ AARCH64_TESTS += bti-2
 
 # MTE Tests
 ifneq ($(CROSS_CC_HAS_ARMV8_MTE),)
-AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7
+AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7 mte-8
 mte-%: CFLAGS += -march=armv8.5-a+memtag
 endif
 
@@ -127,7 +127,14 @@ run-gdbstub-sve-ioctls: sve-ioctls
--bin $< --test $(AARCH64_SRC)/gdbstub/test-sve-ioctl.py, \
basic gdbstub SVE ZLEN support)
 
-EXTRA_RUNS += run-gdbstub-sysregs run-gdbstub-sve-ioctls
+run-gdbstub-mte: mte-8
+   $(call run-test, $@, $(GDB_SCRIPT) \
+   --gdb $(GDB) \
+   --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \
+   --bin "$< -s" --test $(AARCH64_SRC)/gdbstub/test-mte.py, \
+   gdbstub MTE support)
+
+EXTRA_RUNS += run-gdbstub-sysregs run-gdbstub-sve-ioctls run-gdbstub-mte
 endif
 endif
 
diff --git a/tests/tcg/aarch64/gdbstub/test-mte.py 
b/tests/tcg/aarch64/gdbstub/test-mte.py
new file mode 100644
index 00..6530f33ad8
--- /dev/null
+++ b/tests/tcg/aarch64/gdbstub/test-mte.py
@@ -0,0 +1,86 @@
+from __future__ import print_function
+#
+# Test GDB memory-tag commands that exercise the stubs for the 
qIsAddressTagged,
+# qMemTag, and QMemTag packets. Logical tag-only commands rely on local
+# operations, hence don't exercise any stub.
+#
+# The test consists in breaking just after a atag() call (which sets the
+# allocation tag -- see mte-8.c for details) and setting/getting tags in
+# different memory locations and ranges starting at the address of the array
+# 'a'.
+#
+# This is launched via tests/guest-debug/run-test.py
+#
+
+
+import gdb
+import re
+from test_gdbstub import main, report
+
+
+PATTERN_0 = "Memory tags for address 0x[0-9a-f]+ match \(0x[0-9a-f]+\)."
+PATTERN_1 = ".*(0x[0-9a-f]+)"
+
+
+def run_test():
+gdb.execute("break 94", False, True)
+gdb.execute("continue", False, True)
+try:
+# Test if we can check correctly that the allocation tag for
+# array 'a' matches the logical tag after atag() is called.
+co = gdb.execute("memory-tag check a", False, True)
+tags_match = re.findall(PATTERN_0, co, re.MULTILINE)
+if tags_match:
+report(True, "Logical and allocation tags match.")
+else:
+report(False, "Logical and allocation tags don't match!")
+
+# Test allocation tag 'set and print' commands. Commands on logical
+# tags rely on local operation and so don't exercise any stub.
+
+# Set the allocation tag for the first granule (16 bytes) of
+# address starting at 'a' address to a known value, i.e. 0x04.
+gdb.execute("memory-tag set-allocation-tag a 1 04", False, True)
+
+# Then set the allocation tag for the second granule to a known
+# value, i.e. 0x06. This tests that contiguous tag granules are
+# set correct and don't run over each other.
+gdb.execute("memory-tag set-allocation-tag a+16 1 06", False, True)
+
+# Read the known values back and check if they remain the same.
+
+co = gdb.execute("memory-tag print-allocation-tag a", False, True)
+first_tag = re.match(PATTERN_1, co)[1]
+
+co = gdb.execute("memory-tag print-allocation-tag a+16", False, True)
+second_tag = re.match(PATTERN_1, co)[1]
+
+if first_tag == "0x4" and second_tag == "0x6":
+report(True, "Allocation tags are correctly set/printed.")
+else:
+report(False, "Can't set/print allocation tags!")
+
+# Now test fill pattern by setting a whole page with a pattern.
+gdb.execute("memory-tag set-allocation-tag a 4096 0a0b", False, True)
+
+# And read back the tags of the last two granules in page so
+# we also test if the pattern is set correctly up to the end of
+# the page.
+co = gdb.execute("memory-tag print-allocation-tag a+4096-32", False, 
True)
+tag = re.match(PATTERN_1, co)[1]
+
+co = gdb.execute("memory-tag print-allocation-tag a+4096-16", False, 
True)
+last_tag = re.match(PATTERN_1, co)[1]
+
+if tag == "0xa" and last_tag == "0xb":
+report(True, "Fill pattern is ok.")
+else:
+report(False, "Fill pattern failed!")
+
+except gdb.error:
+# This usually happens because a GDB version that does not
+# suppor

[PATCH 3/4] tests: Gently exit from GDB when tests complete

2024-05-15 Thread Gustavo Romero

GDB commit a207f6b3a38 ('Rewrite "python" command exception handling')
changed how exit() called from Python scripts loaded by GDB behave,
turning it into an exception instead of a generic error code that is
returned. This change caused several QEMU tests to crash with the
following exception:

Python Exception : 0
Error occurred in Python: 0

This happens because in tests/guest-debug/test_gdbstub.py exit is
called after the tests have completed.

This commit fixes it by politely asking GDB to exit via gdb.execute,
passing the proper fail_count to be reported to 'make', instead of
abruptly calling exit() from the Python script.

Signed-off-by: Gustavo Romero 
---
 tests/guest-debug/test_gdbstub.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/guest-debug/test_gdbstub.py 
b/tests/guest-debug/test_gdbstub.py
index 7f71d34da1..46fbf98f0c 100644
--- a/tests/guest-debug/test_gdbstub.py
+++ b/tests/guest-debug/test_gdbstub.py
@@ -57,4 +57,4 @@ def main(test, expected_arch=None):
 pass
 
 print("All tests complete: {} failures".format(fail_count))
-exit(fail_count)
+gdb.execute(f"exit {fail_count}")
-- 
2.34.1

[PATCH 1/4] gdbstub: Add support for target-specific stubs

2024-05-15 Thread Gustavo Romero

Currently, it's not possible to have stubs specific to a given target,
even though there are GDB features which are target-specific, like, for
instance, memory tagging.

This commit introduces set_query_supported_arch,
set_gdb_gen_query_table_arch, and set_gdb_gen_set_table_arch functions
as interfaces to extend the qSupported string, the query handler table,
and set handler table per target, so allowing target-specific stub
implementation.

Besides that, it moves GdbCmdParseEntry struct, its related types, and
gdb_put_packet to include/exec/gdbstub.h so they are also available in
the target-specific stubs.

Signed-off-by: Gustavo Romero 
---
 gdbstub/gdbstub.c  | 108 +++--
 gdbstub/internals.h|  22 -
 gdbstub/syscalls.c |   1 +
 include/exec/gdbstub.h |  86 +++-
 4 files changed, 147 insertions(+), 70 deletions(-)

diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index b3574997ea..4996530fde 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -920,43 +920,6 @@ static int cmd_parse_params(const char *data, const char 
*schema,
 return 0;
 }
 
-typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx);
-
-/*
- * cmd_startswith -> cmd is compared using startswith
- *
- * allow_stop_reply -> true iff the gdbstub can respond to this command with a
- *   "stop reply" packet. The list of commands that accept such response is
- *   defined at the GDB Remote Serial Protocol documentation. see:
- *   
https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html#Stop-Reply-Packets.
- *
- * schema definitions:
- * Each schema parameter entry consists of 2 chars,
- * the first char represents the parameter type handling
- * the second char represents the delimiter for the next parameter
- *
- * Currently supported schema types:
- * 'l' -> unsigned long (stored in .val_ul)
- * 'L' -> unsigned long long (stored in .val_ull)
- * 's' -> string (stored in .data)
- * 'o' -> single char (stored in .opcode)
- * 't' -> thread id (stored in .thread_id)
- * '?' -> skip according to delimiter
- *
- * Currently supported delimiters:
- * '?' -> Stop at any delimiter (",;:=\0")
- * '0' -> Stop at "\0"
- * '.' -> Skip 1 char unless reached "\0"
- * Any other value is treated as the delimiter value itself
- */
-typedef struct GdbCmdParseEntry {
-GdbCmdHandler handler;
-const char *cmd;
-bool cmd_startswith;
-const char *schema;
-bool allow_stop_reply;
-} GdbCmdParseEntry;
-
 static inline int startswith(const char *string, const char *pattern)
 {
   return !strncmp(string, pattern, strlen(pattern));
@@ -1645,6 +1608,13 @@ static void handle_query_thread_extra(GArray *params, 
void *user_ctx)
 gdb_put_strbuf();
 }
 
+/* Arch-specific qSupported */
+char *query_supported_arch = NULL;
+void set_query_supported_arch(char *query_supported)
+{
+query_supported_arch = query_supported;
+}
+
 static void handle_query_supported(GArray *params, void *user_ctx)
 {
 CPUClass *cc;
@@ -1684,6 +1654,11 @@ static void handle_query_supported(GArray *params, void 
*user_ctx)
 }
 
 g_string_append(gdbserver_state.str_buf, ";vContSupported+;multiprocess+");
+
+if (query_supported_arch) {
+g_string_append(gdbserver_state.str_buf, query_supported_arch);
+}
+
 gdb_put_strbuf();
 }
 
@@ -1765,6 +1740,16 @@ static const GdbCmdParseEntry 
gdb_gen_query_set_common_table[] = {
 },
 };
 
+
+/* Arch-specific query table */
+static GdbCmdParseEntry *gdb_gen_query_table_arch = NULL ;
+static int gdb_gen_query_table_arch_size = 0;
+void set_gdb_gen_query_table_arch(GdbCmdParseEntry  *table, int size)
+{
+gdb_gen_query_table_arch = table;
+gdb_gen_query_table_arch_size = size;
+}
+
 static const GdbCmdParseEntry gdb_gen_query_table[] = {
 {
 .handler = handle_query_curr_tid,
@@ -1857,6 +1842,15 @@ static const GdbCmdParseEntry gdb_gen_query_table[] = {
 #endif
 };
 
+/* Arch-specific set table */
+static GdbCmdParseEntry *gdb_gen_set_table_arch = NULL;
+static int gdb_gen_set_table_arch_size = 0;
+void set_gdb_gen_set_table_arch(GdbCmdParseEntry *table, int size)
+{
+gdb_gen_set_table_arch = table;
+gdb_gen_set_table_arch_size = size;
+}
+
 static const GdbCmdParseEntry gdb_gen_set_table[] = {
 /* Order is important if has same prefix */
 {
@@ -1889,17 +1883,27 @@ static void handle_gen_query(GArray *params, void 
*user_ctx)
 return;
 }
 
-if (!process_string_cmd(get_param(params, 0)->data,
-gdb_gen_query_set_common_table,
-ARRAY_SIZE(gdb_gen_query_set_common_table))) {
+if (process_string_cmd(get_param(params, 0)->data,
+   gdb_gen_query_set_common_table,
+   ARRAY_SIZE(gdb_gen_query_set_common_table)) == 0) {
 return;
 }
 
 if (process_string_cmd(get_param(params, 0)->data,
gdb_gen_query_table,
-

[PATCH 0/4] Add MTE stubs for aarch64 user mode

2024-05-15 Thread Gustavo Romero

This patchset adds the stubs necessary to support GDB memory tagging
commands on QEMU aarch64 user mode.

These new stubs handle the qIsAddressTagged, qMemTag, and QMemTag
packets, which allow GDB memory tagging subcommands 'check',
'print-allocation-tag', and 'set-allocation-tag' to work. The remaining
memory tagging commands ('print-logical-tag' and 'with-logical-tag')
will also work, but they don't rely on any stub because they perform
local operations.

Since the memory tagging stubs are not common to all architectures, this
patchset also introduces three functions: set_query_supported_arch,
set_gdb_gen_query_table_arch, and set_gdb_gen_set_table_arch. These
functions can be used to extend the target-specific 'qSupported' feature
string and the handlers for the 'q' (query) and 'Q' (set) packets. These
new functions are used to add the MTE stubs for the aarch64 gdbstub.
 
Note that this patchset requires a GDB that supports the
qIsAddressTagged packet (recently added to GDB), so the gdbstub MTE
tests introduced by it must be run using GDB's master branch, since the
GDB in the distros hasn't picked up the change yet.

Once GDB is built and installed locally, the tests can be exercised, for
example, this way:

make GDB=~/.local/bin/gdb run-tcg-tests-aarch64-linux-user -j 32


Cheers,
Gustavo

Gustavo Romero (4):
  gdbstub: Add support for target-specific stubs
  gdbstub: Add support for MTE in user mode
  tests: Gently exit from GDB when tests complete
  tests/tcg/aarch64: Add MTE gdbstub tests

 configs/targets/aarch64-linux-user.mak |   2 +-
 gdbstub/gdbstub.c  | 108 +
 gdbstub/internals.h|  22 --
 gdbstub/syscalls.c |   1 +
 include/exec/gdbstub.h |  86 ++-
 target/arm/cpu.c   |   1 +
 target/arm/gdbstub.c   | 321 +
 target/arm/internals.h |   2 +
 tests/guest-debug/test_gdbstub.py  |   2 +-
 tests/tcg/aarch64/Makefile.target  |  11 +-
 tests/tcg/aarch64/gdbstub/test-mte.py  |  86 +++
 tests/tcg/aarch64/mte-8.c  | 102 
 12 files changed, 670 insertions(+), 74 deletions(-)
 create mode 100644 tests/tcg/aarch64/gdbstub/test-mte.py
 create mode 100644 tests/tcg/aarch64/mte-8.c

-- 
2.34.1

Re: [PATCH 03/20] docs/qapidoc: delint a tiny portion of the module

2024-05-15 Thread Markus Armbruster

John Snow  writes:

> On Wed, May 15, 2024 at 5:17 AM Markus Armbruster  wrote:
>
>> John Snow  writes:
>>
>> > In the coming patches, it's helpful to have a linting baseline. However,
>> > there's no need to shuffle around the deck chairs too much, because most
>> > of this code will be removed once the new qapidoc generator (the
>> > "transmogrifier") is in place.
>> >
>> > To ease my pain: just turn off the black auto-formatter for most, but
>> > not all, of qapidoc.py. This will help ensure that *new* code follows a
>> > coding standard without bothering too much with cleaning up the existing
>> > code.
>> >
>> > For manual checking for now, try "black --check qapidoc.py" from the
>> > docs/sphinx directory. "pip install black" (without root permissions) if
>> > you do not have it installed otherwise.
>> >
>> > Signed-off-by: John Snow 
>> > ---
>> >  docs/sphinx/qapidoc.py | 16 +---
>> >  1 file changed, 9 insertions(+), 7 deletions(-)
>> >
>> > diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
>> > index f270b494f01..1655682d4c7 100644
>> > --- a/docs/sphinx/qapidoc.py
>> > +++ b/docs/sphinx/qapidoc.py
>> > @@ -28,28 +28,30 @@
>> >  import re
>> >
>> >  from docutils import nodes
>> > +from docutils.parsers.rst import Directive, directives
>> >  from docutils.statemachine import ViewList
>> > -from docutils.parsers.rst import directives, Directive
>> > -from sphinx.errors import ExtensionError
>> > -from sphinx.util.nodes import nested_parse_with_titles
>> > -import sphinx
>> > -from qapi.gen import QAPISchemaVisitor
>> >  from qapi.error import QAPIError, QAPISemError
>> > +from qapi.gen import QAPISchemaVisitor
>> >  from qapi.schema import QAPISchema
>> >
>> > +import sphinx
>> > +from sphinx.errors import ExtensionError
>> > +from sphinx.util.nodes import nested_parse_with_titles
>> > +
>>
>> Exchanges old pylint gripe
>>
>> docs/sphinx/qapidoc.py:45:4: C0412: Imports from package sphinx are
>> not grouped (ungrouped-imports)
>>
>> for new gripes
>>
>> docs/sphinx/qapidoc.py:37:0: C0411: third party import "import sphinx"
>> should be placed before "from qapi.error import QAPIError, QAPISemError"
>> (wrong-import-order)
>> docs/sphinx/qapidoc.py:38:0: C0411: third party import "from
>> sphinx.errors import ExtensionError" should be placed before "from
>> qapi.error import QAPIError, QAPISemError" (wrong-import-order)
>> docs/sphinx/qapidoc.py:39:0: C0411: third party import "from
>> sphinx.util.nodes import nested_parse_with_titles" should be placed before
>> "from qapi.error import QAPIError, QAPISemError" (wrong-import-order)
>>
>> Easy enough to fix.
>>
>
> I believe these errors are caused by the fact that the tools are confused
> about the "sphinx" namespace - some interpret them as being the local
> "module", the docs/sphinx/ directory, and others believe them to be the
> third party external package.
>
> I have not been using pylint on docs/sphinx/ files because of the
> difficulty of managing imports - this environment is generally beyond the
> reaches of my python borgcube and at present I don't have plans to
> integrate it.
>
> At the moment, I am using black, isort and flake8 for qapidoc.py and
> they're happy with it. I am not using mypy because I never did the typing
> boogaloo with qapidoc.py and I won't be bothering - except for any new code
> I write, which *will* bother. By the end of the new transmogrifier,
> qapidoc.py *will* strictly typecheck.
>
> pylint may prove to be an issue with the imports, though. isort also seems
> to misunderstand "sphinx, the stuff in this folder" and "sphinx, the stuff
> in a third party package" and so I'm afraid I don't have any good ability
> to get pylint to play along, here.
>
> Pleading for "Sorry, this sucks and I can't figure out how to solve it
> quickly". Maybe a future project, apologies.

Is this pain we inflict on ourselves by naming the directory "sphinx"?

>> >
>> >  # Sphinx up to 1.6 uses AutodocReporter; 1.7 and later
>> >  # use switch_source_input. Check borrowed from kerneldoc.py.
>> > -Use_SSI = sphinx.__version__[:3] >= '1.7'
>> > +Use_SSI = sphinx.__version__[:3] >= "1.7"
>> >  if Use_SSI:
>> >  from sphinx.util.docutils import switch_source_input
>> >  else:
>> >  from sphinx.ext.autodoc import AutodocReporter
>> >
>> >
>> > -__version__ = '1.0'
>> > +__version__ = "1.0"
>> >
>> >
>> > +# fmt: off
>>
>> I figure this tells black to keep quiet for the remainder of the file.
>> Worth a comment, I think.
>>
>> >  # Function borrowed from pydash, which is under the MIT license
>> >  def intersperse(iterable, separator):
>> >  """Yield the members of *iterable* interspersed with *separator*."""
>>
>> With my comments addressed
>> Reviewed-by: Markus Armbruster 
>>
>
> ^ Dropping this unless you're okay with the weird import orders owing to
> the strange import paradigm in the sphinx folder.r

Feel free to keep it.

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Daniel P . Berrangé

On Wed, May 15, 2024 at 11:03:27AM -0600, Peter Xu wrote:
> On Wed, May 15, 2024 at 05:03:44PM +0100, Daniel P. Berrangé wrote:
> > Above all, I'm failing to see why there's a compelling reason
> > for virtio_gpu to diverge from our long standing practice of
> > adding a named property flag "virtio_scanout_vmstate_fix"
> > on the machine class, and then setting it in machine types
> > which need it.
> 
> The reason to introduce that is definitely avoid introducing fields /
> properties in similar cases in which case all the fields may represent the
> same thing ("return true if MC is older than xxx version").  Especially
> when such change is not bound to a new feature so in which case it won't
> make sense to allow user to even control that propoerty, even if we
> exported this "x-virtio-scanout-fix" property, but now we must export it
> because compat fields require it.
> 
> However I think agree that having upstream specific MC versions in VMSD
> checks is kind of unwanted.  I think the major problem is we don't have
> that extra machine type abstract where we can have simply a number showing
> the release of QEMU, then we can map that number to whatever
> upstream/downstream machine types.  E.g.:
> 
>   Release No. Upstream version   Downstream version
>   50  9.0Y.0
>   51  9.1
>   52  9.2Y.1
>   ...

Downstream versions do not map cleanly to individual upstream versions
across the whole code base. If we have two distinct features in upstream
version X, each of them may map to a different downstream release. 

This can happen when downstream skips one or more upstream releases.
One feature from the skipped release might be backported to an earlier
downstream release, while other feature might not arrive downstream
until they later rebase. Version based checks are an inherantly
undesirable idea for a situation where there is any backporting taking
place, whether its machine type versions or something else. Named feature
/ flag based checks are always the way to go.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v11 08/10] virtio-gpu: Handle resource blob commands

2024-05-15 Thread Dmitry Osipenko

On 5/15/24 20:04, Akihiko Odaki wrote:
>>
> 
> VIRTIO_GPU_CMD_RESOURCE_UNREF should also call
> virtio_gpu_virgl_async_unmap_resource_blob(). I guess that's the
> original intention of having a function for this instead of inlining the
> content of this function to virgl_cmd_resource_unmap_blob().

Correct, previous patchset versions unmapped resource on unref.

In v11 I dropped unmapping from unref to avoid adding additional
`async_unmap_in_progress` flag because normally map/unmap will be
balanced by guest anyways.

The virtio-gpu spec doesn't tell that resource have to be implicitly
unmapped on unref. In a case of Linux guest, it actually will be a bug
to unref a mapped resource because guest will continue to map and use
the destroyed resource.

-- 
Best regards,
Dmitry

Re: [PATCH v6 3/9] migration: Extend migration_file_set_error() with Error* argument

2024-05-15 Thread Cédric Le Goater


On 5/15/24 09:04, Eric Auger wrote:

Hi Cédric,

On 5/14/24 17:31, Cédric Le Goater wrote:

Use it to update the current error of the migration stream if
available and if not, simply print out the error. Next changes will
update with an error to report.

Reviewed-by: Avihai Horon 
Acked-by: Fabiano Rosas 
Signed-off-by: Cédric Le Goater 
---

  Changes in v6:

  - Commit log improvements (Avihai)
  
  include/migration/misc.h | 2 +-

  hw/vfio/common.c | 2 +-
  hw/vfio/migration.c  | 4 ++--
  migration/migration.c| 6 --
  4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/migration/misc.h b/include/migration/misc.h
index 
bf7339cc1e6430226127fb6a878d06b458170858..bfadc5613bac614a316e5aed7da95d8c7845cf42
 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -97,7 +97,7 @@ void migration_add_notifier_mode(NotifierWithReturn *notify,
  
  void migration_remove_notifier(NotifierWithReturn *notify);

  bool migration_is_running(void);
-void migration_file_set_error(int err);
+void migration_file_set_error(int ret, Error *err);
  
  /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */

  bool migration_in_incoming_postcopy(void);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
b5102f54a6474a50c6366e8fbce23812d55e384e..ed5ee6349ced78b3bde68d2ee506f78ba1a9dd9c
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -150,7 +150,7 @@ bool vfio_viommu_preset(VFIODevice *vbasedev)
  static void vfio_set_migration_error(int err)

nit: I would have renamed err into ret here to avoid any further confusion.


That was done in v5 :

  https://lore.kernel.org/qemu-devel/20240506092053.388578-11-...@redhat.com/

in the last patch, that I dropped in v6 because I believe it needs more
work. I will address these last changes, including the err->ret rename,
in a followup series if that's ok with you.


Thanks,

C.





  {
  if (migration_is_setup_or_active()) {
-migration_file_set_error(err);
+migration_file_set_error(err, NULL);
  }
  }
  
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c

index 
06ae40969b6c19037e190008e14f28be646278cd..bf2fd0759ba6e4fb103cc5c1a43edb180a3d0de4
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -726,7 +726,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool 
running,
   * Migration should be aborted in this case, but vm_state_notify()
   * currently does not support reporting failures.
   */
-migration_file_set_error(ret);
+migration_file_set_error(ret, NULL);
  }
  
  trace_vfio_vmstate_change_prepare(vbasedev->name, running,

@@ -756,7 +756,7 @@ static void vfio_vmstate_change(void *opaque, bool running, 
RunState state)
   * Migration should be aborted in this case, but vm_state_notify()
   * currently does not support reporting failures.
   */
-migration_file_set_error(ret);
+migration_file_set_error(ret, NULL);
  }
  
  trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),

diff --git a/migration/migration.c b/migration/migration.c
index 
e88b24f1e6cbe82dad3f890c00e264d2ab6ad355..70d66a441bf04761decf91dbe57ce52c57fde58f
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2994,13 +2994,15 @@ static MigThrError postcopy_pause(MigrationState *s)
  }
  }
  
-void migration_file_set_error(int err)

+void migration_file_set_error(int ret, Error *err)
  {
  MigrationState *s = current_migration;
  
  WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {

  if (s->to_dst_file) {
-qemu_file_set_error(s->to_dst_file, err);
+qemu_file_set_error_obj(s->to_dst_file, ret, err);
+} else if (err) {
+error_report_err(err);
  }
  }
  }

Reviewed-by: Eric Auger 

Eric

Re: [PATCH v6 2/9] vfio: Add Error** argument to vfio_devices_dma_logging_start()

2024-05-15 Thread Cédric Le Goater


On 5/15/24 08:53, Eric Auger wrote:

Hi Cédric,
On 5/14/24 17:31, Cédric Le Goater wrote:

This allows to update the Error argument of the VFIO log_global_start()
handler. Errors for container based logging will also be propagated to
qemu_savevm_state_setup() when the ram save_setup() handler is executed.

nit: also now collect & print errors from
vfio_container_set_dirty_page_tracking()


OK. To avoid resending, I amended the commit log with :

"Also, errors from vfio_container_set_dirty_page_tracking() are now
collected and reported."


Thanks,

C.





The vfio_set_migration_error() call becomes redundant in
vfio_listener_log_global_start(). Remove it.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Avihai Horon 
Signed-off-by: Cédric Le Goater 
---

  Changes in v6:

  - Commit log improvements (Avihai)
  
  Changes in v5:


  - Used error_setg_errno() in vfio_devices_dma_logging_start()
  
  hw/vfio/common.c | 26 +++---

  1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
485e53916491f1164d29e739fb7106c0c77df737..b5102f54a6474a50c6366e8fbce23812d55e384e
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1027,7 +1027,8 @@ static void vfio_device_feature_dma_logging_start_destroy(
  g_free(feature);
  }
  
-static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)

+static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+  Error **errp)
  {
  struct vfio_device_feature *feature;
  VFIODirtyRanges ranges;
@@ -1038,6 +1039,7 @@ static int 
vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
  feature = vfio_device_feature_dma_logging_start_create(bcontainer,
 &ranges);
  if (!feature) {
+error_setg_errno(errp, errno, "Failed to prepare DMA logging");
  return -errno;
  }
  
@@ -1049,8 +1051,8 @@ static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)

  ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
  if (ret) {
  ret = -errno;
-error_report("%s: Failed to start DMA logging, err %d (%s)",
- vbasedev->name, ret, strerror(errno));
+error_setg_errno(errp, errno, "%s: Failed to start DMA logging",
+ vbasedev->name);
  goto out;
  }
  vbasedev->dirty_tracking = true;
@@ -1069,20 +1071,19 @@ out:
  static bool vfio_listener_log_global_start(MemoryListener *listener,
 Error **errp)
  {
+ERRP_GUARD();
  VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
   listener);
  int ret;
  
  if (vfio_devices_all_device_dirty_tracking(bcontainer)) {

-ret = vfio_devices_dma_logging_start(bcontainer);
+ret = vfio_devices_dma_logging_start(bcontainer, errp);
  } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true, NULL);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp);
  }
  
  if (ret) {

-error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
- ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_prepend(errp, "vfio: Could not start dirty page tracking - ");
  }
  return !ret;
  }
@@ -1091,17 +1092,20 @@ static void 
vfio_listener_log_global_stop(MemoryListener *listener)
  {
  VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
   listener);
+Error *local_err = NULL;
  int ret = 0;
  
  if (vfio_devices_all_device_dirty_tracking(bcontainer)) {

  vfio_devices_dma_logging_stop(bcontainer);
  } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, false, NULL);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, false,
+ &local_err);
  }
  
  if (ret) {

-error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
- ret, strerror(-ret));
+error_prepend(&local_err,
+  "vfio: Could not stop dirty page tracking - ");
+error_report_err(local_err);
  vfio_set_migration_error(ret);
  }
  }


Reviewed-by: Eric Auger 

Eric

Re: [PATCH 1/2] hw/core: allow parameter=1 for SMP topology on any machine

2024-05-15 Thread Daniel P . Berrangé

On Tue, May 14, 2024 at 11:49:40AM +0800, Zhao Liu wrote:
> > I'm failing to see what real world technical problems QEMU faces
> > with a parameter being set to '1' by a mgmt app, when QEMU itself
> > treats all omitted values as being '1' anyway.
> > 
> > If we're trying to faithfully model the real world, then restricting
> > the topology against machine types though still looks inherantly wrong.
> > The valid topology ought to be constrained based on the named CPU model.
> > eg it doesn't make sense to allow 'dies=4' with a Skylake CPU model,
> > only an EPYC CPU model, especially if we want to model cache info in
> > a way that matches the real world silicon better.
> 
> Thanks for figuring out this. This issue is related with Intel CPU
> cache model: currently Intel code defaults L3 shared at die level.
> This could be resolved by defining the accurate default cache topology
> level for CPU model and make Intel CPU models share L3 at package level
> except only Cascadelake.
> 
> Then user could define any other topology levels (die/module) for
> Icelake and this won't change the cache topology, unless the user adds
> more sockets or further customizes the cache topology in another way [1].
> Do you agree with this solution?

Broadly speaking yes. Historically we have created trouble for
ourselves (and or our users) by allowing creation of "wierd"
guest CPU models, which don't resemble those which can be found
in real world silicon. Problems specifically have been around
unsual combinations of CPUID features eg user enabled X, but not Y,
where real silicon always has X + Y enabled, and guest OS assumed
this is always the case.

So if our named CPU models can more faithfully match what you might
see in terms of cache topology in the real world, that's likely to
be a good thing.

> > As above, I think that restrictions based on machine type, while nice and
> > simple, are incorrect long term. If we did impose restrictions based on
> > CPU model, then we could trivially expose this info to mgmt apps via the
> > existing mechanism for querying supported CPU models. Limiting based on
> > CPU model, however, has potentially greater back compat issues, though
> > it would be strictly more faithful to hardware.
> 
> I think as long as the default cache topology model is clearly defined,
> users can further customize the CPU topology and adjust the cache
> topology based on it. After all, topology is architectural, not CPU
> model-specific (linux support for topology does not take into account
> specific CPU models).
> 
> For example, x86, for simplicity, can we assume that all x86 CPU models
> support all x86 topology levels (thread/core/module/die/package) without
> making distinctions based on specific CPU models?

Hmm, true, if we have direct control over cache topology, the
CPU topology is less critical. I'd still be wary of suggesting
it is a good idea to use CPU topology configs that don't reflect
something the CPU vendor has concievably used in real silicon.

> That way as long as the user doesn't change the default topology, then
> Guest's cache and other topology information won't be "corrupted".

> And there's one more question, does this rollback mean that smp's
> parameters must have compatible default values for all architectures?

Historically we preferred "sockets", when filling missing topology,
then more recently we switched to prefer "cores", since high core
counts are generally more common in real world than high socket
counts.

In theory at some point, one might want to fill in 'dies > 0' for
EPYC, or 'modules > 0' for appropriate Intel CPU models, but doing
the reverse while theoretically valid, would be wierd as no such
topology would exist in real silicon.

Ultimately if you're allowing QEMU guest vCPUs threads to float
freely across host CPUs, there is little point in setting dies/
modules/threads to a value other than 1, because the guest OS
won't benefit from understanding cache differences for dies/
modules/threads/etc, if the vCPU can be moved between host CPUs
at any time by the host OS scheduler.

Fine grained control over dies/modules/threads only makes more
sense if you have strictly pinning vCPU threads 1:1 to host CPUs

IOW, simply preferring "cores" for everything is a reasonable
default long term plan for everything, unless the specific
architecture target has no concept of "cores".

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 04/20] qapi/parser: preserve indentation in QAPIDoc sections

2024-05-15 Thread John Snow

On Wed, May 15, 2024 at 10:18 AM Markus Armbruster 
wrote:

> John Snow  writes:
>
> > On Wed, May 15, 2024, 7:50 AM Markus Armbruster 
> wrote:
> >
> >> John Snow  writes:
> >>
> >> > Prior to this patch, a section like this:
> >> >
> >> > @name: lorem ipsum
> >> >dolor sit amet
> >> >  consectetur adipiscing elit
> >> >
> >> > would be parsed as:
> >> >
> >> > "lorem ipsum\ndolor sit amet\n  consectetur adipiscing elit"
> >> >
> >> > We want to preserve the indentation for even the first body line so
> that
> >> > the entire block can be parsed directly as rST. This patch would now
> >> > parse that segment as:
> >> >
> >> > "lorem ipsum\n   dolor sit amet\n consectetur adipiscing elit"
> >>
> >> I'm afraid it's less than clear *why* we want to parse the entire block
> >> directly as rST.  I have just enough insight into what you've built on
> >> top of this series to hazard a guess.  Bear with me while I try to
> >> explain it.
> >>
> >
> > My own summary: qapidoc expects a paragraph, the new generator expects a
> > block.
> >
> >
> >> We first parse the doc comment with parser.py into an internal
> >> representation.  The structural parts become objects, and the remainder
> >> becomes text attributes of these objects.  Currently, parser.py
> >> carefully adjusts indentation for these text attributes.  Why?  I'll get
> >> to that.
> >>
> >> For your example, parser.py creates an ArgSection object, and sets its
> >> @text member to
> >>
> >> "lorem ipsum\ndolor sit amet\n  consectetur adipiscing elit"
> >>
> >> Printing this string gives us
> >>
> >> lorem ipsum
> >> dolor sit amet
> >>   consectetur adipiscing elit
> >>
> >> qapidoc.py then transforms parser.py's IR into Sphinx IR.  The objects
> >> become (more complicated) Sphinx objects, and their text attributes get
> >> re-parsed as rST text into still more Sphinx objects.
> >>
> >> This re-parse rejects your example with "Unexpected indentation."
> >>
> >
> > Specifically, it'd be an unexpected *unindent*; the indent lacking on the
> > first *two* lines is the problem.
> >
> >
> >> Let me use a slightly different one:
> >>
> >> # @name: lorem ipsum
> >> #dolor sit amet
> >> #consectetur adipiscing elit
> >>
> >> Results in this @text member
> >>
> >> lorem ipsum
> >> dolor sit amet
> >> consectetur adipiscing elit
> >>
> >> which is re-parsed as paragraph, i.e. exactly what we want.
> >>
> >
> > It's what we used to want, anyway.
>
> Yes, I'm describing the current state here.
>
> >> > This understandably breaks qapidoc.py;
> >>
> >> Without indentation adjustment, we'd get
> >>
> >> lorem ipsum
> >>dolor sit amet
> >>consectetur adipiscing elit
> >>
> >> which would be re-parsed as a definition list, I guess.  This isn't what
> >> we want.
> >>
> >> >so a new function is added
> there
> >> > to re-dedent the text.
> >>
> >> Your patch moves the indentation adjustment to another place.  No
> >> functional change.
> >>
> >> You move it so you can branch off your new rendering pipeline before the
> >> indentation adjustment, because ...
> >>
> >> >Once the new generator is merged, this function
> >> > will not be needed any longer and can be dropped.
> >>
> >> ... yours doesn't want it.
> >>
> >> I believe it doesn't want it, because it generates rST (with a QAPI
> >> extension) instead of Sphinx objects.  For your example, something like
> >>
> >> :arg name: lorem ipsum
> >>dolor sit amet
> >>  consectetur adipiscing elit
> >>
> >> For mine:
> >>
> >> :arg name: lorem ipsum
> >>dolor sit amet
> >>consectetur adipiscing elit
> >>
> >> Fair?
> >>
> >
> > Not quite;
> >
> > Old parsing, new generator:
> >
> > :arg type name: lorem ipsum
> > dolor sit amet
> >   consectetur apidiscing elit
> >
> > This is wrong - continuations of a field list must be indented. Unlike
> > paragraphs, we want indents to "keep the block".
> >
> > New parsing, new generator:
> >
> > :arg type name: lorem ipsum
> >dolor sit amet
> >  consectetur apidiscing elit
> >
> > indent is preserved, maintaining the block-level element.
> >
> > I don't have to re-add indents and any nested block elements will be
> > preserved correctly. i.e. you can use code examples, nested lists, etc.
> in
> > argument definitions.
> >
> > The goal here was "Do not treat this as a paragraph, treat it directly as
> > rST and do not modify it needlessly."
> >
> > It's a lot simpler than trying to manage the indent and injecting spaces
> > manually - and adding a temporary dedent to scheduled-for-demolition code
> > seemed the nicer place to add the hack.
>
> Understand.
>
> A bit more rationale in the commit message would be nice.  Perhaps start
> with current state ("we deintent"), then describe the patch ("move the
> deindent"), then rationale "to get it out of the way of a new thingy I
> wrote, and intend to p

Re: [PATCH v11 08/10] virtio-gpu: Handle resource blob commands

2024-05-15 Thread Akihiko Odaki


On 2024/05/16 2:01, Dmitry Osipenko wrote:

On 5/15/24 19:42, Akihiko Odaki wrote:

It may be better to actually implement unmapping instead of returning an
error for consistency with the iov operation. Apparently crosvm also
unmaps blobs with VIRTIO_GPU_CMD_RESOURCE_UNREF.


Then I'll add back `async_unmap_in_progress` because resource can be
both mapped/unmapped on unref, and we'll need flag to know whether async
unmapping has been finished to do the final unmapping of the resource.


Such a situation should be already handled since unmapping in progress
blocks all commands (not just VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB but
literally all, including VIRTIO_GPU_CMD_RESOURCE_UNREF).


The async unmapping consists of 3 parts:

1. begin async unmapping with memory_region_del_subregion() and suspend
2. wait for res->mr to be freed and resume
3. finish the unmapping with final virgl_renderer_resource_unmap()

Parts 1 and 3 are handled by  virtio_gpu_virgl_async_unmap_resource_blob()


The VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB is different because we know that
blob is mapped in the first place. Hence we can safely perform the part
3, assuming that parts 1/2 has been completed.

In case of VIRTIO_GPU_CMD_RESOURCE_UNREF, blob can be unmapped in the
first place and we can't do the part 3 because it will error out for
unmapped resource since parts 1/2 were not performed.



VIRTIO_GPU_CMD_RESOURCE_UNREF should also call 
virtio_gpu_virgl_async_unmap_resource_blob(). I guess that's the 
original intention of having a function for this instead of inlining the 
content of this function to virgl_cmd_resource_unmap_blob().

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Peter Xu

On Wed, May 15, 2024 at 05:03:44PM +0100, Daniel P. Berrangé wrote:
> Above all, I'm failing to see why there's a compelling reason
> for virtio_gpu to diverge from our long standing practice of
> adding a named property flag "virtio_scanout_vmstate_fix"
> on the machine class, and then setting it in machine types
> which need it.

The reason to introduce that is definitely avoid introducing fields /
properties in similar cases in which case all the fields may represent the
same thing ("return true if MC is older than xxx version").  Especially
when such change is not bound to a new feature so in which case it won't
make sense to allow user to even control that propoerty, even if we
exported this "x-virtio-scanout-fix" property, but now we must export it
because compat fields require it.

However I think agree that having upstream specific MC versions in VMSD
checks is kind of unwanted.  I think the major problem is we don't have
that extra machine type abstract where we can have simply a number showing
the release of QEMU, then we can map that number to whatever
upstream/downstream machine types.  E.g.:

  Release No. Upstream version   Downstream version
  50  9.0Y.0
  51  9.1
  52  9.2Y.1
  ...

Then downstream is not mapping to 9.0/... but the release no.  Then here
instead of hard code upstream MC versions we can already provide similar
helpers like:

  machine_type_newer_than_50()

Then device code can use it without polluting that with upstream MC
versioning.  Downstream will simply work if downstream MCs are mapped
alright to the release no. when rebase.

But I'm not sure whether it'll be even worthwhile.. the majority will still
be that the VMSD change is caused by a new feature, and exporting that
property might in most cases be wanted.

In all cases, for now I agree it's at least easier to stick with the simple
way.

Thanks,

-- 
Peter Xu

Re: [PATCH 1/5] tcg: Introduce TCG_TARGET_HAS_tst_vec

2024-05-15 Thread Philippe Mathieu-Daudé


On 15/5/24 16:58, Richard Henderson wrote:

Prelude to supporting TCG_COND_TST* in vector comparisons.

Signed-off-by: Richard Henderson 
---
  include/tcg/tcg.h| 1 +
  tcg/aarch64/tcg-target.h | 1 +
  tcg/arm/tcg-target.h | 1 +
  tcg/i386/tcg-target.h| 1 +
  tcg/loongarch64/tcg-target.h | 1 +
  tcg/ppc/tcg-target.h | 1 +
  tcg/s390x/tcg-target.h   | 1 +
  7 files changed, 7 insertions(+)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v11 08/10] virtio-gpu: Handle resource blob commands

2024-05-15 Thread Dmitry Osipenko

On 5/15/24 19:42, Akihiko Odaki wrote:
>>> It may be better to actually implement unmapping instead of returning an
>>> error for consistency with the iov operation. Apparently crosvm also
>>> unmaps blobs with VIRTIO_GPU_CMD_RESOURCE_UNREF.
>>
>> Then I'll add back `async_unmap_in_progress` because resource can be
>> both mapped/unmapped on unref, and we'll need flag to know whether async
>> unmapping has been finished to do the final unmapping of the resource.
> 
> Such a situation should be already handled since unmapping in progress
> blocks all commands (not just VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB but
> literally all, including VIRTIO_GPU_CMD_RESOURCE_UNREF).

The async unmapping consists of 3 parts:

1. begin async unmapping with memory_region_del_subregion() and suspend
2. wait for res->mr to be freed and resume
3. finish the unmapping with final virgl_renderer_resource_unmap()

Parts 1 and 3 are handled by  virtio_gpu_virgl_async_unmap_resource_blob()

The VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB is different because we know that
blob is mapped in the first place. Hence we can safely perform the part
3, assuming that parts 1/2 has been completed.

In case of VIRTIO_GPU_CMD_RESOURCE_UNREF, blob can be unmapped in the
first place and we can't do the part 3 because it will error out for
unmapped resource since parts 1/2 were not performed.

-- 
Best regards,
Dmitry

Re: [PATCH v6 1/9] vfio: Add Error** argument to .set_dirty_page_tracking() handler

2024-05-15 Thread Cédric Le Goater


On 5/15/24 08:40, Eric Auger wrote:

Hi Cédric,

On 5/14/24 17:31, Cédric Le Goater wrote:

We will use the Error object to improve error reporting in the
.log_global*() handlers of VFIO. Add documentation while at it.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Avihai Horon 
Signed-off-by: Cédric Le Goater 
---
  Changes in v5:

  - Fixed typo in set_dirty_page_tracking documentation
  
  include/hw/vfio/vfio-container-base.h | 18 --

  hw/vfio/common.c  |  4 ++--
  hw/vfio/container-base.c  |  4 ++--
  hw/vfio/container.c   |  6 +++---
  4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
3582d5f97a37877b2adfc0d0b06996c82403f8b7..326ceea52a2030eec9dad289a9845866c4a8c090
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -82,7 +82,7 @@ int vfio_container_add_section_window(VFIOContainerBase 
*bcontainer,
  void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
 MemoryRegionSection *section);
  int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
-   bool start);
+   bool start, Error **errp);

I am a bit confused now wrt [PATCH v2 03/11] vfio: Make
VFIOIOMMUClass::attach_device() and its wrapper return bool & co

Shall we return a bool or a int?


It would be better to follow the best practices described in qapi/error.h.

Zhenzhong excluded some files from his cleanup series to avoid conflicts
with this series of mine. And indeed, I would prefer to merge this one
first. It should be addressed later.




Looking at ./include/qapi/error.h I have not found any stringent requirement

  * - Whenever practical, also return a value that indicates success /
  *   failure.  This can make the error checking more concise, and can
  *   avoid useless error object creation and destruction.  Note that
  *   we still have many functions returning void.  We recommend
  *   • bool-valued functions return true on success / false on failure,
  *   • pointer-valued functions return non-null / null pointer, and
  *   • integer-valued functions return non-negative / negative.




  int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap,
hwaddr iova, hwaddr size);
@@ -121,9 +121,23 @@ struct VFIOIOMMUClass {
  int (*attach_device)(const char *name, VFIODevice *vbasedev,
   AddressSpace *as, Error **errp);
  void (*detach_device)(VFIODevice *vbasedev);
+
  /* migration feature */
+
+/**
+ * @set_dirty_page_tracking
+ *
+ * Start or stop dirty pages tracking on VFIO container
+ *
+ * @bcontainer: #VFIOContainerBase on which to de/activate dirty
+ *  page tracking
+ * @start: indicates whether to start or stop dirty pages tracking
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error
+ */
  int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
-   bool start);
+   bool start, Error **errp);
  int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap,
hwaddr iova, hwaddr size);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
8f9cbdc0264044ce587877a7d19d14b28527291b..485e53916491f1164d29e739fb7106c0c77df737
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1076,7 +1076,7 @@ static bool vfio_listener_log_global_start(MemoryListener 
*listener,
  if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
  ret = vfio_devices_dma_logging_start(bcontainer);
  } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, NULL);
  }
  
  if (ret) {

@@ -1096,7 +1096,7 @@ static void vfio_listener_log_global_stop(MemoryListener 
*listener)
  if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
  vfio_devices_dma_logging_stop(bcontainer);
  } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, false);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, false, NULL);
  }
  
  if (ret) {

diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
913ae49077c4f09b7b27517c1231cfbe4befb7fb..7c0764121d24b02b6c4e66e368d7dff78a6d65aa
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -53,14 +53,14 @@ void vfio_container_del_section_window(VFIOContainerBase 
*bcontainer,
  }
  
  int vfio_container_set_dir

Re: [PATCH 01/17] ppc64: Fix include order

2024-05-15 Thread Philippe Mathieu-Daudé


On 15/5/24 15:53, Richard Henderson wrote:

On 5/15/24 15:11, Philippe Mathieu-Daudé wrote:

Hi Richard,

On 11/5/24 13:53, Richard Henderson wrote:

Signed-off-by: Richard Henderson 
---
  risu_ppc64.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/risu_ppc64.c b/risu_ppc64.c
index 9df8d58..62cf6aa 100644
--- a/risu_ppc64.c
+++ b/risu_ppc64.c
@@ -11,9 +11,8 @@
   * based on Peter Maydell's risu_arm.c
   
*/

-#include 
-
  #include "risu.h"
+#include 


What is fixed exactly?


I don't remember (patch dated in 2022).
It is probably a #define namespace issue with cfarm hosts running Centos 
7.9?

I suppose I should investigate, and drop it if irrelevant.


It was just out of curiosity (I had a quick look at the headers
and couldn't see anything obvious, and other headers also include
system headers before "risu.h").

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v11 01/10] virtio-gpu: Unrealize GL device

2024-05-15 Thread Dmitry Osipenko

On 5/15/24 19:22, Akihiko Odaki wrote:
> On 2024/05/16 1:18, Dmitry Osipenko wrote:
>> On 5/13/24 11:44, Akihiko Odaki wrote:
>>> On 2024/05/12 3:22, Dmitry Osipenko wrote:
 Even though GL GPU doesn't support hotplugging today, free virgl
 resources when GL device is unrealized. For consistency.

 Signed-off-by: Dmitry Osipenko 
 ---
    hw/display/virtio-gpu-gl.c | 11 +++
    hw/display/virtio-gpu-virgl.c  |  9 +
    include/hw/virtio/virtio-gpu.h |  1 +
    3 files changed, 21 insertions(+)

 diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
 index e06be60dfbfc..0c0a8d136954 100644
 --- a/hw/display/virtio-gpu-gl.c
 +++ b/hw/display/virtio-gpu-gl.c
 @@ -136,6 +136,16 @@ static Property virtio_gpu_gl_properties[] = {
    DEFINE_PROP_END_OF_LIST(),
    };
    +static void virtio_gpu_gl_device_unrealize(DeviceState *qdev)
 +{
 +    VirtIOGPU *g = VIRTIO_GPU(qdev);
 +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(qdev);
 +
 +    if (gl->renderer_inited) {
 +    virtio_gpu_virgl_deinit(g);
 +    }
 +}
 +
    static void virtio_gpu_gl_class_init(ObjectClass *klass, void *data)
    {
    DeviceClass *dc = DEVICE_CLASS(klass);
 @@ -149,6 +159,7 @@ static void virtio_gpu_gl_class_init(ObjectClass
 *klass, void *data)
    vgc->update_cursor_data = virtio_gpu_gl_update_cursor_data;
      vdc->realize = virtio_gpu_gl_device_realize;
 +    vdc->unrealize = virtio_gpu_gl_device_unrealize;
    vdc->reset = virtio_gpu_gl_reset;
    device_class_set_props(dc, virtio_gpu_gl_properties);
    }
 diff --git a/hw/display/virtio-gpu-virgl.c
 b/hw/display/virtio-gpu-virgl.c
 index 9f34d0e6619c..b0500eccf8e0 100644
 --- a/hw/display/virtio-gpu-virgl.c
 +++ b/hw/display/virtio-gpu-virgl.c
 @@ -665,3 +665,12 @@ int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
      return capset2_max_ver ? 2 : 1;
    }
 +
 +void virtio_gpu_virgl_deinit(VirtIOGPU *g)
 +{
 +    if (g->fence_poll) {
>>>
>>> Isn't g->fence_poll always non-NULL when this function is called?
>>
>> virtio_gpu_virgl_init() is invoked when first cmd is executed, please
>> see virtio_gpu_gl_handle_ctrl() that invokes it. Hence g->fence_poll can
>> be NULL.
>>
> 
> But it already checks renderer_inited, doesn't it? And I think it's
> better to utilize one single flag to represent that virgl is enabled
> instead of checking several variables (fence_poll and cmdq_resume_bh in
> the future).

The virtio_gpu_virgl_init() will have to be changed to do that because
virgl_renderer_init() can fail before fence_poll/cmdq_resume_bh are inited.

Though, the error returned by virtio_gpu_virgl_init() isn't checked by
virtio_gpu_gl_handle_ctrl(), which leads to a further Qemu crash due to
fence_poll=NULL. I'll try to improve it all in v12, thanks.

-- 
Best regards,
Dmitry

Re: [PATCH v9] arm/kvm: Enable support for KVM_ARM_VCPU_PMU_V3_FILTER

2024-05-15 Thread Daniel P . Berrangé

On Mon, May 13, 2024 at 02:52:14PM +0800, Zhao Liu wrote:
> Hi Daniel,
> 
> > Please describe it in terms of a QAPI definition, as that's what we're
> > striving for with all QEMU public interfaces. Once the QAPI design is
> > agreed, then the -object mapping is trivial, as -object's JSON format
> > supports arbitrary QAPI structures.
> 
> Thank you for your guidance!
> 
> I rethought and and modified my previous proposal:
> 
> Let me show the command examples firstly:
>   * Add a single event:
> (x86) -object kvm-pmu-event,id=e0,action=allow,format=x86-default,\
>   select=0x3c,umask=0x00
> (arm or general) -object kvm-pmu-event,id=e1,action=deny,\
>  format=raw,code=0x01
>  
>   * Add a counter bitmap:
> (x86) -object kvm-pmu-counter,id=cnt,action=allow,type=x86-fixed,\
>   bitmap=0x
>  
>   * Add an event list (must use Json syntax format):
>(x86) -object 
> '{"qom-type":"kvm-pmu-event-list","id"="filter0","action"="allow","format"="x86-default","events=[{"select"=0x3c,"umask"=0x00},{"select"=0x2e,"umask"=0x4f}]'
>(arm) -object 
> '{"qom-type":"kvm-pmu-event-list","id"="filter1","action"="allow","format"="raw","events"=[{"code"=0x01},{"code"=0x02}]'
> 
> 
> The specific JSON definitions are as follows (IIUC, this is "in terms of
> a QAPI definition", right? ;-)): 
> * Define PMU event and counter bitmap with JSON format:
>   - basic filter action:
> 
>   { 'enum': 'KVMPMUFilterAction',
> 'prefix': 'KVM_PMU_FILTER_ACTION',
> 'data': ['deny', 'allow' ] }
> 
>   - PMU counter:
> 
>   { 'enum': 'KVMPMUCounterType',
> 'prefix': 'KVM_PMU_COUNTER_TYPE',
> 'data': [ 'x86-fixed' ] }
> 
>   { 'struct': 'KVMPMUX86FixedCounter',
> 'data': { 'bitmap': 'uint32' } }
> 
>   - PMU events (total 3 formats):
> 
>   # 3 encoding formats: "raw" is compatible with shaoqin's ARM format as
>   # well as the x86 raw format, and could support other architectures in
>   # the future.
>   { 'enum': 'KVMPMUEventEncodeFmt',
> 'prefix': 'KVM_PMU_EVENT_ENCODE_FMT',
> 'data': ['raw', 'x86-default', 'x86-masked-entry' ] }
> 
>   # A general format.
>   { 'struct': 'KVMPMURawEvent',
> 'data': { 'code': 'uint64' } }
> 
>   # x86-specific
>   { 'struct': 'KVMPMUX86DefalutEvent',
> 'data': { 'select': 'uint16',
>   'umask': 'uint16' } }
> 
>   # another x86 specific
>   { 'struct': 'KVMPMUX86MaskedEntry',
> 'data': { 'select': 'uint16',
>   'match': 'uint8',
>   'mask': 'uint8',
>   'exclude': 'bool' } }
> 
>   # And their list wrappers:
>   { 'struct': 'KVMPMURawEventList',
> 'data': { 'events': ['KVMPMURawEvent'] } }
> 
>   { 'struct': 'KVMPMUX86DefalutEventList',
> 'data': { 'events': ['KVMPMUX86DefalutEvent'] } }
> 
>   { 'struct': 'KVMPMUX86MaskedEntryList',
> 'data': { 'events': ['KVMPMUX86MaskedEntryList'] } }
> 
> 
> Based on the above basic structs, we could provide 3 new more qom-types:
>   - 'kvm-pmu-counter': 'KVMPMUFilterCounter'
> 
>   # This is a single object option to configure PMU counter
>   # bitmap filter.
>   { 'union': 'KVMPMUFilterCounter',
> 'base': { 'action': 'KVMPMUFilterAction',
>   'type': 'KVMPMUCounterType' },
> 'discriminator': 'type',
> 'data': { 'x86-fixed': 'KVMPMUX86FixedCounter' } }
> 
> 
>   - 'kvm-pmu-counter': 'KVMPMUFilterCounter'
> 
>   # This option is used to configure a single PMU event for
>   # PMU filter.
>   { 'union': 'KVMPMUFilterEvent',
> 'base': { 'action': 'KVMPMUFilterAction',
>   'format': 'KVMPMUEventEncodeFmt' },
> 'discriminator': 'format',
> 'data': { 'raw': 'KVMPMURawEvent',
>   'x86-default': 'KVMPMUX86DefalutEvent',
>   'x86-masked-entry': 'KVMPMUX86MaskedEntry' } }
> 
> 
>   - 'kvm-pmu-event-list': 'KVMPMUFilterEventList'
> 
>   # Used to configure multiple events.
>   { 'union': 'KVMPMUFilterEventList',
> 'base': { 'action': 'KVMPMUFilterAction',
>   'format': 'KVMPMUEventEncodeFmt' },
> 'discriminator': 'format',
> 'data': { 'raw': 'KVMPMURawEventList',
>   'x86-default': 'KVMPMUX86DefalutEventList',
>   'x86-masked-entry': 'KVMPMUX86MaskedEntryList' } }
> 
> 
> Compared to Shaoqin's original format, kvm-pmu-event-list is not able to
> enumerate events continuously (similar to 0x00-0x30 before), and now
> user must enumerate events one by one individually.
> 
> What do you think about the above 3 new commands?

I don't know enough about KVM PMU to give feedback on the specific
choices, but in terms of how to do QAPI design, this looks like a
good start.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v11 08/10] virtio-gpu: Handle resource blob commands

2024-05-15 Thread Akihiko Odaki


On 2024/05/16 1:39, Dmitry Osipenko wrote:

On 5/13/24 12:18, Akihiko Odaki wrote:

     static void virgl_cmd_resource_unref(VirtIOGPU *g,
- struct virtio_gpu_ctrl_command
*cmd)
+ struct virtio_gpu_ctrl_command
*cmd,
+ bool *cmd_suspended)


This parameter is not used as it returns an error if the resource is
still mapped.


Missed to remove it by accident


It may be better to actually implement unmapping instead of returning an
error for consistency with the iov operation. Apparently crosvm also
unmaps blobs with VIRTIO_GPU_CMD_RESOURCE_UNREF.


Then I'll add back `async_unmap_in_progress` because resource can be
both mapped/unmapped on unref, and we'll need flag to know whether async
unmapping has been finished to do the final unmapping of the resource.


Such a situation should be already handled since unmapping in progress 
blocks all commands (not just VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB but 
literally all, including VIRTIO_GPU_CMD_RESOURCE_UNREF).

Re: [PATCH v11 08/10] virtio-gpu: Handle resource blob commands

2024-05-15 Thread Dmitry Osipenko

On 5/13/24 12:18, Akihiko Odaki wrote:
>>     static void virgl_cmd_resource_unref(VirtIOGPU *g,
>> - struct virtio_gpu_ctrl_command
>> *cmd)
>> + struct virtio_gpu_ctrl_command
>> *cmd,
>> + bool *cmd_suspended)
> 
> This parameter is not used as it returns an error if the resource is
> still mapped.

Missed to remove it by accident

> It may be better to actually implement unmapping instead of returning an
> error for consistency with the iov operation. Apparently crosvm also
> unmaps blobs with VIRTIO_GPU_CMD_RESOURCE_UNREF.

Then I'll add back `async_unmap_in_progress` because resource can be
both mapped/unmapped on unref, and we'll need flag to know whether async
unmapping has been finished to do the final unmapping of the resource.

...
>> +    QTAILQ_INSERT_HEAD(&g->reslist, &res->base, next);
>> +
>> +    virgl_args.res_handle = cblob.resource_id;
>> +    virgl_args.ctx_id = cblob.hdr.ctx_id;
>> +    virgl_args.blob_mem = cblob.blob_mem;
>> +    virgl_args.blob_id = cblob.blob_id;
>> +    virgl_args.blob_flags = cblob.blob_flags;
>> +    virgl_args.size = cblob.size;
>> +    virgl_args.iovecs = res->base.iov;
>> +    virgl_args.num_iovs = res->base.iov_cnt;
>> +
>> +    ret = virgl_renderer_resource_create_blob(&virgl_args);
>> +    if (ret) {
>> +    qemu_log_mask(LOG_GUEST_ERROR, "%s: virgl blob create error:
>> %s\n",
>> +  __func__, strerror(-ret));
>> +    cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
> 
> reslist keeps the stale res even if an error happens.

Good catch

-- 
Best regards,
Dmitry

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Peter Xu

On Wed, May 15, 2024 at 12:02:49PM -0400, Michael S. Tsirkin wrote:
> On Wed, May 15, 2024 at 06:15:56PM +0400, marcandre.lur...@redhat.com wrote:
> > From: Marc-André Lureau 
> > 
> > Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke
> > forward/backward version migration. Versioning of nested VMSD structures
> > is not straightforward, as the wire format doesn't have nested
> > structures versions.
> > 
> > Use the previously introduced check_machine_version() function as a
> > field test to ensure proper saving/loading based on the machine version.
> > The VMSD.version is irrelevant now.
> > 
> > Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load")
> > Suggested-by: Peter Xu 
> > Signed-off-by: Marc-André Lureau 
> 
> I don't get it. Our standard way to do it is:
> - add a property (begin name with x- so we don't commit to an API)
> - set from compat machinery
> - test property value in VMSTATE macros
> 
> Big advantage is, it works well with any downstreams
> which pick any properties they like.
> Why is this not a good fit here?

I think it'll simplify upstream to avoid introducing one new field + one
new property for each of such protocol change, which fundamentally are the
same thing.  But it's indeed a good point that such helper can slightly
complicate the backport a bit.. I assume a global replacement of versions
over the helper will be needed after downstream settles on how to map
downstream MCs to upstream's.

Thanks,

-- 
Peter Xu

Re: [PATCH v2 03/11] vfio: Make VFIOIOMMUClass::attach_device() and its wrapper return bool

2024-05-15 Thread Cédric Le Goater


On 5/7/24 09:34, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 03/11] vfio: Make VFIOIOMMUClass::attach_device()
and its wrapper return bool

On 5/7/24 08:42, Zhenzhong Duan wrote:

Make VFIOIOMMUClass::attach_device() and its wrapper function
vfio_attach_device() return bool.

This is to follow the coding standand to return bool if 'Error **'
is used to pass error.

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
---
   include/hw/vfio/vfio-common.h |  4 ++--
   include/hw/vfio/vfio-container-base.h |  4 ++--
   hw/vfio/ap.c  |  6 ++
   hw/vfio/ccw.c |  6 ++
   hw/vfio/common.c  |  4 ++--
   hw/vfio/container.c   | 14 +++---
   hw/vfio/iommufd.c | 11 +--
   hw/vfio/pci.c |  5 ++---
   hw/vfio/platform.c|  7 +++
   9 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-

common.h

index b9da6c08ef..a7b6fc8f46 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -198,8 +198,8 @@ void vfio_region_exit(VFIORegion *region);
   void vfio_region_finalize(VFIORegion *region);
   void vfio_reset_handler(void *opaque);
   struct vfio_device_info *vfio_get_device_info(int fd);
-int vfio_attach_device(char *name, VFIODevice *vbasedev,
-   AddressSpace *as, Error **errp);
+bool vfio_attach_device(char *name, VFIODevice *vbasedev,
+AddressSpace *as, Error **errp);
   void vfio_detach_device(VFIODevice *vbasedev);

   int vfio_kvm_device_add_fd(int fd, Error **errp);
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-

container-base.h

index 3582d5f97a..c839cfd9cb 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -118,8 +118,8 @@ struct VFIOIOMMUClass {
   int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb);
-int (*attach_device)(const char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp);
+bool (*attach_device)(const char *name, VFIODevice *vbasedev,
+  AddressSpace *as, Error **errp);
   void (*detach_device)(VFIODevice *vbasedev);
   /* migration feature */
   int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 7c4caa5938..d50600b702 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -156,7 +156,6 @@ static void

vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,

   static void vfio_ap_realize(DeviceState *dev, Error **errp)
   {
   ERRP_GUARD();
-int ret;
   Error *err = NULL;
   VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
   VFIODevice *vbasedev = &vapdev->vdev;
@@ -165,9 +164,8 @@ static void vfio_ap_realize(DeviceState *dev, Error

**errp)

   return;
   }

-ret = vfio_attach_device(vbasedev->name, vbasedev,
- &address_space_memory, errp);
-if (ret) {
+if (!vfio_attach_device(vbasedev->name, vbasedev,
+&address_space_memory, errp)) {
   goto error;
   }

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 90e4a53437..782bd4bed7 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -580,7 +580,6 @@ static void vfio_ccw_realize(DeviceState *dev,

Error **errp)

   S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
   VFIODevice *vbasedev = &vcdev->vdev;
   Error *err = NULL;
-int ret;

   /* Call the class init function for subchannel. */
   if (cdc->realize) {
@@ -594,9 +593,8 @@ static void vfio_ccw_realize(DeviceState *dev,

Error **errp)

   return;
   }

-ret = vfio_attach_device(cdev->mdevid, vbasedev,
- &address_space_memory, errp);
-if (ret) {
+if (!vfio_attach_device(cdev->mdevid, vbasedev,
+&address_space_memory, errp)) {
   goto out_attach_dev_err;
   }

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 8f9cbdc026..890d30910e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1492,8 +1492,8 @@ retry:
   return info;
   }

-int vfio_attach_device(char *name, VFIODevice *vbasedev,
-   AddressSpace *as, Error **errp)
+bool vfio_attach_device(char *name, VFIODevice *vbasedev,
+AddressSpace *as, Error **errp)
   {
   const VFIOIOMMUClass *ops =


VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));

This is still broken. No need to resend. I will update the code.


I put this series before preq v4, so you don't see that change.
See 
https://github.com/yiliu1765/qemu/commits/zhenzhong/iommufd_nesting_rfcv2_preq_

Re: [PATCH v11 09/10] virtio-gpu: Register capsets dynamically

2024-05-15 Thread Dmitry Osipenko

On 5/13/24 12:20, Akihiko Odaki wrote:
...
>>   -int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
>> +static void virtio_gpu_virgl_add_capset(GArray *capset_ids, uint32_t
>> capset_id)
>> +{
>> +    g_array_append_val(capset_ids, capset_id);
>> +}
> 
> Is it worthwhile to have a function for this?

It's necessary to have it because g_array_append_val() is actually a macro that 
takes &capset_id ptr internally. 

I.e. g_array_append_val(capset_ids, VIRTIO_GPU_CAPSET_VIRGL2) will fail to 
compile with:

/usr/include/glib-2.0/glib/garray.h:66:59: error: lvalue required as unary '&' 
operand
   66 | #define g_array_append_val(a,v)   g_array_append_vals (a, &(v), 1)

-- 
Best regards,
Dmitry

Re: [PATCH v11 01/10] virtio-gpu: Unrealize GL device

2024-05-15 Thread Akihiko Odaki


On 2024/05/16 1:18, Dmitry Osipenko wrote:

On 5/13/24 11:44, Akihiko Odaki wrote:

On 2024/05/12 3:22, Dmitry Osipenko wrote:

Even though GL GPU doesn't support hotplugging today, free virgl
resources when GL device is unrealized. For consistency.

Signed-off-by: Dmitry Osipenko 
---
   hw/display/virtio-gpu-gl.c | 11 +++
   hw/display/virtio-gpu-virgl.c  |  9 +
   include/hw/virtio/virtio-gpu.h |  1 +
   3 files changed, 21 insertions(+)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index e06be60dfbfc..0c0a8d136954 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -136,6 +136,16 @@ static Property virtio_gpu_gl_properties[] = {
   DEFINE_PROP_END_OF_LIST(),
   };
   +static void virtio_gpu_gl_device_unrealize(DeviceState *qdev)
+{
+    VirtIOGPU *g = VIRTIO_GPU(qdev);
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(qdev);
+
+    if (gl->renderer_inited) {
+    virtio_gpu_virgl_deinit(g);
+    }
+}
+
   static void virtio_gpu_gl_class_init(ObjectClass *klass, void *data)
   {
   DeviceClass *dc = DEVICE_CLASS(klass);
@@ -149,6 +159,7 @@ static void virtio_gpu_gl_class_init(ObjectClass
*klass, void *data)
   vgc->update_cursor_data = virtio_gpu_gl_update_cursor_data;
     vdc->realize = virtio_gpu_gl_device_realize;
+    vdc->unrealize = virtio_gpu_gl_device_unrealize;
   vdc->reset = virtio_gpu_gl_reset;
   device_class_set_props(dc, virtio_gpu_gl_properties);
   }
diff --git a/hw/display/virtio-gpu-virgl.c
b/hw/display/virtio-gpu-virgl.c
index 9f34d0e6619c..b0500eccf8e0 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -665,3 +665,12 @@ int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
     return capset2_max_ver ? 2 : 1;
   }
+
+void virtio_gpu_virgl_deinit(VirtIOGPU *g)
+{
+    if (g->fence_poll) {


Isn't g->fence_poll always non-NULL when this function is called?


virtio_gpu_virgl_init() is invoked when first cmd is executed, please
see virtio_gpu_gl_handle_ctrl() that invokes it. Hence g->fence_poll can
be NULL.



But it already checks renderer_inited, doesn't it? And I think it's 
better to utilize one single flag to represent that virgl is enabled 
instead of checking several variables (fence_poll and cmdq_resume_bh in 
the future).

Re: [PATCH v3 0/5] Fix "virtio-gpu: fix scanout migration post-load"

2024-05-15 Thread Daniel P . Berrangé

On Wed, May 15, 2024 at 10:07:31AM -0600, Peter Xu wrote:
> On Wed, May 15, 2024 at 06:15:51PM +0400, marcandre.lur...@redhat.com wrote:
> > From: Marc-André Lureau 
> > 
> > Hi,
> > 
> > The aforementioned patch breaks virtio-gpu device migrations for versions
> > pre-9.0/9.0, both forwards and backwards. Versioning of `VMS_STRUCT` is more
> > complex than it may initially appear, as evidenced in the problematic commit
> > dfcf74fa68c ("virtio-gpu: fix scanout migration post-load").
> > 
> > v2:
> >  - use a manual version field test (instead of the more complex struct 
> > variant)
> > 
> > v3:
> >  - introduce machine_check_version()
> >  - drop the VMSD version, and use machine version field test
> 
> Thanks for trying this out already.
> 
> Last time I mentioned this may for the long term because I remember Dan and
> Thomas were trying to work on some machine deprecation work, and maybe such
> things may collapse with that work (and perhaps easier with that work
> landed, too?).  Just to copy them both here so we know where we are now, as
> I didn't follow that discussion.  IOW, patch 3/4 may need separate review
> from outside migration..

You'll be refering to my series here:

  https://lists.nongnu.org/archive/html/qemu-devel/2024-05/msg00084.html

Note that series very delibrately did *not* expose the version numbers
as accessible fields to code. The version number info is only accessible
within the machine type macros, and once the macros are expanded, the
version digits remains hidden within the opaque machine type name strings,
and/or method names.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v3 2/4] vfio/migration: Emit VFIO migration QAPI event

2024-05-15 Thread Cédric Le Goater


On 5/15/24 15:21, Avihai Horon wrote:

Emit VFIO migration QAPI event when a VFIO device changes its migration
state. This can be used by management applications to get updates on the
current state of the VFIO device for their own purposes.

A new per VFIO device capability, "migration-events", is added so events
can be enabled only for the required devices. It is disabled by default.

Signed-off-by: Avihai Horon 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  include/hw/vfio/vfio-common.h |  1 +
  hw/vfio/migration.c   | 59 +--
  hw/vfio/pci.c |  2 ++
  3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index b9da6c08ef..3ec5f2425e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -115,6 +115,7 @@ typedef struct VFIODevice {
  bool no_mmap;
  bool ram_block_discard_allowed;
  OnOffAuto enable_migration;
+bool migration_events;
  VFIODeviceOps *ops;
  unsigned int num_irqs;
  unsigned int num_regions;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 06ae40969b..2e1a8f6031 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -24,6 +24,7 @@
  #include "migration/register.h"
  #include "migration/blocker.h"
  #include "qapi/error.h"
+#include "qapi/qapi-events-vfio.h"
  #include "exec/ramlist.h"
  #include "exec/ram_addr.h"
  #include "pci.h"
@@ -80,6 +81,58 @@ static const char *mig_state_to_str(enum 
vfio_device_mig_state state)
  }
  }
  
+static VfioMigrationState

+mig_state_to_qapi_state(enum vfio_device_mig_state state)
+{
+switch (state) {
+case VFIO_DEVICE_STATE_STOP:
+return QAPI_VFIO_MIGRATION_STATE_STOP;
+case VFIO_DEVICE_STATE_RUNNING:
+return QAPI_VFIO_MIGRATION_STATE_RUNNING;
+case VFIO_DEVICE_STATE_STOP_COPY:
+return QAPI_VFIO_MIGRATION_STATE_STOP_COPY;
+case VFIO_DEVICE_STATE_RESUMING:
+return QAPI_VFIO_MIGRATION_STATE_RESUMING;
+case VFIO_DEVICE_STATE_RUNNING_P2P:
+return QAPI_VFIO_MIGRATION_STATE_RUNNING_P2P;
+case VFIO_DEVICE_STATE_PRE_COPY:
+return QAPI_VFIO_MIGRATION_STATE_PRE_COPY;
+case VFIO_DEVICE_STATE_PRE_COPY_P2P:
+return QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
+default:
+g_assert_not_reached();
+}
+}
+
+static void vfio_migration_send_event(VFIODevice *vbasedev)
+{
+VFIOMigration *migration = vbasedev->migration;
+DeviceState *dev = vbasedev->dev;
+g_autofree char *qom_path = NULL;
+Object *obj;
+
+if (!vbasedev->migration_events) {
+return;
+}
+
+g_assert(vbasedev->ops->vfio_get_object);
+obj = vbasedev->ops->vfio_get_object(vbasedev);
+g_assert(obj);
+qom_path = object_get_canonical_path(obj);
+
+qapi_event_send_vfio_migration(
+dev->id, qom_path, mig_state_to_qapi_state(migration->device_state));
+}
+
+static void vfio_migration_set_device_state(VFIODevice *vbasedev,
+enum vfio_device_mig_state state)
+{
+VFIOMigration *migration = vbasedev->migration;
+
+migration->device_state = state;
+vfio_migration_send_event(vbasedev);
+}
+
  static int vfio_migration_set_state(VFIODevice *vbasedev,
  enum vfio_device_mig_state new_state,
  enum vfio_device_mig_state recover_state)
@@ -125,12 +178,12 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  goto reset_device;
  }
  
-migration->device_state = recover_state;

+vfio_migration_set_device_state(vbasedev, recover_state);
  
  return ret;

  }
  
-migration->device_state = new_state;

+vfio_migration_set_device_state(vbasedev, new_state);
  if (mig_state->data_fd != -1) {
  if (migration->data_fd != -1) {
  /*
@@ -156,7 +209,7 @@ reset_device:
   strerror(errno));
  }
  
-migration->device_state = VFIO_DEVICE_STATE_RUNNING;

+vfio_migration_set_device_state(vbasedev, VFIO_DEVICE_STATE_RUNNING);
  
  return ret;

  }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 64780d1b79..8840602c50 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3362,6 +3362,8 @@ static Property vfio_pci_dev_properties[] = {
  VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
  DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
  vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
+DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
+ vbasedev.migration_events, false),
  DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
  DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
   vbasedev.ram_block_discard_allowed, false),

Re: [PATCH v11 01/10] virtio-gpu: Unrealize GL device

2024-05-15 Thread Dmitry Osipenko

On 5/13/24 11:44, Akihiko Odaki wrote:
> On 2024/05/12 3:22, Dmitry Osipenko wrote:
>> Even though GL GPU doesn't support hotplugging today, free virgl
>> resources when GL device is unrealized. For consistency.
>>
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>   hw/display/virtio-gpu-gl.c | 11 +++
>>   hw/display/virtio-gpu-virgl.c  |  9 +
>>   include/hw/virtio/virtio-gpu.h |  1 +
>>   3 files changed, 21 insertions(+)
>>
>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>> index e06be60dfbfc..0c0a8d136954 100644
>> --- a/hw/display/virtio-gpu-gl.c
>> +++ b/hw/display/virtio-gpu-gl.c
>> @@ -136,6 +136,16 @@ static Property virtio_gpu_gl_properties[] = {
>>   DEFINE_PROP_END_OF_LIST(),
>>   };
>>   +static void virtio_gpu_gl_device_unrealize(DeviceState *qdev)
>> +{
>> +    VirtIOGPU *g = VIRTIO_GPU(qdev);
>> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(qdev);
>> +
>> +    if (gl->renderer_inited) {
>> +    virtio_gpu_virgl_deinit(g);
>> +    }
>> +}
>> +
>>   static void virtio_gpu_gl_class_init(ObjectClass *klass, void *data)
>>   {
>>   DeviceClass *dc = DEVICE_CLASS(klass);
>> @@ -149,6 +159,7 @@ static void virtio_gpu_gl_class_init(ObjectClass
>> *klass, void *data)
>>   vgc->update_cursor_data = virtio_gpu_gl_update_cursor_data;
>>     vdc->realize = virtio_gpu_gl_device_realize;
>> +    vdc->unrealize = virtio_gpu_gl_device_unrealize;
>>   vdc->reset = virtio_gpu_gl_reset;
>>   device_class_set_props(dc, virtio_gpu_gl_properties);
>>   }
>> diff --git a/hw/display/virtio-gpu-virgl.c
>> b/hw/display/virtio-gpu-virgl.c
>> index 9f34d0e6619c..b0500eccf8e0 100644
>> --- a/hw/display/virtio-gpu-virgl.c
>> +++ b/hw/display/virtio-gpu-virgl.c
>> @@ -665,3 +665,12 @@ int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
>>     return capset2_max_ver ? 2 : 1;
>>   }
>> +
>> +void virtio_gpu_virgl_deinit(VirtIOGPU *g)
>> +{
>> +    if (g->fence_poll) {
> 
> Isn't g->fence_poll always non-NULL when this function is called?

virtio_gpu_virgl_init() is invoked when first cmd is executed, please
see virtio_gpu_gl_handle_ctrl() that invokes it. Hence g->fence_poll can
be NULL.

-- 
Best regards,
Dmitry

Re: [PATCH v3 4/4] vfio/migration: Enhance VFIO migration state tracing

2024-05-15 Thread Cédric Le Goater


On 5/15/24 15:21, Avihai Horon wrote:

Move trace_vfio_migration_set_state() to the top of the function, add
recover_state to it, and add a new trace event to
vfio_migration_set_device_state().

This improves tracing of device state changes as state changes are now
also logged when vfio_migration_set_state() fails (covering recover
state and device reset transitions) and in no-op state transitions to
the same state.

Suggested-by: Cédric Le Goater 
Signed-off-by: Avihai Horon 



Thanks for doing so,



Reviewed-by: Cédric Le Goater 


C.



---
  hw/vfio/migration.c  | 8 ++--
  hw/vfio/trace-events | 3 ++-
  2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index f2b7a3067b..7f0d76ab50 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -129,6 +129,9 @@ static void vfio_migration_set_device_state(VFIODevice 
*vbasedev,
  {
  VFIOMigration *migration = vbasedev->migration;
  
+trace_vfio_migration_set_device_state(vbasedev->name,

+  mig_state_to_str(state));
+
  migration->device_state = state;
  vfio_migration_send_event(vbasedev);
  }
@@ -146,6 +149,9 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  (struct vfio_device_feature_mig_state *)feature->data;
  int ret;
  
+trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state),

+   mig_state_to_str(recover_state));
+
  if (new_state == migration->device_state) {
  return 0;
  }
@@ -203,8 +209,6 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  migration->data_fd = mig_state->data_fd;
  }
  
-trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state));

-
  return 0;
  
  reset_device:

diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index f0474b244b..64161bf6f4 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -152,7 +152,8 @@ vfio_load_device_config_state(const char *name) " (%s)"
  vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
  vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 
0x%"PRIx64" ret %d"
  vfio_migration_realize(const char *name) " (%s)"
-vfio_migration_set_state(const char *name, const char *state) " (%s) state %s"
+vfio_migration_set_device_state(const char *name, const char *state) " (%s) state 
%s"
+vfio_migration_set_state(const char *name, const char *new_state, const char 
*recover_state) " (%s) new state %s, recover state %s"
  vfio_migration_state_notifier(const char *name, int state) " (%s) state %d"
  vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
  vfio_save_cleanup(const char *name) " (%s)"

Re: [PATCH 03/20] docs/qapidoc: delint a tiny portion of the module

2024-05-15 Thread John Snow

On Wed, May 15, 2024 at 5:17 AM Markus Armbruster  wrote:

> John Snow  writes:
>
> > In the coming patches, it's helpful to have a linting baseline. However,
> > there's no need to shuffle around the deck chairs too much, because most
> > of this code will be removed once the new qapidoc generator (the
> > "transmogrifier") is in place.
> >
> > To ease my pain: just turn off the black auto-formatter for most, but
> > not all, of qapidoc.py. This will help ensure that *new* code follows a
> > coding standard without bothering too much with cleaning up the existing
> > code.
> >
> > For manual checking for now, try "black --check qapidoc.py" from the
> > docs/sphinx directory. "pip install black" (without root permissions) if
> > you do not have it installed otherwise.
> >
> > Signed-off-by: John Snow 
> > ---
> >  docs/sphinx/qapidoc.py | 16 +---
> >  1 file changed, 9 insertions(+), 7 deletions(-)
> >
> > diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
> > index f270b494f01..1655682d4c7 100644
> > --- a/docs/sphinx/qapidoc.py
> > +++ b/docs/sphinx/qapidoc.py
> > @@ -28,28 +28,30 @@
> >  import re
> >
> >  from docutils import nodes
> > +from docutils.parsers.rst import Directive, directives
> >  from docutils.statemachine import ViewList
> > -from docutils.parsers.rst import directives, Directive
> > -from sphinx.errors import ExtensionError
> > -from sphinx.util.nodes import nested_parse_with_titles
> > -import sphinx
> > -from qapi.gen import QAPISchemaVisitor
> >  from qapi.error import QAPIError, QAPISemError
> > +from qapi.gen import QAPISchemaVisitor
> >  from qapi.schema import QAPISchema
> >
> > +import sphinx
> > +from sphinx.errors import ExtensionError
> > +from sphinx.util.nodes import nested_parse_with_titles
> > +
>
> Exchanges old pylint gripe
>
> docs/sphinx/qapidoc.py:45:4: C0412: Imports from package sphinx are
> not grouped (ungrouped-imports)
>
> for new gripes
>
> docs/sphinx/qapidoc.py:37:0: C0411: third party import "import sphinx"
> should be placed before "from qapi.error import QAPIError, QAPISemError"
> (wrong-import-order)
> docs/sphinx/qapidoc.py:38:0: C0411: third party import "from
> sphinx.errors import ExtensionError" should be placed before "from
> qapi.error import QAPIError, QAPISemError" (wrong-import-order)
> docs/sphinx/qapidoc.py:39:0: C0411: third party import "from
> sphinx.util.nodes import nested_parse_with_titles" should be placed before
> "from qapi.error import QAPIError, QAPISemError" (wrong-import-order)
>
> Easy enough to fix.
>

I believe these errors are caused by the fact that the tools are confused
about the "sphinx" namespace - some interpret them as being the local
"module", the docs/sphinx/ directory, and others believe them to be the
third party external package.

I have not been using pylint on docs/sphinx/ files because of the
difficulty of managing imports - this environment is generally beyond the
reaches of my python borgcube and at present I don't have plans to
integrate it.

At the moment, I am using black, isort and flake8 for qapidoc.py and
they're happy with it. I am not using mypy because I never did the typing
boogaloo with qapidoc.py and I won't be bothering - except for any new code
I write, which *will* bother. By the end of the new transmogrifier,
qapidoc.py *will* strictly typecheck.

pylint may prove to be an issue with the imports, though. isort also seems
to misunderstand "sphinx, the stuff in this folder" and "sphinx, the stuff
in a third party package" and so I'm afraid I don't have any good ability
to get pylint to play along, here.

Pleading for "Sorry, this sucks and I can't figure out how to solve it
quickly". Maybe a future project, apologies.


> >
> >  # Sphinx up to 1.6 uses AutodocReporter; 1.7 and later
> >  # use switch_source_input. Check borrowed from kerneldoc.py.
> > -Use_SSI = sphinx.__version__[:3] >= '1.7'
> > +Use_SSI = sphinx.__version__[:3] >= "1.7"
> >  if Use_SSI:
> >  from sphinx.util.docutils import switch_source_input
> >  else:
> >  from sphinx.ext.autodoc import AutodocReporter
> >
> >
> > -__version__ = '1.0'
> > +__version__ = "1.0"
> >
> >
> > +# fmt: off
>
> I figure this tells black to keep quiet for the remainder of the file.
> Worth a comment, I think.
>
> >  # Function borrowed from pydash, which is under the MIT license
> >  def intersperse(iterable, separator):
> >  """Yield the members of *iterable* interspersed with *separator*."""
>
> With my comments addressed
> Reviewed-by: Markus Armbruster 
>

^ Dropping this unless you're okay with the weird import orders owing to
the strange import paradigm in the sphinx folder.r

Re: [PATCH v3 0/5] Fix "virtio-gpu: fix scanout migration post-load"

2024-05-15 Thread Peter Xu

On Wed, May 15, 2024 at 06:15:51PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Hi,
> 
> The aforementioned patch breaks virtio-gpu device migrations for versions
> pre-9.0/9.0, both forwards and backwards. Versioning of `VMS_STRUCT` is more
> complex than it may initially appear, as evidenced in the problematic commit
> dfcf74fa68c ("virtio-gpu: fix scanout migration post-load").
> 
> v2:
>  - use a manual version field test (instead of the more complex struct 
> variant)
> 
> v3:
>  - introduce machine_check_version()
>  - drop the VMSD version, and use machine version field test

Thanks for trying this out already.

Last time I mentioned this may for the long term because I remember Dan and
Thomas were trying to work on some machine deprecation work, and maybe such
things may collapse with that work (and perhaps easier with that work
landed, too?).  Just to copy them both here so we know where we are now, as
I didn't follow that discussion.  IOW, patch 3/4 may need separate review
from outside migration..

The simpler solution is we stick with the customized field and simple fix
to the issue first, then whenever we have that new helper later we simply
use the new helper to replace the old, alongside we can drop the new field
/ property too as long as it is declared with "x-".  Might be easier to
backport too in this case.  Marc-Andre, what do you think?

Thanks,

-- 
Peter Xu

Re: [PATCH v3 4/5] Set major/minor for PC and arm machines

2024-05-15 Thread Michael S. Tsirkin

On Wed, May 15, 2024 at 06:15:55PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Signed-off-by: Marc-André Lureau 


I would much rather compat machinery was in one place
as opposed to being spread all over the codebase as this
new API would encourage.

> ---
>  include/hw/i386/pc.h |  4 ++-
>  hw/arm/virt.c|  2 ++
>  hw/i386/pc_piix.c| 74 ++--
>  hw/i386/pc_q35.c | 62 ++---
>  4 files changed, 73 insertions(+), 69 deletions(-)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index e52290916c..fa91bb7603 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -292,12 +292,14 @@ extern const size_t pc_compat_2_1_len;
>  extern GlobalProperty pc_compat_2_0[];
>  extern const size_t pc_compat_2_0_len;
>  
> -#define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
> +#define DEFINE_PC_MACHINE(maj, min, suffix, namestr, initfn, optsfn) \
>  static void pc_machine_##suffix##_class_init(ObjectClass *oc, void 
> *data) \
>  { \
>  MachineClass *mc = MACHINE_CLASS(oc); \
>  optsfn(mc); \
>  mc->init = initfn; \
> +mc->major = maj; \
> +mc->minor = min; \
>  } \
>  static const TypeInfo pc_machine_type_##suffix = { \
>  .name   = namestr TYPE_MACHINE_SUFFIX, \
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 3c93c0c0a6..7e3a03b39a 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -109,6 +109,8 @@ static void arm_virt_compat_set(MachineClass *mc)
>  arm_virt_compat_set(mc); \
>  virt_machine_##major##_##minor##_options(mc); \
>  mc->desc = "QEMU " # major "." # minor " ARM Virtual Machine"; \
> +mc->ma##jor = major; \
> +mc->mi##nor = minor; \
>  if (latest) { \
>  mc->alias = "virt"; \
>  } \
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 99efb3c45c..bb6767d8d0 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -477,7 +477,7 @@ static void pc_xen_hvm_init(MachineState *machine)
>  }
>  #endif
>  
> -#define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \
> +#define DEFINE_I440FX_MACHINE(major, minor, suffix, name, compatfn, 
> optionfn) \
>  static void pc_init_##suffix(MachineState *machine) \
>  { \
>  void (*compat)(MachineState *m) = (compatfn); \
> @@ -486,7 +486,7 @@ static void pc_xen_hvm_init(MachineState *machine)
>  } \
>  pc_init1(machine, TYPE_I440FX_PCI_DEVICE); \
>  } \
> -DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
> +DEFINE_PC_MACHINE(major, minor, suffix, name, pc_init_##suffix, optionfn)
>  
>  static void pc_i440fx_machine_options(MachineClass *m)
>  {
> @@ -521,7 +521,7 @@ static void pc_i440fx_9_1_machine_options(MachineClass *m)
>  m->is_default = true;
>  }
>  
> -DEFINE_I440FX_MACHINE(v9_1, "pc-i440fx-9.1", NULL,
> +DEFINE_I440FX_MACHINE(9, 1, v9_1, "pc-i440fx-9.1", NULL,
>pc_i440fx_9_1_machine_options);
>  
>  static void pc_i440fx_9_0_machine_options(MachineClass *m)
> @@ -534,7 +534,7 @@ static void pc_i440fx_9_0_machine_options(MachineClass *m)
>  compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
>  }
>  
> -DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0", NULL,
> +DEFINE_I440FX_MACHINE(9, 0, v9_0, "pc-i440fx-9.0", NULL,
>pc_i440fx_9_0_machine_options);
>  
>  static void pc_i440fx_8_2_machine_options(MachineClass *m)
> @@ -549,7 +549,7 @@ static void pc_i440fx_8_2_machine_options(MachineClass *m)
>  pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
>  }
>  
> -DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL,
> +DEFINE_I440FX_MACHINE(8, 2, v8_2, "pc-i440fx-8.2", NULL,
>pc_i440fx_8_2_machine_options);
>  
>  static void pc_i440fx_8_1_machine_options(MachineClass *m)
> @@ -563,7 +563,7 @@ static void pc_i440fx_8_1_machine_options(MachineClass *m)
>  compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len);
>  }
>  
> -DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1", NULL,
> +DEFINE_I440FX_MACHINE(8, 1, v8_1, "pc-i440fx-8.1", NULL,
>pc_i440fx_8_1_machine_options);
>  
>  static void pc_i440fx_8_0_machine_options(MachineClass *m)
> @@ -578,7 +578,7 @@ static void pc_i440fx_8_0_machine_options(MachineClass *m)
>  pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
>  }
>  
> -DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0", NULL,
> +DEFINE_I440FX_MACHINE(8, 0, v8_0, "pc-i440fx-8.0", NULL,
>pc_i440fx_8_0_machine_options);
>  
>  static void pc_i440fx_7_2_machine_options(MachineClass *m)
> @@ -588,7 +588,7 @@ static void pc_i440fx_7_2_machine_options(MachineClass *m)
>  compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len);
>  }
>  
> -DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL,
> +DEFINE_I440FX_MACHINE(7, 2, v7_

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Daniel P . Berrangé

On Wed, May 15, 2024 at 06:15:56PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke
> forward/backward version migration. Versioning of nested VMSD structures
> is not straightforward, as the wire format doesn't have nested
> structures versions.
> 
> Use the previously introduced check_machine_version() function as a
> field test to ensure proper saving/loading based on the machine version.
> The VMSD.version is irrelevant now.
> 
> Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load")
> Suggested-by: Peter Xu 
> Signed-off-by: Marc-André Lureau 
> ---
>  hw/display/virtio-gpu.c | 21 +
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index ae831b6b3e..b2d8e5faeb 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -20,6 +20,7 @@
>  #include "trace.h"
>  #include "sysemu/dma.h"
>  #include "sysemu/sysemu.h"
> +#include "hw/boards.h"
>  #include "hw/virtio/virtio.h"
>  #include "migration/qemu-file-types.h"
>  #include "hw/virtio/virtio-gpu.h"
> @@ -1166,10 +1167,14 @@ static void virtio_gpu_cursor_bh(void *opaque)
>  virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
>  }
>  
> +static bool machine_check_9_0(void *opaque, int version)
> +{
> +return machine_check_version(9, 0);
> +}

I think applying version number checks to decide machine type
compatibility is a highly undesirable direction for QEMU to
take.

Machine type compatibility is a difficult problem, but one of
the good aspects about our current solution is that it is
clear what the differences are for each version. We can see
all the compatibility properties/flags/values being set in
one place, in the declaration of each machine's class.

Sprinkling version number checks around the codebase in
arbitrary files will harm visibility of what ABI is expressd
by each machine, and thus is liable to increase the liklihood
of mistakes.

This will negatively impact downstream vendors cherry-picking
patches to their stable branches, as the version number logic
may have incorrect semantics. 

It will also create trouble for downstream vendors who define
their own machines with distinct versioning from upstream, as
there will be confusion over whether a version check is for
the base QEMU version, or the downstream version, and such
code added to the tree is less visible than the machine type
definitions.

Above all, I'm failing to see why there's a compelling reason
for virtio_gpu to diverge from our long standing practice of
adding a named property flag "virtio_scanout_vmstate_fix"
on the machine class, and then setting it in machine types
which need it.


> +
>  static const VMStateDescription vmstate_virtio_gpu_scanout = {
>  .name = "virtio-gpu-one-scanout",
> -.version_id = 2,
> -.minimum_version_id = 1,
> +.version_id = 1,
>  .fields = (const VMStateField[]) {
>  VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout),
>  VMSTATE_UINT32(width, struct virtio_gpu_scanout),
> @@ -1181,12 +1186,12 @@ static const VMStateDescription 
> vmstate_virtio_gpu_scanout = {
>  VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout),
>  VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout),
>  VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout),
> -VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),
> +VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout, 
> machine_check_9_0),
>  VMSTATE_END_OF_LIST()
>  },
>  };
> -- 
> 2.41.0.28.gd7d8841f67
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v3 5/5] virtio-gpu: fix v2 migration

2024-05-15 Thread Michael S. Tsirkin

On Wed, May 15, 2024 at 06:15:56PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke
> forward/backward version migration. Versioning of nested VMSD structures
> is not straightforward, as the wire format doesn't have nested
> structures versions.
> 
> Use the previously introduced check_machine_version() function as a
> field test to ensure proper saving/loading based on the machine version.
> The VMSD.version is irrelevant now.
> 
> Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load")
> Suggested-by: Peter Xu 
> Signed-off-by: Marc-André Lureau 

I don't get it. Our standard way to do it is:
- add a property (begin name with x- so we don't commit to an API)
- set from compat machinery
- test property value in VMSTATE macros

Big advantage is, it works well with any downstreams
which pick any properties they like.
Why is this not a good fit here?


> ---
>  hw/display/virtio-gpu.c | 21 +
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index ae831b6b3e..b2d8e5faeb 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -20,6 +20,7 @@
>  #include "trace.h"
>  #include "sysemu/dma.h"
>  #include "sysemu/sysemu.h"
> +#include "hw/boards.h"
>  #include "hw/virtio/virtio.h"
>  #include "migration/qemu-file-types.h"
>  #include "hw/virtio/virtio-gpu.h"
> @@ -1166,10 +1167,14 @@ static void virtio_gpu_cursor_bh(void *opaque)
>  virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
>  }
>  
> +static bool machine_check_9_0(void *opaque, int version)
> +{
> +return machine_check_version(9, 0);
> +}
> +
>  static const VMStateDescription vmstate_virtio_gpu_scanout = {
>  .name = "virtio-gpu-one-scanout",
> -.version_id = 2,
> -.minimum_version_id = 1,
> +.version_id = 1,
>  .fields = (const VMStateField[]) {
>  VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout),
>  VMSTATE_UINT32(width, struct virtio_gpu_scanout),
> @@ -1181,12 +1186,12 @@ static const VMStateDescription 
> vmstate_virtio_gpu_scanout = {
>  VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout),
>  VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout),
>  VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout),
> -VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
> -VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),
> +VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout, 
> machine_check_9_0),
> +VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout, 
> machine_check_9_0),
>  VMSTATE_END_OF_LIST()
>  },
>  };
> -- 
> 2.41.0.28.gd7d8841f67

Re: [PATCH v3 2/2] vhost-user: fix lost reconnect again

2024-05-15 Thread Raphael Norwitz

The case your describing makes sense but now I have some concerns on
the vhost_dev_cleanup bit.

On Wed, May 15, 2024 at 1:47 AM Li Feng  wrote:
>
>
>
> > 2024年5月14日 21:58，Raphael Norwitz  写道：
> >
> > Code looks good. Just a question on the error case you're trying to fix.
> >
> > On Tue, May 14, 2024 at 2:12 AM Li Feng  wrote:
> >>
> >> When the vhost-user is reconnecting to the backend, and if the vhost-user 
> >> fails
> >> at the get_features in vhost_dev_init(), then the reconnect will fail
> >> and it will not be retriggered forever.
> >>
> >> The reason is:
> >> When the vhost-user fail at get_features, the vhost_dev_cleanup will be 
> >> called
> >> immediately.
> >>
> >> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
> >>
> >> The reconnect path is:
> >> vhost_user_blk_event
> >>   vhost_user_async_close(.. vhost_user_blk_disconnect ..)
> >> qemu_chr_fe_set_handlers <- clear the notifier callback
> >>   schedule vhost_user_async_close_bh
> >>
> >> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
> >> called, then the event fd callback will not be reinstalled.
> >>
> >> With this patch, the vhost_user_blk_disconnect will call the
> >> vhost_dev_cleanup() again, it's safe.
> >>
> >> In addition, the CLOSE event may occur in a scenario where connected is 
> >> false.
> >> At this time, the event handler will be cleared. We need to ensure that the
> >> event handler can remain installed.
> >
> > Following on from the prior patch, why would "connected" be false when
> > a CLOSE event happens?
>
> In OPEN event handling, vhost_user_blk_connect calls vhost_dev_init and 
> encounters
> an error such that s->connected remains false.
> Next, after the CLOSE event arrives, it is found that s->connected is false, 
> so nothing
> is done, but the event handler will be cleaned up in `vhost_user_async_close`
> before the CLOSE event is executed.
>

Got it - I see why the event handler is never re-installed in the code
as it was before if we fail at get_features. That said, how do you
explain your comment:

> >> With this patch, the vhost_user_blk_disconnect will call the
> >> vhost_dev_cleanup() again, it's safe.

I see vhost_dev_cleanup() accessing hdev without even a NULL check. In
the case we're talking about here I don't think it's a problem because
if vhost_dev_init() fails, connected will be false and hit the goto
but I am concerned that there could be double-frees or use-after-frees
in other cases.

> Thanks,
> Li
>
> >
> >>
> >> All vhost-user devices have this issue, including vhost-user-blk/scsi.
> >>
> >> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
> >>
> >> Signed-off-by: Li Feng 
> >> ---
> >> hw/block/vhost-user-blk.c   |  3 ++-
> >> hw/scsi/vhost-user-scsi.c   |  3 ++-
> >> hw/virtio/vhost-user-base.c |  3 ++-
> >> hw/virtio/vhost-user.c  | 10 +-
> >> 4 files changed, 7 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> >> index 41d1ac3a5a..c6842ced48 100644
> >> --- a/hw/block/vhost-user-blk.c
> >> +++ b/hw/block/vhost-user-blk.c
> >> @@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
> >> VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >>
> >> if (!s->connected) {
> >> -return;
> >> +goto done;
> >> }
> >> s->connected = false;
> >>
> >> @@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
> >>
> >> vhost_dev_cleanup(&s->dev);
> >>
> >> +done:
> >> /* Re-instate the event handler for new connections */
> >> qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> >>  NULL, dev, NULL, true);
> >> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> >> index 48a59e020e..b49a11d23b 100644
> >> --- a/hw/scsi/vhost-user-scsi.c
> >> +++ b/hw/scsi/vhost-user-scsi.c
> >> @@ -181,7 +181,7 @@ static void vhost_user_scsi_disconnect(DeviceState 
> >> *dev)
> >> VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> >>
> >> if (!s->connected) {
> >> -return;
> >> +goto done;
> >> }
> >> s->connected = false;
> >>
> >> @@ -189,6 +189,7 @@ static void vhost_user_scsi_disconnect(DeviceState 
> >> *dev)
> >>
> >> vhost_dev_cleanup(&vsc->dev);
> >>
> >> +done:
> >> /* Re-instate the event handler for new connections */
> >> qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
> >>  vhost_user_scsi_event, NULL, dev, NULL, true);
> >> diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
> >> index 4b54255682..11e72b1e3b 100644
> >> --- a/hw/virtio/vhost-user-base.c
> >> +++ b/hw/virtio/vhost-user-base.c
> >> @@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev)
> >> VHostUserBase *vub = VHOST_USER_BASE(vdev);
> >>
> >> if (!vub->connected) {
> >> -return;
> >> +goto done;
> >> }
> >> vub->co

Re: [PATCH v3 1/2] Revert "vhost-user: fix lost reconnect"

2024-05-15 Thread Raphael Norwitz

On Wed, May 15, 2024 at 1:47 AM Li Feng  wrote:
>
>
>
> > 2024年5月14日 21:58，Raphael Norwitz  写道：
> >
> > The code for these two patches looks fine. Just some questions on the
> > failure case you're trying to fix.
> >
> >
> > On Tue, May 14, 2024 at 2:12 AM Li Feng  wrote:
> >>
> >> This reverts commit f02a4b8e6431598612466f76aac64ab492849abf.
> >>
> >> Since the current patch cannot completely fix the lost reconnect
> >> problem, there is a scenario that is not considered:
> >> - When the virtio-blk driver is removed from the guest os,
> >>  s->connected has no chance to be set to false, resulting in
> >
> > Why would the virtio-blk driver being removed (unloaded?) in the guest
> > effect s->connected? Isn't this variable just tracking whether Qemu is
> > connected to the backend process? What does it have to do with the
> > guest driver state?
>
> Unload the virtio-blk, it will trigger ‘vhost_user_blk_stop’, and in 
> `vhost_dev_stop`
> it will set the `hdev->vdev = NULL;`.
>
> Next if kill the backend, the CLOSE event will be triggered, and the 
> `vhost->vdev`
> has been set to null before, then the `vhost_user_blk_disconnect` will not 
> have a
> chance to execute.So that he s->connected is still true.

Makes sense - basically if the driver is unloaded and then the device
is brought down s->connected will remain true when it should be false,
which will mess up a subsequent reconnect.

See my comments on the following patch though.

>
> static void vhost_user_async_close_bh(void *opaque)
> {
> VhostAsyncCallback *data = opaque;
> struct vhost_dev *vhost = data->vhost;
>
> /*
>  * If the vhost_dev has been cleared in the meantime there is
>  * nothing left to do as some other path has completed the
>  * cleanup.
>  */
> if (vhost->vdev) {  < HERE vdev is null.
> data->cb(data->dev);
> } else if (data->event_cb) {
> qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
>  NULL, data->dev, NULL, true);
>}
>
> g_free(data);
> }
>
> Thanks,
> Li
>
> >
> >>  subsequent reconnection not being executed.
> >>
> >> The next patch will completely fix this issue with a better approach.
> >>
> >> Signed-off-by: Li Feng 
> >> ---
> >> hw/block/vhost-user-blk.c  |  2 +-
> >> hw/scsi/vhost-user-scsi.c  |  3 +--
> >> hw/virtio/vhost-user-base.c|  2 +-
> >> hw/virtio/vhost-user.c | 10 ++
> >> include/hw/virtio/vhost-user.h |  3 +--
> >> 5 files changed, 6 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> >> index 9e6bbc6950..41d1ac3a5a 100644
> >> --- a/hw/block/vhost-user-blk.c
> >> +++ b/hw/block/vhost-user-blk.c
> >> @@ -384,7 +384,7 @@ static void vhost_user_blk_event(void *opaque, 
> >> QEMUChrEvent event)
> >> case CHR_EVENT_CLOSED:
> >> /* defer close until later to avoid circular close */
> >> vhost_user_async_close(dev, &s->chardev, &s->dev,
> >> -   vhost_user_blk_disconnect, 
> >> vhost_user_blk_event);
> >> +   vhost_user_blk_disconnect);
> >> break;
> >> case CHR_EVENT_BREAK:
> >> case CHR_EVENT_MUX_IN:
> >> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> >> index a63b1f4948..48a59e020e 100644
> >> --- a/hw/scsi/vhost-user-scsi.c
> >> +++ b/hw/scsi/vhost-user-scsi.c
> >> @@ -214,8 +214,7 @@ static void vhost_user_scsi_event(void *opaque, 
> >> QEMUChrEvent event)
> >> case CHR_EVENT_CLOSED:
> >> /* defer close until later to avoid circular close */
> >> vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
> >> -   vhost_user_scsi_disconnect,
> >> -   vhost_user_scsi_event);
> >> +   vhost_user_scsi_disconnect);
> >> break;
> >> case CHR_EVENT_BREAK:
> >> case CHR_EVENT_MUX_IN:
> >> diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
> >> index a83167191e..4b54255682 100644
> >> --- a/hw/virtio/vhost-user-base.c
> >> +++ b/hw/virtio/vhost-user-base.c
> >> @@ -254,7 +254,7 @@ static void vub_event(void *opaque, QEMUChrEvent event)
> >> case CHR_EVENT_CLOSED:
> >> /* defer close until later to avoid circular close */
> >> vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev,
> >> -   vub_disconnect, vub_event);
> >> +   vub_disconnect);
> >> break;
> >> case CHR_EVENT_BREAK:
> >> case CHR_EVENT_MUX_IN:
> >> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> >> index cdf9af4a4b..c929097e87 100644
> >> --- a/hw/virtio/vhost-user.c
> >> +++ b/hw/virtio/vhost-user.c
> >> @@ -2776,7 +2776,6 @@ typedef struct {
> >> DeviceState *dev;
> >> CharBackend *cd;
> >> struct vhost_dev *vhost;
> >> -IOEventHandler *event_cb;
> >> } Vhost

Re: [PATCH 00/41] target/sparc: Implement VIS4

2024-05-15 Thread Richard Henderson


On 4/29/24 23:02, Richard Henderson wrote:

On 4/29/24 13:52, Mark Cave-Ayland wrote:
No objections here about the remainder of the series, other than that I don't have an 
easy/obvious way to test the new instructions...


I was thinking about adding support to RISU, but the gcc compile farm sparc machines have 
been down for ages, so no way to generate the reference traces.


Update: I have successfully ported RISU to Sparc64, Solaris and Linux.  There is a 
limitation in that I cannot find how to extract %gsr from the signal frame, which is 
unfortunate, but I can work around that for now.


I have added descriptions of VIS1 instructions to RISU, and it turns out we have failures 
relative to a Sparc M8.  I have not yet analyzed these failures, but it proves the effort 
was not wasted.  :-)


I'll clean up these patches and post them here when I next get some downtime.


r~

[PATCH v3 11/28] target/i386: Convert do_xrstor_* to X86Access

2024-05-15 Thread Richard Henderson

The body of do_xrstor is now fully converted.

Reviewed-by: Paolo Bonzini 
Signed-off-by: Richard Henderson 
---
 target/i386/tcg/fpu_helper.c | 51 ++--
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 356397a4ab..7796688514 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2800,8 +2800,9 @@ static void do_clear_sse(CPUX86State *env)
 }
 }
 
-static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
 {
+CPUX86State *env = ac->env;
 int i, nb_xmm_regs;
 
 if (env->hflags & HF_CS64_MASK) {
@@ -2811,8 +2812,8 @@ static void do_xrstor_ymmh(CPUX86State *env, target_ulong 
ptr, uintptr_t ra)
 }
 
 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
-env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra);
-env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra);
+env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
+env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
 }
 }
 
@@ -2832,29 +2833,32 @@ static void do_clear_ymmh(CPUX86State *env)
 }
 }
 
-static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
 {
+CPUX86State *env = ac->env;
 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
 int i;
 
 for (i = 0; i < 4; i++, addr += 16) {
-env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
-env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
+env->bnd_regs[i].lb = access_ldq(ac, addr);
+env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
 }
 }
 
-static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
 {
+CPUX86State *env = ac->env;
+
 /* FIXME: Extend highest implemented bit of linear address.  */
 env->bndcs_regs.cfgu
-= cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
+= access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
 env->bndcs_regs.sts
-= cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
+= access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
 }
 
-static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
 {
-env->pkru = cpu_ldq_data_ra(env, ptr, ra);
+ac->env->pkru = access_ldq(ac, ptr);
 }
 
 static void do_fxrstor(X86Access *ac, target_ulong ptr)
@@ -2892,6 +2896,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, 
uint64_t rfbm, uintptr
 {
 uint64_t xstate_bv, xcomp_bv, reserve0;
 X86Access ac;
+unsigned size, size_ext;
 
 rfbm &= env->xcr0;
 
@@ -2905,7 +2910,10 @@ static void do_xrstor(CPUX86State *env, target_ulong 
ptr, uint64_t rfbm, uintptr
 raise_exception_ra(env, EXCP0D_GPF, ra);
 }
 
-xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
+size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
+access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
+
+xstate_bv = access_ldq(&ac, ptr + XO(header.xstate_bv));
 
 if ((int64_t)xstate_bv < 0) {
 /* FIXME: Compact form.  */
@@ -2924,14 +2932,17 @@ static void do_xrstor(CPUX86State *env, target_ulong 
ptr, uint64_t rfbm, uintptr
describes only XCOMP_BV, but the description of the standard form
of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
includes the next 64-bit field.  */
-xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
-reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
+xcomp_bv = access_ldq(&ac, ptr + XO(header.xcomp_bv));
+reserve0 = access_ldq(&ac, ptr + XO(header.reserve0));
 if (xcomp_bv || reserve0) {
 raise_exception_ra(env, EXCP0D_GPF, ra);
 }
 
-access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
-   MMU_DATA_LOAD, ra);
+size_ext = xsave_area_size(rfbm & xstate_bv, false);
+if (size < size_ext) {
+/* TODO: See if existing page probe has covered extra size. */
+access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
+}
 
 if (rfbm & XSTATE_FP_MASK) {
 if (xstate_bv & XSTATE_FP_MASK) {
@@ -2953,14 +2964,14 @@ static void do_xrstor(CPUX86State *env, target_ulong 
ptr, uint64_t rfbm, uintptr
 }
 if (rfbm & XSTATE_YMM_MASK) {
 if (xstate_bv & XSTATE_YMM_MASK) {
-do_xrstor_ymmh(env, ptr + XO(avx_state), ra);
+do_xrstor_ymmh(&ac, ptr + XO(avx_state));
 } else {
 do_clear_ymmh(env);
 }
 }
 if (rfbm & XSTATE_BNDREGS_MASK) {
 if (xstate_bv & XSTATE_BNDREGS_MASK) {
-do_xrstor_bndregs(env, ptr + XO(bndreg_

[PATCH v3 14/28] target/i386: Add {hw, sw}_reserved to X86LegacyXSaveArea

2024-05-15 Thread Richard Henderson

This completes the 512 byte structure, allowing the union to
be removed.  Assert that the structure layout is as expected.

Reviewed-by: Paolo Bonzini 
Signed-off-by: Richard Henderson 
---
 target/i386/cpu.h | 39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 97014b14ce..4b3bffeb9c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1414,23 +1414,34 @@ typedef struct {
  */
 #define UNASSIGNED_APIC_ID 0x
 
-typedef union X86LegacyXSaveArea {
-struct {
-uint16_t fcw;
-uint16_t fsw;
-uint8_t ftw;
-uint8_t reserved;
-uint16_t fpop;
-uint64_t fpip;
-uint64_t fpdp;
-uint32_t mxcsr;
-uint32_t mxcsr_mask;
-FPReg fpregs[8];
-uint8_t xmm_regs[16][16];
+typedef struct X86LegacyXSaveArea {
+uint16_t fcw;
+uint16_t fsw;
+uint8_t ftw;
+uint8_t reserved;
+uint16_t fpop;
+union {
+struct {
+uint64_t fpip;
+uint64_t fpdp;
+};
+struct {
+uint32_t fip;
+uint32_t fcs;
+uint32_t foo;
+uint32_t fos;
+};
 };
-uint8_t data[512];
+uint32_t mxcsr;
+uint32_t mxcsr_mask;
+FPReg fpregs[8];
+uint8_t xmm_regs[16][16];
+uint32_t hw_reserved[12];
+uint32_t sw_reserved[12];
 } X86LegacyXSaveArea;
 
+QEMU_BUILD_BUG_ON(sizeof(X86LegacyXSaveArea) != 512);
+
 typedef struct X86XSaveHeader {
 uint64_t xstate_bv;
 uint64_t xcomp_bv;
-- 
2.34.1

[PATCH v3 04/28] target/i386: Convert do_fldenv to X86Access

2024-05-15 Thread Richard Henderson

Reviewed-by: Paolo Bonzini 
Signed-off-by: Richard Henderson 
---
 target/i386/tcg/fpu_helper.c | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 6237cd8383..5ad6e04639 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2442,20 +2442,15 @@ static void cpu_set_fpus(CPUX86State *env, uint16_t 
fpus)
 #endif
 }
 
-static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
-  uintptr_t retaddr)
+static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
 {
 int i, fpus, fptag;
+CPUX86State *env = ac->env;
+
+cpu_set_fpuc(env, access_ldw(ac, ptr));
+fpus = access_ldw(ac, ptr + (2 << data32));
+fptag = access_ldw(ac, ptr + (4 << data32));
 
-if (data32) {
-cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
-fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
-fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
-} else {
-cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
-fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
-fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
-}
 cpu_set_fpus(env, fpus);
 for (i = 0; i < 8; i++) {
 env->fptags[i] = ((fptag & 3) == 3);
@@ -2465,7 +2460,10 @@ static void do_fldenv(CPUX86State *env, target_ulong 
ptr, int data32,
 
 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
 {
-do_fldenv(env, ptr, data32, GETPC());
+X86Access ac;
+
+access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
+do_fldenv(&ac, ptr, data32);
 }
 
 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
@@ -2499,12 +2497,12 @@ static void do_frstor(CPUX86State *env, target_ulong 
ptr, int data32,
 {
 X86Access ac;
 floatx80 tmp;
-int i;
+int i, envsize = 14 << data32;
 
-do_fldenv(env, ptr, data32, retaddr);
-ptr += (target_ulong)14 << data32;
+access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_LOAD, retaddr);
 
-access_prepare(&ac, env, ptr, 80, MMU_DATA_LOAD, retaddr);
+do_fldenv(&ac, ptr, data32);
+ptr += envsize;
 
 for (i = 0; i < 8; i++) {
 tmp = do_fldt(&ac, ptr);
-- 
2.34.1

1 2 3 4 >

1 - 100 of 371 matches

Mail list logo