date:20231007

[RFC PATCH 3/4] virtio-net: Add hash type options

2023-10-07 Thread Akihiko Odaki

By default, virtio-net limits the hash types that will be advertised to
the guest so that all hash types are covered by the offloading
capability the client provides. This change allows to override this
behavior and to advertise hash types that require user-space hash
calculation by specifying "on" for the corresponding properties.

Signed-off-by: Akihiko Odaki 
---
 include/hw/virtio/virtio-net.h |  1 +
 hw/net/virtio-net.c| 48 +++---
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index deba793ec2..33857dd3c2 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -142,6 +142,7 @@ typedef struct VirtioNetRssData {
 uint32_t runtime_hash_types;
 uint32_t supported_hash_types;
 uint32_t peer_hash_types;
+OnOffAuto specified_hash_types[8];
 uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
 uint16_t indirections_len;
 uint16_t *indirections_table;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3bf1fec2ac..9146a65aa4 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1379,16 +1379,31 @@ static bool virtio_net_configure_rss_host(VirtIONet *n, 
Error **errp)
 {
 NetVnetHashCap cap;
 
+n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES;
+
 if (qemu_get_vnet_hash_cap(qemu_get_queue(n->nic)->peer, ) &&
 cap.max_indirection_table_length >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
 n->rss_data.peer_hash_available = true;
-n->rss_data.supported_hash_types = cap.types;
 n->rss_data.peer_hash_types = cap.types;
 
-return true;
+for (size_t i = 0; ; i++) {
+if (i >= ARRAY_SIZE(n->rss_data.specified_hash_types)) {
+n->rss_data.supported_hash_types = cap.types;
+break;
+}
+
+if (n->rss_data.specified_hash_types[i] == ON_OFF_AUTO_ON &&
+!(cap.types & BIT(i + 1))) {
+break;
+}
+}
 }
 
-n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES;
+for (size_t i = 0; i < ARRAY_SIZE(n->rss_data.specified_hash_types); i++) {
+if (n->rss_data.specified_hash_types[i] == ON_OFF_AUTO_OFF) {
+n->rss_data.supported_hash_types &= ~BIT(i + 1);
+}
+}
 
 return virtio_net_commit_rss_host_config(n, errp);
 }
@@ -4013,6 +4028,33 @@ static Property virtio_net_properties[] = {
 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+DEFINE_PROP_ON_OFF_AUTO("hash-ipv4", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_IPv4 - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-tcp4", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_TCPv4 - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-udp4", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_UDPv4 - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-ipv6", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_IPv6 - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-tcp6", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_TCPv6 - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-udp6", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_UDPv6 - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-ipv6ex", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_IPv6_EX - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-tcp6ex", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1],
+ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("hash-udp6ex", VirtIONet,
+
rss_data.specified_hash_types[VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1],
+ON_OFF_AUTO_AUTO),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.42.0

[RFC PATCH 4/4] docs/devel/ebpf_rss.rst: Update for kernel RSS

2023-10-07 Thread Akihiko Odaki

eBPF RSS virtio-net support was written in assumption that there is only
one alternative RSS implementation: 'in-qemu' RSS. It is no longer true,
and we now have yet another implementation; namely the kernel RSS.

Signed-off-by: Akihiko Odaki 
---
 docs/devel/ebpf_rss.rst | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/devel/ebpf_rss.rst b/docs/devel/ebpf_rss.rst
index 4a68682b31..487123006c 100644
--- a/docs/devel/ebpf_rss.rst
+++ b/docs/devel/ebpf_rss.rst
@@ -5,9 +5,21 @@ eBPF RSS virtio-net support
 RSS(Receive Side Scaling) is used to distribute network packets to guest 
virtqueues
 by calculating packet hash. Usually every queue is processed then by a 
specific guest CPU core.
 
-For now there are 2 RSS implementations in qemu:
-- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off)
-- eBPF RSS (can function with also with vhost=on)
+For now there are 3 RSS implementations in qemu:
+1. Kernel RSS
+2. eBPF RSS
+3. 'In-QEMU' RSS
+
+'In-QEMU' RSS is incompatible with vhost since the packets are not routed to
+QEMU. eBPF RSS requires Linux 5.8+. Kernel RSS requires Linux with the 
following
+patches:
+https://lore.kernel.org/all/20231008052101.144422-1-akihiko.od...@daynix.com/
+
+eBPF RSS does not support hash reporting. The hash types and indirection
+table size compatible with the kernel RSS may be limited by the kernel.
+
+virtio-net automatically chooses the RSS implementation to use. Kernel RSS is
+the most preferred, and 'in-QEMU' RSS is the least.
 
 eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
 To enable eBPF RSS support use './configure --enable-bpf'.
@@ -47,9 +59,6 @@ eBPF RSS turned on by different combinations of vhost-net, 
vitrio-net and tap co
 
 tap,vhost=on & virtio-net-pci,rss=on,hash=on
 
-If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
-Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load 
or set to TUN.
-
 RSS eBPF program
 
 
@@ -65,7 +74,6 @@ Prerequisites to recompile the eBPF program (regenerate 
ebpf/rss.bpf.skeleton.h)
 $ make -f Makefile.ebpf
 
 Current eBPF RSS implementation uses 'bounded loops' with 'backward jump 
instructions' which present in the last kernels.
-Overall eBPF RSS works on kernels 5.8+.
 
 eBPF RSS implementation
 ---
-- 
2.42.0

[RFC PATCH 2/4] virtio-net: Offload hashing without eBPF

2023-10-07 Thread Akihiko Odaki

Offloading virtio-net hashing to the client can improve the performance
since the performance can reuse the hash calculated for RSS for hash
reporting as well.

Signed-off-by: Akihiko Odaki 
---
 include/hw/virtio/virtio-net.h |   5 +-
 hw/net/virtio-net.c| 164 +++--
 2 files changed, 142 insertions(+), 27 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 5f5dcb4572..deba793ec2 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -138,7 +138,10 @@ typedef struct VirtioNetRssData {
 boolenabled_software_rss;
 boolredirect;
 boolpopulate_hash;
-uint32_t hash_types;
+boolpeer_hash_available;
+uint32_t runtime_hash_types;
+uint32_t supported_hash_types;
+uint32_t peer_hash_types;
 uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
 uint16_t indirections_len;
 uint16_t *indirections_table;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3bb4bf136d..3bf1fec2ac 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -158,7 +158,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, 
uint8_t *config)
  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 virtio_stl_p(vdev, _hash_types,
- VIRTIO_NET_RSS_SUPPORTED_HASHES);
+ n->rss_data.supported_hash_types);
 memcpy(config, , n->config_size);
 
 /*
@@ -931,6 +931,10 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 features &= ~(1ULL << VIRTIO_NET_F_MTU);
 }
 
+if (!virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
+features &= ~(1ULL << VIRTIO_NET_F_HASH_REPORT);
+}
+
 virtio_net_set_multiqueue(n,
   virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
   virtio_has_feature(features, VIRTIO_NET_F_MQ));
@@ -1224,7 +1228,7 @@ static void rss_data_to_rss_config(struct 
VirtioNetRssData *data,
 {
 config->redirect = data->redirect;
 config->populate_hash = data->populate_hash;
-config->hash_types = data->hash_types;
+config->hash_types = data->runtime_hash_types;
 config->indirections_len = data->indirections_len;
 config->default_queue = data->default_queue;
 }
@@ -1255,18 +1259,62 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
 static void virtio_net_commit_rss_config(VirtIONet *n)
 {
 if (n->rss_data.enabled) {
-n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
-if (n->rss_data.populate_hash) {
-virtio_net_detach_epbf_rss(n);
-} else if (!virtio_net_attach_epbf_rss(n)) {
-n->rss_data.enabled_software_rss = true;
+if (n->rss_data.peer_hash_types &&
+(n->rss_data.peer_hash_types & n->rss_data.runtime_hash_types) ==
+n->rss_data.runtime_hash_types) {
+size_t indirection_table_size =
+n->rss_data.indirections_len *
+sizeof(*n->rss_data.indirections_table);
+
+size_t hash_size = sizeof(NetVnetHash) + indirection_table_size +
+   sizeof(n->rss_data.key);
+
+g_autofree struct {
+NetVnetHash header;
+uint8_t footer[];
+} *hash = g_malloc(hash_size);
+
+hash->header.flags =
+(n->rss_data.redirect ? NET_VNET_HASH_RSS : 0) |
+(n->rss_data.populate_hash ? NET_VNET_HASH_REPORT : 0);
+hash->header.types = n->rss_data.supported_hash_types;
+hash->header.indirection_table_mask =
+n->rss_data.indirections_len - 1;
+hash->header.unclassified_queue = n->rss_data.default_queue;
+
+memcpy(hash->footer, n->rss_data.indirections_table,
+   indirection_table_size);
+
+memcpy(hash->footer + indirection_table_size, n->rss_data.key,
+   sizeof(n->rss_data.key));
+
+qemu_set_vnet_hash(qemu_get_queue(n->nic)->peer, hash);
+n->rss_data.enabled_software_rss = false;
+} else {
+if (n->rss_data.peer_hash_types) {
+NetVnetHash hash = { .flags = 0 };
+qemu_set_vnet_hash(qemu_get_queue(n->nic)->peer, );
+}
+
+n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
+if (n->rss_data.populate_hash) {
+virtio_net_detach_epbf_rss(n);
+} else if (!virtio_net_attach_epbf_rss(n)) {
+n->rss_data.enabled_software_rss = true;
+}
 }
 
-trace_virtio_net_rss_enable(n->rss_data.hash_types,
+trace_virtio_net_rss_enable(n->rss_data.runtime_hash_types,
 n->rss_data.indirections_len,
 sizeof(n->rss_data.key));
 } else {
-

[RFC PATCH 1/4] net: Allow to offload virtio-net hashing

2023-10-07 Thread Akihiko Odaki

Offloading virtio-net hashing to the client can improve the performance
since the performance can reuse the hash calculated for RSS for hash
reporting as well.

Signed-off-by: Akihiko Odaki 
---
 include/net/net.h | 47 +++
 net/tap_int.h |  3 +++
 net/net.c | 14 ++
 net/tap-bsd.c | 10 ++
 net/tap-linux.c   | 10 ++
 net/tap-solaris.c | 10 ++
 net/tap-stub.c| 10 ++
 net/tap.c | 14 ++
 8 files changed, 118 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index 685ec58318..7eafef1703 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -35,6 +35,47 @@ typedef struct NICConf {
 int32_t bootindex;
 } NICConf;
 
+#ifdef CONFIG_LINUX
+#ifndef TUNGETVNETHASHCAP
+#define TUNGETVNETHASHCAP _IO('T', 228)
+#define TUNSETVNETHASH _IOW('T', 229, unsigned int)
+
+struct tun_vnet_hash_cap {
+uint16_t max_indirection_table_length;
+uint32_t types;
+};
+
+#define TUN_VNET_HASH_RSS 0x01
+#define TUN_VNET_HASH_REPORT 0x02
+struct tun_vnet_hash {
+uint8_t flags;
+uint32_t types;
+uint16_t indirection_table_mask;
+uint16_t unclassified_queue;
+};
+#endif
+
+typedef struct tun_vnet_hash_cap NetVnetHashCap;
+typedef struct tun_vnet_hash NetVnetHash;
+#define NET_VNET_HASH_RSS TUN_VNET_HASH_RSS
+#define NET_VNET_HASH_REPORT TUN_VNET_HASH_REPORT
+#else
+#define NET_VNET_HASH_RSS 1
+#define NET_VNET_HASH_REPORT 2
+
+typedef struct NetVnetHashCap {
+uint16_t max_indirection_table_length;
+uint32_t types;
+} NetVnetHashCap;
+
+typedef struct NetVnetHash {
+uint8_t flags;
+uint32_t types;
+uint16_t indirection_table_mask;
+uint16_t unclassified_queue;
+} NetVnetHash;
+#endif
+
 #define DEFINE_NIC_PROPERTIES(_state, _conf)\
 DEFINE_PROP_MACADDR("mac",   _state, _conf.macaddr),\
 DEFINE_PROP_NETDEV("netdev", _state, _conf.peers)
@@ -61,6 +102,8 @@ typedef void (UsingVnetHdr)(NetClientState *, bool);
 typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
 typedef int (GetVnetHdrLen)(NetClientState *);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
+typedef bool (GetVnetHashCap)(NetClientState *, NetVnetHashCap *);
+typedef void (SetVnetHash)(NetClientState *, const void *);
 typedef int (SetVnetLE)(NetClientState *, bool);
 typedef int (SetVnetBE)(NetClientState *, bool);
 typedef struct SocketReadState SocketReadState;
@@ -93,6 +136,8 @@ typedef struct NetClientInfo {
 SetVnetHdrLen *set_vnet_hdr_len;
 SetVnetLE *set_vnet_le;
 SetVnetBE *set_vnet_be;
+GetVnetHashCap *get_vnet_hash_cap;
+SetVnetHash *set_vnet_hash;
 NetAnnounce *announce;
 SetSteeringEBPF *set_steering_ebpf;
 NetCheckPeerType *check_peer_type;
@@ -197,6 +242,8 @@ void qemu_set_offload(NetClientState *nc, int csum, int 
tso4, int tso6,
   int ecn, int ufo);
 int qemu_get_vnet_hdr_len(NetClientState *nc);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
+bool qemu_get_vnet_hash_cap(NetClientState *nc, NetVnetHashCap *cap);
+void qemu_set_vnet_hash(NetClientState *nc, const void *hash);
 int qemu_set_vnet_le(NetClientState *nc, bool is_le);
 int qemu_set_vnet_be(NetClientState *nc, bool is_be);
 void qemu_macaddr_default_if_unset(MACAddr *macaddr);
diff --git a/net/tap_int.h b/net/tap_int.h
index 547f8a5a28..aa36615600 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -27,6 +27,7 @@
 #define NET_TAP_INT_H
 
 #include "qapi/qapi-types-net.h"
+#include "net/net.h"
 
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
  int vnet_hdr_required, int mq_required, Error **errp);
@@ -36,9 +37,11 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
 int tap_probe_vnet_hdr(int fd, Error **errp);
 int tap_probe_vnet_hdr_len(int fd, int len);
+bool tap_probe_vnet_hash_cap(int fd, NetVnetHashCap *cap);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int 
ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+void tap_fd_set_vnet_hash(int fd, const void *hash);
 int tap_fd_set_vnet_le(int fd, int vnet_is_le);
 int tap_fd_set_vnet_be(int fd, int vnet_is_be);
 int tap_fd_enable(int fd);
diff --git a/net/net.c b/net/net.c
index 3523cceafc..53372dc6aa 100644
--- a/net/net.c
+++ b/net/net.c
@@ -562,6 +562,20 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
 nc->info->set_vnet_hdr_len(nc, len);
 }
 
+bool qemu_get_vnet_hash_cap(NetClientState *nc, NetVnetHashCap *cap)
+{
+if (!nc || !nc->info->get_vnet_hash_cap) {
+return false;
+}
+
+return nc->info->get_vnet_hash_cap(nc, cap);
+}
+
+void qemu_set_vnet_hash(NetClientState *nc, const void *hash)
+{
+nc->info->set_vnet_hash(nc, hash);
+}
+
 int qemu_set_vnet_le(NetClientState *nc, bool is_le)
 {
 #if HOST_BIG_ENDIAN

[PATCH 4/6] virtio-net: Unify the logic to update NIC state for RSS

2023-10-07 Thread Akihiko Odaki

The code to attach or detach the eBPF program to RSS were duplicated so
unify them into one function to save some code.

Signed-off-by: Akihiko Odaki 
---
 hw/net/virtio-net.c | 90 ++---
 1 file changed, 36 insertions(+), 54 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 83bac9a98a..3c3440ab72 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1212,18 +1212,6 @@ static int virtio_net_handle_announce(VirtIONet *n, 
uint8_t cmd,
 }
 }
 
-static void virtio_net_detach_epbf_rss(VirtIONet *n);
-
-static void virtio_net_disable_rss(VirtIONet *n)
-{
-if (n->rss_data.enabled) {
-trace_virtio_net_rss_disable();
-}
-n->rss_data.enabled = false;
-
-virtio_net_detach_epbf_rss(n);
-}
-
 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
 {
 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
@@ -1271,6 +1259,40 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
 virtio_net_attach_ebpf_to_backend(n->nic, -1);
 }
 
+static void virtio_net_commit_rss_config(VirtIONet *n)
+{
+if (n->rss_data.enabled) {
+n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
+if (n->rss_data.populate_hash) {
+virtio_net_detach_epbf_rss(n);
+} else if (!virtio_net_attach_epbf_rss(n)) {
+if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
+warn_report("Can't use eBPF RSS for vhost");
+} else {
+warn_report("Can't use eBPF RSS - fallback to software RSS");
+n->rss_data.enabled_software_rss = true;
+}
+}
+
+trace_virtio_net_rss_enable(n->rss_data.hash_types,
+n->rss_data.indirections_len,
+sizeof(n->rss_data.key));
+} else {
+virtio_net_detach_epbf_rss(n);
+trace_virtio_net_rss_disable();
+}
+}
+
+static void virtio_net_disable_rss(VirtIONet *n)
+{
+if (!n->rss_data.enabled) {
+return;
+}
+
+n->rss_data.enabled = false;
+virtio_net_commit_rss_config(n);
+}
+
 static bool virtio_net_load_ebpf(VirtIONet *n)
 {
 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
@@ -1399,28 +1421,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
 goto error;
 }
 n->rss_data.enabled = true;
-
-if (!n->rss_data.populate_hash) {
-if (!virtio_net_attach_epbf_rss(n)) {
-/* EBPF must be loaded for vhost */
-if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
-warn_report("Can't load eBPF RSS for vhost");
-goto error;
-}
-/* fallback to software RSS */
-warn_report("Can't load eBPF RSS - fallback to software RSS");
-n->rss_data.enabled_software_rss = true;
-}
-} else {
-/* use software RSS for hash populating */
-/* and detach eBPF if was loaded before */
-virtio_net_detach_epbf_rss(n);
-n->rss_data.enabled_software_rss = true;
-}
-
-trace_virtio_net_rss_enable(n->rss_data.hash_types,
-n->rss_data.indirections_len,
-temp.b);
+virtio_net_commit_rss_config(n);
 return queue_pairs;
 error:
 trace_virtio_net_rss_error(err_msg, err_value);
@@ -3016,26 +3017,7 @@ static int virtio_net_post_load_device(void *opaque, int 
version_id)
 }
 }
 
-if (n->rss_data.enabled) {
-n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
-if (!n->rss_data.populate_hash) {
-if (!virtio_net_attach_epbf_rss(n)) {
-if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
-warn_report("Can't post-load eBPF RSS for vhost");
-} else {
-warn_report("Can't post-load eBPF RSS - "
-"fallback to software RSS");
-n->rss_data.enabled_software_rss = true;
-}
-}
-}
-
-trace_virtio_net_rss_enable(n->rss_data.hash_types,
-n->rss_data.indirections_len,
-sizeof(n->rss_data.key));
-} else {
-trace_virtio_net_rss_disable();
-}
+virtio_net_commit_rss_config(n);
 return 0;
 }
 
-- 
2.42.0

[PATCH 1/6] tap: Fix virtio-net header buffer size

2023-10-07 Thread Akihiko Odaki

The largest possible virtio-net header is struct virtio_net_hdr_v1_hash.

Fixes: fbbdbddec0 ("tap: allow extended virtio header with hash info")
Signed-off-by: Akihiko Odaki 
---
 net/tap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index c6639d9f20..ea46feeaa8 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -118,7 +118,7 @@ static ssize_t tap_receive_iov(NetClientState *nc, const 
struct iovec *iov,
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
 const struct iovec *iovp = iov;
 struct iovec iov_copy[iovcnt + 1];
-struct virtio_net_hdr_mrg_rxbuf hdr = { };
+struct virtio_net_hdr_v1_hash hdr = { };
 
 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
 iov_copy[0].iov_base = 
@@ -136,7 +136,7 @@ static ssize_t tap_receive_raw(NetClientState *nc, const 
uint8_t *buf, size_t si
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
 struct iovec iov[2];
 int iovcnt = 0;
-struct virtio_net_hdr_mrg_rxbuf hdr = { };
+struct virtio_net_hdr_v1_hash hdr = { };
 
 if (s->host_vnet_hdr_len) {
 iov[iovcnt].iov_base = 
-- 
2.42.0

[PATCH 5/6] virtio-net: Return an error when vhost cannot enable RSS

2023-10-07 Thread Akihiko Odaki

vhost requires eBPF for RSS. Even when eBPF is not available, virtio-net
reported RSS availability, and raised a warning only after the
guest requested RSS, and the guest could not know that RSS is not
available.

Check RSS availability during device realization and return an error
if RSS is requested but not available. Assert RSS availability when
the guest actually requests the feature.

Signed-off-by: Akihiko Odaki 
---
 ebpf/ebpf_rss.h  |   2 +-
 ebpf/ebpf_rss-stub.c |   4 +-
 ebpf/ebpf_rss.c  |  68 +-
 hw/net/virtio-net.c  | 114 +--
 4 files changed, 82 insertions(+), 106 deletions(-)

diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
index bf3f2572c7..1128173572 100644
--- a/ebpf/ebpf_rss.h
+++ b/ebpf/ebpf_rss.h
@@ -36,7 +36,7 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
 
 bool ebpf_rss_load(struct EBPFRSSContext *ctx);
 
-bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
+void ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
   uint16_t *indirections_table, uint8_t *toeplitz_key);
 
 void ebpf_rss_unload(struct EBPFRSSContext *ctx);
diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
index e71e229190..525b358597 100644
--- a/ebpf/ebpf_rss-stub.c
+++ b/ebpf/ebpf_rss-stub.c
@@ -28,10 +28,10 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
 return false;
 }
 
-bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
+void ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
   uint16_t *indirections_table, uint8_t *toeplitz_key)
 {
-return false;
+g_assert_not_reached();
 }
 
 void ebpf_rss_unload(struct EBPFRSSContext *ctx)
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
index cee658c158..6cdf82d059 100644
--- a/ebpf/ebpf_rss.c
+++ b/ebpf/ebpf_rss.c
@@ -74,42 +74,32 @@ error:
 return false;
 }
 
-static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
+static void ebpf_rss_set_config(struct EBPFRSSContext *ctx,
 struct EBPFRSSConfig *config)
 {
 uint32_t map_key = 0;
 
-if (!ebpf_rss_is_loaded(ctx)) {
-return false;
-}
-if (bpf_map_update_elem(ctx->map_configuration,
-_key, config, 0) < 0) {
-return false;
-}
-return true;
+assert(ebpf_rss_is_loaded(ctx));
+assert(!bpf_map_update_elem(ctx->map_configuration, _key, config, 0));
 }
 
-static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
+static void ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
 uint16_t *indirections_table,
 size_t len)
 {
 uint32_t i = 0;
 
-if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
-   len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
-return false;
-}
+assert(ebpf_rss_is_loaded(ctx));
+assert(indirections_table);
+assert(len <= VIRTIO_NET_RSS_MAX_TABLE_LEN);
 
 for (; i < len; ++i) {
-if (bpf_map_update_elem(ctx->map_indirections_table, ,
-indirections_table + i, 0) < 0) {
-return false;
-}
+assert(!bpf_map_update_elem(ctx->map_indirections_table, ,
+indirections_table + i, 0));
 }
-return true;
 }
 
-static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
+static void ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
  uint8_t *toeplitz_key)
 {
 uint32_t map_key = 0;
@@ -117,41 +107,29 @@ static bool ebpf_rss_set_toepliz_key(struct 
EBPFRSSContext *ctx,
 /* prepare toeplitz key */
 uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
 
-if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
-return false;
-}
+assert(ebpf_rss_is_loaded(ctx));
+assert(toeplitz_key);
+
 memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
 *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
 
-if (bpf_map_update_elem(ctx->map_toeplitz_key, _key, toe,
-0) < 0) {
-return false;
-}
-return true;
+assert(!bpf_map_update_elem(ctx->map_toeplitz_key, _key, toe, 0));
 }
 
-bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
+void ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
   uint16_t *indirections_table, uint8_t *toeplitz_key)
 {
-if (!ebpf_rss_is_loaded(ctx) || config == NULL ||
-indirections_table == NULL || toeplitz_key == NULL) {
-return false;
-}
-
-if (!ebpf_rss_set_config(ctx, config)) {
-return false;
-}
+assert(ebpf_rss_is_loaded(ctx));
+assert(config);
+assert(indirections_table);
+assert(toeplitz_key);
 
-if (!ebpf_rss_set_indirections_table(ctx,

[PATCH 6/6] virtio-net: Do not clear VIRTIO_NET_F_RSS

2023-10-07 Thread Akihiko Odaki

Even if eBPF is not available, virtio-net can perform RSS on the
user-space if vhost is disabled although such a configuration results in
a warning. If vhost is enabled, the configuration will be rejected when
realizing the device. Therefore, VIRTIO_NET_F_RSS should not be cleared
even if eBPF is not loaded.

Signed-off-by: Akihiko Odaki 
---
 hw/net/virtio-net.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 05f9abdbcd..3bb4bf136d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -775,9 +775,6 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, 
uint64_t features,
 return features;
 }
 
-if (!ebpf_rss_is_loaded(>ebpf_rss)) {
-virtio_clear_feature(, VIRTIO_NET_F_RSS);
-}
 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 vdev->backend_features = features;
 
-- 
2.42.0

[PATCH 3/6] virtio-net: Disable RSS on reset

2023-10-07 Thread Akihiko Odaki

RSS is disabled by default.

Fixes: 590790297c ("virtio-net: implement RSS configuration command")
Signed-off-by: Akihiko Odaki 
---
 hw/net/virtio-net.c | 70 +++--
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 1ba748c964..83bac9a98a 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -601,40 +601,6 @@ static void virtio_net_queue_enable(VirtIODevice *vdev, 
uint32_t queue_index)
 }
 }
 
-static void virtio_net_reset(VirtIODevice *vdev)
-{
-VirtIONet *n = VIRTIO_NET(vdev);
-int i;
-
-/* Reset back to compatibility mode */
-n->promisc = 1;
-n->allmulti = 0;
-n->alluni = 0;
-n->nomulti = 0;
-n->nouni = 0;
-n->nobcast = 0;
-/* multiqueue is disabled by default */
-n->curr_queue_pairs = 1;
-timer_del(n->announce_timer.tm);
-n->announce_timer.round = 0;
-n->status &= ~VIRTIO_NET_S_ANNOUNCE;
-
-/* Flush any MAC and VLAN filter table state */
-n->mac_table.in_use = 0;
-n->mac_table.first_multi = 0;
-n->mac_table.multi_overflow = 0;
-n->mac_table.uni_overflow = 0;
-memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
-memcpy(>mac[0], >nic->conf->macaddr, sizeof(n->mac));
-qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
-memset(n->vlans, 0, MAX_VLAN >> 3);
-
-/* Flush any async TX */
-for (i = 0;  i < n->max_queue_pairs; i++) {
-flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
-}
-}
-
 static void peer_test_vnet_hdr(VirtIONet *n)
 {
 NetClientState *nc = qemu_get_queue(n->nic);
@@ -3789,6 +3755,42 @@ static void virtio_net_device_unrealize(DeviceState *dev)
 virtio_cleanup(vdev);
 }
 
+static void virtio_net_reset(VirtIODevice *vdev)
+{
+VirtIONet *n = VIRTIO_NET(vdev);
+int i;
+
+/* Reset back to compatibility mode */
+n->promisc = 1;
+n->allmulti = 0;
+n->alluni = 0;
+n->nomulti = 0;
+n->nouni = 0;
+n->nobcast = 0;
+/* multiqueue is disabled by default */
+n->curr_queue_pairs = 1;
+timer_del(n->announce_timer.tm);
+n->announce_timer.round = 0;
+n->status &= ~VIRTIO_NET_S_ANNOUNCE;
+
+/* Flush any MAC and VLAN filter table state */
+n->mac_table.in_use = 0;
+n->mac_table.first_multi = 0;
+n->mac_table.multi_overflow = 0;
+n->mac_table.uni_overflow = 0;
+memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
+memcpy(>mac[0], >nic->conf->macaddr, sizeof(n->mac));
+qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
+memset(n->vlans, 0, MAX_VLAN >> 3);
+
+/* Flush any async TX */
+for (i = 0;  i < n->max_queue_pairs; i++) {
+flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
+}
+
+virtio_net_disable_rss(n);
+}
+
 static void virtio_net_instance_init(Object *obj)
 {
 VirtIONet *n = VIRTIO_NET(obj);
-- 
2.42.0

[PATCH 2/6] virtio-net: Copy header only when necessary

2023-10-07 Thread Akihiko Odaki

It is necessary to copy the header only for byte swapping. Worse, when
byte swapping is not needed, the header can be larger than the buffer
due to VIRTIO_NET_F_HASH_REPORT, which results in buffer overflow.

Copy the header only when byte swapping is needed.

Fixes: e22f0603fb ("virtio-net: reference implementation of hash report")
Signed-off-by: Akihiko Odaki 
---
 hw/net/virtio-net.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 9a93a2df01..1ba748c964 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -361,7 +361,8 @@ static void virtio_net_vnet_endian_status(VirtIONet *n, 
uint8_t status)
  * can't do it, we fallback onto fixing the headers in the core
  * virtio-net code.
  */
-n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
+n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
+ virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 queue_pairs, true);
 } else if (virtio_net_started(n, vdev->status)) {
 /* After using the device, we need to reset the network backend to
@@ -2690,7 +2691,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
 return -EINVAL;
 }
 
-if (n->has_vnet_hdr) {
+if (n->needs_vnet_hdr_swap) {
 if (iov_to_buf(out_sg, out_num, 0, , n->guest_hdr_len) <
 n->guest_hdr_len) {
 virtio_error(vdev, "virtio-net header incorrect");
@@ -2698,19 +2699,16 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
 g_free(elem);
 return -EINVAL;
 }
-if (n->needs_vnet_hdr_swap) {
-virtio_net_hdr_swap(vdev, (void *) );
-sg2[0].iov_base = 
-sg2[0].iov_len = n->guest_hdr_len;
-out_num = iov_copy([1], ARRAY_SIZE(sg2) - 1,
-   out_sg, out_num,
-   n->guest_hdr_len, -1);
-if (out_num == VIRTQUEUE_MAX_SIZE) {
-goto drop;
-}
-out_num += 1;
-out_sg = sg2;
+virtio_net_hdr_swap(vdev, (void *) );
+sg2[0].iov_base = 
+sg2[0].iov_len = n->guest_hdr_len;
+out_num = iov_copy([1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
+   n->guest_hdr_len, -1);
+if (out_num == VIRTQUEUE_MAX_SIZE) {
+goto drop;
 }
+out_num += 1;
+out_sg = sg2;
 }
 /*
  * If host wants to see the guest header as is, we can
-- 
2.42.0

Re: [PATCH] hw/net/vhost_net: Silence compiler warning when compiling with -Wshadow

2023-10-07 Thread Jason Wang

On Wed, Oct 4, 2023 at 4:57 PM Michael S. Tsirkin  wrote:
>
> On Wed, Oct 04, 2023 at 10:49:39AM +0200, Thomas Huth wrote:
> > Rename the innermost local variables to avoid compiler warnings
> > with "-Wshadow".
> >
> > Signed-off-by: Thomas Huth 
>
> Reviewed-by: Michael S. Tsirkin 

Acked-by: Jason Wang 

Thanks

>
> feel free to merge
>
> > ---
> >  hw/net/vhost_net.c | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 57427a3997..e8e1661646 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -313,8 +313,8 @@ fail:
> >  /* Queue might not be ready for start */
> >  continue;
> >  }
> > -int r = vhost_net_set_backend(>dev, );
> > -assert(r >= 0);
> > +int ret = vhost_net_set_backend(>dev, );
> > +assert(ret >= 0);
> >  }
> >  }
> >  if (net->nc->info->poll) {
> > @@ -629,8 +629,8 @@ err_start:
> >  if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
> >  file.fd = VHOST_FILE_UNBIND;
> >  file.index = idx;
> > -int r = vhost_net_set_backend(>dev, );
> > -assert(r >= 0);
> > +int ret = vhost_net_set_backend(>dev, );
> > +assert(ret >= 0);
> >  }
> >
> >  vhost_dev_stop(>dev, vdev, false);
> > --
> > 2.41.0
>

Re: [PATCH v2] virtio: use shadow_avail_idx while checking number of heads

2023-10-07 Thread Jason Wang

On Wed, Sep 27, 2023 at 9:51 PM Ilya Maximets  wrote:
>
> We do not need the most up to date number of heads, we only want to
> know if there is at least one.
>
> Use shadow variable as long as it is not equal to the last available
> index checked.  This avoids expensive qatomic dereference of the
> RCU-protected memory region cache as well as the memory access itself.
>
> The change improves performance of the af-xdp network backend by 2-3%.
>
> Signed-off-by: Ilya Maximets 

Acked-by: Jason Wang 

Thanks

> ---
>
> Version 2:
>   - Changed to not skip error checks and a barrier.
>   - Added comments about the need for a barrier.
>
>  hw/virtio/virtio.c | 18 +++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 4577f3f5b3..8a4c3e95d2 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -999,7 +999,12 @@ void virtqueue_push(VirtQueue *vq, const 
> VirtQueueElement *elem,
>  /* Called within rcu_read_lock().  */
>  static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
>  {
> -uint16_t num_heads = vring_avail_idx(vq) - idx;
> +uint16_t avail_idx, num_heads;
> +
> +/* Use shadow index whenever possible. */
> +avail_idx = (vq->shadow_avail_idx != idx) ? vq->shadow_avail_idx
> +  : vring_avail_idx(vq);
> +num_heads = avail_idx - idx;
>
>  /* Check it isn't doing very strange things with descriptor numbers. */
>  if (num_heads > vq->vring.num) {
> @@ -1007,8 +1012,15 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned 
> int idx)
>   idx, vq->shadow_avail_idx);
>  return -EINVAL;
>  }
> -/* On success, callers read a descriptor at vq->last_avail_idx.
> - * Make sure descriptor read does not bypass avail index read. */
> +/*
> + * On success, callers read a descriptor at vq->last_avail_idx.
> + * Make sure descriptor read does not bypass avail index read.
> + *
> + * This is necessary even if we are using a shadow index, since
> + * the shadow index could have been initialized by calling
> + * vring_avail_idx() outside of this function, i.e., by a guest
> + * memory read not accompanied by a barrier.
> + */
>  if (num_heads) {
>  smp_rmb();
>  }
> --
> 2.41.0
>

Re: [PATCH] target/riscv: deprecate capital 'Z' CPU properties

2023-10-07 Thread Tsukasa OI

That's great!

I submitted a patch set to deal with the exact problem:




But this one is simpler than mine (and also fits to the latest QEMU).

I support this patch set and want it to be merged.

Thanks,
Tsukasa

On 2023/10/08 2:14, Daniel Henrique Barboza wrote:
> At this moment there are eleven CPU extension properties that starts
> with capital 'Z': Zifencei, Zicsr, Zihintntl, Zihintpause, Zawrs, Zfa,
> Zfh, Zfhmin, Zve32f, Zve64f and Zve64d. All other extensions are named
> with lower-case letters.
> 
> We want all properties to be named with lower-case letters since it's
> consistent with the riscv-isa string that we create in the FDT. Having
> these 11 properties to be exceptions can be confusing.
> 
> Deprecate all of them. Create their lower-case counterpart to be used as
> maintained CPU properties. When trying to use any deprecated property a
> warning message will be displayed, recommending users to switch to the
> lower-case variant:
> 
> ./build/qemu-system-riscv64 -M virt -cpu rv64,Zifencei=true --nographic
> qemu-system-riscv64: warning: CPU property 'Zifencei' is deprecated. Please 
> use 'zifencei' instead
> 
> This will give users some time to change their scripts before we remove
> the capital 'Z' properties entirely.
> 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  docs/about/deprecated.rst  | 23 ++
>  target/riscv/cpu.c | 39 +++---
>  target/riscv/cpu.h |  1 +
>  target/riscv/tcg/tcg-cpu.c | 31 +-
>  4 files changed, 82 insertions(+), 12 deletions(-)
> 
> diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
> index 694b878f36..331f10f930 100644
> --- a/docs/about/deprecated.rst
> +++ b/docs/about/deprecated.rst
> @@ -378,6 +378,29 @@ of generic CPUs: rv32 and rv64 as default CPUs and 'max' 
> as a feature complete
>  CPU for both 32 and 64 bit builds. Users are then discouraged to use the 
> 'any'
>  CPU type starting in 8.2.
>  
> +RISC-V CPU properties which start with with capital 'Z' (since 8.2)
> +^^
> +
> +All RISC-V CPU properties which start with capital 'Z' are being deprecated
> +starting in 8.2. The reason is that they were wrongly added with capital 'Z'
> +in the past. CPU properties were later added with lower-case names, which
> +is the format we want to use from now on.
> +
> +Users which try to use these deprecated properties will receive a warning
> +recommending to switch to their stable counterparts:
> +
> +- "Zifencei" should be replaced with "zifencei"
> +- "Zicsr" should be replaced with "zicsr"
> +- "Zihintntl" should be replaced with "zihintntl"
> +- "Zihintpause" should be replaced with "zihintpause"
> +- "Zawrs" should be replaced with "zawrs"
> +- "Zfa" should be replaced with "zfa"
> +- "Zfh" should be replaced with "zfh"
> +- "Zfhmin" should be replaced with "zfhmin"
> +- "Zve32f" should be replaced with "zve32f"
> +- "Zve64f" should be replaced with "zve64f"
> +- "Zve64d" should be replaced with "zve64d"
> +
>  Block device options
>  
>  
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 521bb88538..1cdc3d2609 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1246,17 +1246,17 @@ const char *riscv_get_misa_ext_description(uint32_t 
> bit)
>  const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
>  /* Defaults for standard extensions */
>  MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false),
> -MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true),
> -MULTI_EXT_CFG_BOOL("Zicsr", ext_icsr, true),
> -MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true),
> -MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true),
> -MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true),
> -MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true),
> -MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false),
> -MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false),
> -MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false),
> -MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false),
> -MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false),
> +MULTI_EXT_CFG_BOOL("zifencei", ext_ifencei, true),
> +MULTI_EXT_CFG_BOOL("zicsr", ext_icsr, true),
> +MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true),
> +MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true),
> +MULTI_EXT_CFG_BOOL("zawrs", ext_zawrs, true),
> +MULTI_EXT_CFG_BOOL("zfa", ext_zfa, true),
> +MULTI_EXT_CFG_BOOL("zfh", ext_zfh, false),
> +MULTI_EXT_CFG_BOOL("zfhmin", ext_zfhmin, false),
> +MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false),
> +MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false),
> +MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
>  MULTI_EXT_CFG_BOOL("sstc", ext_sstc, true),
>  
>  MULTI_EXT_CFG_BOOL("smstateen",

Re: [PATCH] target/loongarch: fix ASXE flag conflict

2023-10-07 Thread gaosong


在 2023/9/30 下午11:46, Richard Henderson 写道:

On 9/30/23 04:28, Jiajie Chen wrote:

HW_FLAGS_EUEN_ASXE acccidentally conflicts with HW_FLAGS_CRMD_PG,
enabling LASX instructions even when CSR_EUEN.ASXE=0.

Closes: https://gitlab.com/qemu-project/qemu/-/issues/1907 >> Signed-off-by: Jiajie 
Chen 
---
  target/loongarch/cpu.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index f125a8e49b..79ad79a289 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -462,7 +462,7 @@ static inline void set_pc(CPULoongArchState *env, 
uint64_t value)

  #define HW_FLAGS_CRMD_PG    R_CSR_CRMD_PG_MASK   /* 0x10 */
  #define HW_FLAGS_EUEN_FPE   0x04
  #define HW_FLAGS_EUEN_SXE   0x08
-#define HW_FLAGS_EUEN_ASXE  0x10
+#define HW_FLAGS_EUEN_ASXE  0x40
  #define HW_FLAGS_VA32   0x20
  static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, 
vaddr *pc,


Better to put all defines in bit order, otherwise it will be easy to 
make the same mistake again.

 > With that,
Reviewed-by: Richard Henderson 


r

Reviewed-by: Song Gao 

Thanks.
Song Gao

Re: [RFC PATCH v2 02/21] RAMBlock: Add support of KVM private gmem

2023-10-07 Thread Xiaoyao Li


On 9/22/2023 3:08 PM, David Hildenbrand wrote:

On 22.09.23 02:22, Xiaoyao Li wrote:

On 9/21/2023 4:55 PM, David Hildenbrand wrote:

On 14.09.23 05:50, Xiaoyao Li wrote:

From: Chao Peng 

Add KVM gmem support to RAMBlock so both normal hva based memory
and kvm gmem fd based private memory can be associated in one RAMBlock.

Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
gmem for the RAMBlock when it's set.



But who sets RAM_KVM_GMEM and when?


The answer is in the next patch. When `private` property of memory
backend is set to true, it will pass RAM_KVM_GMEM flag to
memory_region_init_ram_*()


Okay, assuming that patch (and property) will go away, I assume this 
flag can also go away, right?




If dropping the flag RAM_KVM_GMEM, it seems we need go back to the 
approach of rfc v1[1][2], that allocating gmem inside .region_add() 
callback. Is it accepted by you?


Another option is allocating gmem inside ram_block_add() by checking the 
vm_type (it looks hacky for me). What's your opinion on this option?


One more option is, we keep the RAM_KVM_GMEM as this patch, and change 
"private" property of memory backend into "need_kvm_gmem" field (make it 
not user settable) and "need_kvm_gmem" field will be set to true in 
tdx_kvm_init() specific cgs initialization function.



[1] 
https://lore.kernel.org/qemu-devel/a154c33d-b24d-b713-0dc0-027d54f23...@redhat.com/
[2] 
https://lore.kernel.org/qemu-devel/20230731162201.271114-10-xiaoyao...@intel.com/

Re: [PATCH v4 8/8] vdpa: Send cvq state load commands in parallel

2023-10-07 Thread Hawkins Jiawei

在 2023/10/4 15:33, Eugenio Perez Martin 写道:
> On Tue, Aug 29, 2023 at 7:55 AM Hawkins Jiawei  wrote:
>>
>> This patch enables sending CVQ state load commands
>> in parallel at device startup by following steps:
>>
>>* Refactor vhost_vdpa_net_load_cmd() to iterate through
>> the control commands shadow buffers. This allows different
>> CVQ state load commands to use their own unique buffers.
>>
>>* Delay the polling and checking of buffers until either
>> the SVQ is full or control commands shadow buffers are full.
>>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
>> Signed-off-by: Hawkins Jiawei 
>> ---
>> v4:
>>- refactor argument `cmds_in_flight` to `len` for
>> vhost_vdpa_net_svq_full()
>>- check the return value of vhost_vdpa_net_svq_poll()
>> in vhost_vdpa_net_svq_flush() suggested by Eugenio
>>- use iov_size(), vhost_vdpa_net_load_cursor_reset()
>> and iov_discard_front() to update the cursors instead of
>> accessing it directly according to Eugenio
>>
>> v3: 
>> https://lore.kernel.org/all/3a002790e6c880af928c6470ecbf03e7c65a68bb.1689748694.git.yin31...@gmail.com/
>>
>>   net/vhost-vdpa.c | 155 +--
>>   1 file changed, 97 insertions(+), 58 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index a71e8c9090..818464b702 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -646,6 +646,31 @@ static void 
>> vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s,
>>   in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
>>   }
>>
>> +/*
>> + * Poll SVQ for multiple pending control commands and check the device's 
>> ack.
>> + *
>> + * Caller should hold the BQL when invoking this function.
>> + *
>> + * @s: The VhostVDPAState
>> + * @len: The length of the pending status shadow buffer
>> + */
>> +static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len)
>> +{
>> +/* Device uses a one-byte length ack for each control command */
>> +ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len);
>> +if (unlikely(dev_written != len)) {
>> +return -EIO;
>> +}
>> +
>> +/* check the device's ack */
>> +for (int i = 0; i < len; ++i) {
>> +if (s->status[i] != VIRTIO_NET_OK) {
>> +return -EIO;
>> +}
>> +}
>> +return 0;
>> +}
>> +
>>   static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
>>  struct iovec *out_cursor,
>>  struct iovec *in_cursor, uint8_t 
>> class,
>> @@ -660,10 +685,30 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState 
>> *s,
>>  cmd_size = sizeof(ctrl) + data_size;
>>   struct iovec out, in;
>>   ssize_t r;
>> +unsigned dummy_cursor_iov_cnt;
>>
>>   assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
>> +if (vhost_vdpa_net_svq_available_slots(s) < 2 ||
>> +iov_size(out_cursor, 1) < cmd_size) {
>> +/*
>> + * It is time to flush all pending control commands if SVQ is full
>> + * or control commands shadow buffers are full.
>> + *
>> + * We can poll here since we've had BQL from the time
>> + * we sent the descriptor.
>> + */
>> +r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base -
>> + (void *)s->status);
>> +if (unlikely(r < 0)) {
>> +return r;
>> +}
>> +
>> +vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor);
>> +}
>> +
>
> It would be great to merge this flush with the one at
> vhost_vdpa_net_load. We would need to return ENOSPC or similar and
> handle it there.
>
> But it would make it more difficult to iterate through the loading of
> the different parameters, so I think it can be done on top.
>

Hi Eugenio,

Please forgive my poor English, so do you mean the approach in my
patch is acceptable for you?

>>   /* Each CVQ command has one out descriptor and one in descriptor */
>>   assert(vhost_vdpa_net_svq_available_slots(s) >= 2);
>> +assert(iov_size(out_cursor, 1) >= cmd_size);
>>
>
> Same here, I think we can avoid the assertion, right?

You are right, I will remove this assertion.

Thanks!


>
> Apart from that,
>
> Acked-by: Eugenio Pérez 
>
>>   /* Prepare the buffer for out descriptor for the device */
>>   iov_copy(, 1, out_cursor, 1, 0, cmd_size);
>> @@ -681,11 +726,13 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState 
>> *s,
>>   return r;
>>   }
>>
>> -/*
>> - * We can poll here since we've had BQL from the time
>> - * we sent the descriptor.
>> - */
>> -return vhost_vdpa_net_svq_poll(s, 1);
>> +/* iterate the cursors */
>> +dummy_cursor_iov_cnt = 1;
>> +iov_discard_front(_cursor, _cursor_iov_cnt, cmd_size);
>> +dummy_cursor_iov_cnt = 1;
>> +iov_discard_front(_cursor, _cursor_iov_cnt, 
>> sizeof(*s->status));
>> +
>> +

Re: [PATCH v4 7/8] vdpa: Introduce cursors to vhost_vdpa_net_loadx()

2023-10-07 Thread Hawkins Jiawei

在 2023/10/4 15:21, Eugenio Perez Martin 写道:
> On Tue, Aug 29, 2023 at 7:55 AM Hawkins Jiawei  wrote:
>>
>> This patch introduces two new arugments, `out_cursor`
>> and `in_cursor`, to vhost_vdpa_net_loadx(). Addtionally,
>> it includes a helper function
>> vhost_vdpa_net_load_cursor_reset() for resetting these
>> cursors.
>>
>> Furthermore, this patch refactors vhost_vdpa_net_load_cmd()
>> so that vhost_vdpa_net_load_cmd() prepares buffers
>> for the device using the cursors arguments, instead
>> of directly accesses `s->cvq_cmd_out_buffer` and
>> `s->status` fields.
>>
>> By making these change, next patches in this series
>> can refactor vhost_vdpa_net_load_cmd() directly to
>> iterate through the control commands shadow buffers,
>> allowing QEMU to send CVQ state load commands in parallel
>> at device startup.
>>
>> Signed-off-by: Hawkins Jiawei 
>> ---
>> v4:
>>- use `struct iovec` instead of `void **` as cursor
>> suggested by Eugenio
>>- add vhost_vdpa_net_load_cursor_reset() helper function
>> to reset the cursors
>>- refactor vhost_vdpa_net_load_cmd() to prepare buffers
>> by iov_copy() instead of accessing `in` and `out` directly
>> suggested by Eugenio
>>
>> v3: 
>> https://lore.kernel.org/all/bf390934673f2b613359ea9d7ac6c89199c31384.1689748694.git.yin31...@gmail.com/
>>
>>   net/vhost-vdpa.c | 114 ---
>>   1 file changed, 77 insertions(+), 37 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index d9b8b3cf6c..a71e8c9090 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -633,7 +633,22 @@ static uint16_t 
>> vhost_vdpa_net_svq_available_slots(VhostVDPAState *s)
>>   return vhost_svq_available_slots(svq);
>>   }
>>
>> -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
>> +static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s,
>> + struct iovec *out_cursor,
>> + struct iovec *in_cursor)
>> +{
>> +/* reset the cursor of the output buffer for the device */
>> +out_cursor->iov_base = s->cvq_cmd_out_buffer;
>> +out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
>> +
>> +/* reset the cursor of the in buffer for the device */
>> +in_cursor->iov_base = s->status;
>> +in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
>> +}
>> +
>> +static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
>> +   struct iovec *out_cursor,
>> +   struct iovec *in_cursor, uint8_t 
>> class,
>>  uint8_t cmd, const struct iovec 
>> *data_sg,
>>  size_t data_num)
>>   {
>> @@ -641,28 +656,25 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState 
>> *s, uint8_t class,
>>   .class = class,
>>   .cmd = cmd,
>>   };
>> -size_t data_size = iov_size(data_sg, data_num);
>> -/* Buffers for the device */
>> -const struct iovec out = {
>> -.iov_base = s->cvq_cmd_out_buffer,
>> -.iov_len = sizeof(ctrl) + data_size,
>> -};
>> -const struct iovec in = {
>> -.iov_base = s->status,
>> -.iov_len = sizeof(*s->status),
>> -};
>> +size_t data_size = iov_size(data_sg, data_num),
>> +   cmd_size = sizeof(ctrl) + data_size;
>> +struct iovec out, in;
>>   ssize_t r;
>>
>>   assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
>>   /* Each CVQ command has one out descriptor and one in descriptor */
>>   assert(vhost_vdpa_net_svq_available_slots(s) >= 2);
>>
>> -/* pack the CVQ command header */
>> -memcpy(s->cvq_cmd_out_buffer, , sizeof(ctrl));
>> +/* Prepare the buffer for out descriptor for the device */
>> +iov_copy(, 1, out_cursor, 1, 0, cmd_size);
>
> I may be missing something here, but cmd_size should be the bytes
> available in "out", so we don't overrun it.
>
>> +/* Prepare the buffer for in descriptor for the device. */
>> +iov_copy(, 1, in_cursor, 1, 0, sizeof(*s->status));
>>
>
> Same here, although it is impossible for the moment to overrun it as
> all cmds only return one byte.
>

Here we just use iov_copy() to initialize the `out` and `in` variables,
something like:

/* extract the required buffer from the cursor for output */
out.iov_len = cmd_size;
out.iov_base = out_cursor->iov_base;
/* extract the required buffer from the cursor for input */
in.iov_len = sizeof(*s->status);
in.iov_base = in_cursor->iov_base;

I think iov_copy() can improve readability, what do you think?

>> +/* pack the CVQ command header */
>> +iov_from_buf(, 1, 0, , sizeof(ctrl));
>>   /* pack the CVQ command command-specific-data */
>>   iov_to_buf(data_sg, data_num, 0,
>> -   s->cvq_cmd_out_buffer + sizeof(ctrl), data_size);
>> +   out.iov_base + sizeof(ctrl), data_size);
>
>

Re: [PATCH v4 4/8] vdpa: Avoid using vhost_vdpa_net_load_*() outside vhost_vdpa_net_load()

2023-10-07 Thread Hawkins Jiawei

在 2023/10/4 01:48, Eugenio Perez Martin 写道:
> On Tue, Aug 29, 2023 at 7:55 AM Hawkins Jiawei  wrote:
>>
>> Next patches in this series will refactor vhost_vdpa_net_load_cmd()
>> to iterate through the control commands shadow buffers, allowing QEMU
>> to send CVQ state load commands in parallel at device startup.
>>
>> Considering that QEMU always forwards the CVQ command serialized
>> outside of vhost_vdpa_net_load(), it is more elegant to send the
>> CVQ commands directly without invoking vhost_vdpa_net_load_*() helpers.
>>
>> Signed-off-by: Hawkins Jiawei 
>> ---
>> v4:
>>- pack CVQ command by iov_from_buf() instead of accessing
>> `out` directly suggested by Eugenio
>>
>> v3: 
>> https://lore.kernel.org/all/428a8fac2a29b37757fa15ca747be93c0226cb1f.1689748694.git.yin31...@gmail.com/
>>
>>   net/vhost-vdpa.c | 16 +---
>>   1 file changed, 13 insertions(+), 3 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index e6342b213f..7c67063469 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -1097,12 +1097,14 @@ static NetClientInfo net_vhost_vdpa_cvq_info = {
>>*/
>>   static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
>>  VirtQueueElement 
>> *elem,
>> -   struct iovec *out)
>> +   struct iovec *out,
>> +   const struct iovec 
>> *in)
>>   {
>>   struct virtio_net_ctrl_mac mac_data, *mac_ptr;
>>   struct virtio_net_ctrl_hdr *hdr_ptr;
>>   uint32_t cursor;
>>   ssize_t r;
>> +uint8_t on = 1;
>>
>>   /* parse the non-multicast MAC address entries from CVQ command */
>>   cursor = sizeof(*hdr_ptr);
>> @@ -1150,7 +1152,15 @@ static int 
>> vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
>>* filter table to the vdpa device, it should send the
>>* VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode
>>*/
>> -r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_PROMISC, 1);
>> +cursor = 0;
>
> I think this is redundant, as "cursor" is not used by the new code and
> it is already set to 0 before its usage, isn't it?
>

You are right, I will remove this code in the v5 patch.

>> +hdr_ptr = out->iov_base;
>> +out->iov_len = sizeof(*hdr_ptr) + sizeof(on);
>> +assert(out->iov_len < vhost_vdpa_net_cvq_cmd_page_len());
>> +
>
> I think we can assume this assertion is never hit, as out->iov_len is
> controlled by this function.
>

Thanks for your suggestion, I will remove this assertion.

Thanks!


> Apart from these nits,
>
> Acked-by: Eugenio Pérez 
>
>> +hdr_ptr->class = VIRTIO_NET_CTRL_RX;
>> +hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC;
>> +iov_from_buf(out, 1, sizeof(*hdr_ptr), , sizeof(on));
>> +r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1);
>>   if (unlikely(r < 0)) {
>>   return r;
>>   }
>> @@ -1268,7 +1278,7 @@ static int 
>> vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>>* the CVQ command direclty.
>>*/
>>   dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem,
>> -  );
>> +, _in);
>>   if (unlikely(dev_written < 0)) {
>>   goto out;
>>   }
>> --
>> 2.25.1
>>
>

Re: [PATCH v4 3/8] vhost: Expose vhost_svq_available_slots()

2023-10-07 Thread Hawkins Jiawei

在 2023/10/4 01:44, Eugenio Perez Martin 写道:
> On Tue, Aug 29, 2023 at 7:55 AM Hawkins Jiawei  wrote:
>>
>> Next patches in this series will delay the polling
>> and checking of buffers until either the SVQ is
>> full or control commands shadow buffers are full,
>> no longer perform an immediate poll and check of
>> the device's used buffers for each CVQ state load command.
>>
>> To achieve this, this patch exposes
>> vhost_svq_available_slots() and introduces a helper function,
>> allowing QEMU to know whether the SVQ is full.
>>
>> Signed-off-by: Hawkins Jiawei 
>> Acked-by: Eugenio Pérez 
>> ---
>>   hw/virtio/vhost-shadow-virtqueue.c | 2 +-
>>   hw/virtio/vhost-shadow-virtqueue.h | 1 +
>>   net/vhost-vdpa.c   | 9 +
>>   3 files changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
>> b/hw/virtio/vhost-shadow-virtqueue.c
>> index e731b1d2ea..fc5f408f77 100644
>> --- a/hw/virtio/vhost-shadow-virtqueue.c
>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
>> @@ -66,7 +66,7 @@ bool vhost_svq_valid_features(uint64_t features, Error 
>> **errp)
>>*
>>* @svq: The svq
>>*/
>> -static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
>> +uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
>>   {
>>   return svq->num_free;
>>   }
>> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
>> b/hw/virtio/vhost-shadow-virtqueue.h
>> index 5bce67837b..19c842a15b 100644
>> --- a/hw/virtio/vhost-shadow-virtqueue.h
>> +++ b/hw/virtio/vhost-shadow-virtqueue.h
>> @@ -114,6 +114,7 @@ typedef struct VhostShadowVirtqueue {
>>
>>   bool vhost_svq_valid_features(uint64_t features, Error **errp);
>>
>> +uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq);
>>   void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
>>const VirtQueueElement *elem, uint32_t len);
>>   int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>
> I think it is ok to split this export in its own patch. If you decide
> to do it that way, you can add my Acked-by.

I will split this in its own patch, thanks for your suggestion!

>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index a875767ee9..e6342b213f 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -620,6 +620,13 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
>>   return vhost_svq_poll(svq, 1);
>>   }
>>
>> +/* Convenience wrapper to get number of available SVQ descriptors */
>> +static uint16_t vhost_vdpa_net_svq_available_slots(VhostVDPAState *s)
>> +{
>> +VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 
>> 0);
>
> This is not really generic enough for all VhostVDPAState, as dataplane
> ones have two svqs.
>
> I think the best is to just inline the function in the caller, as
> there is only one, isn't it? If not, would it work to just replace
> _net_ by _cvq_ or similar?
>

Yes, there should be only one user for this function, I will inline
the function in the caller.

>> +return vhost_svq_available_slots(svq);
>> +}
>> +
>>   static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
>>  uint8_t cmd, const struct iovec 
>> *data_sg,
>>  size_t data_num)
>> @@ -640,6 +647,8 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState 
>> *s, uint8_t class,
>>   };
>>
>>   assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
>> +/* Each CVQ command has one out descriptor and one in descriptor */
>> +assert(vhost_vdpa_net_svq_available_slots(s) >= 2);
>>
>
> I think we should remove this assertion. By the end of the series
> there is an "if" checks explicitly for the opposite condition, and
> flushing the queue in that case, so the code can never reach it.
>

Yes, you are right. I will remove this assertion.

Thanks!


>>   /* pack the CVQ command header */
>>   memcpy(s->cvq_cmd_out_buffer, , sizeof(ctrl));
>> --
>> 2.25.1
>>
>

Re: On integrating LoongArch EDK2 firmware into QEMU build process

2023-10-07 Thread Chao Li


Hi Xuerui,

    Sorry for late reply. In fact the EDK2 repo is ready for submit, in 
a few days I will commit the patch set in kilaterlee/edk2 repo and 
execute the EDK2 CI testing. I will notify some people to review them, 
you are also welcome to review the patch set. And then, I'll submit the 
formal version patch to the EDK2 devel community.



Thanks,
Chao
在 2023/10/1 04:16, WANG Xuerui 写道:

On 3/31/23 08:54, maobibo wrote:

Xuerui,

Thanks for your mail, it is a good suggestion. Now we are planing to 
move LoongArch uefi bios from edk2-platform to edk2 repo, so that 
uefi bios supporting LoongArch can be auto compiled and uploaded to 
qemu repo. Only that process is somwhat slow since lacking of hands, 
however we are doing this.


Pinging: a few months have passed, and it seems this work is stalled? 
Given the LoongArch Linux KVM support is about to land in v6.7, it may 
be time to prepare the firmware and QEMU side of things, so users 
would no longer have to manually acquire the firmware blobs whenever 
they fire up their VMs.




Regards
Bibo, Mao

在 2023/3/30 22:06, WANG Xuerui 写道:

Hi,

Recently there are reportedly increased general interest in trying 
out LoongArch on top of QEMU, among both end users and 
organizations; and the EDK2 firmware port is fully upstreamed since 
the stable202211 version, and a build suitable for QEMU is already 
possible with Platform/Loongson/LoongArchQemuPkg in edk2-platforms. 
I think providing pre-built LoongArch firmware would make it much 
easier to dabble in system emulation, helping those users. (They 
currently have to pull a blob from yangxiaojuan/qemu-binary, and 
remember to pair certain version of QEMU with certain revision of 
the firmware blob. I'm also one of the users who can't remember 
which version to use, but I can always build my own; imagine the 
difficulty an end user would face!)


So I tried to add a LoongArch build to the list stored in roms/, but 
discovered that edk2-platforms seems not included, because all other 
platforms' EDK2 packages are directly under the main edk2 repo.


The question is: is integrating a platform package from 
edk2-platforms okay under the current build system, so we can 
arrange to provide edk2-platforms also as a submodule and go ahead? 
Or do we (the LoongArch firmware community) have to change the code 
organization to make necessary parts available in the main edk2 repo?


CC-ing target/loongarch maintainers from Loongson too as you may 
have more information.

Re: [Virtio-fs] [PATCH v4 2/8] vhost-user.rst: Improve [GS]ET_VRING_BASE doc

2023-10-07 Thread Michael S. Tsirkin

On Fri, Oct 06, 2023 at 03:58:44PM +0200, Hanna Czenczek wrote:
> On 06.10.23 15:55, Hanna Czenczek wrote:
> > On 06.10.23 10:49, Michael S. Tsirkin wrote:
> > > On Fri, Oct 06, 2023 at 09:53:53AM +0200, Hanna Czenczek wrote:
> > > > On 05.10.23 19:38, Stefan Hajnoczi wrote:
> > > > > On Wed, Oct 04, 2023 at 02:58:58PM +0200, Hanna Czenczek wrote:
> 
> [...]
> 
> > > > >    ``VHOST_USER_GET_VRING_BASE``
> > > > >  :id: 11
> > > > >  :equivalent ioctl: ``VHOST_USER_GET_VRING_BASE``
> > > > >  :request payload: vring state description
> > > > > -  :reply payload: vring state description
> > > > > +  :reply payload: vring descriptor index/indices
> > > > > +
> > > > > +  Stops the vring and returns the current descriptor index
> > > > > or indices:
> > > > > +
> > > > > +    * For a split virtqueue, returns only the 16-bit next descriptor
> > > > > +  index in the *Available Ring*.  The index in the *Used Ring* is
> > > > > +  controlled by the guest driver and can be read from the vring
> > > > > I find "is controlled by the guest driver" confusing. The
> > > > > device writes
> > > > > the Used Ring index. The driver only reads it. The device is
> > > > > the active
> > > > > party here.
> > > > Er, good point.  That breaks the whole reasoning.  Then I don’t
> > > > understand
> > > > why we do get/set the available ring index and not the used ring
> > > > index.  Do
> > > > you know why?
> > > It's simple. used ring index in memory is controlled by the device and
> > > reflects device state.
> > 
> > Exactly, it’s device state, that’s why I thought the front-end needs to
> > ensure its read and restored around the reset we currently have in
> > vhost_dev_stop()/start().
> > 
> > > device can just read it back to restore.
> > 
> > I find it strange that the device is supposed to read its own state from
> > memory.
> > 
> > > available ring index in memory is controlled by driver and does
> > > not reflect device state.
> > 
> > Why can’t the device read the available index from memory?  That value
> > is put into memory by the driver precisely so the device can read it
> > from there.
> 
> Ah, wait, is the idea that the device may have an internal available index
> counter that reflects what descriptor it has already fetched? I.e. this
> index will lag behind the one in memory, and the difference are new
> descriptors that the device still needs to read? If that internal counter is
> the index that’s get/set here, then yes, that makes a lot of sense.
> 
> Hanna

Exactly. And this gets eventually written out as used index.

-- 
MST

Re: [Virtio-fs] [PATCH v4 2/8] vhost-user.rst: Improve [GS]ET_VRING_BASE doc

2023-10-07 Thread Michael S. Tsirkin

On Fri, Oct 06, 2023 at 03:55:56PM +0200, Hanna Czenczek wrote:
> On 06.10.23 10:49, Michael S. Tsirkin wrote:
> > On Fri, Oct 06, 2023 at 09:53:53AM +0200, Hanna Czenczek wrote:
> > > On 05.10.23 19:38, Stefan Hajnoczi wrote:
> > > > On Wed, Oct 04, 2023 at 02:58:58PM +0200, Hanna Czenczek wrote:
> > > > > GET_VRING_BASE does not mention that it stops the respective ring.  
> > > > > Fix
> > > > > that.
> > > > > 
> > > > > Furthermore, it is not fully clear what the "base offset" these
> > > > > commands' documentation refers to is; an offset could be many things.
> > > > > Be more precise and verbose about it, especially given that these
> > > > > commands use different payload structures depending on whether the 
> > > > > vring
> > > > > is split or packed.
> > > > > 
> > > > > Signed-off-by: Hanna Czenczek 
> > > > > ---
> > > > >docs/interop/vhost-user.rst | 66 
> > > > > ++---
> > > > >1 file changed, 62 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> > > > > index 2f68e67a1a..50f5acebe5 100644
> > > > > --- a/docs/interop/vhost-user.rst
> > > > > +++ b/docs/interop/vhost-user.rst
> > > > > @@ -108,6 +108,37 @@ A vring state description
> > > > >:num: a 32-bit number
> > > > > +A vring descriptor index for split virtqueues
> > > > > +^
> > > > > +
> > > > > ++-+-+
> > > > > +| vring index | index in avail ring |
> > > > > ++-+-+
> > > > > +
> > > > > +:vring index: 32-bit index of the respective virtqueue
> > > > > +
> > > > > +:index in avail ring: 32-bit value, of which currently only the 
> > > > > lower 16
> > > > > +  bits are used:
> > > > > +
> > > > > +  - Bits 0–15: Next descriptor index in the *Available Ring*
> > > > I think we need to say more to make this implementable just by reading
> > > > the spec:
> > > > 
> > > > Index of the next *Available Ring* descriptor that the back-end will
> > > > process. This is a free-running index that is not wrapped by the 
> > > > ring
> > > > size.
> > > Sure, thanks.
> > > 
> > > > Feel free to rephrase.
> > > > 
> > > > > +  - Bits 16–31: Reserved (set to zero)
> > > > > +
> > > > > +Vring descriptor indices for packed virtqueues
> > > > > +^^
> > > > > +
> > > > > ++-++
> > > > > +| vring index | descriptor indices |
> > > > > ++-++
> > > > > +
> > > > > +:vring index: 32-bit index of the respective virtqueue
> > > > > +
> > > > > +:descriptor indices: 32-bit value:
> > > > > +
> > > > > +  - Bits 0–14: Index in the *Available Ring*
> > > > Same here.
> > > > 
> > > > > +  - Bit 15: Driver (Available) Ring Wrap Counter
> > > > > +  - Bits 16–30: Index in the *Used Ring*
> > > > Same here.
> > > > 
> > > > > +  - Bit 31: Device (Used) Ring Wrap Counter
> > > > > +
> > > > >A vring address description
> > > > >^^^
> > > > > @@ -1031,18 +1062,45 @@ Front-end message types
> > > > >``VHOST_USER_SET_VRING_BASE``
> > > > >  :id: 10
> > > > >  :equivalent ioctl: ``VHOST_SET_VRING_BASE``
> > > > > -  :request payload: vring state description
> > > > > +  :request payload: vring descriptor index/indices
> > > > >  :reply payload: N/A
> > > > > -  Sets the base offset in the available vring.
> > > > > +  Sets the next index to use for descriptors in this vring:
> > > > > +
> > > > > +  * For a split virtqueue, sets only the next descriptor index in the
> > > > > +*Available Ring*.  The device is supposed to read the next index 
> > > > > in
> > > > > +the *Used Ring* from the respective vring structure in guest 
> > > > > memory.
> > > > > +
> > > > > +  * For a packed virtqueue, both indices are supplied, as they are 
> > > > > not
> > > > > +explicitly available in memory.
> > > > > +
> > > > > +  Consequently, the payload type is specific to the type of virt 
> > > > > queue
> > > > > +  (*a vring descriptor index for split virtqueues* vs. *vring 
> > > > > descriptor
> > > > > +  indices for packed virtqueues*).
> > > > >``VHOST_USER_GET_VRING_BASE``
> > > > >  :id: 11
> > > > >  :equivalent ioctl: ``VHOST_USER_GET_VRING_BASE``
> > > > >  :request payload: vring state description
> > > > > -  :reply payload: vring state description
> > > > > +  :reply payload: vring descriptor index/indices
> > > > > +
> > > > > +  Stops the vring and returns the current descriptor index or 
> > > > > indices:
> > > > > +
> > > > > +* For a split virtqueue, returns only the 16-bit next descriptor
> > > > > +  index in the *Available Ring*.  The index in the *Used Ring* is
> > > > > +  controlled by the guest driver and can be read from the vring
> > > > I find "is controlled by the guest driver" confusing. The device writes
> > > > the

Re: [PATCH v3] hw/cxl: Add QTG _DSM support for ACPI0017 device

2023-10-07 Thread Michael S. Tsirkin

On Fri, Oct 06, 2023 at 01:09:39PM +0100, Jonathan Cameron wrote:
> On Thu, 5 Oct 2023 12:32:11 -0400
> "Michael S. Tsirkin"  wrote:
> 
> > On Thu, Oct 05, 2023 at 09:11:15AM -0700, Dave Jiang wrote:
> > > 
> > > 
> > > On 10/4/23 20:36, Michael S. Tsirkin wrote:  
> > > > 
> > > > On Wed, Oct 04, 2023 at 04:09:07PM -0700, Dave Jiang wrote:  
> > > >> Add a simple _DSM call support for the ACPI0017 device to return a 
> > > >> fake QTG
> > > >> ID value of 0 in all cases. The enabling is for _DSM plumbing testing
> > > >> from the OS.  
> > > > 
> > > > 
> > > > the enabling -> this  
> > > 
> > > will update  
> > > >   
> > > >>
> > > >> Following edited for readbility only  
> > > > 
> > > > readbility only -> readability  
> > > 
> > > will update  
> > > > 
> > > >   
> > > >>
> > > >> Device (CXLM)
> > > >> {
> > > >> Name (_HID, "ACPI0017")  // _HID: Hardware ID
> > > >> ...
> > > >> Method (_DSM, 4, Serialized)  // _DSM: Device-Specific Method
> > > >> {
> > > >> If ((Arg0 == ToUUID ("f365f9a6-a7de-4071-a66a-b40c0b4f8e52")))
> > > >> {
> > > >> If ((Arg2 == Zero))
> > > >> {
> > > >> Return (Buffer (One) { 0x01 })
> > > >> }
> > > >>
> > > >> If ((Arg2 == One))  
> > > >   
> > > >> {
> > > >> Return (Package (0x02)
> > > >> {
> > > >> Buffer (0x02)
> > > >> { 0x01, 0x00 },
> > > >> Package (0x01)
> > > >> {
> > > >> Buffer (0x02)
> > > >> { 0x00, 0x00 }
> > > >> }
> > > >> })
> > > >> }
> > > >> }
> > > >> }
> > > >>
> > > >> Signed-off-by: Dave Jiang 
> > > >> Signed-off-by: Jonathan Cameron 
> > > >>
> > > >> --
> > > >> v3: Fix output assignment to be BE host friendly. Fix typo in comment.
> > > >> According to the CXL spec, the DSM output should be 1 WORD to indicate
> > > >> the max suppoted QTG ID and a package of 0 or more WORDs for the QTG 
> > > >> IDs.
> > > >> In this dummy impementation, we have first WORD with a 1 to indcate max
> > > >> supprted QTG ID of 1. And second WORD in a package to indicate the QTG
> > > >> ID of 0.
> > > >>
> > > >> v2: Minor edit to drop reference to switches in patch description.
> > > >> Message-Id: <20230904161847.18468-3-jonathan.came...@huawei.com>
> > > >> Reviewed-by: Michael S. Tsirkin 
> > > >> Signed-off-by: Michael S. Tsirkin 
> > > >> ---
> > > >>  hw/acpi/cxl.c |   55 
> > > >> +
> > > >>  hw/i386/acpi-build.c  |1 +
> > > >>  include/hw/acpi/cxl.h |1 +
> > > >>  3 files changed, 57 insertions(+)
> > > >>
> > > >> diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c
> > > >> index 92b46bc9323b..cce12d5bc81c 100644
> > > >> --- a/hw/acpi/cxl.c
> > > >> +++ b/hw/acpi/cxl.c
> > > >> @@ -30,6 +30,61 @@
> > > >>  #include "qapi/error.h"
> > > >>  #include "qemu/uuid.h"
> > > >>  
> > > >> +void build_cxl_dsm_method(Aml *dev)
> > > >> +{
> > > >> +Aml *method, *ifctx, *ifctx2;
> > > >> +
> > > >> +method = aml_method("_DSM", 4, AML_SERIALIZED);
> > > >> +{
> > > >> +Aml *function, *uuid;
> > > >> +
> > > >> +uuid = aml_arg(0);
> > > >> +function = aml_arg(2);
> > > >> +/* CXL spec v3.0 9.17.3.1 *  
> > > > 
> > > > 
> > > > drop this * please
> > > >   
> > > >> , QTG ID _DSM  
> > > 
> > > Ooops. git format-patch mangled this and I didn't catch. Will fix
> > >   
> > > > 
> > > > 
> > > > this is not the name of this paragraph. pls make it match
> > > > exactly so people can search
> > > >   
> > > >> */
> > > >> +ifctx = aml_if(aml_equal(
> > > >> +uuid, 
> > > >> aml_touuid("F365F9A6-A7DE-4071-A66A-B40C0B4F8E52")));
> > > >> +
> > > >> +/* Function 0, standard DSM query function */
> > > >> +ifctx2 = aml_if(aml_equal(function, aml_int(0)));
> > > >> +{
> > > >> +uint8_t byte_list[1] = { 0x01 }; /* functions 1 only */  
> > > > 
> > > > function 1?  
> > > 
> > > Yes, will fix
> > >   
> > > >   
> > > >> +
> > > >> +aml_append(ifctx2,
> > > >> +   aml_return(aml_buffer(sizeof(byte_list), 
> > > >> byte_list)));
> > > >> +}
> > > >> +aml_append(ifctx, ifctx2);
> > > >> +
> > > >> +/*
> > > >> + * Function 1
> > > >> + * A return value of {1, {0}} indicates that
> > > >> + * max supported QTG ID of 1 and recommended QTG is 0.
> > > >> + * The values here are faked to simplify emulation.  
> > > > 
> > > > again pls quote spec directly do not paraphrase  
> > > 
> > > Here it's not paraphrasing from the spec. I'm just describing the dummy 
> > > value that will be provided.
> > >   
> > > >   
> > > >> + */
> > > >> +ifctx2 = aml_if(aml_equal(function, aml_int(1)));
> > > >> +

[PATCH v2 3/3] qom: Link multiple numa nodes to device using a new object

2023-10-07 Thread ankita

From: Ankit Agrawal 

NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows
partitioning of the GPU device resources (including device memory) into
several (upto 8) isolated instances. Each of the partitioned memory needs
a dedicated NUMA node to operate. The partitions are not fixed and they
can be created/deleted at runtime.

Unfortunately Linux OS does not provide a means to dynamically create/destroy
NUMA nodes and such feature implementation is not expected to be trivial. The
nodes that OS discovers at the boot time while parsing SRAT remains fixed. So
we utilize the GI Affinity structures that allows association between nodes
and devices. Multiple GI structures per BDF is possible, allowing creation of
multiple nodes by exposing unique PXM in each of these structures.

Introducing a new nvidia-acpi-generic-initiator object, which inherits from
the generic acpi-generic-initiator object to allow a BDF to be associated with
more than 1 nodes.

An admin can provide the range of nodes using numa-node-start and
numa-node-count and link it to a device by providing its id. The following
sample creates 8 nodes and link them to the device dev0:

-numa node,nodeid=2 \
-numa node,nodeid=3 \
-numa node,nodeid=4 \
-numa node,nodeid=5 \
-numa node,nodeid=6 \
-numa node,nodeid=7 \
-numa node,nodeid=8 \
-numa node,nodeid=9 \
-device 
vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
-object 
nvidia-acpi-generic-initiator,id=gi0,device=dev0,numa-node-start=2,numa-node-count=8
 \

[1] https://www.nvidia.com/en-in/technologies/multi-instance-gpu

Signed-off-by: Ankit Agrawal 
---
 hw/acpi/acpi-generic-initiator.c | 61 
 include/hw/acpi/acpi-generic-initiator.h | 12 +
 qapi/qom.json| 24 +-
 3 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
index 1ae79639be..8ef887c3a4 100644
--- a/hw/acpi/acpi-generic-initiator.c
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -150,3 +150,64 @@ void build_srat_generic_initiator(GArray *table_data)
 }
 g_slist_free(list);
 }
+
+static void
+nvidia_acpi_generic_initiator_set_node_start(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+uint32_t value;
+
+if (!visit_type_uint32(v, name, , errp)) {
+return;
+}
+
+if (value >= MAX_NODES) {
+return;
+}
+
+gi->node = value;
+}
+
+static void
+nvidia_acpi_generic_initiator_set_node_count(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+uint32_t value;
+
+if (!visit_type_uint32(v, name, , errp)) {
+return;
+}
+
+gi->node_count = value;
+}
+
+static void nvidia_acpi_generic_initiator_class_init(ObjectClass *oc, void 
*data)
+{
+object_class_property_add(oc, 
NVIDIA_ACPI_GENERIC_INITIATOR_NODE_START_PROP,
+  "uint32", NULL,
+  nvidia_acpi_generic_initiator_set_node_start,
+  NULL, NULL);
+object_class_property_add(oc, 
NVIDIA_ACPI_GENERIC_INITIATOR_NODE_COUNT_PROP,
+  "uint32", NULL,
+  nvidia_acpi_generic_initiator_set_node_count,
+  NULL, NULL);
+}
+
+static const TypeInfo nvidia_acpi_generic_initiator_info = {
+.parent = TYPE_ACPI_GENERIC_INITIATOR,
+.name = TYPE_NVIDIA_ACPI_GENERIC_INITIATOR,
+.instance_size = sizeof(NvidiaAcpiGenericInitiator),
+.class_size = sizeof(NvidiaAcpiGenericInitiatorClass),
+.class_init = nvidia_acpi_generic_initiator_class_init,
+};
+
+static void
+nvidia_acpi_generic_initiator_register_types(void)
+{
+type_register_static(_acpi_generic_initiator_info);
+}
+type_init(nvidia_acpi_generic_initiator_register_types);
diff --git a/include/hw/acpi/acpi-generic-initiator.h 
b/include/hw/acpi/acpi-generic-initiator.h
index e8e2670309..3e4cf42064 100644
--- a/include/hw/acpi/acpi-generic-initiator.h
+++ b/include/hw/acpi/acpi-generic-initiator.h
@@ -9,10 +9,14 @@
 #include "qom/object_interfaces.h"
 
 #define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator"
+#define TYPE_NVIDIA_ACPI_GENERIC_INITIATOR "nvidia-acpi-generic-initiator"
 
 #define ACPI_GENERIC_INITIATOR_DEVICE_PROP "device"
 #define ACPI_GENERIC_INITIATOR_NODE_PROP "node"
 
+#define NVIDIA_ACPI_GENERIC_INITIATOR_NODE_START_PROP "numa-node-start"
+#define NVIDIA_ACPI_GENERIC_INITIATOR_NODE_COUNT_PROP "numa-node-count"
+
 typedef struct AcpiGenericInitiator {
 /* private */

[PATCH v2 2/3] hw/acpi: Implement the SRAT GI affinity structure

2023-10-07 Thread ankita

From: Ankit Agrawal 

ACPI spec provides a scheme to associate "Generic Initiators" [1]
(e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
integrated compute or DMA engines GPUs) with Proximity Domains. This is
achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
node for each unique PXM ID encountered. Qemu currently do not implement
these structures while building SRAT.

Add GI structures while building VM ACPI SRAT. The association between
devices and PXM are stored using acpi-generic-initiator object. Lookup
presence of all such objects and use them to build these structures.

The structure needs a PCI device handle [2] that consists of the device BDF.
The vfio-pci-nohotplug device corresponding to the acpi-generic-initiator
object is located to determine the BDF.

[1] ACPI Spec 6.5, Section 5.2.16.6
[2] ACPI Spec 6.5, Table 5.66

Signed-off-by: Ankit Agrawal 
---
 hw/acpi/acpi-generic-initiator.c | 78 
 hw/arm/virt-acpi-build.c |  3 +
 hw/vfio/pci.c|  2 -
 hw/vfio/pci.h|  2 +
 include/hw/acpi/acpi-generic-initiator.h | 22 +++
 5 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
index 6406736090..1ae79639be 100644
--- a/hw/acpi/acpi-generic-initiator.c
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -72,3 +72,81 @@ static void acpi_generic_initiator_class_init(ObjectClass 
*oc, void *data)
   NULL, acpi_generic_initiator_set_node, NULL,
   NULL);
 }
+
+static int acpi_generic_initiator_list(Object *obj, void *opaque)
+{
+GSList **list = opaque;
+
+if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
+*list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
+}
+
+object_child_foreach(obj, acpi_generic_initiator_list, opaque);
+return 0;
+}
+
+/*
+ * Identify Generic Initiator objects and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+static GSList *acpi_generic_initiator_get_list(void)
+{
+GSList *list = NULL;
+
+object_child_foreach(object_get_root(), acpi_generic_initiator_list, 
);
+return list;
+}
+
+/*
+ * ACPI spec, Revision 6.5
+ * 5.2.16.6 Generic Initiator Affinity Structure
+ */
+static void build_srat_generic_initiator_affinity(GArray *table_data, int node,
+  PCIDeviceHandle *handle,
+  GenericAffinityFlags flags)
+{
+build_append_int_noprefix(table_data, 5, 1); /* Type */
+build_append_int_noprefix(table_data, 32, 1);/* Length */
+build_append_int_noprefix(table_data, 0, 1); /* Reserved */
+build_append_int_noprefix(table_data, 1, 1); /* Device Handle Type */
+build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
+build_append_int_noprefix(table_data, handle->segment, 2);
+build_append_int_noprefix(table_data, handle->bdf, 2);
+build_append_int_noprefix(table_data, handle->res0, 4);
+build_append_int_noprefix(table_data, handle->res1, 8);
+build_append_int_noprefix(table_data, flags, 4); /* Flags */
+build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+}
+
+void build_srat_generic_initiator(GArray *table_data)
+{
+GSList *gi_list, *list = acpi_generic_initiator_get_list();
+for (gi_list = list; gi_list; gi_list = gi_list->next) {
+AcpiGenericInitiator *gi = gi_list->data;
+Object *o;
+int count;
+
+if (gi->node == MAX_NODES) {
+continue;
+}
+
+o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI_NOHOTPLUG, 
NULL);
+if (!o) {
+continue;
+}
+
+for (count = 0; count < gi->node_count; count++) {
+PCIDeviceHandle dev_handle = {0};
+PCIDevice *pci_dev = PCI_DEVICE(o);
+
+dev_handle.bdf = pci_dev->devfn;
+build_srat_generic_initiator_affinity(table_data,
+  gi->node + count, 
_handle,
+  GEN_AFFINITY_ENABLED);
+}
+}
+g_slist_free(list);
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..7337d8076b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -58,6 +58,7 @@
 #include "migration/vmstate.h"
 #include "hw/acpi/ghes.h"
 #include "hw/acpi/viot.h"
+#include "hw/acpi/acpi-generic-initiator.h"
 
 #define ARM_SPI_BASE 32
 
@@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 }
 }
 
+build_srat_generic_initiator(table_data);
+

[PATCH v2 1/3] qom: new object to associate device to numa node

2023-10-07 Thread ankita

From: Ankit Agrawal 

The CPU cache coherent device memory can be added as NUMA nodes
distinct from the system memory nodes. These nodes are associated
with the device and Qemu needs a way to maintain this link.

Introduce a new acpi-generic-initiator object to allow host admin
provide the device and the corresponding NUMA node. Qemu maintain
this association and use this object to build the requisite GI
Affinity Structure.

The admin provides the id of the device and the NUMA node id such
as in the following example.
-device vfio-pci-nohotplug,host=,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
-object acpi-generic-initiator,id=gi0,device=dev0,node=2 \

Signed-off-by: Ankit Agrawal 
---
 hw/acpi/acpi-generic-initiator.c | 74 
 hw/acpi/meson.build  |  1 +
 include/hw/acpi/acpi-generic-initiator.h | 30 ++
 qapi/qom.json| 20 ++-
 4 files changed, 123 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/acpi-generic-initiator.c
 create mode 100644 include/hw/acpi/acpi-generic-initiator.h

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
new file mode 100644
index 00..6406736090
--- /dev/null
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qom/object_interfaces.h"
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/pci.h"
+#include "hw/pci/pci_device.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/acpi-generic-initiator.h"
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, 
acpi_generic_initiator,
+   ACPI_GENERIC_INITIATOR, OBJECT,
+   { TYPE_USER_CREATABLE },
+   { NULL })
+
+OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
+
+static void acpi_generic_initiator_init(Object *obj)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+gi->device = NULL;
+gi->node = MAX_NODES;
+gi->node_count = 1;
+}
+
+static void acpi_generic_initiator_finalize(Object *obj)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+g_free(gi->device);
+}
+
+static void acpi_generic_initiator_set_device(Object *obj, const char *value,
+  Error **errp)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+gi->device = g_strdup(value);
+}
+
+static void acpi_generic_initiator_set_node(Object *obj, Visitor *v,
+const char *name, void *opaque,
+Error **errp)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+uint32_t value;
+
+if (!visit_type_uint32(v, name, , errp)) {
+return;
+}
+
+if (value >= MAX_NODES) {
+return;
+}
+
+gi->node = value;
+}
+
+static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
+{
+object_class_property_add_str(oc, ACPI_GENERIC_INITIATOR_DEVICE_PROP, NULL,
+  acpi_generic_initiator_set_device);
+object_class_property_add(oc, ACPI_GENERIC_INITIATOR_NODE_PROP, "uint32",
+  NULL, acpi_generic_initiator_set_node, NULL,
+  NULL);
+}
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index fc1b952379..22252836ed 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -5,6 +5,7 @@ acpi_ss.add(files(
   'bios-linker-loader.c',
   'core.c',
   'utils.c',
+  'acpi-generic-initiator.c',
 ))
 acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu.c', 
'cpu_hotplug.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_false: 
files('acpi-cpu-hotplug-stub.c'))
diff --git a/include/hw/acpi/acpi-generic-initiator.h 
b/include/hw/acpi/acpi-generic-initiator.h
new file mode 100644
index 00..e67e6e23b1
--- /dev/null
+++ b/include/hw/acpi/acpi-generic-initiator.h
@@ -0,0 +1,30 @@
+#ifndef ACPI_GENERIC_INITIATOR_H
+#define ACPI_GENERIC_INITIATOR_H
+
+#include "hw/mem/pc-dimm.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "qemu/uuid.h"
+#include "hw/acpi/aml-build.h"
+#include "qom/object.h"
+#include "qom/object_interfaces.h"
+
+#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator"
+
+#define ACPI_GENERIC_INITIATOR_DEVICE_PROP "device"
+#define ACPI_GENERIC_INITIATOR_NODE_PROP "node"
+
+typedef struct AcpiGenericInitiator {
+/* private */
+Object parent;
+
+/* public */
+char *device;
+uint32_t node;
+uint32_t node_count;
+} AcpiGenericInitiator;
+
+typedef struct AcpiGenericInitiatorClass {
+ObjectClass parent_class;
+} AcpiGenericInitiatorClass;
+
+#endif
diff --git a/qapi/qom.json

[PATCH v2 0/3] acpi: report numa nodes for device memory using GI

2023-10-07 Thread ankita

From: Ankit Agrawal 

There are upcoming devices which allow CPU to cache coherently access
their memory. It is sensible to expose such memory as NUMA nodes separate
from the sysmem node to the OS. The ACPI spec provides a scheme in SRAT
called Generic Initiator Affinity Structure [1] to allow an association
between a Proximity Domain (PXM) and a Generic Initiator (GI) (e.g.
heterogeneous processors and accelerators, GPUs, and I/O devices with
integrated compute or DMA engines).

Implement the mechanism to build the GI affinity structures as Qemu
currently does not. Introduce a new acpi-generic-initiator object
that links a node to a device BDF. During SRAT creation, all such
objected are identified and used to add the GI Affinity Structures.

A single node per BDF is insufficient for a full utilization of the NVIDIA
GPUs MIG (Mult-Instance GPUs) [2] feature. The feature allows partitioning
of the GPU device resources (including device memory) into several (upto 8)
isolated instances. Each of the partitioned memory requires a dedicated NUMA
node to operate. The partitions are not fixed and they can be created/deleted
at runtime.

Linux OS does not provide a means to dynamically create/destroy NUMA nodes
and such feature implementation is expected to be non-trivial. The nodes
that OS discovers at the boot time while parsing SRAT remains fixed. So we
utilize the GI Affinity structures that allows association between nodes
and devices. Multiple GI structures per BDF is possible, allowing creation
of multiple nodes in the VM by exposing unique PXM in each of these
structures. Implement a new nvidia-acpi-generic-initiator object to associate
a range of nodes with a device.

The admin will create a range of 8 nodes and associate that with the device
using the nvidia-acpi-generic-initiator object. While a configuration of less
than 8 nodes per device is allowed, such configuration will prevent
utilization of the feature to the fullest. This setting is applicable to
all the Grace+Hopper systems. The following is an example of the Qemu command
line arguments to create 8 nodes and link them to the device 'dev0':

-numa node,nodeid=2 \
-numa node,nodeid=3 \
-numa node,nodeid=4 \
-numa node,nodeid=5 \
-numa node,nodeid=6 \
-numa node,nodeid=7 \
-numa node,nodeid=8 \
-numa node,nodeid=9 \
-device 
vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
-object 
nvidia-acpi-generic-initiator,id=gi0,device=dev0,numa-node-start=2,numa-node-count=8
 \

The performance benefits can be realized by providing the NUMA node distances
appropriately (through libvirt tags or Qemu params). The admin can get the
distance among nodes in hardware using `numactl -H`.

This series goes along with the vfio-pci variant driver [3] under review.
It is expected for a vfio-pci driver to expose this feature through
sysfs. Presence of the feature is checked to enable these code changes.

Applied over v8.1.0-rc4.

Signed-off-by: Ankit Agrawal 
---

[1] ACPI Spec 6.5, Section 5.2.16.6
[2] https://www.nvidia.com/en-in/technologies/multi-instance-gpu
[3] https://lore.kernel.org/all/20230912153032.19935-1-ank...@nvidia.com/

Link for v1:
https://lore.kernel.org/all/20230915024559.6565-1-ank...@nvidia.com/

v1 -> v2
- Removed dependency on sysfs to communicate the feature with variant module.
- Use GI Affinity SRAT structure instead of Memory Affinity.
- No DSDT entries needed to communicate the PXM for the device. SRAT GI
structure is used instead.
- New objects introduced to establish link between device and nodes.

Ankit Agrawal (3):
  qom: new object to associate device to numa node
  hw/acpi: Implement the SRAT GI affinity structure
  qom: Link multiple numa nodes to device using a new object

 hw/acpi/acpi-generic-initiator.c | 213 +++
 hw/acpi/meson.build  |   1 +
 hw/arm/virt-acpi-build.c |   3 +
 hw/vfio/pci.c|   2 -
 hw/vfio/pci.h|   2 +
 include/hw/acpi/acpi-generic-initiator.h |  64 +++
 qapi/qom.json|  40 -
 7 files changed, 321 insertions(+), 4 deletions(-)
 create mode 100644 hw/acpi/acpi-generic-initiator.c
 create mode 100644 include/hw/acpi/acpi-generic-initiator.h

-- 
2.17.1

Re: [PATCH 15/19] parallels: Remove unnecessary data_end field

2023-10-07 Thread Mike Maslenkin

On Sat, Oct 7, 2023 at 5:30 PM Alexander Ivanov
 wrote:
>
>
>
> On 10/7/23 13:21, Mike Maslenkin wrote:
> > On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
> >   wrote:
> >>
> >> On 10/6/23 21:43, Mike Maslenkin wrote:
> >>> On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
> >>>   wrote:
>  Since we have used bitmap, field data_end in BDRVParallelsState is
>  redundant and can be removed.
> 
>  Add parallels_data_end() helper and remove data_end handling.
> 
>  Signed-off-by: Alexander Ivanov
>  ---
> block/parallels.c | 33 +
> block/parallels.h |  1 -
> 2 files changed, 13 insertions(+), 21 deletions(-)
> 
>  diff --git a/block/parallels.c b/block/parallels.c
>  index 48ea5b3f03..80a7171b84 100644
>  --- a/block/parallels.c
>  +++ b/block/parallels.c
>  @@ -265,6 +265,13 @@ static void 
>  parallels_free_used_bitmap(BlockDriverState *bs)
> g_free(s->used_bmap);
> }
> 
>  +static int64_t parallels_data_end(BDRVParallelsState *s)
>  +{
>  +int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
>  +data_end += s->used_bmap_size * s->cluster_size;
>  +return data_end;
>  +}
>  +
> int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>  int64_t *clusters)
> {
>  @@ -275,7 +282,7 @@ int64_t 
>  parallels_allocate_host_clusters(BlockDriverState *bs,
> 
> first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
> if (first_free == s->used_bmap_size) {
>  -host_off = s->data_end * BDRV_SECTOR_SIZE;
>  +host_off = parallels_data_end(s);
> prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
> bytes = prealloc_clusters * s->cluster_size;
> 
>  @@ -297,9 +304,6 @@ int64_t 
>  parallels_allocate_host_clusters(BlockDriverState *bs,
> s->used_bmap = bitmap_zero_extend(s->used_bmap, 
>  s->used_bmap_size,
>   new_usedsize);
> s->used_bmap_size = new_usedsize;
>  -if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
>  -s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
>  -}
> } else {
> next_used = find_next_bit(s->used_bmap, s->used_bmap_size, 
>  first_free);
> 
>  @@ -315,8 +319,7 @@ int64_t 
>  parallels_allocate_host_clusters(BlockDriverState *bs,
>  * branch. In the other case we are likely re-using hole. 
>  Preallocate
>  * the space if required by the prealloc_mode.
>  */
>  -if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
>  -host_off < s->data_end * BDRV_SECTOR_SIZE) {
>  +if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
> ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
> if (ret < 0) {
> return ret;
>  @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, 
>  BdrvCheckResult *res,
> }
> }
> 
>  -if (high_off == 0) {
>  -res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
>  -} else {
>  -res->image_end_offset = high_off + s->cluster_size;
>  -s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>  -}
>  -
>  +res->image_end_offset = parallels_data_end(s);
> return 0;
> }
> 
>  @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, 
>  BdrvCheckResult *res,
> res->check_errors++;
> return ret;
> }
>  -s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> 
> parallels_free_used_bitmap(bs);
> ret = parallels_fill_used_bitmap(bs);
>  @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, 
>  QDict *options, int flags,
> }
> 
> s->data_start = data_start;
>  -s->data_end = s->data_start;
>  -if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
>  +if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
> /*
>  * There is not enough unused space to fit to block align 
>  between BAT
>  * and actual data. We can't avoid read-modify-write...
>  @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, 
>  QDict *options, int flags,
> 
> for (i = 0; i < s->bat_size; i++) {
> sector = bat2sect(s, i);
>  -if (sector + s->tracks > s->data_end) {
>  -s->data_end = sector + s->tracks;
>  +if (sector

[PATCH] target/riscv: deprecate capital 'Z' CPU properties

2023-10-07 Thread Daniel Henrique Barboza

At this moment there are eleven CPU extension properties that starts
with capital 'Z': Zifencei, Zicsr, Zihintntl, Zihintpause, Zawrs, Zfa,
Zfh, Zfhmin, Zve32f, Zve64f and Zve64d. All other extensions are named
with lower-case letters.

We want all properties to be named with lower-case letters since it's
consistent with the riscv-isa string that we create in the FDT. Having
these 11 properties to be exceptions can be confusing.

Deprecate all of them. Create their lower-case counterpart to be used as
maintained CPU properties. When trying to use any deprecated property a
warning message will be displayed, recommending users to switch to the
lower-case variant:

./build/qemu-system-riscv64 -M virt -cpu rv64,Zifencei=true --nographic
qemu-system-riscv64: warning: CPU property 'Zifencei' is deprecated. Please use 
'zifencei' instead

This will give users some time to change their scripts before we remove
the capital 'Z' properties entirely.

Signed-off-by: Daniel Henrique Barboza 
---
 docs/about/deprecated.rst  | 23 ++
 target/riscv/cpu.c | 39 +++---
 target/riscv/cpu.h |  1 +
 target/riscv/tcg/tcg-cpu.c | 31 +-
 4 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 694b878f36..331f10f930 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -378,6 +378,29 @@ of generic CPUs: rv32 and rv64 as default CPUs and 'max' 
as a feature complete
 CPU for both 32 and 64 bit builds. Users are then discouraged to use the 'any'
 CPU type starting in 8.2.
 
+RISC-V CPU properties which start with with capital 'Z' (since 8.2)
+^^
+
+All RISC-V CPU properties which start with capital 'Z' are being deprecated
+starting in 8.2. The reason is that they were wrongly added with capital 'Z'
+in the past. CPU properties were later added with lower-case names, which
+is the format we want to use from now on.
+
+Users which try to use these deprecated properties will receive a warning
+recommending to switch to their stable counterparts:
+
+- "Zifencei" should be replaced with "zifencei"
+- "Zicsr" should be replaced with "zicsr"
+- "Zihintntl" should be replaced with "zihintntl"
+- "Zihintpause" should be replaced with "zihintpause"
+- "Zawrs" should be replaced with "zawrs"
+- "Zfa" should be replaced with "zfa"
+- "Zfh" should be replaced with "zfh"
+- "Zfhmin" should be replaced with "zfhmin"
+- "Zve32f" should be replaced with "zve32f"
+- "Zve64f" should be replaced with "zve64f"
+- "Zve64d" should be replaced with "zve64d"
+
 Block device options
 
 
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 521bb88538..1cdc3d2609 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1246,17 +1246,17 @@ const char *riscv_get_misa_ext_description(uint32_t bit)
 const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 /* Defaults for standard extensions */
 MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false),
-MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true),
-MULTI_EXT_CFG_BOOL("Zicsr", ext_icsr, true),
-MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true),
-MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true),
-MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true),
-MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true),
-MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false),
-MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false),
-MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false),
-MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false),
-MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false),
+MULTI_EXT_CFG_BOOL("zifencei", ext_ifencei, true),
+MULTI_EXT_CFG_BOOL("zicsr", ext_icsr, true),
+MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true),
+MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true),
+MULTI_EXT_CFG_BOOL("zawrs", ext_zawrs, true),
+MULTI_EXT_CFG_BOOL("zfa", ext_zfa, true),
+MULTI_EXT_CFG_BOOL("zfh", ext_zfh, false),
+MULTI_EXT_CFG_BOOL("zfhmin", ext_zfhmin, false),
+MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false),
+MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false),
+MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
 MULTI_EXT_CFG_BOOL("sstc", ext_sstc, true),
 
 MULTI_EXT_CFG_BOOL("smstateen", ext_smstateen, false),
@@ -1349,6 +1349,23 @@ const RISCVCPUMultiExtConfig 
riscv_cpu_experimental_exts[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+/* Deprecated entries marked for future removal */
+const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = {
+MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true),
+MULTI_EXT_CFG_BOOL("Zicsr", ext_icsr, true),
+MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true),
+MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true),
+MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true),
+MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true),
+

Re: [PATCH v11 09/10] migration: Implement MigrateChannelList to hmp migration flow.

2023-10-07 Thread Het Gala


On 10/4/2023 8:55 PM, Fabiano Rosas wrote:

Het Gala  writes:


Integrate MigrateChannelList with all transport backends
(socket, exec and rdma) for both src and dest migration
endpoints for hmp migration.

Suggested-by: Aravind Retnakaran
Signed-off-by: Het Gala
Reviewed-by: Daniel P. Berrangé
---
  migration/migration-hmp-cmds.c | 15 +--
  migration/migration.c  |  5 ++---
  migration/migration.h  |  3 ++-
  3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index a2e6a5c51e..a1657f3d37 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -441,9 +441,14 @@ void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
  {
  Error *err = NULL;
  const char *uri = qdict_get_str(qdict, "uri");
+MigrationChannelList *caps = NULL;
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);

Just the pointer here. If I remember correctly the g_autoptr here would
cause a double free when freeing the caps.


Yes, we'll just have 'g_autoptr(MigrationChannel) channel = NULL'.

Is it because inside QAPI_LIST_PREPEND, caps will be refrencing to the 
same memory as 'channel', we don't need to free channel ? I am still not 
sure what is the right place to use g_steal_pointer(), is this a right 
place to use (non-error paths) ?


  
-qmp_migrate_incoming(uri, false, NULL, );

+migrate_uri_parse(uri, , );
+QAPI_LIST_PREPEND(caps, channel);
  
+qmp_migrate_incoming(NULL, true, caps, );

+qapi_free_MigrationChannelList(caps);
  hmp_handle_error(mon, err);
  }
  
@@ -730,9 +735,15 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)

  bool resume = qdict_get_try_bool(qdict, "resume", false);
  const char *uri = qdict_get_str(qdict, "uri");
  Error *err = NULL;
+MigrationChannelList *caps = NULL;
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);

Same here. We free the channel via caps and we attribute it below, no
need to allocate.

Ack.
  
-qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc,

+migrate_uri_parse(uri, , );
+QAPI_LIST_PREPEND(caps, channel);
+
+qmp_migrate(NULL, true, caps, !!blk, blk, !!inc, inc,
   false, false, true, resume, );
+qapi_free_MigrationChannelList(caps);
  if (hmp_handle_error(mon, err)) {

Regards,
Het Gala

Re: [PATCH v11 08/10] migration: Implement MigrateChannelList to qmp migration flow.

2023-10-07 Thread Het Gala


On 10/4/2023 8:51 PM, Fabiano Rosas wrote:

Het Gala  writes:


Integrate MigrateChannelList with all transport backends
(socket, exec and rdma) for both src and dest migration
endpoints for qmp migration.

For current series, limit the size of MigrateChannelList
to single element (single interface) as runtime check.

Suggested-by: Aravind Retnakaran
Signed-off-by: Het Gala
Reviewed-by: Daniel P. Berrangé
---
  migration/migration.c | 95 +++
  1 file changed, 52 insertions(+), 43 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 6f948988ec..3eae32e616 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -432,9 +432,10 @@ void migrate_add_address(SocketAddress *address)
  }
  
  static bool migrate_uri_parse(const char *uri,

-  MigrationAddress **channel,
+  MigrationChannel **channel,
Error **errp)
  {
+g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);

Here val is passed out of scope so it shouldn't be g_autoptr.
I guess, same as for 'addr' we need to go with adding 
g_steal_pointer() here too ?

  g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
  SocketAddress *saddr = >u.socket;
  InetSocketAddress *isock = >u.rdma;
@@ -471,7 +472,9 @@ static bool migrate_uri_parse(const char *uri,
  return false;
  }
  
-*channel = addr;

+val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
+val->addr = addr;
+*channel = val;
  return true;
  }
  
@@ -479,41 +482,44 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,

MigrationChannelList *channels,
Error **errp)
  {
-g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);
+g_autoptr(MigrationChannel) channel = g_new0(MigrationChannel, 1);
+g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);

Here we want just the pointer, no allocation, no freeing. For both
channel and addr.

Ack, same as channel in patch 2.
  
  /*

   * Having preliminary checks for uri and channel
   */
-if (has_channels) {
-error_setg(errp, "'channels' argument should not be set yet.");
-return;
-}
-
  if (uri && has_channels) {
  error_setg(errp, "'uri' and 'channels' arguments are mutually "
 "exclusive; exactly one of the two should be present in "
 "'migrate-incoming' qmp command ");
  return;
-}
-
-if (!uri && !has_channels) {
+} else if (channels) {
+/* To verify that Migrate channel list has only item */
+if (channels->next) {
+error_setg(errp, "Channel list has more than one entries");
+return;
+}
+channel = channels->value;
+} else if (uri) {
+/* caller uses the old URI syntax */
+if (!migrate_uri_parse(uri, , errp)) {
+return;
+}
+} else {
  error_setg(errp, "neither 'uri' or 'channels' argument are "
 "specified in 'migrate-incoming' qmp command ");
  return;
  }
-
-if (uri && !migrate_uri_parse(uri, , errp)) {
-return;
-}
+addr = channel->addr;
  
  /* transport mechanism not suitable for migration? */

-if (!migration_channels_and_transport_compatible(channel, errp)) {
+if (!migration_channels_and_transport_compatible(addr, errp)) {
  return;
  }
  
  qapi_event_send_migration(MIGRATION_STATUS_SETUP);

-if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
-SocketAddress *saddr = >u.socket;
+if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+SocketAddress *saddr = >u.socket;
  if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
  saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
  saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
@@ -522,11 +528,11 @@ static void qemu_start_incoming_migration(const char 
*uri, bool has_channels,
  fd_start_incoming_migration(saddr->u.fd.str, errp);
  }
  #ifdef CONFIG_RDMA
-} else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
-rdma_start_incoming_migration(>u.rdma, errp);
-#endif
-} else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
-exec_start_incoming_migration(channel->u.exec.args, errp);
+} else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+rdma_start_incoming_migration(>u.rdma, errp);
+ #endif
+} else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
+exec_start_incoming_migration(addr->u.exec.args, errp);
  } else {
  error_setg(errp, "unknown migration protocol: %s", uri);
  }
@@ -1750,35 +1756,38 @@ void qmp_migrate(const char *uri, bool has_channels,
  bool resume_requested;
  Error

Re: [PULL 00/21] vfio queue

2023-10-07 Thread Michael Tokarev


07.10.2023 13:14, Cédric Le Goater wrote:


Please send a v5 for "Prerequisite changes for IOMMUFD support" with the
fixes we talked about. I will rebuild a PR next week.


Can you push the first bugfix at least, so it will not miss stable-8.1.2?

Thanks,

/mjt

Re: [PATCH 15/19] parallels: Remove unnecessary data_end field

2023-10-07 Thread Alexander Ivanov





On 10/7/23 13:21, Mike Maslenkin wrote:

On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
  wrote:


On 10/6/23 21:43, Mike Maslenkin wrote:

On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
  wrote:

Since we have used bitmap, field data_end in BDRVParallelsState is
redundant and can be removed.

Add parallels_data_end() helper and remove data_end handling.

Signed-off-by: Alexander Ivanov
---
   block/parallels.c | 33 +
   block/parallels.h |  1 -
   2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 48ea5b3f03..80a7171b84 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState 
*bs)
   g_free(s->used_bmap);
   }

+static int64_t parallels_data_end(BDRVParallelsState *s)
+{
+int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
+data_end += s->used_bmap_size * s->cluster_size;
+return data_end;
+}
+
   int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
int64_t *clusters)
   {
@@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState 
*bs,

   first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
   if (first_free == s->used_bmap_size) {
-host_off = s->data_end * BDRV_SECTOR_SIZE;
+host_off = parallels_data_end(s);
   prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
   bytes = prealloc_clusters * s->cluster_size;

@@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState 
*bs,
   s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
 new_usedsize);
   s->used_bmap_size = new_usedsize;
-if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
-s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
-}
   } else {
   next_used = find_next_bit(s->used_bmap, s->used_bmap_size, 
first_free);

@@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState 
*bs,
* branch. In the other case we are likely re-using hole. Preallocate
* the space if required by the prealloc_mode.
*/
-if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
-host_off < s->data_end * BDRV_SECTOR_SIZE) {
+if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
   ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
   if (ret < 0) {
   return ret;
@@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, 
BdrvCheckResult *res,
   }
   }

-if (high_off == 0) {
-res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
-} else {
-res->image_end_offset = high_off + s->cluster_size;
-s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
-}
-
+res->image_end_offset = parallels_data_end(s);
   return 0;
   }

@@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult 
*res,
   res->check_errors++;
   return ret;
   }
-s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;

   parallels_free_used_bitmap(bs);
   ret = parallels_fill_used_bitmap(bs);
@@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
   }

   s->data_start = data_start;
-s->data_end = s->data_start;
-if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
   /*
* There is not enough unused space to fit to block align between BAT
* and actual data. We can't avoid read-modify-write...
@@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,

   for (i = 0; i < s->bat_size; i++) {
   sector = bat2sect(s, i);
-if (sector + s->tracks > s->data_end) {
-s->data_end = sector + s->tracks;
+if (sector + s->tracks > file_nb_sectors) {
+need_check = true;
   }
   }
-need_check = need_check || s->data_end > file_nb_sectors;

   ret = parallels_fill_used_bitmap(bs);
   if (ret == -ENOMEM) {
@@ -1461,7 +1455,6 @@ static int 
parallels_truncate_unused_clusters(BlockDriverState *bs)
   end_off = (end_off + 1) * s->cluster_size;
   }
   end_off += s->data_start * BDRV_SECTOR_SIZE;
-s->data_end = end_off / BDRV_SECTOR_SIZE;
   return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, 
NULL);
   }

diff --git a/block/parallels.h b/block/parallels.h
index 18b4f8068e..a6a048d890 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
   unsigned int bat_size;

   int64_t  data_start;
-int64_t  data_end;

[PATCH v8 29/29] hw/i386/pc_piix: Make PIIX4 south bridge usable in PC machine

2023-10-07 Thread Bernhard Beschow

QEMU's PIIX3 implementation actually models the real PIIX4, but with different
PCI IDs. Usually, guests deal just fine with it. Still, in order to provide a
more consistent illusion to guests, allow QEMU's PIIX4 implementation to be used
in the PC machine.

Signed-off-by: Bernhard Beschow 
---
 docs/system/target-i386-desc.rst.inc |  8 
 include/hw/i386/pc.h |  2 +
 hw/i386/pc.c |  1 +
 hw/i386/pc_piix.c| 61 +++-
 4 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/docs/system/target-i386-desc.rst.inc 
b/docs/system/target-i386-desc.rst.inc
index 7d1fffacbe..5ebbcda9db 100644
--- a/docs/system/target-i386-desc.rst.inc
+++ b/docs/system/target-i386-desc.rst.inc
@@ -71,3 +71,11 @@ machine property, i.e.
|qemu_system_x86| some.img \
-audiodev ,id= \
-machine pcspk-audiodev=
+
+Machine-specific options
+
+
+It supports the following machine-specific options:
+
+- ``x-south-bridge=PIIX3|piix4-isa`` (Experimental option to select a 
particular
+  south bridge. Default: ``PIIX3``)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index bec38cb92c..29a9724524 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -42,6 +42,7 @@ typedef struct PCMachineState {
 uint64_t max_ram_below_4g;
 OnOffAuto vmport;
 SmbiosEntryPointType smbios_entry_point_type;
+const char *south_bridge;
 
 bool acpi_build_enabled;
 bool smbus_enabled;
@@ -92,6 +93,7 @@ struct PCMachineClass {
 /* Device configuration: */
 bool pci_enabled;
 bool kvmclock_enabled;
+const char *default_south_bridge;
 
 /* Compat options: */
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 4e844d02f2..c84d1bdf08 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1707,6 +1707,7 @@ static void pc_machine_initfn(Object *obj)
 #endif /* CONFIG_VMPORT */
 pcms->max_ram_below_4g = 0; /* use default */
 pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type;
+pcms->south_bridge = pcmc->default_south_bridge;
 
 /* acpi build is enabled by default if machine supports it */
 pcms->acpi_build_enabled = pcmc->has_acpi_build;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index e38942a3c3..334d9a0299 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -262,7 +262,7 @@ static void pc_init1(MachineState *machine,
 DeviceState *dev;
 size_t i;
 
-pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
+pci_dev = pci_new_multifunction(-1, pcms->south_bridge);
 object_property_set_bool(OBJECT(pci_dev), "has-usb",
  machine_usb(machine), _abort);
 object_property_set_bool(OBJECT(pci_dev), "has-acpi",
@@ -394,6 +394,56 @@ static void pc_init1(MachineState *machine,
 }
 }
 
+typedef enum PCSouthBridgeOption {
+PC_SOUTH_BRIDGE_OPTION_PIIX3,
+PC_SOUTH_BRIDGE_OPTION_PIIX4,
+PC_SOUTH_BRIDGE_OPTION_MAX,
+} PCSouthBridgeOption;
+
+static const QEnumLookup PCSouthBridgeOption_lookup = {
+.array = (const char *const[]) {
+[PC_SOUTH_BRIDGE_OPTION_PIIX3] = TYPE_PIIX3_DEVICE,
+[PC_SOUTH_BRIDGE_OPTION_PIIX4] = TYPE_PIIX4_PCI_DEVICE,
+},
+.size = PC_SOUTH_BRIDGE_OPTION_MAX
+};
+
+#define NotifyVmexitOption_str(val) \
+qapi_enum_lookup(_lookup, (val))
+
+static int pc_get_south_bridge(Object *obj, Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+int i;
+
+for (i = 0; i < PCSouthBridgeOption_lookup.size; i++) {
+if (g_strcmp0(PCSouthBridgeOption_lookup.array[i],
+  pcms->south_bridge) == 0) {
+return i;
+}
+}
+
+error_setg(errp, "Invalid south bridge value set");
+return 0;
+}
+
+static void pc_set_south_bridge(Object *obj, int value, Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+
+if (value < 0) {
+error_setg(errp, "Value can't be negative");
+return;
+}
+
+if (value >= PCSouthBridgeOption_lookup.size) {
+error_setg(errp, "Value too big");
+return;
+}
+
+pcms->south_bridge = PCSouthBridgeOption_lookup.array[value];
+}
+
 /* Looking for a pc_compat_2_4() function? It doesn't exist.
  * pc_compat_*() functions that run on machine-init time and
  * change global QEMU state are deprecated. Please don't create
@@ -473,6 +523,8 @@ static void pc_xen_hvm_init(MachineState *machine)
 static void pc_i440fx_machine_options(MachineClass *m)
 {
 PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ObjectClass *oc = OBJECT_CLASS(m);
+pcmc->default_south_bridge = TYPE_PIIX3_DEVICE;
 pcmc->pci_root_uid = 0;
 pcmc->default_cpu_version = 1;
 
@@ -484,6 +536,13 @@ static void pc_i440fx_machine_options(MachineClass *m)
 m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL);
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
 machine_class_allow_dynamic_sysbus_dev(m,

[PATCH v8 22/29] hw/isa/piix: Harmonize names of reset control memory regions

2023-10-07 Thread Bernhard Beschow

There is no need for having different names here. Having the same name
further allows code to be shared between PIIX3 and PIIX4.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index 270b8eb1f7..bd66fb7475 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -339,7 +339,7 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 }
 
 memory_region_init_io(>rcr_mem, OBJECT(dev), _ops, d,
-  "piix3-reset-control", 1);
+  "piix-reset-control", 1);
 memory_region_add_subregion_overlap(pci_address_space_io(dev),
 PIIX_RCR_IOPORT, >rcr_mem, 1);
 
@@ -532,7 +532,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
  "intr", 1);
 
 memory_region_init_io(>rcr_mem, OBJECT(dev), _ops, s,
-  "reset-control", 1);
+  "piix-reset-control", 1);
 memory_region_add_subregion_overlap(pci_address_space_io(dev),
 PIIX_RCR_IOPORT, >rcr_mem, 1);
 
-- 
2.42.0

[PATCH v8 20/29] hw/isa/piix: Allow for optional PIC creation in PIIX3

2023-10-07 Thread Bernhard Beschow

In the PC machine, the PIC is created in board code to allow it to be
virtualized with various virtualization techniques. So explicitly disable its
creation in the PC machine via a property which defaults to enabled. Once the
PIIX implementations are consolidated this default will keep Malta working
without further ado.

Signed-off-by: Bernhard Beschow 
---
 include/hw/southbridge/piix.h |  1 +
 hw/i386/pc_piix.c |  2 ++
 hw/isa/piix.c | 21 +++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index dd5f7b31c0..08491693b4 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -69,6 +69,7 @@ struct PIIXState {
 MemoryRegion rcr_mem;
 
 bool has_acpi;
+bool has_pic;
 bool has_usb;
 bool smm_enabled;
 };
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 70cffcfe4f..fa39afd891 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -268,6 +268,8 @@ static void pc_init1(MachineState *machine,
 object_property_set_bool(OBJECT(pci_dev), "has-acpi",
  x86_machine_is_acpi_enabled(x86ms),
  _abort);
+object_property_set_bool(OBJECT(pci_dev), "has-pic", false,
+ _abort);
 qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100);
 object_property_set_bool(OBJECT(pci_dev), "smm-enabled",
  x86_machine_is_smm_enabled(x86ms),
diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index f6da334c6f..d6d9ac6473 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -106,7 +106,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int 
level)
 }
 }
 
-static void piix4_request_i8259_irq(void *opaque, int irq, int level)
+static void piix_request_i8259_irq(void *opaque, int irq, int level)
 {
 PIIX4State *s = opaque;
 qemu_set_irq(s->cpu_intr, level);
@@ -343,6 +343,22 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 memory_region_add_subregion_overlap(pci_address_space_io(dev),
 PIIX_RCR_IOPORT, >rcr_mem, 1);
 
+/* PIC */
+if (d->has_pic) {
+qemu_irq *i8259_out_irq = qemu_allocate_irqs(piix_request_i8259_irq, d,
+ 1);
+qemu_irq *i8259 = i8259_init(isa_bus, *i8259_out_irq);
+size_t i;
+
+for (i = 0; i < ISA_NUM_IRQS; i++) {
+d->isa_irqs_in[i] = i8259[i];
+}
+
+g_free(i8259);
+
+qdev_init_gpio_out_named(DEVICE(dev), >cpu_intr, "intr", 1);
+}
+
 isa_bus_register_input_irqs(isa_bus, d->isa_irqs_in);
 
 i8257_dma_init(isa_bus, 0);
@@ -419,6 +435,7 @@ static void pci_piix3_init(Object *obj)
 static Property pci_piix3_props[] = {
 DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0),
 DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true),
+DEFINE_PROP_BOOL("has-pic", PIIXState, has_pic, true),
 DEFINE_PROP_BOOL("has-usb", PIIXState, has_usb, true),
 DEFINE_PROP_BOOL("smm-enabled", PIIXState, smm_enabled, false),
 DEFINE_PROP_END_OF_LIST(),
@@ -514,7 +531,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 PIIX_RCR_IOPORT, >rcr_mem, 1);
 
 /* initialize i8259 pic */
-i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1);
+i8259_out_irq = qemu_allocate_irqs(piix_request_i8259_irq, s, 1);
 i8259 = i8259_init(isa_bus, *i8259_out_irq);
 
 for (i = 0; i < ISA_NUM_IRQS; i++) {
-- 
2.42.0

[PATCH v8 17/29] hw/isa/piix4: Rename reset control operations to match PIIX3

2023-10-07 Thread Bernhard Beschow

Both implementations are the same and will be shared upon merging.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix4.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index 9c8b6c98ab..eb456622c5 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -148,8 +148,8 @@ static void piix4_request_i8259_irq(void *opaque, int irq, 
int level)
 qemu_set_irq(s->cpu_intr, level);
 }
 
-static void piix4_rcr_write(void *opaque, hwaddr addr, uint64_t val,
-unsigned int len)
+static void rcr_write(void *opaque, hwaddr addr, uint64_t val,
+  unsigned int len)
 {
 PIIX4State *s = opaque;
 
@@ -161,16 +161,16 @@ static void piix4_rcr_write(void *opaque, hwaddr addr, 
uint64_t val,
 s->rcr = val & 2; /* keep System Reset type only */
 }
 
-static uint64_t piix4_rcr_read(void *opaque, hwaddr addr, unsigned int len)
+static uint64_t rcr_read(void *opaque, hwaddr addr, unsigned int len)
 {
 PIIX4State *s = opaque;
 
 return s->rcr;
 }
 
-static const MemoryRegionOps piix4_rcr_ops = {
-.read = piix4_rcr_read,
-.write = piix4_rcr_write,
+static const MemoryRegionOps rcr_ops = {
+.read = rcr_read,
+.write = rcr_write,
 .endianness = DEVICE_LITTLE_ENDIAN,
 .impl = {
 .min_access_size = 1,
@@ -194,7 +194,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 qdev_init_gpio_out_named(DEVICE(dev), >cpu_intr,
  "intr", 1);
 
-memory_region_init_io(>rcr_mem, OBJECT(dev), _rcr_ops, s,
+memory_region_init_io(>rcr_mem, OBJECT(dev), _ops, s,
   "reset-control", 1);
 memory_region_add_subregion_overlap(pci_address_space_io(dev),
 PIIX_RCR_IOPORT, >rcr_mem, 1);
-- 
2.42.0

[PATCH v8 18/29] hw/isa/piix4: Reuse struct PIIXState from PIIX3

2023-10-07 Thread Bernhard Beschow

PIIX4 has its own, private PIIX4State structure. PIIX3 has almost the
same structure, provided in a public header. So reuse it and add a
cpu_intr attribute to it which is only used by PIIX4.

Signed-off-by: Bernhard Beschow 
---
 include/hw/southbridge/piix.h |  1 +
 hw/isa/piix4.c| 26 +++---
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index 0b257e1582..dd5f7b31c0 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -49,6 +49,7 @@ struct PIIXState {
 #endif
 uint64_t pic_levels;
 
+qemu_irq cpu_intr;
 qemu_irq isa_irqs_in[ISA_NUM_IRQS];
 
 /* This member isn't used. Just for save/load compatibility */
diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index eb456622c5..71899aaa69 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -42,21 +42,9 @@
 #include "sysemu/runstate.h"
 #include "qom/object.h"
 
-struct PIIX4State {
-PCIDevice dev;
-qemu_irq cpu_intr;
-qemu_irq *isa_irqs_in;
-
-MC146818RtcState rtc;
-PCIIDEState ide;
-UHCIState uhci;
-PIIX4PMState pm;
-/* Reset Control Register */
-MemoryRegion rcr_mem;
-uint8_t rcr;
-};
+typedef struct PIIXState PIIX4State;
 
-OBJECT_DECLARE_SIMPLE_TYPE(PIIX4State, PIIX4_PCI_DEVICE)
+DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE)
 
 static void piix4_set_irq(void *opaque, int irq_num, int level)
 {
@@ -184,6 +172,8 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 PCIBus *pci_bus = pci_get_bus(dev);
 ISABus *isa_bus;
 qemu_irq *i8259_out_irq;
+qemu_irq *i8259;
+size_t i;
 
 isa_bus = isa_bus_new(DEVICE(dev), pci_address_space(dev),
   pci_address_space_io(dev), errp);
@@ -201,7 +191,13 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 
 /* initialize i8259 pic */
 i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1);
-s->isa_irqs_in = i8259_init(isa_bus, *i8259_out_irq);
+i8259 = i8259_init(isa_bus, *i8259_out_irq);
+
+for (i = 0; i < ISA_NUM_IRQS; i++) {
+s->isa_irqs_in[i] = i8259[i];
+}
+
+g_free(i8259);
 
 /* initialize ISA irqs */
 isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in);
-- 
2.42.0

[PATCH v8 21/29] hw/isa/piix: Allow for optional PIT creation in PIIX3

2023-10-07 Thread Bernhard Beschow

In the PC machine, the PIT is created in board code to allow it to be
virtualized with various virtualization techniques. So explicitly disable its
creation in the PC machine via a property which defaults to enabled. Once the
PIIX implementations are consolidated this default will keep Malta working
without further ado.

Signed-off-by: Bernhard Beschow 
---
 include/hw/southbridge/piix.h | 1 +
 hw/i386/pc_piix.c | 2 ++
 hw/isa/piix.c | 6 ++
 3 files changed, 9 insertions(+)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index 08491693b4..86709ba2e4 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -70,6 +70,7 @@ struct PIIXState {
 
 bool has_acpi;
 bool has_pic;
+bool has_pit;
 bool has_usb;
 bool smm_enabled;
 };
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index fa39afd891..e38942a3c3 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -270,6 +270,8 @@ static void pc_init1(MachineState *machine,
  _abort);
 object_property_set_bool(OBJECT(pci_dev), "has-pic", false,
  _abort);
+object_property_set_bool(OBJECT(pci_dev), "has-pit", false,
+ _abort);
 qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100);
 object_property_set_bool(OBJECT(pci_dev), "smm-enabled",
  x86_machine_is_smm_enabled(x86ms),
diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index d6d9ac6473..270b8eb1f7 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -361,6 +361,11 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 
 isa_bus_register_input_irqs(isa_bus, d->isa_irqs_in);
 
+/* PIT */
+if (d->has_pit) {
+i8254_pit_init(isa_bus, 0x40, 0, NULL);
+}
+
 i8257_dma_init(isa_bus, 0);
 
 /* RTC */
@@ -436,6 +441,7 @@ static Property pci_piix3_props[] = {
 DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0),
 DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true),
 DEFINE_PROP_BOOL("has-pic", PIIXState, has_pic, true),
+DEFINE_PROP_BOOL("has-pit", PIIXState, has_pit, true),
 DEFINE_PROP_BOOL("has-usb", PIIXState, has_usb, true),
 DEFINE_PROP_BOOL("smm-enabled", PIIXState, smm_enabled, false),
 DEFINE_PROP_END_OF_LIST(),
-- 
2.42.0

[PATCH v8 26/29] hw/isa/piix: Reuse PIIX3's PCI interrupt triggering in PIIX4

2023-10-07 Thread Bernhard Beschow

Speeds up PIIX4 which resolves an old TODO. Also makes PIIX4 compatible with Xen
which relies on pci_bus_fire_intx_routing_notifier() to be fired.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix.c | 27 +++
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index 449c1baaab..17677c2126 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -81,27 +81,6 @@ static void piix_set_pci_irq(void *opaque, int pirq, int 
level)
 piix_set_pci_irq_level(s, pirq, level);
 }
 
-static void piix4_set_irq(void *opaque, int irq_num, int level)
-{
-int i, pic_irq, pic_level;
-PIIXState *s = opaque;
-PCIBus *bus = pci_get_bus(>dev);
-
-/* now we change the pic irq level according to the piix irq mappings */
-/* XXX: optimize */
-pic_irq = s->dev.config[PIIX_PIRQCA + irq_num];
-if (pic_irq < ISA_NUM_IRQS) {
-/* The pic level is the logical OR of all the PCI irqs mapped to it. */
-pic_level = 0;
-for (i = 0; i < PIIX_NUM_PIRQS; i++) {
-if (pic_irq == s->dev.config[PIIX_PIRQCA + i]) {
-pic_level |= pci_bus_get_irq_level(bus, i);
-}
-}
-qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level);
-}
-}
-
 static void piix_request_i8259_irq(void *opaque, int irq, int level)
 {
 PIIXState *s = opaque;
@@ -223,7 +202,7 @@ static int piix4_post_load(void *opaque, int version_id)
 s->rcr = 0;
 }
 
-return 0;
+return piix_post_load(opaque, version_id);
 }
 
 static int piix3_pre_save(void *opaque)
@@ -442,6 +421,7 @@ static void pci_piix_class_init(ObjectClass *klass, void 
*data)
 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
 
+k->config_write = piix_write_config;
 dc->reset   = piix_reset;
 dc->desc= "ISA bridge";
 dc->hotpluggable   = false;
@@ -497,7 +477,6 @@ static void piix3_class_init(ObjectClass *klass, void *data)
 DeviceClass *dc = DEVICE_CLASS(klass);
 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-k->config_write = piix_write_config;
 k->realize = piix3_realize;
 /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */
 k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0;
@@ -522,7 +501,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 return;
 }
 
-pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS);
+pci_bus_irqs(pci_bus, piix_set_pci_irq, s, PIIX_NUM_PIRQS);
 }
 
 static void piix4_init(Object *obj)
-- 
2.42.0

[PATCH v8 25/29] hw/isa/piix: Rename functions to be shared for PCI interrupt triggering

2023-10-07 Thread Bernhard Beschow

PIIX4 will get the same optimizations which are already implemented for
PIIX3.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix.c | 72 +--
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index 2ab799b95e..449c1baaab 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -38,47 +38,47 @@
 #include "migration/vmstate.h"
 #include "hw/acpi/acpi_aml_interface.h"
 
-static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq)
+static void piix_set_irq_pic(PIIXState *s, int pic_irq)
 {
-qemu_set_irq(piix3->isa_irqs_in[pic_irq],
- !!(piix3->pic_levels &
+qemu_set_irq(s->isa_irqs_in[pic_irq],
+ !!(s->pic_levels &
 (((1ULL << PIIX_NUM_PIRQS) - 1) <<
  (pic_irq * PIIX_NUM_PIRQS;
 }
 
-static void piix3_set_irq_level_internal(PIIXState *piix3, int pirq, int level)
+static void piix_set_pci_irq_level_internal(PIIXState *s, int pirq, int level)
 {
 int pic_irq;
 uint64_t mask;
 
-pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq];
+pic_irq = s->dev.config[PIIX_PIRQCA + pirq];
 if (pic_irq >= ISA_NUM_IRQS) {
 return;
 }
 
 mask = 1ULL << ((pic_irq * PIIX_NUM_PIRQS) + pirq);
-piix3->pic_levels &= ~mask;
-piix3->pic_levels |= mask * !!level;
+s->pic_levels &= ~mask;
+s->pic_levels |= mask * !!level;
 }
 
-static void piix3_set_irq_level(PIIXState *piix3, int pirq, int level)
+static void piix_set_pci_irq_level(PIIXState *s, int pirq, int level)
 {
 int pic_irq;
 
-pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq];
+pic_irq = s->dev.config[PIIX_PIRQCA + pirq];
 if (pic_irq >= ISA_NUM_IRQS) {
 return;
 }
 
-piix3_set_irq_level_internal(piix3, pirq, level);
+piix_set_pci_irq_level_internal(s, pirq, level);
 
-piix3_set_irq_pic(piix3, pic_irq);
+piix_set_irq_pic(s, pic_irq);
 }
 
-static void piix3_set_irq(void *opaque, int pirq, int level)
+static void piix_set_pci_irq(void *opaque, int pirq, int level)
 {
-PIIXState *piix3 = opaque;
-piix3_set_irq_level(piix3, pirq, level);
+PIIXState *s = opaque;
+piix_set_pci_irq_level(s, pirq, level);
 }
 
 static void piix4_set_irq(void *opaque, int irq_num, int level)
@@ -108,10 +108,10 @@ static void piix_request_i8259_irq(void *opaque, int irq, 
int level)
 qemu_set_irq(s->cpu_intr, level);
 }
 
-static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
+static PCIINTxRoute piix_route_intx_pin_to_irq(void *opaque, int pin)
 {
-PIIXState *piix3 = opaque;
-int irq = piix3->dev.config[PIIX_PIRQCA + pin];
+PCIDevice *pci_dev = opaque;
+int irq = pci_dev->config[PIIX_PIRQCA + pin];
 PCIINTxRoute route;
 
 if (irq < ISA_NUM_IRQS) {
@@ -125,29 +125,29 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void 
*opaque, int pin)
 }
 
 /* irq routing is changed. so rebuild bitmap */
-static void piix3_update_irq_levels(PIIXState *piix3)
+static void piix_update_pci_irq_levels(PIIXState *s)
 {
-PCIBus *bus = pci_get_bus(>dev);
+PCIBus *bus = pci_get_bus(>dev);
 int pirq;
 
-piix3->pic_levels = 0;
+s->pic_levels = 0;
 for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) {
-piix3_set_irq_level(piix3, pirq, pci_bus_get_irq_level(bus, pirq));
+piix_set_pci_irq_level(s, pirq, pci_bus_get_irq_level(bus, pirq));
 }
 }
 
-static void piix3_write_config(PCIDevice *dev,
-   uint32_t address, uint32_t val, int len)
+static void piix_write_config(PCIDevice *dev, uint32_t address, uint32_t val,
+  int len)
 {
 pci_default_write_config(dev, address, val, len);
 if (ranges_overlap(address, len, PIIX_PIRQCA, 4)) {
-PIIXState *piix3 = PIIX_PCI_DEVICE(dev);
+PIIXState *s = PIIX_PCI_DEVICE(dev);
 int pic_irq;
 
-pci_bus_fire_intx_routing_notifier(pci_get_bus(>dev));
-piix3_update_irq_levels(piix3);
+pci_bus_fire_intx_routing_notifier(pci_get_bus(>dev));
+piix_update_pci_irq_levels(s);
 for (pic_irq = 0; pic_irq < ISA_NUM_IRQS; pic_irq++) {
-piix3_set_irq_pic(piix3, pic_irq);
+piix_set_irq_pic(s, pic_irq);
 }
 }
 }
@@ -193,9 +193,9 @@ static void piix_reset(DeviceState *dev)
 d->rcr = 0;
 }
 
-static int piix3_post_load(void *opaque, int version_id)
+static int piix_post_load(void *opaque, int version_id)
 {
-PIIXState *piix3 = opaque;
+PIIXState *s = opaque;
 int pirq;
 
 /*
@@ -207,10 +207,10 @@ static int piix3_post_load(void *opaque, int version_id)
  * Here, we update irq levels without raising the interrupt.
  * Interrupt state will be deserialized separately through the i8259.
  */
-piix3->pic_levels = 0;
+s->pic_levels = 0;
 for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) {
-piix3_set_irq_level_internal(piix3,

[PATCH v8 16/29] hw/isa/piix4: Rename "isa" attribute to "isa_irqs_in"

2023-10-07 Thread Bernhard Beschow

Rename the "isa" attribute to align it with PIIX3 for consolidation.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/piix4.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index 3c3c7a094c..9c8b6c98ab 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -45,7 +45,7 @@
 struct PIIX4State {
 PCIDevice dev;
 qemu_irq cpu_intr;
-qemu_irq *isa;
+qemu_irq *isa_irqs_in;
 
 MC146818RtcState rtc;
 PCIIDEState ide;
@@ -75,7 +75,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int 
level)
 pic_level |= pci_bus_get_irq_level(bus, i);
 }
 }
-qemu_set_irq(s->isa[pic_irq], pic_level);
+qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level);
 }
 }
 
@@ -201,10 +201,10 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 
 /* initialize i8259 pic */
 i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1);
-s->isa = i8259_init(isa_bus, *i8259_out_irq);
+s->isa_irqs_in = i8259_init(isa_bus, *i8259_out_irq);
 
 /* initialize ISA irqs */
-isa_bus_register_input_irqs(isa_bus, s->isa);
+isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in);
 
 /* initialize pit */
 i8254_pit_init(isa_bus, 0x40, 0, NULL);
@@ -236,7 +236,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 if (!qdev_realize(DEVICE(>pm), BUS(pci_bus), errp)) {
 return;
 }
-qdev_connect_gpio_out(DEVICE(>pm), 0, s->isa[9]);
+qdev_connect_gpio_out(DEVICE(>pm), 0, s->isa_irqs_in[9]);
 
 pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS);
 }
-- 
2.42.0

[PATCH v8 28/29] hw/isa/piix: Implement multi-process QEMU support also for PIIX4

2023-10-07 Thread Bernhard Beschow

So far multi-process QEMU was only implemented for PIIX3. Move the support into
the base class to achieve feature parity between both device models.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/piix.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index cba2098ca2..04ebed5b52 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -374,6 +374,7 @@ static void pci_piix_realize(PCIDevice *dev, const char 
*uhci_type,
 }
 
 pci_bus_irqs(pci_bus, piix_set_pci_irq, d, PIIX_NUM_PIRQS);
+pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq);
 }
 
 static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
@@ -454,15 +455,7 @@ static const TypeInfo piix_pci_type_info = {
 
 static void piix3_realize(PCIDevice *dev, Error **errp)
 {
-ERRP_GUARD();
-PCIBus *pci_bus = pci_get_bus(dev);
-
 pci_piix_realize(dev, TYPE_PIIX3_USB_UHCI, errp);
-if (*errp) {
-return;
-}
-
-pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq);
 }
 
 static void piix3_init(Object *obj)
-- 
2.42.0

[PATCH v8 14/29] hw/isa/piix3: Drop the "3" from PIIX base class name

2023-10-07 Thread Bernhard Beschow

TYPE_PIIX3_PCI_DEVICE was the former base class of the Xen and non-Xen variants
of the PIIX3 ISA device models. It will become the base class for the PIIX3 and
PIIX4 device models, so drop the "3" from the type names.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/southbridge/piix.h |  6 ++--
 hw/isa/piix3.c| 56 +--
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index c56ce49fd3..0b257e1582 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -71,11 +71,9 @@ struct PIIXState {
 bool has_usb;
 bool smm_enabled;
 };
-typedef struct PIIXState PIIX3State;
 
-#define TYPE_PIIX3_PCI_DEVICE "pci-piix3"
-DECLARE_INSTANCE_CHECKER(PIIX3State, PIIX3_PCI_DEVICE,
- TYPE_PIIX3_PCI_DEVICE)
+#define TYPE_PIIX_PCI_DEVICE "pci-piix"
+OBJECT_DECLARE_SIMPLE_TYPE(PIIXState, PIIX_PCI_DEVICE)
 
 #define TYPE_PIIX3_DEVICE "PIIX3"
 #define TYPE_PIIX4_PCI_DEVICE "piix4-isa"
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 5b867df299..c7e59249b6 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -35,7 +35,7 @@
 #include "migration/vmstate.h"
 #include "hw/acpi/acpi_aml_interface.h"
 
-static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq)
+static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq)
 {
 qemu_set_irq(piix3->isa_irqs_in[pic_irq],
  !!(piix3->pic_levels &
@@ -43,7 +43,7 @@ static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq)
  (pic_irq * PIIX_NUM_PIRQS;
 }
 
-static void piix3_set_irq_level_internal(PIIX3State *piix3, int pirq, int 
level)
+static void piix3_set_irq_level_internal(PIIXState *piix3, int pirq, int level)
 {
 int pic_irq;
 uint64_t mask;
@@ -58,7 +58,7 @@ static void piix3_set_irq_level_internal(PIIX3State *piix3, 
int pirq, int level)
 piix3->pic_levels |= mask * !!level;
 }
 
-static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level)
+static void piix3_set_irq_level(PIIXState *piix3, int pirq, int level)
 {
 int pic_irq;
 
@@ -74,13 +74,13 @@ static void piix3_set_irq_level(PIIX3State *piix3, int 
pirq, int level)
 
 static void piix3_set_irq(void *opaque, int pirq, int level)
 {
-PIIX3State *piix3 = opaque;
+PIIXState *piix3 = opaque;
 piix3_set_irq_level(piix3, pirq, level);
 }
 
 static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
 {
-PIIX3State *piix3 = opaque;
+PIIXState *piix3 = opaque;
 int irq = piix3->dev.config[PIIX_PIRQCA + pin];
 PCIINTxRoute route;
 
@@ -95,7 +95,7 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, 
int pin)
 }
 
 /* irq routing is changed. so rebuild bitmap */
-static void piix3_update_irq_levels(PIIX3State *piix3)
+static void piix3_update_irq_levels(PIIXState *piix3)
 {
 PCIBus *bus = pci_get_bus(>dev);
 int pirq;
@@ -111,7 +111,7 @@ static void piix3_write_config(PCIDevice *dev,
 {
 pci_default_write_config(dev, address, val, len);
 if (ranges_overlap(address, len, PIIX_PIRQCA, 4)) {
-PIIX3State *piix3 = PIIX3_PCI_DEVICE(dev);
+PIIXState *piix3 = PIIX_PCI_DEVICE(dev);
 int pic_irq;
 
 pci_bus_fire_intx_routing_notifier(pci_get_bus(>dev));
@@ -124,7 +124,7 @@ static void piix3_write_config(PCIDevice *dev,
 
 static void piix3_reset(DeviceState *dev)
 {
-PIIX3State *d = PIIX3_PCI_DEVICE(dev);
+PIIXState *d = PIIX_PCI_DEVICE(dev);
 uint8_t *pci_conf = d->dev.config;
 
 pci_conf[0x04] = 0x07; /* master, memory and I/O */
@@ -165,7 +165,7 @@ static void piix3_reset(DeviceState *dev)
 
 static int piix3_post_load(void *opaque, int version_id)
 {
-PIIX3State *piix3 = opaque;
+PIIXState *piix3 = opaque;
 int pirq;
 
 /*
@@ -188,7 +188,7 @@ static int piix3_post_load(void *opaque, int version_id)
 static int piix3_pre_save(void *opaque)
 {
 int i;
-PIIX3State *piix3 = opaque;
+PIIXState *piix3 = opaque;
 
 for (i = 0; i < ARRAY_SIZE(piix3->pci_irq_levels_vmstate); i++) {
 piix3->pci_irq_levels_vmstate[i] =
@@ -200,7 +200,7 @@ static int piix3_pre_save(void *opaque)
 
 static bool piix3_rcr_needed(void *opaque)
 {
-PIIX3State *piix3 = opaque;
+PIIXState *piix3 = opaque;
 
 return (piix3->rcr != 0);
 }
@@ -211,7 +211,7 @@ static const VMStateDescription vmstate_piix3_rcr = {
 .minimum_version_id = 1,
 .needed = piix3_rcr_needed,
 .fields = (VMStateField[]) {
-VMSTATE_UINT8(rcr, PIIX3State),
+VMSTATE_UINT8(rcr, PIIXState),
 VMSTATE_END_OF_LIST()
 }
 };
@@ -223,8 +223,8 @@ static const VMStateDescription vmstate_piix3 = {
 .post_load = piix3_post_load,
 .pre_save = piix3_pre_save,
 .fields = (VMStateField[]) {
-VMSTATE_PCI_DEVICE(dev, PIIX3State),
-VMSTATE_INT32_ARRAY_V(pci_irq_levels_vmstate, PIIX3State,
+

[PATCH v8 19/29] hw/isa/piix3: Merge hw/isa/piix4.c

2023-10-07 Thread Bernhard Beschow

Now that the PIIX3 and PIIX4 device models are sufficiently prepared, their
implementations can be merged into one file for further consolidation.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 MAINTAINERS|   6 +-
 hw/isa/{piix3.c => piix.c} | 190 +++-
 hw/isa/piix4.c | 290 -
 hw/i386/Kconfig|   2 +-
 hw/isa/Kconfig |  11 +-
 hw/isa/meson.build |   3 +-
 hw/mips/Kconfig|   2 +-
 7 files changed, 195 insertions(+), 309 deletions(-)
 rename hw/isa/{piix3.c => piix.c} (71%)
 delete mode 100644 hw/isa/piix4.c

diff --git a/MAINTAINERS b/MAINTAINERS
index ea91f9e804..6fb609f624 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1287,7 +1287,7 @@ Malta
 M: Philippe Mathieu-Daudé 
 R: Aurelien Jarno 
 S: Odd Fixes
-F: hw/isa/piix4.c
+F: hw/isa/piix.c
 F: hw/acpi/piix4.c
 F: hw/mips/malta.c
 F: hw/pci-host/gt64120.c
@@ -1706,7 +1706,7 @@ F: hw/pci-host/pam.c
 F: include/hw/pci-host/i440fx.h
 F: include/hw/pci-host/q35.h
 F: include/hw/pci-host/pam.h
-F: hw/isa/piix3.c
+F: hw/isa/piix.c
 F: hw/isa/lpc_ich9.c
 F: hw/i2c/smbus_ich9.c
 F: hw/acpi/piix4.c
@@ -2459,7 +2459,7 @@ PIIX4 South Bridge (i82371AB)
 M: Hervé Poussineau 
 M: Philippe Mathieu-Daudé 
 S: Maintained
-F: hw/isa/piix4.c
+F: hw/isa/piix.c
 F: include/hw/southbridge/piix.h
 
 Firmware configuration (fw_cfg)
diff --git a/hw/isa/piix3.c b/hw/isa/piix.c
similarity index 71%
rename from hw/isa/piix3.c
rename to hw/isa/piix.c
index c7e59249b6..f6da334c6f 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix.c
@@ -2,6 +2,7 @@
  * QEMU PIIX PCI ISA Bridge Emulation
  *
  * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2018 Hervé Poussineau
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
@@ -27,14 +28,20 @@
 #include "qapi/error.h"
 #include "hw/dma/i8257.h"
 #include "hw/southbridge/piix.h"
+#include "hw/timer/i8254.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/ide/piix.h"
+#include "hw/intc/i8259.h"
 #include "hw/isa/isa.h"
 #include "sysemu/runstate.h"
 #include "migration/vmstate.h"
 #include "hw/acpi/acpi_aml_interface.h"
 
+typedef struct PIIXState PIIX4State;
+
+DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE)
+
 static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq)
 {
 qemu_set_irq(piix3->isa_irqs_in[pic_irq],
@@ -78,6 +85,33 @@ static void piix3_set_irq(void *opaque, int pirq, int level)
 piix3_set_irq_level(piix3, pirq, level);
 }
 
+static void piix4_set_irq(void *opaque, int irq_num, int level)
+{
+int i, pic_irq, pic_level;
+PIIX4State *s = opaque;
+PCIBus *bus = pci_get_bus(>dev);
+
+/* now we change the pic irq level according to the piix irq mappings */
+/* XXX: optimize */
+pic_irq = s->dev.config[PIIX_PIRQCA + irq_num];
+if (pic_irq < ISA_NUM_IRQS) {
+/* The pic level is the logical OR of all the PCI irqs mapped to it. */
+pic_level = 0;
+for (i = 0; i < PIIX_NUM_PIRQS; i++) {
+if (pic_irq == s->dev.config[PIIX_PIRQCA + i]) {
+pic_level |= pci_bus_get_irq_level(bus, i);
+}
+}
+qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level);
+}
+}
+
+static void piix4_request_i8259_irq(void *opaque, int irq, int level)
+{
+PIIX4State *s = opaque;
+qemu_set_irq(s->cpu_intr, level);
+}
+
 static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
 {
 PIIXState *piix3 = opaque;
@@ -122,9 +156,8 @@ static void piix3_write_config(PCIDevice *dev,
 }
 }
 
-static void piix3_reset(DeviceState *dev)
+static void piix_reset(PIIXState *d)
 {
-PIIXState *d = PIIX_PCI_DEVICE(dev);
 uint8_t *pci_conf = d->dev.config;
 
 pci_conf[0x04] = 0x07; /* master, memory and I/O */
@@ -163,6 +196,13 @@ static void piix3_reset(DeviceState *dev)
 d->rcr = 0;
 }
 
+static void piix3_reset(DeviceState *dev)
+{
+PIIXState *d = PIIX_PCI_DEVICE(dev);
+
+piix_reset(d);
+}
+
 static int piix3_post_load(void *opaque, int version_id)
 {
 PIIXState *piix3 = opaque;
@@ -185,6 +225,17 @@ static int piix3_post_load(void *opaque, int version_id)
 return 0;
 }
 
+static int piix4_post_load(void *opaque, int version_id)
+{
+PIIX4State *s = opaque;
+
+if (version_id == 2) {
+s->rcr = 0;
+}
+
+return 0;
+}
+
 static int piix3_pre_save(void *opaque)
 {
 int i;
@@ -234,6 +285,17 @@ static const VMStateDescription vmstate_piix3 = {
 }
 };
 
+static const VMStateDescription vmstate_piix4 = {
+.name = "PIIX4",
+.version_id = 3,
+.minimum_version_id = 2,
+.post_load = piix4_post_load,
+.fields = (VMStateField[]) {
+VMSTATE_PCI_DEVICE(dev, PIIX4State),
+VMSTATE_UINT8_V(rcr, PIIX4State, 3),
+VMSTATE_END_OF_LIST()
+}
+};
 
 static

[PATCH v8 04/29] hw/isa/piix3: Resolve redundant PIIX_NUM_PIC_IRQS

2023-10-07 Thread Bernhard Beschow

PIIX_NUM_PIC_IRQS is assumed to be the same as ISA_NUM_IRQS, otherwise
inconsistencies can occur.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/southbridge/piix.h | 5 ++---
 hw/isa/piix3.c| 8 
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index 278171752f..2317bb7974 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -27,7 +27,6 @@
  */
 #define PIIX_RCR_IOPORT 0xcf9
 
-#define PIIX_NUM_PIC_IRQS   16  /* i8259 * 2 */
 #define PIIX_NUM_PIRQS  4ULL/* PIRQ[A-D] */
 
 struct PIIXState {
@@ -39,10 +38,10 @@ struct PIIXState {
  * So one PIC level is tracked by PIIX_NUM_PIRQS bits.
  *
  * PIRQ is mapped to PIC pins, we track it by
- * PIIX_NUM_PIRQS * PIIX_NUM_PIC_IRQS = 64 bits with
+ * PIIX_NUM_PIRQS * ISA_NUM_IRQS = 64 bits with
  * pic_irq * PIIX_NUM_PIRQS + pirq
  */
-#if PIIX_NUM_PIC_IRQS * PIIX_NUM_PIRQS > 64
+#if ISA_NUM_IRQS * PIIX_NUM_PIRQS > 64
 #error "unable to encode pic state in 64bit in pic_levels."
 #endif
 uint64_t pic_levels;
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 117024e450..7240c91440 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -48,7 +48,7 @@ static void piix3_set_irq_level_internal(PIIX3State *piix3, 
int pirq, int level)
 uint64_t mask;
 
 pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq];
-if (pic_irq >= PIIX_NUM_PIC_IRQS) {
+if (pic_irq >= ISA_NUM_IRQS) {
 return;
 }
 
@@ -62,7 +62,7 @@ static void piix3_set_irq_level(PIIX3State *piix3, int pirq, 
int level)
 int pic_irq;
 
 pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq];
-if (pic_irq >= PIIX_NUM_PIC_IRQS) {
+if (pic_irq >= ISA_NUM_IRQS) {
 return;
 }
 
@@ -83,7 +83,7 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, 
int pin)
 int irq = piix3->dev.config[PIIX_PIRQCA + pin];
 PCIINTxRoute route;
 
-if (irq < PIIX_NUM_PIC_IRQS) {
+if (irq < ISA_NUM_IRQS) {
 route.mode = PCI_INTX_ENABLED;
 route.irq = irq;
 } else {
@@ -115,7 +115,7 @@ static void piix3_write_config(PCIDevice *dev,
 
 pci_bus_fire_intx_routing_notifier(pci_get_bus(>dev));
 piix3_update_irq_levels(piix3);
-for (pic_irq = 0; pic_irq < PIIX_NUM_PIC_IRQS; pic_irq++) {
+for (pic_irq = 0; pic_irq < ISA_NUM_IRQS; pic_irq++) {
 piix3_set_irq_pic(piix3, pic_irq);
 }
 }
-- 
2.42.0

[PATCH v8 24/29] hw/isa/piix: Reuse PIIX3 base class' realize method in PIIX4

2023-10-07 Thread Bernhard Beschow

Resolves duplicate code. Also makes PIIX4 respect the PIIX3 properties which get
added, too. This allows for using PIIX4 in the PC machine.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/piix.c   | 80 ++---
 hw/mips/malta.c |  5 ++--
 2 files changed, 12 insertions(+), 73 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index 8f7d6c56a8..2ab799b95e 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -315,7 +315,8 @@ static const MemoryRegionOps rcr_ops = {
 },
 };
 
-static void pci_piix3_realize(PCIDevice *dev, Error **errp)
+static void pci_piix_realize(PCIDevice *dev, const char *uhci_type,
+ Error **errp)
 {
 PIIXState *d = PIIX_PCI_DEVICE(dev);
 PCIBus *pci_bus = pci_get_bus(dev);
@@ -374,8 +375,7 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 
 /* USB */
 if (d->has_usb) {
-object_initialize_child(OBJECT(dev), "uhci", >uhci,
-TYPE_PIIX3_USB_UHCI);
+object_initialize_child(OBJECT(dev), "uhci", >uhci, uhci_type);
 qdev_prop_set_int32(DEVICE(>uhci), "addr", dev->devfn + 2);
 if (!qdev_realize(DEVICE(>uhci), BUS(pci_bus), errp)) {
 return;
@@ -426,7 +426,7 @@ static void pci_piix_init(Object *obj)
 object_initialize_child(obj, "rtc", >rtc, TYPE_MC146818_RTC);
 }
 
-static Property pci_piix3_props[] = {
+static Property pci_piix_props[] = {
 DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0),
 DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true),
 DEFINE_PROP_BOOL("has-pic", PIIXState, has_pic, true),
@@ -452,6 +452,7 @@ static void pci_piix_class_init(ObjectClass *klass, void 
*data)
  * pc_piix.c's pc_init1()
  */
 dc->user_creatable = false;
+device_class_set_props(dc, pci_piix_props);
 adevc->build_dev_aml = build_pci_isa_aml;
 }
 
@@ -475,7 +476,7 @@ static void piix3_realize(PCIDevice *dev, Error **errp)
 PIIXState *piix3 = PIIX_PCI_DEVICE(dev);
 PCIBus *pci_bus = pci_get_bus(dev);
 
-pci_piix3_realize(dev, errp);
+pci_piix_realize(dev, TYPE_PIIX3_USB_UHCI, errp);
 if (*errp) {
 return;
 }
@@ -501,7 +502,6 @@ static void piix3_class_init(ObjectClass *klass, void *data)
 /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */
 k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0;
 dc->vmsd = _piix3;
-device_class_set_props(dc, pci_piix3_props);
 }
 
 static const TypeInfo piix3_info = {
@@ -513,71 +513,14 @@ static const TypeInfo piix3_info = {
 
 static void piix4_realize(PCIDevice *dev, Error **errp)
 {
+ERRP_GUARD();
 PIIXState *s = PIIX_PCI_DEVICE(dev);
 PCIBus *pci_bus = pci_get_bus(dev);
-ISABus *isa_bus;
-qemu_irq *i8259_out_irq;
-qemu_irq *i8259;
-size_t i;
-
-isa_bus = isa_bus_new(DEVICE(dev), pci_address_space(dev),
-  pci_address_space_io(dev), errp);
-if (!isa_bus) {
-return;
-}
-
-qdev_init_gpio_out_named(DEVICE(dev), >cpu_intr,
- "intr", 1);
-
-memory_region_init_io(>rcr_mem, OBJECT(dev), _ops, s,
-  "piix-reset-control", 1);
-memory_region_add_subregion_overlap(pci_address_space_io(dev),
-PIIX_RCR_IOPORT, >rcr_mem, 1);
-
-/* initialize i8259 pic */
-i8259_out_irq = qemu_allocate_irqs(piix_request_i8259_irq, s, 1);
-i8259 = i8259_init(isa_bus, *i8259_out_irq);
-
-for (i = 0; i < ISA_NUM_IRQS; i++) {
-s->isa_irqs_in[i] = i8259[i];
-}
-
-g_free(i8259);
-
-/* initialize ISA irqs */
-isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in);
-
-/* initialize pit */
-i8254_pit_init(isa_bus, 0x40, 0, NULL);
 
-/* DMA */
-i8257_dma_init(isa_bus, 0);
-
-/* RTC */
-qdev_prop_set_int32(DEVICE(>rtc), "base_year", 2000);
-if (!qdev_realize(DEVICE(>rtc), BUS(isa_bus), errp)) {
-return;
-}
-s->rtc.irq = isa_get_irq(ISA_DEVICE(>rtc), s->rtc.isairq);
-
-/* IDE */
-qdev_prop_set_int32(DEVICE(>ide), "addr", dev->devfn + 1);
-if (!qdev_realize(DEVICE(>ide), BUS(pci_bus), errp)) {
-return;
-}
-
-/* USB */
-qdev_prop_set_int32(DEVICE(>uhci), "addr", dev->devfn + 2);
-if (!qdev_realize(DEVICE(>uhci), BUS(pci_bus), errp)) {
-return;
-}
-
-/* ACPI controller */
-qdev_prop_set_int32(DEVICE(>pm), "addr", dev->devfn + 3);
-if (!qdev_realize(DEVICE(>pm), BUS(pci_bus), errp)) {
+pci_piix_realize(dev, TYPE_PIIX4_USB_UHCI, errp);
+if (*errp) {
 return;
 }
-qdev_connect_gpio_out(DEVICE(>pm), 0, s->isa_irqs_in[9]);
 
 pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS);
 }
@@ -587,11 +530,6 @@ static void piix4_init(Object *obj)
 PIIXState *s = PIIX_PCI_DEVICE(obj);
 
 object_initialize_child(obj, "ide", >ide, TYPE_PIIX4_IDE);
-object_initialize_child(obj, "uhci", >uhci, TYPE_PIIX4_USB_UHCI);
-
-

[PATCH v8 01/29] hw/i386/pc: Merge two if statements into one

2023-10-07 Thread Bernhard Beschow

By being the only entity assigning a non-NULL value to "rtc_irq", the first if
statement determines whether the second if statement is executed. So merge the
two statements into one.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/pc.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index aad7e8ccd1..2fbdff89e0 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1200,7 +1200,6 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 DeviceState *hpet = NULL;
 int pit_isa_irq = 0;
 qemu_irq pit_alt_irq = NULL;
-qemu_irq rtc_irq = NULL;
 ISADevice *pit = NULL;
 MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
 MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
@@ -1220,6 +1219,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
  */
 if (pcms->hpet_enabled && (!kvm_irqchip_in_kernel() ||
kvm_has_pit_state2())) {
+qemu_irq rtc_irq;
+
 hpet = qdev_try_new(TYPE_HPET);
 if (!hpet) {
 error_report("couldn't create HPET device");
@@ -1244,9 +1245,6 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 pit_isa_irq = -1;
 pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
 rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
-}
-
-if (rtc_irq) {
 qdev_connect_gpio_out(DEVICE(rtc_state), 0, rtc_irq);
 } else {
 uint32_t irq = object_property_get_uint(OBJECT(rtc_state),
@@ -1254,6 +1252,7 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 _fatal);
 isa_connect_gpio_out(rtc_state, 0, irq);
 }
+
 object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(rtc_state),
   "date");
 
-- 
2.42.0

[PATCH v8 13/29] hw/isa/piix3: Create power management controller in host device

2023-10-07 Thread Bernhard Beschow

The power management controller is an integral part of PIIX3 (function 3). So
create it as part of the south bridge.

Note that the ACPI function is optional in QEMU. This is why it gets
object_initialize_child()'ed in realize rather than in instance_init.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/southbridge/piix.h |  6 ++
 hw/i386/pc_piix.c | 24 +++-
 hw/isa/piix3.c| 15 +++
 hw/isa/Kconfig|  1 +
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index 5cd866f1f2..c56ce49fd3 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -13,6 +13,7 @@
 #define HW_SOUTHBRIDGE_PIIX_H
 
 #include "hw/pci/pci_device.h"
+#include "hw/acpi/piix4.h"
 #include "hw/ide/pci.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/usb/hcd-uhci.h"
@@ -56,6 +57,9 @@ struct PIIXState {
 MC146818RtcState rtc;
 PCIIDEState ide;
 UHCIState uhci;
+PIIX4PMState pm;
+
+uint32_t smb_io_base;
 
 /* Reset Control Register contents */
 uint8_t rcr;
@@ -63,7 +67,9 @@ struct PIIXState {
 /* IO memory region for Reset Control Register (PIIX_RCR_IOPORT) */
 MemoryRegion rcr_mem;
 
+bool has_acpi;
 bool has_usb;
+bool smm_enabled;
 };
 typedef struct PIIXState PIIX3State;
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8dcd6851d0..70cffcfe4f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -50,7 +50,6 @@
 #include "hw/i2c/smbus_eeprom.h"
 #include "exec/memory.h"
 #include "hw/acpi/acpi.h"
-#include "hw/acpi/piix4.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/xen.h"
@@ -115,7 +114,7 @@ static void pc_init1(MachineState *machine,
 MemoryRegion *system_io = get_system_io();
 PCIBus *pci_bus = NULL;
 ISABus *isa_bus;
-int piix3_devfn = -1;
+Object *piix4_pm = NULL;
 qemu_irq smi_irq;
 GSIState *gsi_state;
 BusState *idebus[MAX_IDE_BUS];
@@ -266,6 +265,13 @@ static void pc_init1(MachineState *machine,
 pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
 object_property_set_bool(OBJECT(pci_dev), "has-usb",
  machine_usb(machine), _abort);
+object_property_set_bool(OBJECT(pci_dev), "has-acpi",
+ x86_machine_is_acpi_enabled(x86ms),
+ _abort);
+qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100);
+object_property_set_bool(OBJECT(pci_dev), "smm-enabled",
+ x86_machine_is_smm_enabled(x86ms),
+ _abort);
 dev = DEVICE(pci_dev);
 for (i = 0; i < ISA_NUM_IRQS; i++) {
 qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]);
@@ -286,10 +292,10 @@ static void pc_init1(MachineState *machine,
  XEN_IOAPIC_NUM_PIRQS);
 }
 
-piix3_devfn = pci_dev->devfn;
 isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0"));
 rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev),
  "rtc"));
+piix4_pm = object_resolve_path_component(OBJECT(pci_dev), "pm");
 dev = DEVICE(object_resolve_path_component(OBJECT(pci_dev), "ide"));
 pci_ide_create_devs(PCI_DEVICE(dev));
 idebus[0] = qdev_get_child_bus(dev, "ide.0");
@@ -360,17 +366,9 @@ static void pc_init1(MachineState *machine,
 }
 #endif
 
-if (pcmc->pci_enabled && x86_machine_is_acpi_enabled(X86_MACHINE(pcms))) {
-PCIDevice *piix4_pm;
-
+if (piix4_pm) {
 smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0);
-piix4_pm = pci_new(piix3_devfn + 3, TYPE_PIIX4_PM);
-qdev_prop_set_uint32(DEVICE(piix4_pm), "smb_io_base", 0xb100);
-qdev_prop_set_bit(DEVICE(piix4_pm), "smm-enabled",
-  x86_machine_is_smm_enabled(x86ms));
-pci_realize_and_unref(piix4_pm, pci_bus, _fatal);
 
-qdev_connect_gpio_out(DEVICE(piix4_pm), 0, x86ms->gsi[9]);
 qdev_connect_gpio_out_named(DEVICE(piix4_pm), "smi-irq", 0, smi_irq);
 pcms->smbus = I2C_BUS(qdev_get_child_bus(DEVICE(piix4_pm), "i2c"));
 /* TODO: Populate SPD eeprom data.  */
@@ -382,7 +380,7 @@ static void pc_init1(MachineState *machine,
  object_property_allow_set_link,
  OBJ_PROP_LINK_STRONG);
 object_property_set_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
- OBJECT(piix4_pm), _abort);
+ piix4_pm, _abort);
 }
 
 if (machine->nvdimms_state->is_enabled) {
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index aebc0da23b..5b867df299 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c

[PATCH v8 09/29] hw/isa/piix3: Wire PIC IRQs to ISA bus in host device

2023-10-07 Thread Bernhard Beschow

Thie PIIX3 south bridge implements both the PIC and the ISA bus, so wiring the
interrupts there makes the device model more self-contained. Furthermore, this
allows the ISA interrupts to be wired to internal child devices in
pci_piix3_realize() which will be performed in subsequent patches.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/pc_piix.c | 2 +-
 hw/isa/piix3.c| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index cd6c00c0b3..5988656279 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -293,6 +293,7 @@ static void pc_init1(MachineState *machine,
 } else {
 isa_bus = isa_bus_new(NULL, system_memory, system_io,
   _abort);
+isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
 
 rtc_state = isa_new(TYPE_MC146818_RTC);
 qdev_prop_set_int32(DEVICE(rtc_state), "base_year", 2000);
@@ -301,7 +302,6 @@ static void pc_init1(MachineState *machine,
 i8257_dma_init(isa_bus, 0);
 pcms->hpet_enabled = false;
 }
-isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
 
 if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
 pc_i8259_create(isa_bus, gsi_state->i8259_irq);
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 616f5418fa..3e7c42fa68 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -278,6 +278,8 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 memory_region_add_subregion_overlap(pci_address_space_io(dev),
 PIIX_RCR_IOPORT, >rcr_mem, 1);
 
+isa_bus_register_input_irqs(isa_bus, d->isa_irqs_in);
+
 i8257_dma_init(isa_bus, 0);
 
 /* RTC */
-- 
2.42.0

[PATCH v8 07/29] hw/isa/piix3: Rename "pic" attribute to "isa_irqs_in"

2023-10-07 Thread Bernhard Beschow

TYPE_PIIX3_DEVICE doesn't instantiate a PIC since it relies on the board to do
so. The "pic" attribute, however, suggests that there is one. Rename the
attribute to reflect that it represents ISA interrupt lines. Use the same naming
convention as in the VIA south bridges as well as in TYPE_I82378.

Signed-off-by: Bernhard Beschow 
---
 include/hw/southbridge/piix.h | 2 +-
 hw/isa/piix3.c| 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index bb898c6c88..b07ff6bb26 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -46,7 +46,7 @@ struct PIIXState {
 #endif
 uint64_t pic_levels;
 
-qemu_irq pic[ISA_NUM_IRQS];
+qemu_irq isa_irqs_in[ISA_NUM_IRQS];
 
 /* This member isn't used. Just for save/load compatibility */
 int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index c17547a2c0..616f5418fa 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -36,7 +36,7 @@
 
 static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq)
 {
-qemu_set_irq(piix3->pic[pic_irq],
+qemu_set_irq(piix3->isa_irqs_in[pic_irq],
  !!(piix3->pic_levels &
 (((1ULL << PIIX_NUM_PIRQS) - 1) <<
  (pic_irq * PIIX_NUM_PIRQS;
@@ -312,7 +312,8 @@ static void pci_piix3_init(Object *obj)
 {
 PIIX3State *d = PIIX3_PCI_DEVICE(obj);
 
-qdev_init_gpio_out_named(DEVICE(obj), d->pic, "isa-irqs", ISA_NUM_IRQS);
+qdev_init_gpio_out_named(DEVICE(obj), d->isa_irqs_in, "isa-irqs",
+ ISA_NUM_IRQS);
 
 object_initialize_child(obj, "rtc", >rtc, TYPE_MC146818_RTC);
 }
-- 
2.42.0

[PATCH v8 27/29] hw/isa/piix: Resolve duplicate code regarding PCI interrupt wiring

2023-10-07 Thread Bernhard Beschow

Now that both PIIX3 and PIIX4 use piix_set_irq() to trigger PCI IRQs the wiring
in the respective realize methods can be shared, too.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix.c | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index 17677c2126..cba2098ca2 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -372,6 +372,8 @@ static void pci_piix_realize(PCIDevice *dev, const char 
*uhci_type,
 }
 qdev_connect_gpio_out(DEVICE(>pm), 0, d->isa_irqs_in[9]);
 }
+
+pci_bus_irqs(pci_bus, piix_set_pci_irq, d, PIIX_NUM_PIRQS);
 }
 
 static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
@@ -453,7 +455,6 @@ static const TypeInfo piix_pci_type_info = {
 static void piix3_realize(PCIDevice *dev, Error **errp)
 {
 ERRP_GUARD();
-PIIXState *piix3 = PIIX_PCI_DEVICE(dev);
 PCIBus *pci_bus = pci_get_bus(dev);
 
 pci_piix_realize(dev, TYPE_PIIX3_USB_UHCI, errp);
@@ -461,7 +462,6 @@ static void piix3_realize(PCIDevice *dev, Error **errp)
 return;
 }
 
-pci_bus_irqs(pci_bus, piix_set_pci_irq, piix3, PIIX_NUM_PIRQS);
 pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq);
 }
 
@@ -492,16 +492,7 @@ static const TypeInfo piix3_info = {
 
 static void piix4_realize(PCIDevice *dev, Error **errp)
 {
-ERRP_GUARD();
-PIIXState *s = PIIX_PCI_DEVICE(dev);
-PCIBus *pci_bus = pci_get_bus(dev);
-
 pci_piix_realize(dev, TYPE_PIIX4_USB_UHCI, errp);
-if (*errp) {
-return;
-}
-
-pci_bus_irqs(pci_bus, piix_set_pci_irq, s, PIIX_NUM_PIRQS);
 }
 
 static void piix4_init(Object *obj)
-- 
2.42.0

[PATCH v8 12/29] hw/isa/piix3: Create USB controller in host device

2023-10-07 Thread Bernhard Beschow

The USB controller is an integral part of PIIX3 (function 2). So create
it as part of the south bridge.

Note that the USB function is optional in QEMU. This is why it gets
object_initialize_child()'ed in realize rather than in instance_init.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/southbridge/piix.h |  4 
 hw/i386/pc_piix.c |  7 ++-
 hw/isa/piix3.c| 16 
 hw/isa/Kconfig|  1 +
 4 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index 1daeff397c..5cd866f1f2 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -15,6 +15,7 @@
 #include "hw/pci/pci_device.h"
 #include "hw/ide/pci.h"
 #include "hw/rtc/mc146818rtc.h"
+#include "hw/usb/hcd-uhci.h"
 
 /* PIRQRC[A:D]: PIRQx Route Control Registers */
 #define PIIX_PIRQCA 0x60
@@ -54,12 +55,15 @@ struct PIIXState {
 
 MC146818RtcState rtc;
 PCIIDEState ide;
+UHCIState uhci;
 
 /* Reset Control Register contents */
 uint8_t rcr;
 
 /* IO memory region for Reset Control Register (PIIX_RCR_IOPORT) */
 MemoryRegion rcr_mem;
+
+bool has_usb;
 };
 typedef struct PIIXState PIIX3State;
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c98a997482..8dcd6851d0 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -51,7 +51,6 @@
 #include "exec/memory.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/piix4.h"
-#include "hw/usb/hcd-uhci.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/xen.h"
@@ -265,6 +264,8 @@ static void pc_init1(MachineState *machine,
 size_t i;
 
 pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
+object_property_set_bool(OBJECT(pci_dev), "has-usb",
+ machine_usb(machine), _abort);
 dev = DEVICE(pci_dev);
 for (i = 0; i < ISA_NUM_IRQS; i++) {
 qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]);
@@ -359,10 +360,6 @@ static void pc_init1(MachineState *machine,
 }
 #endif
 
-if (pcmc->pci_enabled && machine_usb(machine)) {
-pci_create_simple(pci_bus, piix3_devfn + 2, TYPE_PIIX3_USB_UHCI);
-}
-
 if (pcmc->pci_enabled && x86_machine_is_acpi_enabled(X86_MACHINE(pcms))) {
 PCIDevice *piix4_pm;
 
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 3f1dabade0..aebc0da23b 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -298,6 +298,16 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 if (!qdev_realize(DEVICE(>ide), BUS(pci_bus), errp)) {
 return;
 }
+
+/* USB */
+if (d->has_usb) {
+object_initialize_child(OBJECT(dev), "uhci", >uhci,
+TYPE_PIIX3_USB_UHCI);
+qdev_prop_set_int32(DEVICE(>uhci), "addr", dev->devfn + 2);
+if (!qdev_realize(DEVICE(>uhci), BUS(pci_bus), errp)) {
+return;
+}
+}
 }
 
 static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
@@ -332,6 +342,11 @@ static void pci_piix3_init(Object *obj)
 object_initialize_child(obj, "ide", >ide, TYPE_PIIX3_IDE);
 }
 
+static Property pci_piix3_props[] = {
+DEFINE_PROP_BOOL("has-usb", PIIX3State, has_usb, true),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static void pci_piix3_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -352,6 +367,7 @@ static void pci_piix3_class_init(ObjectClass *klass, void 
*data)
  * pc_piix.c's pc_init1()
  */
 dc->user_creatable = false;
+device_class_set_props(dc, pci_piix3_props);
 adevc->build_dev_aml = build_pci_isa_aml;
 }
 
diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig
index 28345edbb3..1076df69ca 100644
--- a/hw/isa/Kconfig
+++ b/hw/isa/Kconfig
@@ -37,6 +37,7 @@ config PIIX3
 select IDE_PIIX
 select ISA_BUS
 select MC146818RTC
+select USB_UHCI
 
 config PIIX4
 bool
-- 
2.42.0

[PATCH v8 23/29] hw/isa/piix: Share PIIX3's base class with PIIX4

2023-10-07 Thread Bernhard Beschow

Having a common base class will allow for futher code sharing between PIIX3 and
PIIX4. Moreover, it makes PIIX4 implement the acpi-dev-aml-interface.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix.c | 85 ++-
 1 file changed, 30 insertions(+), 55 deletions(-)

diff --git a/hw/isa/piix.c b/hw/isa/piix.c
index bd66fb7475..8f7d6c56a8 100644
--- a/hw/isa/piix.c
+++ b/hw/isa/piix.c
@@ -38,10 +38,6 @@
 #include "migration/vmstate.h"
 #include "hw/acpi/acpi_aml_interface.h"
 
-typedef struct PIIXState PIIX4State;
-
-DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE)
-
 static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq)
 {
 qemu_set_irq(piix3->isa_irqs_in[pic_irq],
@@ -88,7 +84,7 @@ static void piix3_set_irq(void *opaque, int pirq, int level)
 static void piix4_set_irq(void *opaque, int irq_num, int level)
 {
 int i, pic_irq, pic_level;
-PIIX4State *s = opaque;
+PIIXState *s = opaque;
 PCIBus *bus = pci_get_bus(>dev);
 
 /* now we change the pic irq level according to the piix irq mappings */
@@ -108,7 +104,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int 
level)
 
 static void piix_request_i8259_irq(void *opaque, int irq, int level)
 {
-PIIX4State *s = opaque;
+PIIXState *s = opaque;
 qemu_set_irq(s->cpu_intr, level);
 }
 
@@ -156,8 +152,9 @@ static void piix3_write_config(PCIDevice *dev,
 }
 }
 
-static void piix_reset(PIIXState *d)
+static void piix_reset(DeviceState *dev)
 {
+PIIXState *d = PIIX_PCI_DEVICE(dev);
 uint8_t *pci_conf = d->dev.config;
 
 pci_conf[0x04] = 0x07; /* master, memory and I/O */
@@ -196,13 +193,6 @@ static void piix_reset(PIIXState *d)
 d->rcr = 0;
 }
 
-static void piix3_reset(DeviceState *dev)
-{
-PIIXState *d = PIIX_PCI_DEVICE(dev);
-
-piix_reset(d);
-}
-
 static int piix3_post_load(void *opaque, int version_id)
 {
 PIIXState *piix3 = opaque;
@@ -227,7 +217,7 @@ static int piix3_post_load(void *opaque, int version_id)
 
 static int piix4_post_load(void *opaque, int version_id)
 {
-PIIX4State *s = opaque;
+PIIXState *s = opaque;
 
 if (version_id == 2) {
 s->rcr = 0;
@@ -291,8 +281,8 @@ static const VMStateDescription vmstate_piix4 = {
 .minimum_version_id = 2,
 .post_load = piix4_post_load,
 .fields = (VMStateField[]) {
-VMSTATE_PCI_DEVICE(dev, PIIX4State),
-VMSTATE_UINT8_V(rcr, PIIX4State, 3),
+VMSTATE_PCI_DEVICE(dev, PIIXState),
+VMSTATE_UINT8_V(rcr, PIIXState, 3),
 VMSTATE_END_OF_LIST()
 }
 };
@@ -426,7 +416,7 @@ static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml 
*scope)
 qbus_build_aml(bus, scope);
 }
 
-static void pci_piix3_init(Object *obj)
+static void pci_piix_init(Object *obj)
 {
 PIIXState *d = PIIX_PCI_DEVICE(obj);
 
@@ -434,7 +424,6 @@ static void pci_piix3_init(Object *obj)
  ISA_NUM_IRQS);
 
 object_initialize_child(obj, "rtc", >rtc, TYPE_MC146818_RTC);
-object_initialize_child(obj, "ide", >ide, TYPE_PIIX3_IDE);
 }
 
 static Property pci_piix3_props[] = {
@@ -447,27 +436,22 @@ static Property pci_piix3_props[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
-static void pci_piix3_class_init(ObjectClass *klass, void *data)
+static void pci_piix_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
 
-k->config_write = piix3_write_config;
-dc->reset   = piix3_reset;
+dc->reset   = piix_reset;
 dc->desc= "ISA bridge";
-dc->vmsd= _piix3;
 dc->hotpluggable   = false;
 k->vendor_id= PCI_VENDOR_ID_INTEL;
-/* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */
-k->device_id= PCI_DEVICE_ID_INTEL_82371SB_0;
 k->class_id = PCI_CLASS_BRIDGE_ISA;
 /*
- * Reason: part of PIIX3 southbridge, needs to be wired up by
+ * Reason: part of PIIX southbridge, needs to be wired up by e.g.
  * pc_piix.c's pc_init1()
  */
 dc->user_creatable = false;
-device_class_set_props(dc, pci_piix3_props);
 adevc->build_dev_aml = build_pci_isa_aml;
 }
 
@@ -475,9 +459,9 @@ static const TypeInfo piix_pci_type_info = {
 .name = TYPE_PIIX_PCI_DEVICE,
 .parent = TYPE_PCI_DEVICE,
 .instance_size = sizeof(PIIXState),
-.instance_init = pci_piix3_init,
+.instance_init = pci_piix_init,
 .abstract = true,
-.class_init = pci_piix3_class_init,
+.class_init = pci_piix_class_init,
 .interfaces = (InterfaceInfo[]) {
 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
 { TYPE_ACPI_DEV_AML_IF },
@@ -500,22 +484,36 @@ static void piix3_realize(PCIDevice *dev, Error **errp)
 pci_bus_set_route_irq_fn(pci_bus, piix3_route_intx_pin_to_irq);
 }
 
+static void piix3_init(Object *obj)
+{
+PIIXState *d = PIIX_PCI_DEVICE(obj);

[PATCH v8 00/29] Consolidate PIIX south bridges

2023-10-07 Thread Bernhard Beschow

This series consolidates the implementations of the PIIX3 and PIIX4 south
bridges and makes PIIX4 usable in the PC machine via an experimental command
line parameter. The motivation is to resolve duplicate code between the device
models as well as resolving the "Frankenstein" PIIX4-PM problem in PIIX3
discussed on this list before.

The series is structured as follows:

Patches 1-8 are preparational patches necessary for moving all sub devices into
PIIX3, like was done for PIIX4. In isolation these patches can also be seen as
general x86 machine cleanup sub series which has merit in its own right -- and
could be applied to master if the remainder of the series takes longer to
review.

Patches 9-13 move PIIX3 sub devices into one device model like already
done for PIIX4. Together with the previous sub series these patches form a
bigger sub series which also has merit in its own right, and could be applied
independent of the remainder of this series as well.

The remainder of this series consolidates the PIIX3 and PIIX4 device models.
The culmination point is the last commit which makes PIIX4 usable in the PC
machine.

One challenge was dealing with optional devices where Peter already gave advice
in [1] which this series implements. Although PIIX4 is now usable in the PC
machine it still has a different binary layout in its VM state.

Testing done:
* `make check`
* `qemu-system-x86_64 -M pc -m 2G -accel kvm -cdrom
 manjaro-kde-21.3.2-220704-linux515.iso`
* `qemu-system-x86_64 -M pc,x-south-bridge=piix4-isa -m 2G -accel kvm -cdrom
 manjaro-kde-21.3.2-220704-linux515.iso`
* `qemu-system-x86_64 -M q35 -m 2G -accel kvm -cdrom
 manjaro-kde-21.3.2-220704-linux515.iso`
* `qemu-system-mips64el -M malta -cpu 5KEc -m 1G -kernel kernel -initrd initrd
 -append "root=LABEL=rootfs console=ttyS0" -drive file=image.qcow2`
* `qemu-system-mips64el -M malta -bios yamon-02.22.bin`
* Run HVM domU guest under Xen with manjaro-kde-21.3.2-220704-linux515.iso image

v8:
- Wire ISA interrupts before device realization
- Optionally allow a PIC and PIT to be instantiated in PIIX3 for compatiblity
with PIIX4
- Touch ICH9 LPC as far as required for PIIX consolidation
- Make PIIX4 usable in the PC machine via an experimental option
- Review and rework history, touching every commit and drop R-b tags when
changes became too large

v7:
- Rebase onto master
- Avoid the PIC proxy (Phil)
  The motivation for the PIC proxy was to allow for wiring up ISA interrupts in
  the south bridges. ISA interrupt wiring requires the GPIO lines to be
  populated already but pc_piix assigned the interrupts only after realizing
  PIIX3. By shifting interrupt assignment before realizing, the ISA interrupts
  are already populated during PIIX3's realize phase where the ISA interrupts
  are wired up.
- New patches:
  * hw/isa/piix4: Reuse struct PIIXState from PIIX3
  * hw/isa/piix4: Create the "intr" property during init() already
- Patches with substantial changes (Reviewed-by dropped):
  * hw/isa/piix3: Move ISA bus IRQ assignments into host device

v6:
- Fix some comments about TYPE_ISA_PIC (Mark) ... and use it consistently
  within the patch series.
- Incorporate series "[PATCH v2 0/3] Decouple INTx-to-LNKx routing from south
  bridges" [2] for maintainer convenience.
- Merge v5's 'hw/i386/pc_piix: Associate pci_map_irq_fn as soon as PCI bus is
  created' into
  https://lists.nongnu.org/archive/html/qemu-devel/2022-11/msg03312.html . Do
  similar for Malta.
- Rebase onto latest master (d6271b657286 "Merge tag 'for_upstream' of
  https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging")

v5:
- Pick up Reviewed-by tags from 
https://lists.nongnu.org/archive/html/qemu-devel/2023-01/msg00116.html
- Add patch to make usage of the isa_pic global more type-safe
- Re-introduce isa-pic as PIC specific proxy (Mark)

v4:
- Rebase onto "[PATCH v2 0/3] Decouple INTx-to-LNKx routing from south bridges"
  since it is already queued via mips-next. This eliminates patches
  'hw/isa/piix3: Prefix pci_slot_get_pirq() with "piix3_"' and 'hw/isa/piix4:
  Prefix pci_slot_get_pirq() with "piix4_"'.
- Squash 'hw/isa/piix: Drop the "3" from the PIIX base class' into
  'hw/isa/piix3: Rename typedef PIIX3State to PIIXState'. I originally only
  split these patches since I wasn't sure whether renaming a type was allowed.
- Add new patch 'hw/i386/pc_piix: Associate pci_map_irq_fn as soon as PCI bus is
  created' for forther cleanup of INTx-to-LNKx route decoupling.

v3:
- Introduce one TYPE_ICH9_USB_UHCI(fn) rather than several TYPE_ICH9_USB_UHCIx
  (Philippe)
- Make proxy PIC generic (Philippe)
- Track Malta's PIIX dependencies through KConfig
- Rebase onto Philippe's 'hw/isa/piix4: Remove MIPS Malta specific bits' series 
[3]
- Also rebase onto latest master to resolve merge conflicts. This required
  copying Philippe's series as first three patches - please ignore.

v2:
- Introduce TYPE_ defines for IDE and USB device models (Mark)
- Omit unexporting

[PATCH v8 10/29] hw/i386/pc: Wire RTC ISA IRQs in south bridges

2023-10-07 Thread Bernhard Beschow

Makes the south bridges a bit more self-contained and aligns PIIX3 more with
PIIX4. The latter is needed for consolidating the PIIX south bridges.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/pc.c  | 7 ++-
 hw/isa/lpc_ich9.c | 3 +++
 hw/isa/piix3.c| 3 +++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2fbdff89e0..4e844d02f2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1245,12 +1245,9 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 pit_isa_irq = -1;
 pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
 rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
+
+/* overwrite connection created by south bridge */
 qdev_connect_gpio_out(DEVICE(rtc_state), 0, rtc_irq);
-} else {
-uint32_t irq = object_property_get_uint(OBJECT(rtc_state),
-"irq",
-_fatal);
-isa_connect_gpio_out(rtc_state, 0, irq);
 }
 
 object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(rtc_state),
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 3fcefc5a8a..23eba64f22 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -696,6 +696,7 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp)
 ICH9LPCState *lpc = ICH9_LPC_DEVICE(d);
 PCIBus *pci_bus = pci_get_bus(d);
 ISABus *isa_bus;
+uint32_t irq;
 
 if ((lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT)) 
&&
 !(lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT))) {
@@ -745,6 +746,8 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp)
 if (!qdev_realize(DEVICE(>rtc), BUS(isa_bus), errp)) {
 return;
 }
+irq = object_property_get_uint(OBJECT(>rtc), "irq", _fatal);
+isa_connect_gpio_out(ISA_DEVICE(>rtc), 0, irq);
 
 pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS);
 pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq);
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 3e7c42fa68..11d72ca2bb 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -266,6 +266,7 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 {
 PIIX3State *d = PIIX3_PCI_DEVICE(dev);
 ISABus *isa_bus;
+uint32_t irq;
 
 isa_bus = isa_bus_new(DEVICE(d), pci_address_space(dev),
   pci_address_space_io(dev), errp);
@@ -287,6 +288,8 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 if (!qdev_realize(DEVICE(>rtc), BUS(isa_bus), errp)) {
 return;
 }
+irq = object_property_get_uint(OBJECT(>rtc), "irq", _fatal);
+isa_connect_gpio_out(ISA_DEVICE(>rtc), 0, irq);
 }
 
 static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
-- 
2.42.0

[PATCH v8 15/29] hw/isa/piix4: Remove unused inbound ISA interrupt lines

2023-10-07 Thread Bernhard Beschow

The Malta board, which is the only user of PIIX4, doesn't connect to the
exported interrupt lines. PIIX3 doesn't expose such interrupt lines
either, so remove them for PIIX4 for simplicity and consistency.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 hw/isa/piix4.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index e0b149f8eb..3c3c7a094c 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -148,12 +148,6 @@ static void piix4_request_i8259_irq(void *opaque, int irq, 
int level)
 qemu_set_irq(s->cpu_intr, level);
 }
 
-static void piix4_set_i8259_irq(void *opaque, int irq, int level)
-{
-PIIX4State *s = opaque;
-qemu_set_irq(s->isa[irq], level);
-}
-
 static void piix4_rcr_write(void *opaque, hwaddr addr, uint64_t val,
 unsigned int len)
 {
@@ -197,8 +191,6 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 return;
 }
 
-qdev_init_gpio_in_named(DEVICE(dev), piix4_set_i8259_irq,
-"isa", ISA_NUM_IRQS);
 qdev_init_gpio_out_named(DEVICE(dev), >cpu_intr,
  "intr", 1);
 
-- 
2.42.0

[PATCH v8 02/29] hw/i386/pc_piix: Allow for setting properties before realizing PIIX3 south bridge

2023-10-07 Thread Bernhard Beschow

The next patches will need to take advantage of it.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Peter Maydell 
Reviewed-by: Michael S. Tsirkin 
---
 hw/i386/pc_piix.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index e36a3262b2..6d2f5509e6 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -264,7 +264,8 @@ static void pc_init1(MachineState *machine,
 PIIX3State *piix3;
 PCIDevice *pci_dev;
 
-pci_dev = pci_create_simple_multifunction(pci_bus, -1, 
TYPE_PIIX3_DEVICE);
+pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
+pci_realize_and_unref(pci_dev, pci_bus, _fatal);
 
 if (xen_enabled()) {
 pci_device_set_intx_routing_notifier(
-- 
2.42.0

[PATCH v8 08/29] hw/i386/pc_q35: Wire ICH9 LPC function's interrupts before its realize()

2023-10-07 Thread Bernhard Beschow

When the board assigns the ISA IRQs after the device's realize(), internal
devices such as the RTC can't be wired in ich9_lpc_realize() since the qemu_irqs
are still NULL. Fix that by assigning the ISA interrupts before realize().

This change is necessary for PIIX consolidation because PIIX4 wires the RTC
interrupts in its realize() method, so PIIX3 needs to do so as well. Since the
PC and Q35 boards share RTC code, and since PIIX3 needs the change, ICH9 needs
to be adapted as well.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/pc_q35.c  | 14 +++---
 hw/isa/lpc_ich9.c |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index a7386f2ca2..597943ff1b 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -242,11 +242,18 @@ static void pc_q35_init(MachineState *machine)
 host_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0"));
 pcms->bus = host_bus;
 
+/* irq lines */
+gsi_state = pc_gsi_create(>gsi, pcmc->pci_enabled);
+
 /* create ISA bus */
 lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC),
 TYPE_ICH9_LPC_DEVICE);
 qdev_prop_set_bit(DEVICE(lpc), "smm-enabled",
   x86_machine_is_smm_enabled(x86ms));
+lpc_dev = DEVICE(lpc);
+for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]);
+}
 pci_realize_and_unref(lpc, host_bus, _fatal);
 
 rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(lpc), "rtc"));
@@ -273,13 +280,6 @@ static void pc_q35_init(MachineState *machine)
"true", true);
 }
 
-/* irq lines */
-gsi_state = pc_gsi_create(>gsi, pcmc->pci_enabled);
-
-lpc_dev = DEVICE(lpc);
-for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]);
-}
 isa_bus = ISA_BUS(qdev_get_child_bus(lpc_dev, "isa.0"));
 
 if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 3f59980aa0..3fcefc5a8a 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -675,6 +675,9 @@ static void ich9_lpc_initfn(Object *obj)
 
 object_initialize_child(obj, "rtc", >rtc, TYPE_MC146818_RTC);
 
+qdev_init_gpio_out_named(DEVICE(lpc), lpc->gsi, ICH9_GPIO_GSI,
+ IOAPIC_NUM_PINS);
+
 object_property_add_uint8_ptr(obj, ACPI_PM_PROP_SCI_INT,
   >sci_gsi, OBJ_PROP_FLAG_READ);
 object_property_add_uint8_ptr(OBJECT(lpc), ACPI_PM_PROP_ACPI_ENABLE_CMD,
@@ -691,7 +694,6 @@ static void ich9_lpc_initfn(Object *obj)
 static void ich9_lpc_realize(PCIDevice *d, Error **errp)
 {
 ICH9LPCState *lpc = ICH9_LPC_DEVICE(d);
-DeviceState *dev = DEVICE(d);
 PCIBus *pci_bus = pci_get_bus(d);
 ISABus *isa_bus;
 
@@ -734,8 +736,6 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp)
 ICH9_RST_CNT_IOPORT, >rst_cnt_mem,
 1);
 
-qdev_init_gpio_out_named(dev, lpc->gsi, ICH9_GPIO_GSI, IOAPIC_NUM_PINS);
-
 isa_bus_register_input_irqs(isa_bus, lpc->gsi);
 
 i8257_dma_init(isa_bus, 0);
-- 
2.42.0

[PATCH v8 11/29] hw/isa/piix3: Create IDE controller in host device

2023-10-07 Thread Bernhard Beschow

The IDE controller is an integral part of PIIX3 (function 1). So create it as
part of the south bridge.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/southbridge/piix.h |  2 ++
 hw/i386/pc_piix.c | 13 ++---
 hw/isa/piix3.c|  9 +
 hw/i386/Kconfig   |  1 -
 hw/isa/Kconfig|  1 +
 5 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index b07ff6bb26..1daeff397c 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -13,6 +13,7 @@
 #define HW_SOUTHBRIDGE_PIIX_H
 
 #include "hw/pci/pci_device.h"
+#include "hw/ide/pci.h"
 #include "hw/rtc/mc146818rtc.h"
 
 /* PIRQRC[A:D]: PIRQx Route Control Registers */
@@ -52,6 +53,7 @@ struct PIIXState {
 int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
 
 MC146818RtcState rtc;
+PCIIDEState ide;
 
 /* Reset Control Register contents */
 uint8_t rcr;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 5988656279..c98a997482 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -43,7 +43,6 @@
 #include "net/net.h"
 #include "hw/ide/isa.h"
 #include "hw/ide/pci.h"
-#include "hw/ide/piix.h"
 #include "hw/irq.h"
 #include "sysemu/kvm.h"
 #include "hw/i386/kvm/clock.h"
@@ -290,6 +289,10 @@ static void pc_init1(MachineState *machine,
 isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0"));
 rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev),
  "rtc"));
+dev = DEVICE(object_resolve_path_component(OBJECT(pci_dev), "ide"));
+pci_ide_create_devs(PCI_DEVICE(dev));
+idebus[0] = qdev_get_child_bus(dev, "ide.0");
+idebus[1] = qdev_get_child_bus(dev, "ide.1");
 } else {
 isa_bus = isa_bus_new(NULL, system_memory, system_io,
   _abort);
@@ -301,6 +304,8 @@ static void pc_init1(MachineState *machine,
 
 i8257_dma_init(isa_bus, 0);
 pcms->hpet_enabled = false;
+idebus[0] = NULL;
+idebus[1] = NULL;
 }
 
 if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
@@ -329,12 +334,6 @@ static void pc_init1(MachineState *machine,
 pc_nic_init(pcmc, isa_bus, pci_bus);
 
 if (pcmc->pci_enabled) {
-PCIDevice *dev;
-
-dev = pci_create_simple(pci_bus, piix3_devfn + 1, TYPE_PIIX3_IDE);
-pci_ide_create_devs(dev);
-idebus[0] = qdev_get_child_bus(>qdev, "ide.0");
-idebus[1] = qdev_get_child_bus(>qdev, "ide.1");
 pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);
 }
 #ifdef CONFIG_IDE_ISA
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 11d72ca2bb..3f1dabade0 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -29,6 +29,7 @@
 #include "hw/southbridge/piix.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
+#include "hw/ide/piix.h"
 #include "hw/isa/isa.h"
 #include "sysemu/runstate.h"
 #include "migration/vmstate.h"
@@ -265,6 +266,7 @@ static const MemoryRegionOps rcr_ops = {
 static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 {
 PIIX3State *d = PIIX3_PCI_DEVICE(dev);
+PCIBus *pci_bus = pci_get_bus(dev);
 ISABus *isa_bus;
 uint32_t irq;
 
@@ -290,6 +292,12 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
 }
 irq = object_property_get_uint(OBJECT(>rtc), "irq", _fatal);
 isa_connect_gpio_out(ISA_DEVICE(>rtc), 0, irq);
+
+/* IDE */
+qdev_prop_set_int32(DEVICE(>ide), "addr", dev->devfn + 1);
+if (!qdev_realize(DEVICE(>ide), BUS(pci_bus), errp)) {
+return;
+}
 }
 
 static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
@@ -321,6 +329,7 @@ static void pci_piix3_init(Object *obj)
  ISA_NUM_IRQS);
 
 object_initialize_child(obj, "rtc", >rtc, TYPE_MC146818_RTC);
+object_initialize_child(obj, "ide", >ide, TYPE_PIIX3_IDE);
 }
 
 static void pci_piix3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 9051083c1e..ade817f1b6 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -73,7 +73,6 @@ config I440FX
 select PC_ACPI
 select PCI_I440FX
 select PIIX3
-select IDE_PIIX
 select DIMM
 select SMBIOS
 select FW_CFG_DMA
diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig
index c10cbc5fc1..28345edbb3 100644
--- a/hw/isa/Kconfig
+++ b/hw/isa/Kconfig
@@ -34,6 +34,7 @@ config PC87312
 config PIIX3
 bool
 select I8257
+select IDE_PIIX
 select ISA_BUS
 select MC146818RTC
 
-- 
2.42.0

[PATCH v8 03/29] hw/i386/pc_piix: Assign PIIX3's ISA interrupts before its realize()

2023-10-07 Thread Bernhard Beschow

Unlike its PIIX4 counterpart, TYPE_PIIX3_DEVICE doesn't instantiate a PIC
itself. Instead, it relies on the board to do so. This means that the board
needs to wire the ISA IRQs to the PIIX3 device model. As long as the board
assigns the ISA IRQs after PIIX3's realize(), internal devices can't be wired in
pci_piix3_realize() since the qemu_irqs are still NULL. Fix that by assigning
the ISA interrupts before realize(). This will allow for embedding child devices
into the host device as already done for PIIX4.

Signed-off-by: Bernhard Beschow 

---

Note that this avoids the "PIC proxy" of previous iterations of the PIIX
consolidation series. Assigning the IRQs before realize() has been agreed upon
at KVM forum 2023.
---
 hw/i386/pc_piix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6d2f5509e6..a003923788 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -265,6 +265,8 @@ static void pc_init1(MachineState *machine,
 PCIDevice *pci_dev;
 
 pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
+piix3 = PIIX3_PCI_DEVICE(pci_dev);
+piix3->pic = x86ms->gsi;
 pci_realize_and_unref(pci_dev, pci_bus, _fatal);
 
 if (xen_enabled()) {
@@ -281,8 +283,6 @@ static void pc_init1(MachineState *machine,
  XEN_IOAPIC_NUM_PIRQS);
 }
 
-piix3 = PIIX3_PCI_DEVICE(pci_dev);
-piix3->pic = x86ms->gsi;
 piix3_devfn = piix3->dev.devfn;
 isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0"));
 rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev),
-- 
2.42.0

[PATCH v8 05/29] hw/i386/pc_piix: Wire PIIX3's ISA interrupts by new "isa-irqs" property

2023-10-07 Thread Bernhard Beschow

Avoid assigning the private member of struct PIIX3State from outside which goes
against best QOM practices. Instead, implement best QOM practice by adding an
"isa-irqs" array property to TYPE_PIIX3_DEVICE and assign it in board code, i.e.
from outside.

Signed-off-by: Bernhard Beschow 
---
 include/hw/southbridge/piix.h | 2 +-
 hw/i386/pc_piix.c | 7 ++-
 hw/isa/piix3.c| 2 ++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
index 2317bb7974..bb898c6c88 100644
--- a/include/hw/southbridge/piix.h
+++ b/include/hw/southbridge/piix.h
@@ -46,7 +46,7 @@ struct PIIXState {
 #endif
 uint64_t pic_levels;
 
-qemu_irq *pic;
+qemu_irq pic[ISA_NUM_IRQS];
 
 /* This member isn't used. Just for save/load compatibility */
 int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a003923788..4dc7298c15 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -263,10 +263,15 @@ static void pc_init1(MachineState *machine,
 if (pcmc->pci_enabled) {
 PIIX3State *piix3;
 PCIDevice *pci_dev;
+DeviceState *dev;
+size_t i;
 
 pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
 piix3 = PIIX3_PCI_DEVICE(pci_dev);
-piix3->pic = x86ms->gsi;
+dev = DEVICE(pci_dev);
+for (i = 0; i < ISA_NUM_IRQS; i++) {
+qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]);
+}
 pci_realize_and_unref(pci_dev, pci_bus, _fatal);
 
 if (xen_enabled()) {
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 7240c91440..c17547a2c0 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -312,6 +312,8 @@ static void pci_piix3_init(Object *obj)
 {
 PIIX3State *d = PIIX3_PCI_DEVICE(obj);
 
+qdev_init_gpio_out_named(DEVICE(obj), d->pic, "isa-irqs", ISA_NUM_IRQS);
+
 object_initialize_child(obj, "rtc", >rtc, TYPE_MC146818_RTC);
 }
 
-- 
2.42.0

[PATCH v8 06/29] hw/i386/pc_piix: Remove redundant "piix3" variable

2023-10-07 Thread Bernhard Beschow

The variable is never used by its declared type. Eliminate it.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/pc_piix.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4dc7298c15..cd6c00c0b3 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -261,13 +261,11 @@ static void pc_init1(MachineState *machine,
 gsi_state = pc_gsi_create(>gsi, pcmc->pci_enabled);
 
 if (pcmc->pci_enabled) {
-PIIX3State *piix3;
 PCIDevice *pci_dev;
 DeviceState *dev;
 size_t i;
 
 pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE);
-piix3 = PIIX3_PCI_DEVICE(pci_dev);
 dev = DEVICE(pci_dev);
 for (i = 0; i < ISA_NUM_IRQS; i++) {
 qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]);
@@ -288,8 +286,8 @@ static void pc_init1(MachineState *machine,
  XEN_IOAPIC_NUM_PIRQS);
 }
 
-piix3_devfn = piix3->dev.devfn;
-isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0"));
+piix3_devfn = pci_dev->devfn;
+isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0"));
 rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev),
  "rtc"));
 } else {
-- 
2.42.0

Re: [PATCH v11 05/10] migration: convert exec backend to accept MigrateAddress.

2023-10-07 Thread Het Gala


On 10/4/2023 8:25 PM, Fabiano Rosas wrote:

Het Gala  writes:


Exec transport backend for 'migrate'/'migrate-incoming' QAPIs accept
new wire protocol of MigrateAddress struct.

It is achived by parsing 'uri' string and storing migration parameters
required for exec connection into strList struct.

Suggested-by: Aravind Retnakaran
Signed-off-by: Het Gala
Reviewed-by: Daniel P. Berrangé
---
  migration/exec.c  | 71 +++
  migration/exec.h  |  4 +--
  migration/migration.c | 10 +++---
  3 files changed, 57 insertions(+), 28 deletions(-)

-void exec_start_outgoing_migration(MigrationState *s, const char *command, 
Error **errp)
+/* provides the length of strList */
+static int
+str_list_length(strList *list)
+{
+int len = 0;
+strList *elem;
+
+for (elem = list; elem != NULL; elem = elem->next) {
+len++;
+}
+
+return len;
+}
+
+static void
+init_exec_array(strList *command, char **argv, Error **errp)
+{
+int i = 0;
+strList *lst;
+
+for (lst = command; lst; lst = lst->next) {
+argv[i++] = lst->value;
+}
+
+argv[i] = NULL;

This will write out of bounds.
Yes, will increase the length of argv in exec_start_outgoing_migration() 
by one, that would solve the issue right.

+return;
+}
+
+void exec_start_outgoing_migration(MigrationState *s, strList *command,
+   Error **errp)
  {
  QIOChannel *ioc;
  
-#ifdef WIN32

-const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
-#else
-const char *argv[] = { "/bin/sh", "-c", command, NULL };
-#endif
+int length = str_list_length(command);
+g_auto(GStrv) argv = (char **) g_new0(const char *, length);

This allocation does not leave space for the NULL byte.


Yes you are rigght. I assumed the user will itself have to end the list 
of argument with a 'NULL', but that's not correct. Thanks for pointing 
it out. Will make the length of argv from length -> length+1.


  
-trace_migration_exec_outgoing(command);

-ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
-O_RDWR,
-errp));
+init_exec_array(command, argv, errp);
+g_autofree char *new_command = g_strjoinv(" ", (char **)argv);
+
+trace_migration_exec_outgoing(new_command);
+ioc = QIO_CHANNEL(
+qio_channel_command_new_spawn((const char * const *) argv,
+  O_RDWR,
+  errp));
  if (!ioc) {
  return;
  }
@@ -71,20 +101,21 @@ static gboolean exec_accept_incoming_migration(QIOChannel 
*ioc,
  return G_SOURCE_REMOVE;
  }
  
-void exec_start_incoming_migration(const char *command, Error **errp)

+void exec_start_incoming_migration(strList *command, Error **errp)
  {
  QIOChannel *ioc;
  
-#ifdef WIN32

-const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
-#else
-const char *argv[] = { "/bin/sh", "-c", command, NULL };
-#endif
+int length = str_list_length(command);
+g_auto(GStrv) argv = (char **) g_new0(const char *, length);

Here as well.

Ack, will increase length of argv by one while initalization.
Regards,
Het Gala

Re: [PATCH] target/riscv: Use a direct cast for better performance

2023-10-07 Thread Daniel Henrique Barboza





On 10/7/23 06:02, Richard W.M. Jones wrote:

RISCV_CPU(cs) uses a checked cast.  When QOM cast debugging is enabled
this adds about 5% total overhead when emulating RV64 on x86-64 host.

Using a RISC-V guest with 16 vCPUs, 16 GB of guest RAM, virtio-blk
disk.  The guest has a copy of the qemu source tree.  The test
involves compiling the qemu source tree with 'make clean; time make -j16'.

Before making this change the compile step took 449 & 447 seconds over
two consecutive runs.

After making this change, 428 & 422 seconds.

The saving is about 5%.

Thanks: Paolo Bonzini
Signed-off-by: Richard W.M. Jones 
---


Reviewed-by: Daniel Henrique Barboza 



  target/riscv/cpu_helper.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 3a02079290..6174d99fb2 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -66,7 +66,11 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
uint64_t *cs_base, uint32_t *pflags)
  {
  CPUState *cs = env_cpu(env);
-RISCVCPU *cpu = RISCV_CPU(cs);
+/*
+ * Using the checked cast RISCV_CPU(cs) imposes ~ 5% overhead when
+ * qemu cast debugging is enabled, so use a direct cast instead.
+ */
+RISCVCPU *cpu = (RISCVCPU *)cs;
  RISCVExtStatus fs, vs;
  uint32_t flags = 0;

Re: [PATCH v11 02/10] migration: convert migration 'uri' into 'MigrateAddress'

2023-10-07 Thread Het Gala



On 10/4/2023 11:42 PM, Fabiano Rosas wrote:

Daniel P. Berrangé  writes:


On Wed, Oct 04, 2023 at 11:43:12AM -0300, Fabiano Rosas wrote:

Het Gala  writes:


This patch parses 'migrate' and 'migrate-incoming' QAPI's 'uri'
string containing migration connection related information
and stores them inside well defined 'MigrateAddress' struct.

Suggested-by: Aravind Retnakaran
Signed-off-by: Het Gala
Reviewed-by: Daniel P. Berrangé
---
  migration/exec.c  |  1 -
  migration/exec.h  |  4 
  migration/migration.c | 55 +++
  3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/migration/exec.c b/migration/exec.c
index 2bf882bbe1..32f5143dfd 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -27,7 +27,6 @@
  #include "qemu/cutils.h"
  
  #ifdef WIN32

-const char *exec_get_cmd_path(void);
  const char *exec_get_cmd_path(void)
  {
  g_autofree char *detected_path = g_new(char, MAX_PATH);
diff --git a/migration/exec.h b/migration/exec.h
index b210ffde7a..736cd71028 100644
--- a/migration/exec.h
+++ b/migration/exec.h
@@ -19,6 +19,10 @@
  
  #ifndef QEMU_MIGRATION_EXEC_H

  #define QEMU_MIGRATION_EXEC_H
+
+#ifdef WIN32
+const char *exec_get_cmd_path(void);
+#endif
  void exec_start_incoming_migration(const char *host_port, Error **errp);
  
  void exec_start_outgoing_migration(MigrationState *s, const char *host_port,

diff --git a/migration/migration.c b/migration/migration.c
index 6d3cf5d5cd..dcbd509d56 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -65,6 +65,7 @@
  #include "sysemu/qtest.h"
  #include "options.h"
  #include "sysemu/dirtylimit.h"
+#include "qemu/sockets.h"
  
  static NotifierList migration_state_notifiers =

  NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -427,15 +428,64 @@ void migrate_add_address(SocketAddress *address)
QAPI_CLONE(SocketAddress, address));
  }
  
+static bool migrate_uri_parse(const char *uri,

+  MigrationAddress **channel,
+  Error **errp)
+{
+g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);

This cannot be g_autoptr because you're passing it out of scope at the
end of the function.

It is still good to use g_autoptr to deal with the error paths.

On the success path though you need   g_steal_pointer() to
prevent the autofree cleanup running.

Ah good point, this has been suggested in an earlier version already, I
forgot to mention. We should definitely use g_steal_pointer() whenever
the variable goes out of scope.
Okay. I get the point that g_autoptr helps to deal with freeing of 
pointer in case error occurs inside the function.
But I am still trying to figure out why we need g_steal_pointer() ? If 
you could please explain once again.
My understanding till now was that if we want to return 'addr' variable 
as return type, then we would want to make use of g_steal_pointer() 
so instead of addr, we pass a temp ptr refrencing to the same location 
as addr, and then assign addr = NULL. But we are already assigning the 
memory location where addr was refrencing to 'channel'. Let me know if I 
am missing something ?
I think the syntax follows as the second example given here: 
https://docs.gtk.org/glib/func.steal_pointer.html ?



Regards,
Het Gala

Re: [PATCH 15/19] parallels: Remove unnecessary data_end field

2023-10-07 Thread Mike Maslenkin

On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
 wrote:
>
>
>
> On 10/6/23 21:43, Mike Maslenkin wrote:
> > On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
> >  wrote:
> >> Since we have used bitmap, field data_end in BDRVParallelsState is
> >> redundant and can be removed.
> >>
> >> Add parallels_data_end() helper and remove data_end handling.
> >>
> >> Signed-off-by: Alexander Ivanov 
> >> ---
> >>   block/parallels.c | 33 +
> >>   block/parallels.h |  1 -
> >>   2 files changed, 13 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/block/parallels.c b/block/parallels.c
> >> index 48ea5b3f03..80a7171b84 100644
> >> --- a/block/parallels.c
> >> +++ b/block/parallels.c
> >> @@ -265,6 +265,13 @@ static void 
> >> parallels_free_used_bitmap(BlockDriverState *bs)
> >>   g_free(s->used_bmap);
> >>   }
> >>
> >> +static int64_t parallels_data_end(BDRVParallelsState *s)
> >> +{
> >> +int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
> >> +data_end += s->used_bmap_size * s->cluster_size;
> >> +return data_end;
> >> +}
> >> +
> >>   int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>int64_t *clusters)
> >>   {
> >> @@ -275,7 +282,7 @@ int64_t 
> >> parallels_allocate_host_clusters(BlockDriverState *bs,
> >>
> >>   first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
> >>   if (first_free == s->used_bmap_size) {
> >> -host_off = s->data_end * BDRV_SECTOR_SIZE;
> >> +host_off = parallels_data_end(s);
> >>   prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
> >>   bytes = prealloc_clusters * s->cluster_size;
> >>
> >> @@ -297,9 +304,6 @@ int64_t 
> >> parallels_allocate_host_clusters(BlockDriverState *bs,
> >>   s->used_bmap = bitmap_zero_extend(s->used_bmap, 
> >> s->used_bmap_size,
> >> new_usedsize);
> >>   s->used_bmap_size = new_usedsize;
> >> -if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
> >> -s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
> >> -}
> >>   } else {
> >>   next_used = find_next_bit(s->used_bmap, s->used_bmap_size, 
> >> first_free);
> >>
> >> @@ -315,8 +319,7 @@ int64_t 
> >> parallels_allocate_host_clusters(BlockDriverState *bs,
> >>* branch. In the other case we are likely re-using hole. 
> >> Preallocate
> >>* the space if required by the prealloc_mode.
> >>*/
> >> -if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
> >> -host_off < s->data_end * BDRV_SECTOR_SIZE) {
> >> +if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
> >>   ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
> >>   if (ret < 0) {
> >>   return ret;
> >> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, 
> >> BdrvCheckResult *res,
> >>   }
> >>   }
> >>
> >> -if (high_off == 0) {
> >> -res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
> >> -} else {
> >> -res->image_end_offset = high_off + s->cluster_size;
> >> -s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> >> -}
> >> -
> >> +res->image_end_offset = parallels_data_end(s);
> >>   return 0;
> >>   }
> >>
> >> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, 
> >> BdrvCheckResult *res,
> >>   res->check_errors++;
> >>   return ret;
> >>   }
> >> -s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> >>
> >>   parallels_free_used_bitmap(bs);
> >>   ret = parallels_fill_used_bitmap(bs);
> >> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, 
> >> QDict *options, int flags,
> >>   }
> >>
> >>   s->data_start = data_start;
> >> -s->data_end = s->data_start;
> >> -if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
> >> +if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
> >>   /*
> >>* There is not enough unused space to fit to block align 
> >> between BAT
> >>* and actual data. We can't avoid read-modify-write...
> >> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, 
> >> QDict *options, int flags,
> >>
> >>   for (i = 0; i < s->bat_size; i++) {
> >>   sector = bat2sect(s, i);
> >> -if (sector + s->tracks > s->data_end) {
> >> -s->data_end = sector + s->tracks;
> >> +if (sector + s->tracks > file_nb_sectors) {
> >> +need_check = true;
> >>   }
> >>   }
> >> -need_check = need_check || s->data_end > file_nb_sectors;
> >>
> >>   ret = parallels_fill_used_bitmap(bs);
> >>   if (ret == -ENOMEM) {
> >> @@ -1461,7 +1455,6 @@ static int 
> >> parallels_truncate_unused_clusters(BlockDriverState *bs)
> >>   end_off

Re: [PATCH 2/3] hw/pci-host: Add emulation of Mai Logic Articia S

2023-10-07 Thread BALATON Zoltan


On Fri, 6 Oct 2023, Volker Rümelin wrote:

Am 06.10.23 um 00:13 schrieb BALATON Zoltan:

The Articia S is a generic chipset supporting several different CPUs
that were used on some PPC boards. This is a minimal emulation of the
parts needed for emulating the AmigaOne board.

Signed-off-by: BALATON Zoltan 
---
 hw/pci-host/Kconfig   |   5 +
 hw/pci-host/articia.c | 266 ++
 hw/pci-host/meson.build   |   2 +
 include/hw/pci-host/articia.h |  17 +++
 4 files changed, 290 insertions(+)
 create mode 100644 hw/pci-host/articia.c
 create mode 100644 include/hw/pci-host/articia.h
diff --git a/hw/pci-host/articia.c b/hw/pci-host/articia.c
new file mode 100644
index 00..80558e1c47
--- /dev/null
+++ b/hw/pci-host/articia.c
@@ -0,0 +1,266 @@
+/*
+ * Mai Logic Articia S emulation
+ *
+ * Copyright (c) 2023 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "hw/pci/pci_device.h"
+#include "hw/pci/pci_host.h"
+#include "hw/irq.h"
+#include "hw/i2c/bitbang_i2c.h"
+#include "hw/intc/i8259.h"
+#include "hw/pci-host/articia.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(ArticiaState, ARTICIA)
+
+OBJECT_DECLARE_SIMPLE_TYPE(ArticiaHostState, ARTICIA_PCI_HOST)
+struct ArticiaHostState {
+PCIDevice parent_obj;
+
+ArticiaState *as;
+};
+
+/* TYPE_ARTICIA */
+
+struct ArticiaState {
+PCIHostState parent_obj;
+
+qemu_irq irq[PCI_NUM_PINS];
+MemoryRegion io;
+MemoryRegion mem;
+MemoryRegion reg;
+
+bitbang_i2c_interface smbus;
+uint32_t gpio; /* bits 0-7 in, 8-15 out, 16-23 direction (0 in, 1 out) */
+hwaddr gpio_base;
+MemoryRegion gpio_reg;
+};
+
+static uint64_t articia_gpio_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ArticiaState *s = opaque;
+
+return (s->gpio >> (addr * 8)) & 0xff;
+}
+
+static void articia_gpio_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned int size)
+{
+ArticiaState *s = opaque;
+uint32_t sh = addr * 8;
+
+if (addr == 0) {
+/* in bits read only? */
+return;
+}
+
+if ((s->gpio & (0xff << sh)) != (val & 0xff) << sh) {
+s->gpio &= ~(0xff << sh | 0xff);
+s->gpio |= (val & 0xff) << sh;
+s->gpio |= bitbang_i2c_set(>smbus, BITBANG_I2C_SDA,
+   s->gpio & BIT(16) ?
+   !!(s->gpio & BIT(8)) : 1);
+if ((s->gpio & BIT(17))) {
+s->gpio &= ~BIT(0);
+s->gpio |= bitbang_i2c_set(>smbus, BITBANG_I2C_SCL,
+   !!(s->gpio & BIT(9)));
+}
+}
+}
+
+static const MemoryRegionOps articia_gpio_ops = {
+.read = articia_gpio_read,
+.write = articia_gpio_write,
+.valid.min_access_size = 1,
+.valid.max_access_size = 1,
+.endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t articia_reg_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ArticiaState *s = opaque;
+uint64_t ret = UINT_MAX;
+
+switch (addr) {
+case 0xc00cf8:
+ret = pci_host_conf_le_ops.read(PCI_HOST_BRIDGE(s), 0, size);
+break;
+case 0xe00cfc ... 0xe00cff:
+ret = pci_host_data_le_ops.read(PCI_HOST_BRIDGE(s), addr - 0xe00cfc, 
size);
+break;
+case 0xf0:
+ret = pic_read_irq(isa_pic);
+break;
+default:
+qemu_log_mask(LOG_UNIMP, "%s: Unimplemented register read 0x%"
+  HWADDR_PRIx " %d\n", __func__, addr, size);
+break;
+}
+return ret;
+}
+
+static void articia_reg_write(void *opaque, hwaddr addr, uint64_t val,
+  unsigned int size)
+{
+ArticiaState *s = opaque;
+
+switch (addr) {
+case 0xc00cf8:
+pci_host_conf_le_ops.write(PCI_HOST_BRIDGE(s), 0, val, size);
+break;
+case 0xe00cfc ... 0xe00cff:
+pci_host_data_le_ops.write(PCI_HOST_BRIDGE(s), addr, val, size);
+break;
+default:
+qemu_log_mask(LOG_UNIMP, "%s: Unimplemented register write 0x%"
+  HWADDR_PRIx " %d <- %"PRIx64"\n", __func__, addr, size, 
val);
+break;
+}
+}
+
+static const MemoryRegionOps articia_reg_ops = {
+.read = articia_reg_read,
+.write = articia_reg_write,
+.valid.min_access_size = 1,
+.valid.max_access_size = 4,
+.endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void articia_pcihost_set_irq(void *opaque, int n, int level)
+{
+ArticiaState *s = opaque;
+qemu_set_irq(s->irq[n], level);
+}
+
+static void articia_realize(DeviceState *dev, Error **errp)
+{
+ArticiaState *s = ARTICIA(dev);
+PCIHostState *h = PCI_HOST_BRIDGE(dev);
+PCIDevice *pdev;
+
+bitbang_i2c_init(>smbus, i2c_init_bus(dev, "smbus"));
+memory_region_init_io(>gpio_reg, OBJECT(s), _gpio_ops, s,
+  TYPE_ARTICIA, 4);
+
+

Re: [PATCH 15/19] parallels: Remove unnecessary data_end field

2023-10-07 Thread Alexander Ivanov





On 10/6/23 21:43, Mike Maslenkin wrote:

On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
 wrote:

Since we have used bitmap, field data_end in BDRVParallelsState is
redundant and can be removed.

Add parallels_data_end() helper and remove data_end handling.

Signed-off-by: Alexander Ivanov 
---
  block/parallels.c | 33 +
  block/parallels.h |  1 -
  2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 48ea5b3f03..80a7171b84 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState 
*bs)
  g_free(s->used_bmap);
  }

+static int64_t parallels_data_end(BDRVParallelsState *s)
+{
+int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
+data_end += s->used_bmap_size * s->cluster_size;
+return data_end;
+}
+
  int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
   int64_t *clusters)
  {
@@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState 
*bs,

  first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
  if (first_free == s->used_bmap_size) {
-host_off = s->data_end * BDRV_SECTOR_SIZE;
+host_off = parallels_data_end(s);
  prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
  bytes = prealloc_clusters * s->cluster_size;

@@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState 
*bs,
  s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
new_usedsize);
  s->used_bmap_size = new_usedsize;
-if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
-s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
-}
  } else {
  next_used = find_next_bit(s->used_bmap, s->used_bmap_size, 
first_free);

@@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState 
*bs,
   * branch. In the other case we are likely re-using hole. Preallocate
   * the space if required by the prealloc_mode.
   */
-if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
-host_off < s->data_end * BDRV_SECTOR_SIZE) {
+if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
  ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
  if (ret < 0) {
  return ret;
@@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, 
BdrvCheckResult *res,
  }
  }

-if (high_off == 0) {
-res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
-} else {
-res->image_end_offset = high_off + s->cluster_size;
-s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
-}
-
+res->image_end_offset = parallels_data_end(s);
  return 0;
  }

@@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult 
*res,
  res->check_errors++;
  return ret;
  }
-s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;

  parallels_free_used_bitmap(bs);
  ret = parallels_fill_used_bitmap(bs);
@@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
  }

  s->data_start = data_start;
-s->data_end = s->data_start;
-if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
  /*
   * There is not enough unused space to fit to block align between BAT
   * and actual data. We can't avoid read-modify-write...
@@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,

  for (i = 0; i < s->bat_size; i++) {
  sector = bat2sect(s, i);
-if (sector + s->tracks > s->data_end) {
-s->data_end = sector + s->tracks;
+if (sector + s->tracks > file_nb_sectors) {
+need_check = true;
  }
  }
-need_check = need_check || s->data_end > file_nb_sectors;

  ret = parallels_fill_used_bitmap(bs);
  if (ret == -ENOMEM) {
@@ -1461,7 +1455,6 @@ static int 
parallels_truncate_unused_clusters(BlockDriverState *bs)
  end_off = (end_off + 1) * s->cluster_size;
  }
  end_off += s->data_start * BDRV_SECTOR_SIZE;
-s->data_end = end_off / BDRV_SECTOR_SIZE;
  return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
  }

diff --git a/block/parallels.h b/block/parallels.h
index 18b4f8068e..a6a048d890 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
  unsigned int bat_size;

  int64_t  data_start;
-int64_t  data_end;
  uint64_t prealloc_size;
  ParallelsPreallocMode prealloc_mode;

--
2.34.1


Is it intended behavior?

Run:
1. ./qemu-img create -f parallels

Re: [PULL 00/21] vfio queue

2023-10-07 Thread Cédric Le Goater


Hello Eric,


If you are ok with the above, I will squash the change in the
related patch and send a v2.


Unfortunately this is not sufficient. There is another regression
(crash) on a double free of vbasedev->name as I reported before. I was
able to hit it on a failing hotplug.  How do you want me to send the
fix? I can resend the whole series of just fixes on the related patches.


Please send a v5 for "Prerequisite changes for IOMMUFD support" with the
fixes we talked about. I will rebuild a PR next week.

Thanks,

C.

Re: [PATCH] hw/virtio/vhost: Silence compiler warnings in vhost code when using -Wshadow

2023-10-07 Thread Michael Tokarev


06.10.2023 14:24, Michael S. Tsirkin:

On Fri, Oct 06, 2023 at 01:45:51PM +0300, Michael Tokarev wrote:

06.10.2023 11:55, Markus Armbruster пишет:

Michael Tokarev  writes:


Applied to my trivial-patches tree, thanks!

Marcus, you picked up previous patches of this theme, --
you can continue this tradition if you like :)


I intend to post a pull request for the -Wshadow patches that have
R-bys.  I'm also tracking the unreviewed ones, and hope they get
reviewed.


Ahh, ok.

I've added my own R-bys for the ones I picked up, and for this one
by Thomas too:

Reviewed-By: Michael Tokarev 


it would be better to deal with all of them in one place tbh.


This is exactly why I asked Marcus about this in the first place,
being confused a bit with the stuff being especially sent to
qemu-trivial@ :)

/mjt

Re: [PATCH] target/riscv: Use a direct cast for better performance

2023-10-07 Thread Richard W.M. Jones

If you're interested in how I found this problem, it was done using
'perf report -a -g' & flamegraphs.  This is the flamegraph of qemu (on
the host) when the guest is running the parallel compile:

  http://oirase.annexia.org/tmp/qemu-riscv.svg

If you click into 'CPU_0/TCG' at the bottom left (all the vCPUs
basically act alike), and then go to 'cpu_get_tb_cpu_state' you can
see the call to 'object_dynamic_cast_assert' taking considerable time.

If you zoom out, hit Ctrl F and type 'object_dynamic_cast_assert' into
the search box then the flamegraph will tell you this call takes about
6.6% of total time (not all, but most, attributable to the call from
'cpu_get_tb_cpu_state' -> 'object_dynamic_cast_assert').

There are several other issues in the flamegraph which I'm trying to
address, but this was the simplest one.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-builder quickly builds VMs from scratch
http://libguestfs.org/virt-builder.1.html

[PATCH] target/riscv: Use a direct cast for better performance

2023-10-07 Thread Richard W.M. Jones

RISCV_CPU(cs) uses a checked cast.  When QOM cast debugging is enabled
this adds about 5% total overhead when emulating RV64 on x86-64 host.

Using a RISC-V guest with 16 vCPUs, 16 GB of guest RAM, virtio-blk
disk.  The guest has a copy of the qemu source tree.  The test
involves compiling the qemu source tree with 'make clean; time make -j16'.

Before making this change the compile step took 449 & 447 seconds over
two consecutive runs.

After making this change, 428 & 422 seconds.

The saving is about 5%.

Thanks: Paolo Bonzini
Signed-off-by: Richard W.M. Jones 
---
 target/riscv/cpu_helper.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 3a02079290..6174d99fb2 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -66,7 +66,11 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
   uint64_t *cs_base, uint32_t *pflags)
 {
 CPUState *cs = env_cpu(env);
-RISCVCPU *cpu = RISCV_CPU(cs);
+/*
+ * Using the checked cast RISCV_CPU(cs) imposes ~ 5% overhead when
+ * qemu cast debugging is enabled, so use a direct cast instead.
+ */
+RISCVCPU *cpu = (RISCVCPU *)cs;
 RISCVExtStatus fs, vs;
 uint32_t flags = 0;
 
-- 
2.41.0

Re: [PATCH v11 02/10] migration: convert migration 'uri' into 'MigrateAddress'

2023-10-07 Thread Het Gala



On 10/4/2023 8:13 PM, Fabiano Rosas wrote:

Het Gala  writes:


This patch parses 'migrate' and 'migrate-incoming' QAPI's 'uri'
string containing migration connection related information
and stores them inside well defined 'MigrateAddress' struct.

Suggested-by: Aravind Retnakaran
Signed-off-by: Het Gala
Reviewed-by: Daniel P. Berrangé
---
  migration/exec.c  |  1 -
  migration/exec.h  |  4 
  migration/migration.c | 55 +++
  3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/migration/exec.c b/migration/exec.c
index 2bf882bbe1..32f5143dfd 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -27,7 +27,6 @@
  #include "qemu/cutils.h"
  
  #ifdef WIN32

-const char *exec_get_cmd_path(void);
  const char *exec_get_cmd_path(void)
  {
  g_autofree char *detected_path = g_new(char, MAX_PATH);
diff --git a/migration/exec.h b/migration/exec.h
index b210ffde7a..736cd71028 100644
--- a/migration/exec.h
+++ b/migration/exec.h
@@ -19,6 +19,10 @@
  
  #ifndef QEMU_MIGRATION_EXEC_H

  #define QEMU_MIGRATION_EXEC_H
+
+#ifdef WIN32
+const char *exec_get_cmd_path(void);
+#endif
  void exec_start_incoming_migration(const char *host_port, Error **errp);
  
  void exec_start_outgoing_migration(MigrationState *s, const char *host_port,

diff --git a/migration/migration.c b/migration/migration.c
index 6d3cf5d5cd..dcbd509d56 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -65,6 +65,7 @@
  #include "sysemu/qtest.h"
  #include "options.h"
  #include "sysemu/dirtylimit.h"
+#include "qemu/sockets.h"
  
  static NotifierList migration_state_notifiers =

  NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -427,15 +428,64 @@ void migrate_add_address(SocketAddress *address)
QAPI_CLONE(SocketAddress, address));
  }
  
+static bool migrate_uri_parse(const char *uri,

+  MigrationAddress **channel,
+  Error **errp)
+{
+g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);

This cannot be g_autoptr because you're passing it out of scope at the
end of the function.

+SocketAddress *saddr = >u.socket;

This attribution is useless. Down below you overwrite it with the result
of socket_parse.

Ack. Will assign saddr to NULL here.

+InetSocketAddress *isock = >u.rdma;
+strList **tail = >u.exec.args;
+
+if (strstart(uri, "exec:", NULL)) {
+addr->transport = MIGRATION_ADDRESS_TYPE_EXEC;
+#ifdef WIN32
+QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path()));
+QAPI_LIST_APPEND(tail, g_strdup("/c"));
+#else
+QAPI_LIST_APPEND(tail, g_strdup("/bin/sh"));
+QAPI_LIST_APPEND(tail, g_strdup("-c"));
+#endif
+QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:")));
+} else if (strstart(uri, "rdma:", NULL)) {
+if (inet_parse(isock, uri + strlen("rdma:"), errp)) {
+qapi_free_InetSocketAddress(isock);
+return false;
+}
+addr->transport = MIGRATION_ADDRESS_TYPE_RDMA;
+} else if (strstart(uri, "tcp:", NULL) ||
+strstart(uri, "unix:", NULL) ||
+strstart(uri, "vsock:", NULL) ||
+strstart(uri, "fd:", NULL)) {
+addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET;
+saddr = socket_parse(uri, errp);
+if (!saddr) {
+qapi_free_SocketAddress(saddr);

Shouldn't free here. socket_parse() already does so on failure.

Okay, understood. So should just return false, nothing else here.

+return false;
+}

Then here you can set the values you intended to set.

addr->u.socket.type = saddr->type;
addr->u.socket.u = saddr->u;

Ack. Will do that change.

+} else {
+error_setg(errp, "unknown migration protocol: %s", uri);
+return false;
+}
+
+*channel = addr;
+return true;
+}
+
  static void qemu_start_incoming_migration(const char *uri, Error **errp)
  {
  const char *p = NULL;
+g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);

The memory is leaked here because the pointer is overwritten below. Just
set it to NULL. You can keep the g_autoptr.

Ack. You mean it is overwritten by migrate_parse_uri function.
  
  /* URI is not suitable for migration? */

  if (!migration_channels_and_uri_compatible(uri, errp)) {
  return;
  }
  
+if (uri && !migrate_uri_parse(uri, , errp)) {

+return;
+}
+
  qapi_event_send_migration(MIGRATION_STATUS_SETUP);
  if (strstart(uri, "tcp:", ) ||
  strstart(uri, "unix:", NULL) ||
@@ -1671,12 +1721,17 @@ void qmp_migrate(const char *uri, bool has_blk, bool 
blk,
  Error *local_err = NULL;
  MigrationState *s = migrate_get_current();
  const char *p = NULL;
+g_autoptr(MigrationAddress) channel = g_new0(MigrationAddress, 1);

Same here.

Ack. Will change that.
  
  /* URI is not suitable for migration? */

  if

Re: [PATCH] libvduse: Fix compiler warning with -Wshadow=local

2023-10-07 Thread Yongji Xie

On Fri, Oct 6, 2023 at 8:08 PM Thomas Huth  wrote:
>
> No need to declare a new variable with the same name here,
> we can simple re-use the one from the top of the function.
> With this change, the file now compiles fine with -Wshadow=local.
>
> Signed-off-by: Thomas Huth 
> ---

Reviewed-by: Xie Yongji 

Thanks,
Yongji

[PATCH] docs/about: Deprecate the 'next-cube' m68k machine

2023-10-07 Thread Thomas Huth

The machine is incomplete, and unfortunately the hoped-for improvements
never happened. So it's maybe best if we mark this machine as deprecated
and finally remove it again in case it gets into the way of a code
refactoring or other reasons.

Signed-off-by: Thomas Huth 
---
 docs/about/deprecated.rst | 8 
 1 file changed, 8 insertions(+)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 3b074b9ed4..2f6dadd12f 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -253,6 +253,14 @@ deprecated; use the new name ``dtb-randomness`` instead. 
The new name
 better reflects the way this property affects all random data within
 the device tree blob, not just the ``kaslr-seed`` node.
 
+``next-cube`` m68k machine (since 8.2)
+''
+
+The machine never got fully implemented and can only show the firmware prompt.
+Given the incomplete state and slow progress on improvements, it might get
+removed again without replacement.
+
+
 Backend options
 ---
 
-- 
2.41.0

[PATCH] i386/pc: Drop pc_machine_kvm_type()

2023-10-07 Thread Xiaoyao Li

pc_machine_kvm_type() was introduced by commit e21be724eaf5 ("i386/xen:
add pc_machine_kvm_type to initialize XEN_EMULATE mode") to do Xen
specific initialization by utilizing kvm_type method.

commit eeedfe6c6316 ("hw/xen: Simplify emulated Xen platform init")
moves the Xen specific initialization to pc_basic_device_init().

There is no need to keep the PC specific kvm_type() implementation
anymore. So we'll fallback to kvm_arch_get_default_type(), which
simply returns 0.

Signed-off-by: Xiaoyao Li 
Reviewed-by: Isaku Yamahata 
Reviewed-by: David Hildenbrand 
Acked-by: David Woodhouse 
---
 hw/i386/pc.c | 5 -
 include/hw/i386/pc.h | 3 ---
 2 files changed, 8 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index aad7e8ccd1d7..41783b137b9a 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1730,11 +1730,6 @@ static void pc_machine_initfn(Object *obj)
 cxl_machine_init(obj, >cxl_devices_state);
 }
 
-int pc_machine_kvm_type(MachineState *machine, const char *kvm_type)
-{
-return 0;
-}
-
 static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
 {
 CPUState *cs;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index bec38cb92cf7..ad7149cb10b5 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -305,15 +305,12 @@ extern const size_t pc_compat_1_5_len;
 extern GlobalProperty pc_compat_1_4[];
 extern const size_t pc_compat_1_4_len;
 
-int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
-
 #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
 { \
 MachineClass *mc = MACHINE_CLASS(oc); \
 optsfn(mc); \
 mc->init = initfn; \
-mc->kvm_type = pc_machine_kvm_type; \
 } \
 static const TypeInfo pc_machine_type_##suffix = { \
 .name   = namestr TYPE_MACHINE_SUFFIX, \

base-commit: 2f3913f4b2ad74baeb5a6f1d36efbd9ecdf1057d
-- 
2.34.1

Re: [PATCH v2 1/2] migration: Fix rdma migration failed

2023-10-07 Thread Zhijian Li (Fujitsu)



On 04/10/2023 02:57, Juan Quintela wrote:
> commit c638f66121ce30063fbf68c3eab4d7429cf2b209
> Author: Juan Quintela
> Date:   Tue Oct 3 20:53:38 2023 +0200
> 
>  migration: Non multifd migration don't care about multifd flushes
>  
>  RDMA was having trouble because
>  migrate_multifd_flush_after_each_section() can only be true or false,
>  but we don't want to send any flush when we are not in multifd
>  migration.
>  
>  CC: Fabiano Rosas   Reported-by: Li Zhijian
>  Signed-off-by: Juan Quintela

Looks good to me

Reviewed-by: Li Zhijian

78 matches

Mail list logo