[PATCH 2/5] migration: HMP side changes for modified 'migrate' QAPI design

2022-12-25 Thread Het Gala
From: Author Het Gala 

hmp_migrate() accepts uri (backward compatibility) and a
well-defined struct for migration parameters, and with help of
migrate_channel_from_qdict() maps QDict's 'key':'value' pair
required for migration stream into MigrateChannel struct.

Suggested-by: Daniel P. Berrange 
Suggested-by: Manish Mishra 
Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
 migration/migration.c |  6 +--
 monitor/hmp-cmds.c| 91 ++-
 2 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 52b5d39244..1b6e62612a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2391,9 +2391,9 @@ static bool migrate_prepare(MigrationState *s, bool blk, 
bool blk_inc,
 return true;
 }
 
-void qmp_migrate(const char *uri, bool has_blk, bool blk,
- bool has_inc, bool inc, bool has_detach, bool detach,
- bool has_resume, bool resume, Error **errp)
+void qmp_migrate(const char *uri, MigrateChannel *channel, bool has_blk,
+ bool blk, bool has_inc, bool inc, bool has_detach,
+ bool detach, bool has_resume, bool resume, Error **errp)
 {
 Error *local_err = NULL;
 MigrationState *s = migrate_get_current();
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index ed78a87ddd..e44d96f5dc 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -999,6 +999,91 @@ void hmp_announce_self(Monitor *mon, const QDict *qdict)
 qapi_free_AnnounceParameters(params);
 }
 
+static void
+migrate_channel_from_qdict(MigrateChannel **channel,
+   const QDict *qdict, Error **errp)
+{
+Error *err = NULL;
+const char *channeltype  = qdict_get_try_str(qdict, "channeltype");
+const char *transport_str = qdict_get_try_str(qdict, "transport");
+const char *socketaddr_type = qdict_get_try_str(qdict, "type");
+const char *inet_host = qdict_get_try_str(qdict, "host");
+const char *inet_port = qdict_get_try_str(qdict, "port");
+const char *unix_path = qdict_get_try_str(qdict, "path");
+const char *vsock_cid = qdict_get_try_str(qdict, "cid");
+const char *vsock_port = qdict_get_try_str(qdict, "port");
+const char *fd_str = qdict_get_try_str(qdict, "str");
+const char *exec_str = qdict_get_try_str(qdict, "exec-str");
+const char *rdma_str = qdict_get_try_str(qdict, "rdma-str");
+MigrateChannel *val = g_new0(MigrateChannel, 1);
+MigrateChannelType channel_type;
+MigrateTransport transport;
+MigrateAddress *addr = g_new0(MigrateAddress, 1);
+SocketAddress *saddr = g_new(SocketAddress, 1);
+SocketAddressType type;
+
+channel_type = qapi_enum_parse(&MigrateChannelType_lookup,
+   channeltype, -1, &err);
+if (channel_type < 0) {
+goto end;
+}
+
+transport = qapi_enum_parse(&MigrateTransport_lookup,
+transport_str, -1, &err);
+if (transport < 0) {
+goto end;
+}
+
+type = qapi_enum_parse(&SocketAddressType_lookup,
+   socketaddr_type, -1, &err);
+if (type < 0) {
+goto end;
+}
+
+addr->transport = transport;
+switch (transport) {
+case MIGRATE_TRANSPORT_SOCKET:
+switch (type) {
+case SOCKET_ADDRESS_TYPE_INET:
+saddr->type = SOCKET_ADDRESS_TYPE_INET;
+saddr->u.inet.host = (char *)inet_host;
+saddr->u.inet.port = (char *)inet_port;
+break;
+case SOCKET_ADDRESS_TYPE_UNIX:
+saddr->type = SOCKET_ADDRESS_TYPE_UNIX;
+saddr->u.q_unix.path = (char *)unix_path;
+break;
+case SOCKET_ADDRESS_TYPE_VSOCK:
+saddr->type = SOCKET_ADDRESS_TYPE_VSOCK;
+saddr->u.vsock.cid = (char *)vsock_cid;
+saddr->u.vsock.port = (char *)vsock_port;
+break;
+case SOCKET_ADDRESS_TYPE_FD:
+saddr->type = SOCKET_ADDRESS_TYPE_FD;
+saddr->u.fd.str = (char *)fd_str;
+break;
+default:
+break;
+}
+addr->u.socket.socket_type = saddr;
+break;
+case MIGRATE_TRANSPORT_EXEC:
+addr->u.exec.exec_str = (char *)exec_str;
+break;
+case MIGRATE_TRANSPORT_RDMA:
+addr->u.rdma.rdma_str = (char *)rdma_str;
+break;
+default:
+break;
+}
+
+val->channeltype = channel_type;
+val->addr = addr;
+*channel = val;
+end:
+error_propagate(errp, err);
+}
+
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
 {
 qmp_migrate_cancel(NULL);
@@ -1441,8 +1526,12 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
 const char *uri = qdict_get_str(qdict, "uri");
 Error *err = NULL;
 
-qmp_migrate(uri, !!blk, blk, !!inc, inc,
+MigrateChannel *channel = g_new0(MigrateChannel, 1);
+migrate_channel_from_qdict(&channel, qdict, &err);
+
+   

[PATCH 0/5] migration: Modified 'migrate' QAPI command for migration

2022-12-25 Thread Het Gala
Current QAPI 'migrate' command design (for initiating a migration
stream) contains information regarding different migrate transport mechanism
(tcp / unix / exec), dest-host IP address, and binding port number in form of
a string. Thus the design does seem to have some design issues. Some of the
issues, stated below are:

1. Use of string URIs is a data encoding scheme within a data encoding scheme.
   QEMU code should directly be able to work with the results from QAPI,
   without resorting to do a second level of parsing (eg. socket_parse()).
2. For features / parameters related to migration, the migration tunables needs
   to be defined and updated upfront. For example, 'migrate-set-capability'
   and 'migrate-set-parameter' is required to enable multifd capability and
   multifd-number of channels respectively. Instead, 'Multifd-channels' can
   directly be represented as a single additional parameter to 'migrate'
   QAPI. 'migrate-set-capability' and 'migrate-set-parameter' commands could
   be used for runtime tunables that need setting after migration has already
   started.

The current patchset focuses on solving the first problem of multi-level
encoding of URIs. The patch defines 'migrate' command as a QAPI discriminated
union for the various transport backends (like socket, exec and rdma), and on
basis of transport backends, different migration parameters are defined.

(uri) string -->  (channel) Channel-type
Transport-type
Migration parameters based on transport type

-

Author Het Gala (5):
  migration: Updated QAPI format for 'migrate' qemu monitor command
  migration: HMP side changes for modified 'migrate' QAPI design
  migration: Avoid multiple parsing of uri in migration code flow
  migration: Modified 'migrate-incoming' QAPI and HMP side changes on
the destination interface.
  migration: Established connection for listener sockets on the dest
interface

 migration/migration.c | 133 +--
 migration/socket.c|  31 +
 migration/socket.h|   5 +-
 monitor/hmp-cmds.c| 101 -
 qapi/migration.json   | 143 --
 softmmu/vl.c  |   2 +-
 6 files changed, 344 insertions(+), 71 deletions(-)

-- 
2.22.3




[PATCH 1/5] migration: Updated QAPI format for 'migrate' qemu monitor command

2022-12-25 Thread Het Gala
From: Author Het Gala 

Existing 'migrate' QAPI design enforces transport mechanism, ip address
of destination interface and corresponding port number in the form
of a unified string 'uri' parameter. This scheme does seem to have an issue
in it, i.e. double-level encoding of URIs.

The current patch maps existing QAPI design into a well-defined data
structure - 'MigrateChannel' only from the design perspective. Please note that
the existing 'uri' parameter is kept untouched for backward compatibility.

Suggested-by: Daniel P. Berrange 
Suggested-by: Manish Mishra 
Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
 qapi/migration.json | 121 +++-
 1 file changed, 119 insertions(+), 2 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 88ecf86ac8..753e187ce2 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1449,12 +1449,108 @@
 ##
 { 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} }
 
+##
+# @MigrateTransport:
+#
+# The supported communication transport mechanisms for migration
+#
+# @socket: Supported communication type between two devices for migration.
+#  Socket is able to cover all of 'tcp', 'unix', 'vsock' and
+#  'fd' already
+#
+# @exec: Supported communication type to redirect migration stream into file.
+#
+# @rdma: Supported communication type to redirect rdma type migration stream.
+#
+# Since 8.0
+##
+{ 'enum': 'MigrateTransport',
+  'data': ['socket', 'exec', 'rdma'] }
+
+##
+# @MigrateSocketAddr:
+#
+# To support different type of socket.
+#
+# @socket-type: Different type of socket connections.
+#
+# Since 8.0
+##
+{ 'struct': 'MigrateSocketAddr',
+  'data': {'socket-type': 'SocketAddress' } }
+
+##
+# @MigrateExecAddr:
+ #
+ # Since 8.0
+ ##
+{ 'struct': 'MigrateExecAddr',
+   'data' : {'exec-str': 'str' } }
+
+##
+# @MigrateRdmaAddr:
+#
+# Since 8.0
+##
+{ 'struct': 'MigrateRdmaAddr',
+   'data' : {'rdma-str': 'str' } }
+
+##
+# @MigrateAddress:
+#
+# The options available for communication transport mechanisms for migration
+#
+# Since 8.0
+##
+{ 'union' : 'MigrateAddress',
+  'base' : { 'transport' : 'MigrateTransport'},
+  'discriminator' : 'transport',
+  'data' : {
+'socket' : 'MigrateSocketAddr',
+'exec' : 'MigrateExecAddr',
+'rdma': 'MigrateRdmaAddr' } }
+
+##
+# @MigrateChannelType:
+#
+# The supported options for migration channel type requests
+#
+# @main: Support request for main outbound migration control channel
+#
+# Since 8.0
+##
+{ 'enum': 'MigrateChannelType',
+  'data': [ 'main'] }
+
+##
+# @MigrateChannel:
+#
+# Information regarding migration Channel-type for transferring packets,
+# source and corresponding destination interface for socket connection
+# and number of multifd channels over the interface.
+#
+# @channeltype: Name of Channel type for transfering packet information
+#
+# @addr: SocketAddress of destination interface
+#
+# Since 8.0
+##
+{ 'struct': 'MigrateChannel',
+  'data' : {
+   'channeltype' : 'MigrateChannelType',
+   'addr' : 'MigrateAddress' } }
+
 ##
 # @migrate:
 #
 # Migrates the current running guest to another Virtual Machine.
 #
 # @uri: the Uniform Resource Identifier of the destination VM
+#   for migration thread
+#
+# @channel: Struct containing migration channel type, along with all
+#   the details of destination interface required for initiating
+#   a migration stream.
 #
 # @blk: do block migration (full disk copy)
 #
@@ -1479,15 +1575,36 @@
 # 3. The user Monitor's "detach" argument is invalid in QMP and should not
 #be used
 #
+# 4. The uri argument should have the Uniform Resource Identifier of default
+#destination VM. This connection will be bound to default network
+#
+# 5. Both 'uri' and 'channel' arguments, are mututally exclusive but, atleast
+#one of the two arguments should be present.
+#
 # Example:
 #
 # -> { "execute": "migrate", "arguments": { "uri": "tcp:0:4446" } }
 # <- { "return": {} }
 #
+# -> { "execute": "migrate",
+#  "arguments": {
+#  "channel": { "channeltype": "main",
+#"addr": { "transport": "socket",
+#  "socket-type": { "type': "inet',
+#   "host": "10.12.34.9",
+#   "port": "1050" } } } } }
+# <- { "return": {} }
+#
+# -> { "execute": "migrate",
+#  "arguments": { "channel": { "channeltype": "main",
+#  "addr": { "transport": "exec",
+#"exec-str": "/tmp/exec" } } } }
+# <- { "return": {} }
+#
 ##
 { 'command': 'migrate',
-  'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool',
-   '*detach': 'bool', '*resume': 'bool' } }
+  'data': {'*uri': 'str', '*channel': 'MigrateChannel', '*blk': 'bool',
+   '*inc': 'bool', '*detach': 'bool', '*resume': 'bool' } }
 
 ##
 # @migrate-inc

[PATCH 4/5] migration: Modified 'migrate-incoming' QAPI and HMP side changes on the destination interface.

2022-12-25 Thread Het Gala
From: Author Het Gala 

'migrate-incoming' QAPI design have been modified into a well-defined struct
'MigrateChannel'. Similarly like the source side, modified design was
introduced on destination side mainly to prevent multiple-level encoding of
uri string.

The struct contains various fields for type of migration channel, type of
transport backends and various associated migration parameters with each
backends.

Please note that the 'uri' parameter is kept for backward compatibility.
HMP side changes similar to source interface , on dest interface uses
migrate_channel_from_qdict() to redirect migration parameters from QDict
into 'MigrateChannel' struct.

Suggested-by: Daniel P. Berrange 
Suggested-by: Manish Mishra 
Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
 migration/migration.c |  3 ++-
 monitor/hmp-cmds.c| 10 --
 qapi/migration.json   | 22 --
 softmmu/vl.c  |  2 +-
 4 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 36de9f6a6b..838940fd55 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2237,7 +2237,8 @@ void migrate_del_blocker(Error *reason)
 migration_blockers = g_slist_remove(migration_blockers, reason);
 }
 
-void qmp_migrate_incoming(const char *uri, Error **errp)
+void qmp_migrate_incoming(const char *uri, MigrateChannel *channel,
+  Error **errp)
 {
 Error *local_err = NULL;
 static bool once = true;
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index e44d96f5dc..7f45624c41 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1106,9 +1106,15 @@ void hmp_migrate_incoming(Monitor *mon, const QDict 
*qdict)
 {
 Error *err = NULL;
 const char *uri = qdict_get_str(qdict, "uri");
+MigrateChannel *channel = g_new0(MigrateChannel, 1);
+migrate_channel_from_qdict(&channel, qdict, &err);
+if (err) {
+error_setg(&err, "error in retrieving channel from qdict");
+return;
+}
 
-qmp_migrate_incoming(uri, &err);
-
+qmp_migrate_incoming(uri, channel, &err);
+qapi_free_MigrateChannel(channel);
 hmp_handle_error(mon, err);
 }
 
diff --git a/qapi/migration.json b/qapi/migration.json
index 753e187ce2..201b085715 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1613,7 +1613,11 @@
 # with -incoming defer
 #
 # @uri: The Uniform Resource Identifier identifying the source or
-#   address to listen on
+#   the address to listen on
+#
+# @channel: Struct containing migration channel type, along with
+#   all the details of the destination interface required
+#   for the address to listen on for migration stream.
 #
 # Returns: nothing on success
 #
@@ -1630,14 +1634,28 @@
 #
 # 3. The uri format is the same as for -incoming
 #
+# 4. The 'uri' and 'channel' arguments are mutually exclusive but, atleast
+#one of the two arguments should be present.
+#
 # Example:
 #
 # -> { "execute": "migrate-incoming",
 #  "arguments": { "uri": "tcp::4446" } }
 # <- { "return": {} }
 #
+# -> { "execute": "migrate",
+#  "arguments": {
+#  "channels": { 'channeltype': 'main',
+#'addr': { 'transport': 'socket',
+#  'socket-type': {'type': 'inet',
+#  'host': '10.12.34.9',
+#  'port': '1050' } } } } }
+# <- { "return": {} }
+#
 ##
-{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } }
+{ 'command': 'migrate-incoming',
+ 'data': {'*uri': 'str',
+  '*channel': 'MigrateChannel'} }
 
 ##
 # @xen-save-devices-state:
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 798e1dc933..7005be978a 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2614,7 +2614,7 @@ void qmp_x_exit_preconfig(Error **errp)
 if (incoming) {
 Error *local_err = NULL;
 if (strcmp(incoming, "defer") != 0) {
-qmp_migrate_incoming(incoming, &local_err);
+qmp_migrate_incoming(incoming, NULL, &local_err);
 if (local_err) {
 error_reportf_err(local_err, "-incoming %s: ", incoming);
 exit(1);
-- 
2.22.3




[PATCH 3/5] migration: Avoid multiple parsing of uri in migration code flow

2022-12-25 Thread Het Gala
From: Author Het Gala 

Existing uri is encoded at multiple levels to extract the relevant
migration information.

The modified QAPI design maps migration parameters into MigrateChannel
struct before, thus avoiding double-level uri encoding.

socket_outgoing_migration() has been depricated as It's only purpose was
uri parsing.
Renamed socket_outgoing_migration_internal() as socket_outgoing_migration().
qemu_uri_parsing() has been introduced to parse uri string (backward
compatibility) and populate the MigrateChannel struct parameters. Note that
the function will no longer be needed once the 'uri' parameter is depricated.

Suggested-by: Daniel P. Berrange 
Suggested-by: Manish Mishra 
Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
 migration/migration.c | 78 +++
 migration/socket.c| 15 +
 migration/socket.h|  3 +-
 3 files changed, 67 insertions(+), 29 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 1b6e62612a..36de9f6a6b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "qemu/sockets.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -486,6 +487,39 @@ void migrate_add_address(SocketAddress *address)
   QAPI_CLONE(SocketAddress, address));
 }
 
+static void qemu_uri_parsing(const char *uri,
+ MigrateChannel **channel,
+ Error **errp)
+{
+Error *local_err = NULL;
+const char *p = NULL;
+MigrateChannel *val = g_new0(MigrateChannel, 1);
+MigrateAddress *addrs = g_new0(MigrateAddress, 1);
+SocketAddress *saddr = g_new0(SocketAddress, 1);
+
+if (strstart(uri, "exec:", &p)) {
+addrs->transport = MIGRATE_TRANSPORT_EXEC;
+addrs->u.exec.exec_str = g_strdup(p + strlen("exec:"));
+} else if (strstart(uri, "rdma:", NULL)) {
+addrs->transport = MIGRATE_TRANSPORT_RDMA;
+addrs->u.rdma.rdma_str = g_strdup(p + strlen("rdma:"));
+} else if (strstart(uri, "tcp:", NULL) ||
+strstart(uri, "unix:", NULL) ||
+strstart(uri, "vsock:", NULL) ||
+strstart(uri, "fd:", NULL)) {
+addrs->transport = MIGRATE_TRANSPORT_SOCKET;
+saddr = socket_parse(uri, &local_err);
+addrs->u.socket.socket_type = saddr;
+}
+val->channeltype = MIGRATE_CHANNEL_TYPE_MAIN;
+val->addr = addrs;
+*channel = val;
+
+if (local_err) {
+error_propagate(errp, local_err);
+}
+}
+
 static void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
 const char *p = NULL;
@@ -2397,7 +2431,8 @@ void qmp_migrate(const char *uri, MigrateChannel 
*channel, bool has_blk,
 {
 Error *local_err = NULL;
 MigrationState *s = migrate_get_current();
-const char *p = NULL;
+MigrateAddress *addrs = g_new0(MigrateAddress, 1);
+SocketAddress *saddr = g_new0(SocketAddress, 1);
 
 if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
  has_resume && resume, errp)) {
@@ -2411,20 +2446,35 @@ void qmp_migrate(const char *uri, MigrateChannel 
*channel, bool has_blk,
 }
 }
 
+/*
+ * motive here is just to have checks and convert uri into
+ * socketaddress struct
+ */
+if (uri && channel) {
+error_setg(errp, "uri and channels options should be"
+  "mutually exclusive");
+} else if (uri) {
+qemu_uri_parsing(uri, &channel, &local_err);
+}
+
 migrate_protocol_allow_multi_channels(false);
-if (strstart(uri, "tcp:", &p) ||
-strstart(uri, "unix:", NULL) ||
-strstart(uri, "vsock:", NULL)) {
-migrate_protocol_allow_multi_channels(true);
-socket_start_outgoing_migration(s, p ? p : uri, &local_err);
-#ifdef CONFIG_RDMA
-} else if (strstart(uri, "rdma:", &p)) {
-rdma_start_outgoing_migration(s, p, &local_err);
-#endif
-} else if (strstart(uri, "exec:", &p)) {
-exec_start_outgoing_migration(s, p, &local_err);
-} else if (strstart(uri, "fd:", &p)) {
-fd_start_outgoing_migration(s, p, &local_err);
+addrs = channel->addr;
+saddr = channel->addr->u.socket.socket_type;
+if (addrs->transport == MIGRATE_TRANSPORT_SOCKET) {
+if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
+migrate_protocol_allow_multi_channels(true);
+socket_start_outgoing_migration(s, saddr, &local_err);
+} else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
+fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err);
+}
+#ifdef CONFIG_RDMA
+} else if (addrs->transport == MIGRATE_TRANSPORT_RDMA) {
+rdma_start_outgoing_migration(s, addr

[PATCH 5/5] migration: Established connection for listener sockets on the dest interface

2022-12-25 Thread Het Gala
From: Author Het Gala 

Modified 'migrate-incoming' QAPI supports MigrateChannel parameters to prevent
multi-level encodings of uri on the destination side.

socket_start_incoming_migration() has been depricated as it's only purpose was
uri parsing.
Renamed socket_outgoing_migration_internal() as 
socket_start_incoming_migration().
qemu_uri_parsing() is used to populate the new struct from 'uri' string
needed for migration connection. The function will no longer be used once the
'uri' parameter is depricated, as the parameters will already be mapped into
new struct.

Suggested-by: Daniel P. Berrange 
Suggested-by: Manish Mishra 
Suggested-by: Aravind Retnakaran 
Signed-off-by: Het Gala 
---
 migration/migration.c | 48 ---
 migration/socket.c| 16 ++-
 migration/socket.h|  2 +-
 3 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 838940fd55..c70fd0ab4f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -520,27 +520,43 @@ static void qemu_uri_parsing(const char *uri,
 }
 }
 
-static void qemu_start_incoming_migration(const char *uri, Error **errp)
+static void qemu_start_incoming_migration(const char *uri,
+  MigrateChannel *channel,
+  Error **errp)
 {
-const char *p = NULL;
+MigrateAddress *addrs = g_new0(MigrateAddress, 1);
+SocketAddress *saddr = g_new0(SocketAddress, 1);
+
+/*
+ * motive here is just to have checks and convert uri into
+ * socketaddress struct
+ */
+if (uri && channel) {
+error_setg(errp, "uri and channels options should be used "
+  "mutually exclusive");
+} else if (uri) {
+qemu_uri_parsing(uri, &channel, errp);
+}
 
 migrate_protocol_allow_multi_channels(false); /* reset it anyway */
 qapi_event_send_migration(MIGRATION_STATUS_SETUP);
-if (strstart(uri, "tcp:", &p) ||
-strstart(uri, "unix:", NULL) ||
-strstart(uri, "vsock:", NULL)) {
-migrate_protocol_allow_multi_channels(true);
-socket_start_incoming_migration(p ? p : uri, errp);
+if (addrs->transport == MIGRATE_TRANSPORT_SOCKET) {
+if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
+migrate_protocol_allow_multi_channels(true);
+socket_start_incoming_migration(saddr, errp);
+} else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
+fd_start_incoming_migration(saddr->u.fd.str, errp);
+}
 #ifdef CONFIG_RDMA
-} else if (strstart(uri, "rdma:", &p)) {
-rdma_start_incoming_migration(p, errp);
+} else if (addrs->transport == MIGRATE_TRANSPORT_RDMA) {
+rdma_start_incomng_migration(addrs->u.rdma.rdma_str, errp);
 #endif
-} else if (strstart(uri, "exec:", &p)) {
-exec_start_incoming_migration(p, errp);
-} else if (strstart(uri, "fd:", &p)) {
-fd_start_incoming_migration(p, errp);
+} else if (addrs->transport == MIGRATE_TRANSPORT_EXEC) {
+exec_start_incoming_migration(addrs->u.exec.exec_str, errp);
 } else {
-error_setg(errp, "unknown migration protocol: %s", uri);
+error_setg(errp, "unknown migration protocol: %i", addrs->transport);
 }
 }
 
@@ -2256,7 +2272,7 @@ void qmp_migrate_incoming(const char *uri, MigrateChannel 
*channel,
 return;
 }
 
-qemu_start_incoming_migration(uri, &local_err);
+qemu_start_incoming_migration(uri, channel, &local_err);
 
 if (local_err) {
 yank_unregister_instance(MIGRATION_YANK_INSTANCE);
@@ -2292,7 +2308,7 @@ void qmp_migrate_recover(const char *uri, Error **errp)
  * only re-setup the migration stream and poke existing migration
  * to continue using that newly established channel.
  */
-qemu_start_incoming_migration(uri, errp);
+qemu_start_incoming_migration(uri, NULL, errp);
 }
 
 void qmp_migrate_pause(Error **errp)
diff --git a/migration/socket.c b/migration/socket.c
index ecf98b7e6b..3558821298 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -158,9 +158,8 @@ socket_incoming_migration_end(void *opaque)
 object_unref(OBJECT(listener));
 }
 
-static void
-socket_start_incoming_migration_internal(SocketAddress *saddr,
- Error **errp)
+void socket_start_incoming_migration(SocketAddress *saddr,
+ Error **errp)
 {
 QIONetListener *listener = qio_net_listener_new();
 MigrationIncomingState *mis = migration_incoming_get_current();
@@ -198,14 +197,3 @@ socket_start_incoming_migration_internal(SocketAddress 
*saddr,
 qapi_free_SocketAddress(address);
 }
 }
-
-void socket_start_incoming_migration(const char *str, Error **errp)
-{
-Error *err = NULL;
-SocketAdd

Re: [PATCH v3] intel-iommu: Document iova_tree

2022-12-25 Thread Jason Wang
On Sat, Dec 24, 2022 at 12:26 AM Peter Xu  wrote:
>
> On Fri, Dec 23, 2022 at 03:48:01PM +0800, Jason Wang wrote:
> > On Wed, Dec 7, 2022 at 6:13 AM Peter Xu  wrote:
> > >
> > > It seems not super clear on when iova_tree is used, and why.  Add a rich
> > > comment above iova_tree to track why we needed the iova_tree, and when we
> > > need it.
> > >
> > > Also comment for the map/unmap messages, on how they're used and
> > > implications (e.g. unmap can be larger than the mapped ranges).
> > >
> > > Suggested-by: Jason Wang 
> > > Signed-off-by: Peter Xu 
> > > ---
> > > v3:
> > > - Adjust according to Eric's comment
> > > ---
> > >  include/exec/memory.h | 28 ++
> > >  include/hw/i386/intel_iommu.h | 38 ++-
> > >  2 files changed, 65 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > > index 91f8a2395a..269ecb873b 100644
> > > --- a/include/exec/memory.h
> > > +++ b/include/exec/memory.h
> > > @@ -129,6 +129,34 @@ struct IOMMUTLBEntry {
> > >  /*
> > >   * Bitmap for different IOMMUNotifier capabilities. Each notifier can
> > >   * register with one or multiple IOMMU Notifier capability bit(s).
> > > + *
> > > + * Normally there're two use cases for the notifiers:
> > > + *
> > > + *   (1) When the device needs accurate synchronizations of the vIOMMU 
> > > page
> > > + *   tables, it needs to register with both MAP|UNMAP notifies (which
> > > + *   is defined as IOMMU_NOTIFIER_IOTLB_EVENTS below).
> > > + *
> > > + *   Regarding to accurate synchronization, it's when the notified
> > > + *   device maintains a shadow page table and must be notified on 
> > > each
> > > + *   guest MAP (page table entry creation) and UNMAP (invalidation)
> > > + *   events (e.g. VFIO). Both notifications must be accurate so that
> > > + *   the shadow page table is fully in sync with the guest view.
> > > + *
> > > + *   (2) When the device doesn't need accurate synchronizations of the
> > > + *   vIOMMU page tables, it needs to register only with UNMAP or
> > > + *   DEVIOTLB_UNMAP notifies.
> > > + *
> > > + *   It's when the device maintains a cache of IOMMU translations
> > > + *   (IOTLB) and is able to fill that cache by requesting 
> > > translations
> > > + *   from the vIOMMU through a protocol similar to ATS (Address
> > > + *   Translation Service).
> > > + *
> > > + *   Note that in this mode the vIOMMU will not maintain a shadowed
> > > + *   page table for the address space, and the UNMAP messages can be
> > > + *   actually larger than the real invalidations (just like how the
> > > + *   Linux IOMMU driver normally works, where an invalidation can be
> > > + *   enlarged as long as it still covers the target range).  The 
> > > IOMMU
> >
> > Just spot this when testing your fix for DSI:
> >
> > assert(entry->iova >= notifier->start && entry_end <= 
> > notifier->end);
> >
> > Do we need to remove this (but it seems a partial revert of
> > 03c7140c1a0336af3d4fca768de791b9c0e2b128)?
>
> Replied in the othe thread.
>
> I assume this documentation patch is still correct, am I right?  It's
> talking about the possibility of enlarged invalidation range sent from the
> guest and vIOMMU.  That should still not be bigger than the registered
> range in iommu notifiers (even if bigger than the actual unmapped range).

Adding Eugenio.

So I think we need to evaluate the possible side effects to all the
current nmap notifiers. For example the vfio_iommu_map_notify().

And in another thread, if we crop the size, it basically means the
notifier itself will still assume the range is valid, which is not
what is documented in this patch.

What's more interesting I see smmu had:

/* Unmap the whole notifier's range */
static void smmu_unmap_notifier_range(IOMMUNotifier *n)
{
IOMMUTLBEvent event;

event.type = IOMMU_NOTIFIER_UNMAP;
event.entry.target_as = &address_space_memory;
event.entry.iova = n->start;
event.entry.perm = IOMMU_NONE;
event.entry.addr_mask = n->end - n->start;

memory_region_notify_iommu_one(n, &event);
}

So it looks to me it's more safe to do something similar for vtd first.

Btw, I forgot the reason why we need to crop the size in the case of
device IOTLB, Eguenio do you know that?

Thanks

>
> Thanks,
>
> --
> Peter Xu
>




Re: [PATCH] hw/audio/c97: fix abort in audio_calloc()

2022-12-25 Thread Bernhard Beschow



Am 25. Dezember 2022 12:13:57 UTC schrieb Qiang Liu :

There is an 'a' missing in the topic: It should be ac97, not c97.

Best regards,
Bernhard

>Section 5.10.2 of the AC97 specification (https://hands.com/~lkcl/ac97_r23.pdf)
>shows the feasibility to support for rates other than 48kHZ. Specifically,
>AC97_PCM_Front_DAC_Rate (reg 2Ch) should be from 8kHZ to 48kHZ.
>
>Before Volker Rümelin fixed it in 12f4abf6a245 and 0cbc8bd4694f, an adversary
>could leverage this to crash QEMU.
>
>Fixes: e5c9a13e2670 ("PCI AC97 emulation by malc.")
>Reported-by: Volker Rümelin 
>Reported-by: Qiang Liu 
>Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1393
>Signed-off-by: Qiang Liu 
>---
> hw/audio/ac97.c | 11 ---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
>diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
>index be2dd701a4..826411e462 100644
>--- a/hw/audio/ac97.c
>+++ b/hw/audio/ac97.c
>@@ -625,9 +625,14 @@ static void nam_writew(void *opaque, uint32_t addr, 
>uint32_t val)
> break;
> case AC97_PCM_Front_DAC_Rate:
> if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
>-mixer_store(s, addr, val);
>-dolog("Set front DAC rate to %d\n", val);
>-open_voice(s, PO_INDEX, val);
>+if (val >= 8000 && val <= 48000) {
>+mixer_store(s, addr, val);
>+dolog("Set front DAC rate to %d\n", val);
>+open_voice(s, PO_INDEX, val);
>+} else {
>+dolog("Attempt to set front DAC rate to %d, but valid is"
>+  "8-48kHZ\n", val);
>+}
> } else {
> dolog("Attempt to set front DAC rate to %d, but VRA is not set\n",
>   val);



Re: [PATCH] hw/audio/c97: fix abort in audio_calloc()

2022-12-25 Thread Bernhard Beschow



Am 25. Dezember 2022 12:13:57 UTC schrieb Qiang Liu :
>Section 5.10.2 of the AC97 specification (https://hands.com/~lkcl/ac97_r23.pdf)
>shows the feasibility to support for rates other than 48kHZ. Specifically,
>AC97_PCM_Front_DAC_Rate (reg 2Ch) should be from 8kHZ to 48kHZ.
>
>Before Volker Rümelin fixed it in 12f4abf6a245 and 0cbc8bd4694f, an adversary
>could leverage this to crash QEMU.
>
>Fixes: e5c9a13e2670 ("PCI AC97 emulation by malc.")
>Reported-by: Volker Rümelin 
>Reported-by: Qiang Liu 
>Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1393
>Signed-off-by: Qiang Liu 
>---
> hw/audio/ac97.c | 11 ---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
>diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
>index be2dd701a4..826411e462 100644
>--- a/hw/audio/ac97.c
>+++ b/hw/audio/ac97.c
>@@ -625,9 +625,14 @@ static void nam_writew(void *opaque, uint32_t addr, 
>uint32_t val)
> break;
> case AC97_PCM_Front_DAC_Rate:
> if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
>-mixer_store(s, addr, val);
>-dolog("Set front DAC rate to %d\n", val);
>-open_voice(s, PO_INDEX, val);
>+if (val >= 8000 && val <= 48000) {
>+mixer_store(s, addr, val);
>+dolog("Set front DAC rate to %d\n", val);
>+open_voice(s, PO_INDEX, val);
>+} else {
>+dolog("Attempt to set front DAC rate to %d, but valid is"
>+  "8-48kHZ\n", val);

We probably want to log a guest error here. Not only does this communicate 
clearly where the problem is (-> in guest code) but it is also incredibly 
useful for development (think of reusing this code in other device models such 
as vt82c686b if applicable).

Best regards,
Bernhard

>+}
> } else {
> dolog("Attempt to set front DAC rate to %d, but VRA is not set\n",
>   val);



Re: [PATCH] hw/audio/c97: fix abort in audio_calloc()

2022-12-25 Thread Christian Schoenebeck
On Sunday, December 25, 2022 1:13:57 PM CET Qiang Liu wrote:
> Section 5.10.2 of the AC97 specification 
> (https://hands.com/~lkcl/ac97_r23.pdf)
> shows the feasibility to support for rates other than 48kHZ. Specifically,
> AC97_PCM_Front_DAC_Rate (reg 2Ch) should be from 8kHZ to 48kHZ.
> 
> Before Volker Rümelin fixed it in 12f4abf6a245 and 0cbc8bd4694f, an adversary
> could leverage this to crash QEMU.
> 
> Fixes: e5c9a13e2670 ("PCI AC97 emulation by malc.")
> Reported-by: Volker Rümelin 
> Reported-by: Qiang Liu 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1393
> Signed-off-by: Qiang Liu 
> ---
>  hw/audio/ac97.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
> index be2dd701a4..826411e462 100644
> --- a/hw/audio/ac97.c
> +++ b/hw/audio/ac97.c
> @@ -625,9 +625,14 @@ static void nam_writew(void *opaque, uint32_t addr, 
> uint32_t val)
>  break;
>  case AC97_PCM_Front_DAC_Rate:
>  if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
> -mixer_store(s, addr, val);
> -dolog("Set front DAC rate to %d\n", val);
> -open_voice(s, PO_INDEX, val);
> +if (val >= 8000 && val <= 48000) {
> +mixer_store(s, addr, val);
> +dolog("Set front DAC rate to %d\n", val);
> +open_voice(s, PO_INDEX, val);
> +} else {
> +dolog("Attempt to set front DAC rate to %d, but valid is"
> +  "8-48kHZ\n", val);
> +}

Missing space between "is" and "8-48kHz" and it is "Hz" (lower z). Except of 
that:

Reviewed-by: Christian Schoenebeck 

>  } else {
>  dolog("Attempt to set front DAC rate to %d, but VRA is not 
> set\n",
>val);
> 





[PATCH] hw/audio/c97: fix abort in audio_calloc()

2022-12-25 Thread Qiang Liu
Section 5.10.2 of the AC97 specification (https://hands.com/~lkcl/ac97_r23.pdf)
shows the feasibility to support for rates other than 48kHZ. Specifically,
AC97_PCM_Front_DAC_Rate (reg 2Ch) should be from 8kHZ to 48kHZ.

Before Volker Rümelin fixed it in 12f4abf6a245 and 0cbc8bd4694f, an adversary
could leverage this to crash QEMU.

Fixes: e5c9a13e2670 ("PCI AC97 emulation by malc.")
Reported-by: Volker Rümelin 
Reported-by: Qiang Liu 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1393
Signed-off-by: Qiang Liu 
---
 hw/audio/ac97.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
index be2dd701a4..826411e462 100644
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -625,9 +625,14 @@ static void nam_writew(void *opaque, uint32_t addr, 
uint32_t val)
 break;
 case AC97_PCM_Front_DAC_Rate:
 if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
-mixer_store(s, addr, val);
-dolog("Set front DAC rate to %d\n", val);
-open_voice(s, PO_INDEX, val);
+if (val >= 8000 && val <= 48000) {
+mixer_store(s, addr, val);
+dolog("Set front DAC rate to %d\n", val);
+open_voice(s, PO_INDEX, val);
+} else {
+dolog("Attempt to set front DAC rate to %d, but valid is"
+  "8-48kHZ\n", val);
+}
 } else {
 dolog("Attempt to set front DAC rate to %d, but VRA is not set\n",
   val);
-- 
2.25.1




Re: [PATCH] m25p80: Add the is25wp256 SFPD table

2022-12-25 Thread Ben Dooks
On Wed, Dec 21, 2022 at 06:36:02PM +0100, Cédric Le Goater wrote:
> On 12/21/22 13:22, Guenter Roeck wrote:
> > Generated from hardware using the following command and then padding
> > with 0xff to fill out a power-of-2:
> > xxd -p /sys/bus/spi/devices/spi0.0/spi-nor/sfdp
> > 
> > Cc: Michael Walle 
> > Cc: Tudor Ambarus 
> > Signed-off-by: Guenter Roeck 
> 
> Reviewed-by: Cédric Le Goater 

If SFDP is a standard, couldn't we have an function to generate it from
the flash parameters?

-- 
Ben Dooks, b...@fluff.org, http://www.fluff.org/ben/

Large Hadron Colada: A large Pina Colada that makes the universe disappear.




[PATCH] e1000e: Introduce e1000_rx_desc_union

2022-12-25 Thread Akihiko Odaki
Before this change, e1000e_write_packet_to_guest() allocated the
receive descriptor buffer as an array of uint8_t. This does not ensure
the buffer is sufficiently aligned.

Introduce e1000_rx_desc_union type, a union type of all receive
descriptor types to correct this.

Signed-off-by: Akihiko Odaki 
---
 hw/net/e1000_regs.h  |   1 -
 hw/net/e1000e_core.c | 112 +--
 2 files changed, 56 insertions(+), 57 deletions(-)

diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h
index 59e050742b..cf1412d2d8 100644
--- a/hw/net/e1000_regs.h
+++ b/hw/net/e1000_regs.h
@@ -1100,7 +1100,6 @@ union e1000_rx_desc_packet_split {
 #define E1000_RING_DESC_LEN_SHIFT (4)
 
 #define E1000_MIN_RX_DESC_LEN   E1000_RING_DESC_LEN
-#define E1000_MAX_RX_DESC_LEN   (sizeof(union e1000_rx_desc_packet_split))
 
 /* Receive Descriptor bit definitions */
 #define E1000_RXD_STAT_DD   0x01/* Descriptor Done */
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index fc9cdb4528..b1af807cf6 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -53,6 +53,12 @@
  second according to spec 10.2.4.2 */
 #define E1000E_MAX_TX_FRAGS (64)
 
+union e1000_rx_desc_union {
+struct e1000_rx_desc legacy;
+union e1000_rx_desc_extended extended;
+union e1000_rx_desc_packet_split packet_split;
+};
+
 static inline void
 e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val);
 
@@ -1054,29 +1060,29 @@ e1000e_receive_filter(E1000ECore *core, const uint8_t 
*buf, int size)
 }
 
 static inline void
-e1000e_read_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr)
+e1000e_read_lgcy_rx_descr(E1000ECore *core, struct e1000_rx_desc *desc,
+  hwaddr *buff_addr)
 {
-struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
-*buff_addr = le64_to_cpu(d->buffer_addr);
+*buff_addr = le64_to_cpu(desc->buffer_addr);
 }
 
 static inline void
-e1000e_read_ext_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr)
+e1000e_read_ext_rx_descr(E1000ECore *core, union e1000_rx_desc_extended *desc,
+ hwaddr *buff_addr)
 {
 union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc;
 *buff_addr = le64_to_cpu(d->read.buffer_addr);
 }
 
 static inline void
-e1000e_read_ps_rx_descr(E1000ECore *core, uint8_t *desc,
+e1000e_read_ps_rx_descr(E1000ECore *core,
+union e1000_rx_desc_packet_split *desc,
 hwaddr (*buff_addr)[MAX_PS_BUFFERS])
 {
 int i;
-union e1000_rx_desc_packet_split *d =
-(union e1000_rx_desc_packet_split *) desc;
 
 for (i = 0; i < MAX_PS_BUFFERS; i++) {
-(*buff_addr)[i] = le64_to_cpu(d->read.buffer_addr[i]);
+(*buff_addr)[i] = le64_to_cpu(desc->read.buffer_addr[i]);
 }
 
 trace_e1000e_rx_desc_ps_read((*buff_addr)[0], (*buff_addr)[1],
@@ -1084,17 +1090,17 @@ e1000e_read_ps_rx_descr(E1000ECore *core, uint8_t *desc,
 }
 
 static inline void
-e1000e_read_rx_descr(E1000ECore *core, uint8_t *desc,
+e1000e_read_rx_descr(E1000ECore *core, union e1000_rx_desc_union *desc,
  hwaddr (*buff_addr)[MAX_PS_BUFFERS])
 {
 if (e1000e_rx_use_legacy_descriptor(core)) {
-e1000e_read_lgcy_rx_descr(core, desc, &(*buff_addr)[0]);
+e1000e_read_lgcy_rx_descr(core, &desc->legacy, &(*buff_addr)[0]);
 (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0;
 } else {
 if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
-e1000e_read_ps_rx_descr(core, desc, buff_addr);
+e1000e_read_ps_rx_descr(core, &desc->packet_split, buff_addr);
 } else {
-e1000e_read_ext_rx_descr(core, desc, &(*buff_addr)[0]);
+e1000e_read_ext_rx_descr(core, &desc->extended, &(*buff_addr)[0]);
 (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0;
 }
 }
@@ -1265,7 +1271,7 @@ func_exit:
 }
 
 static inline void
-e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc,
+e1000e_write_lgcy_rx_descr(E1000ECore *core, struct e1000_rx_desc *desc,
struct NetRxPkt *pkt,
const E1000E_RSSInfo *rss_info,
uint16_t length)
@@ -1273,71 +1279,66 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t 
*desc,
 uint32_t status_flags, rss, mrq;
 uint16_t ip_id;
 
-struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
-
 assert(!rss_info->enabled);
 
-d->length = cpu_to_le16(length);
-d->csum = 0;
+desc->length = cpu_to_le16(length);
+desc->csum = 0;
 
 e1000e_build_rx_metadata(core, pkt, pkt != NULL,
  rss_info,
  &rss, &mrq,
  &status_flags, &ip_id,
- &d->special);
-d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
-d->status = (uint8_t) le32_to_cpu(status_flags);
+   

[PATCH v3] linux-user: Fix brk() to release pages

2022-12-25 Thread Helge Deller
The current brk() implementation does not de-allocate pages if a lower
address is given compared to earlier brk() calls.
But according to the manpage, brk() shall deallocate memory in this case
and currently it breaks a real-world application, specifically building
the debian gcl package in qemu-user.

Fix this issue by reworking the qemu brk() implementation.

Tested with the C-code testcase included in qemu commit 4d1de87c750, and
by building debian package of gcl in a hppa-linux guest on a x86-64
host.

Signed-off-by: Helge Deller 

---
v3:
- Fixed one bug where page aligned address was returned instead
  of requested address.
- Dropped debug info which is partly outdated now
- Reduced number of changed lines in diff compared to v2 to make
  diff easier readable
- Fixed changelog of v2
v2:
- Fixed return value of brk(). The v1 version wrongly page-aligned
  the provided address, while userspace expects an unmodified
  address returned.


diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 4fee882cd7..7d28802aa6 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -838,49 +838,52 @@ static inline int host_to_target_sock_type(int host_type)
 }

 static abi_ulong target_brk;
-static abi_ulong target_original_brk;
 static abi_ulong brk_page;

 void target_set_brk(abi_ulong new_brk)
 {
-target_original_brk = target_brk = HOST_PAGE_ALIGN(new_brk);
+target_brk = new_brk;
 brk_page = HOST_PAGE_ALIGN(target_brk);
 }

-//#define DEBUGF_BRK(message, args...) do { fprintf(stderr, (message), ## 
args); } while (0)
-#define DEBUGF_BRK(message, args...)
-
 /* do_brk() must return target values and target errnos. */
-abi_long do_brk(abi_ulong new_brk)
+abi_long do_brk(abi_ulong brk_val)
 {
 abi_long mapped_addr;
 abi_ulong new_alloc_size;
+abi_ulong new_brk, new_host_brk_page;

 /* brk pointers are always untagged */

-DEBUGF_BRK("do_brk(" TARGET_ABI_FMT_lx ") -> ", new_brk);
-
-if (!new_brk) {
-DEBUGF_BRK(TARGET_ABI_FMT_lx " (!new_brk)\n", target_brk);
+/* return old brk value if brk_val unchanged or zero */
+if (!brk_val || brk_val == target_brk) {
 return target_brk;
 }
-if (new_brk < target_original_brk) {
-DEBUGF_BRK(TARGET_ABI_FMT_lx " (new_brk < target_original_brk)\n",
-   target_brk);
+
+new_brk = TARGET_PAGE_ALIGN(brk_val);
+new_host_brk_page = HOST_PAGE_ALIGN(brk_val);
+
+/* brk_val and old target_brk might be on the same page */
+if (new_brk == TARGET_PAGE_ALIGN(target_brk)) {
+if (brk_val > target_brk) {
+/* empty remaining bytes in (possibly larger) host page */
+memset(g2h_untagged(target_brk), 0, new_host_brk_page - 
target_brk);
+}
+target_brk = brk_val;
 return target_brk;
 }

-/* If the new brk is less than the highest page reserved to the
- * target heap allocation, set it and we're almost done...  */
-if (new_brk <= brk_page) {
-/* Heap contents are initialized to zero, as for anonymous
- * mapped pages.  */
-if (new_brk > target_brk) {
-memset(g2h_untagged(target_brk), 0, new_brk - target_brk);
-}
-   target_brk = new_brk;
-DEBUGF_BRK(TARGET_ABI_FMT_lx " (new_brk <= brk_page)\n", target_brk);
-   return target_brk;
+/* Release heap if necesary */
+if (new_brk < target_brk) {
+/* empty remaining bytes in (possibly larger) host page */
+memset(g2h_untagged(brk_val), 0, new_host_brk_page - brk_val);
+
+/* free unused host pages and set new brk_page */
+target_munmap(new_host_brk_page, brk_page - new_host_brk_page);
+brk_page = new_host_brk_page;
+
+target_brk = brk_val;
+return target_brk;
 }

 /* We need to allocate more memory after the brk... Note that
@@ -889,10 +892,14 @@ abi_long do_brk(abi_ulong new_brk)
  * itself); instead we treat "mapped but at wrong address" as
  * a failure and unmap again.
  */
-new_alloc_size = HOST_PAGE_ALIGN(new_brk - brk_page);
-mapped_addr = get_errno(target_mmap(brk_page, new_alloc_size,
+new_alloc_size = new_host_brk_page - brk_page;
+if (new_alloc_size) {
+mapped_addr = get_errno(target_mmap(brk_page, new_alloc_size,
 PROT_READ|PROT_WRITE,
 MAP_ANON|MAP_PRIVATE, 0, 0));
+} else {
+mapped_addr = brk_page;
+}

 if (mapped_addr == brk_page) {
 /* Heap contents are initialized to zero, as for anonymous
@@ -904,10 +911,8 @@ abi_long do_brk(abi_ulong new_brk)
  * then shrunken).  */
 memset(g2h_untagged(target_brk), 0, brk_page - target_brk);

-target_brk = new_brk;
-brk_page = HOST_PAGE_ALIGN(target_brk);
-DEBUGF_BRK(TARGET_ABI_FMT_lx " (mapped_addr == brk_page)\n",
-target_brk);
+target_brk = brk_val;
+brk_page = new_host_brk_page