date:20240204

[PATCH 06/15] qapi: Require member documentation (with loophole)

2024-02-04 Thread Markus Armbruster

The QAPI generator forces you to document your stuff.  Except for
command arguments, event data, and members of enum and object types:
these the generator silently "documents" as "Not documented".

We can't require proper documentation there without first fixing all
the offenders.  We've always had too many offenders to pull that off.
Right now, we have more than 500.  Worse, we seem to fix old ones no
faster than we add new ones: in the past year, we fixed 22 ones, but
added 26 new ones.

To help arrest the backsliding, make missing documentation an error
unless the command, type, or event is in listed in new pragma
documentation-exceptions.

List all the current offenders: 117 commands and types in qapi/, and 9
in qga/.

Signed-off-by: Markus Armbruster 
---
 docs/devel/qapi-code-gen.rst  |   5 +
 qapi/pragma.json  | 119 ++
 qga/qapi-schema.json  |  13 +-
 scripts/qapi/parser.py|   7 +-
 scripts/qapi/source.py|   2 +
 .../qapi-schema/doc-bad-alternate-member.json |   2 +
 tests/qapi-schema/doc-good.json   |   4 +-
 7 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index 69c8a1e8bd..756adc187e 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -167,6 +167,7 @@ Syntax::
'*doc-required': BOOL,
'*command-name-exceptions': [ STRING, ... ],
'*command-returns-exceptions': [ STRING, ... ],
+   '*documentation-exceptions': [ STRING, ... ],
'*member-name-exceptions': [ STRING, ... ] } }
 
 The pragma directive lets you control optional generator behavior.
@@ -183,6 +184,10 @@ may contain ``"_"`` instead of ``"-"``.  Default is none.
 Pragma 'command-returns-exceptions' takes a list of commands that may
 violate the rules on permitted return types.  Default is none.
 
+Pragma 'documentation-exceptions' takes a list of types, commands, and
+events whose members / arguments need not be documented.  Default is
+none.
+
 Pragma 'member-name-exceptions' takes a list of types whose member
 names may contain uppercase letters, and ``"_"`` instead of ``"-"``.
 Default is none.
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 0aa4eeddd3..0fa64742b5 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -31,6 +31,125 @@
 'query-tpm-models',
 'query-tpm-types',
 'ringbuf-read' ],
+# Types, commands, and events with undocumented members / arguments:
+'documentation-exceptions': [
+'AbortWrapper',
+'AudiodevDriver',
+'BlkdebugEvent',
+'BlockDirtyBitmapAddWrapper',
+'BlockDirtyBitmapMergeWrapper',
+'BlockDirtyBitmapWrapper',
+'BlockExportOptions',
+'BlockStatsSpecific',
+'BlockdevBackupWrapper',
+'BlockdevDriver',
+'BlockdevQcow2Encryption',
+'BlockdevQcow2EncryptionFormat',
+'BlockdevQcowEncryption',
+'BlockdevSnapshotInternalWrapper',
+'BlockdevSnapshotSyncWrapper',
+'BlockdevSnapshotWrapper',
+'BlockdevVmdkAdapterType',
+'ChardevBackend',
+'ChardevBackendKind',
+'ChardevCommonWrapper',
+'ChardevDBusWrapper',
+'ChardevFileWrapper',
+'ChardevHostdevWrapper',
+'ChardevMuxWrapper',
+'ChardevQemuVDAgentWrapper',
+'ChardevRingbufWrapper',
+'ChardevSocketWrapper',
+'ChardevSpiceChannelWrapper',
+'ChardevSpicePortWrapper',
+'ChardevStdioWrapper',
+'ChardevUdpWrapper',
+'ChardevVCWrapper',
+'CpuS390Entitlement',
+'CpuS390Polarization',
+'CpuS390State',
+'CxlCorErrorType',
+'DisplayProtocol',
+'DriveBackupWrapper',
+'DummyBlockCoreForceArrays',
+'DummyForceArrays',
+'DummyVirtioForceArrays',
+'DumpGuestMemoryCapability',
+'GrabToggleKeys',
+'GuestPanicInformationHyperV',
+'HotKeyMod',
+'HvBalloonDeviceInfoWrapper',
+'ImageInfoSpecific',
+'ImageInfoSpecificFileWrapper',
+'ImageInfoSpecificKind',
+'ImageInfoSpecificLUKSWrapper',
+'ImageInfoSpecificQCow2Wrapper',
+'ImageInfoSpecificRbdWrapper',
+'ImageInfoSpecificVmdkWrapper',
+'InetSocketAddressWrapper',
+'InputAxis',
+'InputBtnEventWrapper',
+'InputButton',
+'InputKeyEventWrapper',
+'InputMoveEventWrapper',
+'InputMultiTouchEvent',
+'InputMultiTouchEventWrapper',
+'InputMultiTouchType',
+'IntWrapper',
+'IscsiHeaderDigest',
+'IscsiTransport',
+'JSONType',
+'KeyValue',
+'KeyValueKind',
+'MemoryDeviceInfo',
+'MemoryDeviceInfoKind',
+'MigrateSetParameters',

[PATCH 15/15] qapi: Add missing union tag documentation

2024-02-04 Thread Markus Armbruster

Low-hanging fruit, and except for StatsFilter, the only members of
these unions lacking documentation.

Signed-off-by: Markus Armbruster 
---
 qapi/block-core.json   | 12 
 qapi/block-export.json |  2 ++
 qapi/char.json |  2 ++
 qapi/crypto.json   |  2 ++
 qapi/machine.json  |  4 
 qapi/migration.json|  2 ++
 qapi/pragma.json   | 16 
 qapi/sockets.json  |  2 ++
 qapi/stats.json|  2 ++
 qapi/transaction.json  |  2 ++
 qapi/ui.json   |  2 ++
 qapi/yank.json |  2 ++
 12 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 55b583f079..ded6437c06 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -252,6 +252,8 @@
 # A discriminated record of image format specific information
 # structures.
 #
+# @type: block driver name
+#
 # Since: 1.7
 ##
 { 'union': 'ImageInfoSpecific',
@@ -1102,6 +1104,8 @@
 #
 # Block driver specific statistics
 #
+# @driver: block driver name
+#
 # Since: 4.2
 ##
 { 'union': 'BlockStatsSpecific',
@@ -3472,6 +3476,8 @@
 ##
 # @BlockdevQcowEncryption:
 #
+# @format: encryption format
+#
 # Since: 2.10
 ##
 { 'union': 'BlockdevQcowEncryption',
@@ -3506,6 +3512,8 @@
 ##
 # @BlockdevQcow2Encryption:
 #
+# @format: encryption format
+#
 # Since: 2.10
 ##
 { 'union': 'BlockdevQcow2Encryption',
@@ -3656,6 +3664,8 @@
 ##
 # @SshHostKeyCheck:
 #
+# @mode: How to check the host key
+#
 # Since: 2.12
 ##
 { 'union': 'SshHostKeyCheck',
@@ -4225,6 +4235,8 @@
 ##
 # @RbdEncryptionCreateOptions:
 #
+# @format: Encryption format.
+#
 # Since: 6.1
 ##
 { 'union': 'RbdEncryptionCreateOptions',
diff --git a/qapi/block-export.json b/qapi/block-export.json
index e063e9255a..d9bd376b48 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -346,6 +346,8 @@
 # Describes a block export, i.e. how single node should be exported on
 # an external interface.
 #
+# @type: Block export type
+#
 # @id: A unique identifier for the block export (across all export
 # types)
 #
diff --git a/qapi/char.json b/qapi/char.json
index e3e1b2c9f5..390e3ef1b9 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -634,6 +634,8 @@
 #
 # Configuration info for the new chardev backend.
 #
+# @type: backend type
+#
 # Since: 1.4
 ##
 { 'union': 'ChardevBackend',
diff --git a/qapi/crypto.json b/qapi/crypto.json
index fd3d46ebd1..03de66e6f6 100644
--- a/qapi/crypto.json
+++ b/qapi/crypto.json
@@ -645,6 +645,8 @@
 # The options that are available for all asymmetric key algorithms
 # when creating a new QCryptoAkCipher.
 #
+# @alg: encryption cipher algorithm
+#
 # Since: 7.1
 ##
 { 'union': 'QCryptoAkCipherOptions',
diff --git a/qapi/machine.json b/qapi/machine.json
index 6a25e39f44..d816c5c02e 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -443,6 +443,8 @@
 #
 # A discriminated record of NUMA options.  (for OptsVisitor)
 #
+# @type: NUMA option type
+#
 # Since: 2.1
 ##
 { 'union': 'NumaOptions',
@@ -1448,6 +1450,8 @@
 #
 # Union containing information about a memory device
 #
+# @type: memory device type
+#
 # Since: 2.1
 ##
 { 'union': 'MemoryDeviceInfo',
diff --git a/qapi/migration.json b/qapi/migration.json
index bf89765a26..7c8881abda 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1630,6 +1630,8 @@
 #
 # Migration endpoint configuration.
 #
+# @transport: The migration stream transport mechanism
+#
 # Since: 8.2
 ##
 { 'union': 'MigrationAddress',
diff --git a/qapi/pragma.json b/qapi/pragma.json
index d5e3f6f142..7ac05ccc26 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -39,18 +39,13 @@
 'BlockDirtyBitmapAddWrapper',
 'BlockDirtyBitmapMergeWrapper',
 'BlockDirtyBitmapWrapper',
-'BlockExportOptions',
-'BlockStatsSpecific',
 'BlockdevBackupWrapper',
 'BlockdevDriver',
-'BlockdevQcow2Encryption',
 'BlockdevQcow2EncryptionFormat',
-'BlockdevQcowEncryption',
 'BlockdevSnapshotInternalWrapper',
 'BlockdevSnapshotSyncWrapper',
 'BlockdevSnapshotWrapper',
 'BlockdevVmdkAdapterType',
-'ChardevBackend',
 'ChardevBackendKind',
 'CpuS390Entitlement',
 'CpuS390Polarization',
@@ -64,7 +59,6 @@
 'GrabToggleKeys',
 'GuestPanicInformationHyperV',
 'HotKeyMod',
-'ImageInfoSpecific',
 'ImageInfoSpecificKind',
 'InputAxis',
 'InputButton',
@@ -73,38 +67,28 @@
 'IscsiHeaderDigest',
 'IscsiTransport',
 'JSONType',
-'KeyValue',
 'KeyValueKind',
-'MemoryDeviceInfo',
 'MemoryDeviceInfoKind',
 'MigrateSetParameters',
-'MigrationAddress',
 'NetClientDriver',
-'NumaOptions',
 'ObjectType',
 'PciMemoryRegion',
 'QCryptoAkCipherKeyType',
-'QCryptoAkCipherOptions',
 'QCryptodevBackendServiceType',
 'QKeyCode',

[PATCH 08/15] qga/qapi-schema: Clean up documentation of guest-set-vcpus

2024-02-04 Thread Markus Armbruster

The command's doc comment describes the argument, but it's not marked
up as such.  Easy enough to fix.

Signed-off-by: Markus Armbruster 
---
 qga/qapi-schema.json | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 35bde36a1f..f3d168d542 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -42,8 +42,7 @@
 'GuestDeviceType',
 'GuestDiskSmart',
 'GuestDiskStatsInfo',
-'GuestNVMeSmart',
-'guest-set-vcpus' ] } }
+'GuestNVMeSmart' ] } }
 
 ##
 # @guest-sync-delimited:
@@ -786,14 +785,15 @@
 # Attempt to reconfigure (currently: enable/disable) logical
 # processors inside the guest.
 #
-# The input list is processed node by node in order.  In each node
-# @logical-id is used to look up the guest VCPU, for which @online
-# specifies the requested state.  The set of distinct @logical-id's is
-# only required to be a subset of the guest-supported identifiers.
-# There's no restriction on list length or on repeating the same
-# @logical-id (with possibly different @online field). Preferably the
-# input list should describe a modified subset of @guest-get-vcpus'
-# return value.
+# @vcpus: The logical processors to be reconfigured.  This list is
+# processed node by node in order.  In each node @logical-id is
+# used to look up the guest VCPU, for which @online specifies the
+# requested state.  The set of distinct @logical-id's is only
+# required to be a subset of the guest-supported identifiers.
+# There's no restriction on list length or on repeating the same
+# @logical-id (with possibly different @online field).  Preferably
+# the input list should describe a modified subset of
+# @guest-get-vcpus' return value.
 #
 # Returns: The length of the initial sublist that has been
 # successfully processed.  The guest agent maximizes this value.
-- 
2.43.0

[PATCH 00/15] qapi: Require member documentation (with loophole)

2024-02-04 Thread Markus Armbruster

The QAPI generator forces you to document your stuff.  Except for
command arguments, event data, and members of enum and object types:
these the generator silently "documents" as "Not documented".

We can't require proper documentation there without first fixing all
the offenders.  We've always had too many offenders to pull that off.
Right now, we have more than 500.  Worse, we seem to fix old ones no
faster than we add new ones: in the past year, we fixed 22 ones, but
added 26 new ones.

PATCH 01-05 are bonus fixes & cleanups.

PATCH 06 makes missing documentation an error unless the command,
type, or event is in listed in new pragma documentation-exceptions.

PATCH 07-09 improve the "QEMU Guest Agent Protocol Reference" manual:
they document eight members and arguments, reducing the number of
offending commands and types from nine to one.  The 25 members of type
GuestNVMeSmart are left undocumented.

PATCH 10-15 improve reduce the "QEMU QMP Reference Manual" manual, but
only a bit: they document 54 members and arguments, reducing number of
offending commands and types from 117 to 65.  467 members and
arguments are left undocumented.  A few of them are not actually used
in QMP, and documenting them is not worthwhile; they should be elided
from the manual instead.  Example: DummyForceArrays.

Remaining definitions with undocumented members:

FILE
DEFINITION #MISSING
--
qga/qapi-schema.json
GuestNVMeSmart 25
qapi/audio.json
AudiodevDriver 12
qapi/block-core.json
BlkdebugEvent 43
BlockdevDriver 39
BlockdevQcow2EncryptionFormat 1
BlockdevVmdkAdapterType 4
DummyBlockCoreForceArrays 1
ImageInfoSpecificKind 2
IscsiHeaderDigest 4
IscsiTransport 2
Qcow2OverlapCheckFlags 8
RbdAuthMode 2
RbdImageEncryptionFormat 2
ThrottleGroupProperties 19
XDbgBlockGraph 2
blockdev-reopen 1
qapi/char.json
ChardevBackendKind 6
qapi/common.json
GrabToggleKeys 6
qapi/crypto.json
QCryptoAkCipherKeyType 2
qapi/cryptodev.json
QCryptodevBackendServiceType 5
qapi/cxl.json
CxlCorErrorType 1
qapi/introspect.json
JSONType 8
qapi/machine-common.json
CpuS390Entitlement 4
qapi/machine-target.json
CpuS390Polarization 2
query-cpu-model-baseline 2
query-cpu-model-comparison 2
query-cpu-model-expansion 2
qapi/machine.json
CpuS390State 5
DummyForceArrays 1
MemoryDeviceInfoKind 1
SysEmuTarget 29
X86CPURegister32 8
qapi/migration.json
MigrateSetParameters 1
qapi/net.json
NetClientDriver 10
String 1
qapi/pci.json
PciMemoryRegion 1
qapi/qom.json
ObjectType 45
qapi/rocker.json
query-rocker 1
query-rocker-ports 1
qapi/run-state.json
GuestPanicInformationHyperV 5
watchdog-set-action 1
qapi/stats.json
StatsFilter 1
StatsValue 1
query-stats-schemas 1
qapi/transaction.json
AbortWrapper 1
BlockDirtyBitmapAddWrapper 1
BlockDirtyBitmapMergeWrapper 1
BlockDirtyBitmapWrapper 1
BlockdevBackupWrapper 1
BlockdevSnapshotInternalWrapper 1
BlockdevSnapshotSyncWrapper 1
BlockdevSnapshotWrapper 1
DriveBackupWrapper 1
qapi/ui.json
DisplayProtocol 2
HotKeyMod 3
InputAxis 2
InputButton 7
InputMultiTouchEvent 1
InputMultiTouchType 5
KeyValueKind 2
QKeyCode 119
VncPrimaryAuth 9
VncVencryptSubAuth 9
qapi/virtio.json
DummyVirtioForceArrays 1
qapi/yank.json
YankInstanceType 3

Markus Armbruster (15):
  docs/devel/qapi-code-gen: Normalize version refs x.y.0 to just x.y
  docs/devel/qapi-code-gen: Tweak doc comment whitespace
  qapi/block-core: Fix BlockLatencyHistogramInfo doc markup
  qapi: Indent tagged doc comment sections properly
  sphinx/qapidoc: Drop code to generate doc for simple union tag
  qapi: Require member documentation (with loophole)
  qga/qapi-schema: Clean up documentation of guest-set-memory-blocks
  qga/qapi-schema: Clean up documentation of guest-set-vcpus
  qga/qapi-schema: Plug trivial documentation holes
  qapi/yank: Clean up documentaion of yank
  qapi/dump: Clean up documentation of DumpGuestMemoryCapability
  qapi: Plug trivial documentation holes around former simple unions
  qapi: Improve documentation of file descriptor socket addresses
  qapi: Move @String out of common.json to discourage reuse
  qapi: Add missing union tag documentation

 docs/devel/qapi-code-gen.rst  | 14 ++--
 docs/sphinx/qapidoc.py|  6 --
 qapi/block-core.json  | 26 ++-
 qapi/block-export.json|  2 +
 qapi/char.json

[PATCH 05/15] sphinx/qapidoc: Drop code to generate doc for simple union tag

2024-02-04 Thread Markus Armbruster

QAPISchemaGenRSTVisitor._nodes_for_members() has a special case to
auto-generate documentation for a union tag member of implicit (enum)
type that lacks documentation.

This was useful for simple unions, where the tag member's type was
implicitly.  The only implicit enum type left today is 'QType'.  Not
worth a special case.  Drop.  No change to generated documentation.

Signed-off-by: Markus Armbruster 
---
 docs/sphinx/qapidoc.py | 6 --
 1 file changed, 6 deletions(-)

diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
index 658c288f8f..05b809af27 100644
--- a/docs/sphinx/qapidoc.py
+++ b/docs/sphinx/qapidoc.py
@@ -168,12 +168,6 @@ def _nodes_for_members(self, doc, what, base=None, 
variants=None):
 # TODO drop fallbacks when undocumented members are outlawed
 if section.text:
 defn = section.text
-elif (variants and variants.tag_member == section.member
-  and not section.member.type.doc_type()):
-values = section.member.type.member_names()
-defn = [nodes.Text('One of ')]
-defn.extend(intersperse([nodes.literal('', v) for v in values],
-nodes.Text(', ')))
 else:
 defn = [nodes.Text('Not documented')]
 
-- 
2.43.0

[PATCH 11/15] qapi/dump: Clean up documentation of DumpGuestMemoryCapability

2024-02-04 Thread Markus Armbruster

The type's doc comment describes its member, but it's not marked up as
such.  Easy enough to fix.

Signed-off-by: Markus Armbruster 
---
 qapi/dump.json   | 2 +-
 qapi/pragma.json | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/qapi/dump.json b/qapi/dump.json
index 5cbc237ad9..1997c1d1d4 100644
--- a/qapi/dump.json
+++ b/qapi/dump.json
@@ -186,7 +186,7 @@
 ##
 # @DumpGuestMemoryCapability:
 #
-# A list of the available formats for dump-guest-memory
+# @formats: the available formats for dump-guest-memory
 #
 # Since: 2.0
 ##
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 544f138afa..aea6384255 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -74,7 +74,6 @@
 'DummyBlockCoreForceArrays',
 'DummyForceArrays',
 'DummyVirtioForceArrays',
-'DumpGuestMemoryCapability',
 'GrabToggleKeys',
 'GuestPanicInformationHyperV',
 'HotKeyMod',
-- 
2.43.0

Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine

2024-02-04 Thread Thomas Huth


On 05/02/2024 03.13, Bibo Mao wrote:

The cdrom test skips to execute on LoongArch system with command
"make check"


Are you sure the test is marked with "skip"? ... it should at least test 
with the "none" machine...?



this patch enables cdrom test for LoongArch virt
machine platform.

With this patch, cdrom test passes to run on LoongArch virt
machine type.

Signed-off-by: Bibo Mao 
---
  tests/qtest/cdrom-test.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 0945383789..c8b97d8d9a 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -271,6 +271,9 @@ int main(int argc, char **argv)
  const char *virtmachine[] = { "virt", NULL };
  add_cdrom_param_tests(virtmachine);
  }
+} else if (g_str_equal(arch, "loongarch64")) {
+const char *virtmachine[] = { "virt", NULL };
+add_cdrom_param_tests(virtmachine);
  } else {
  const char *nonemachine[] = { "none", NULL };
  add_cdrom_param_tests(nonemachine);


Anyway, using the virt machine is certainly better than the "none" machine, so:
Acked-by: Thomas Huth

[PATCH 03/15] qapi/block-core: Fix BlockLatencyHistogramInfo doc markup

2024-02-04 Thread Markus Armbruster

The description of @bins ends with a literal block:

# @bins: list of io request counts corresponding to histogram
# intervals, one more element than @boundaries has.  For the
# example above, @bins may be something like [3, 1, 5, 2], and
# corresponding histogram looks like:
#
# ::
#
#5|   *

Except it actually ends *before* the block: the unindented '::' line
starts a new section.  Makes no sense.

We could fix this by indenting the '::' line.  Instead, double the
colon at the end of the preceding paragraph, and drop the '::' line.

This shifts the box for the literal block right in generated
documentation, so it lines up with the description.

Fixes: commit a0fcff383b34 (qapi: Use rST markup for literal blocks)
Signed-off-by: Markus Armbruster 
---
 qapi/block-core.json | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 781c9bd03e..80ed4122f2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -656,9 +656,7 @@
 # @bins: list of io request counts corresponding to histogram
 # intervals, one more element than @boundaries has.  For the
 # example above, @bins may be something like [3, 1, 5, 2], and
-# corresponding histogram looks like:
-#
-# ::
+# corresponding histogram looks like::
 #
 #5|   *
 #4|   *
-- 
2.43.0

[PATCH 07/15] qga/qapi-schema: Clean up documentation of guest-set-memory-blocks

2024-02-04 Thread Markus Armbruster

The command's doc comment describes the argument, but it's not marked
up as such.  Easy enough to fix.

Signed-off-by: Markus Armbruster 
---
 qga/qapi-schema.json | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index b9501c8c81..35bde36a1f 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -43,7 +43,6 @@
 'GuestDiskSmart',
 'GuestDiskStatsInfo',
 'GuestNVMeSmart',
-'guest-set-memory-blocks',
 'guest-set-vcpus' ] } }
 
 ##
@@ -1174,14 +1173,16 @@
 # Attempt to reconfigure (currently: enable/disable) state of memory
 # blocks inside the guest.
 #
-# The input list is processed node by node in order.  In each node
-# @phys-index is used to look up the guest MEMORY BLOCK, for which
-# @online specifies the requested state.  The set of distinct
-# @phys-index's is only required to be a subset of the guest-supported
-# identifiers.  There's no restriction on list length or on repeating
-# the same @phys-index (with possibly different @online field).
-# Preferably the input list should describe a modified subset of
-# @guest-get-memory-blocks' return value.
+# @mem-blks: The memory blocks to be reconfigured.  This list is
+# processed node by node in order.  In each node @phys-index is
+# used to look up the guest MEMORY BLOCK, for which @online
+# specifies the requested state.  The set of distinct
+# @phys-index's is only required to be a subset of the
+# guest-supported identifiers.  There's no restriction on list
+# length or on repeating the same @phys-index (with possibly
+# different @online field).  Preferably the input list should
+# describe a modified subset of @guest-get-memory-blocks' return
+# value.
 #
 # Returns: The operation results, it is a list of
 # @GuestMemoryBlockResponse, which is corresponding to the input
-- 
2.43.0

[PATCH 14/15] qapi: Move @String out of common.json to discourage reuse

2024-02-04 Thread Markus Armbruster

Use of String is problematic, because it results in awkward interface
documentation.  The previous commit cleaned up one instance.

Move String out of common.json next to its remaining users in net.json
to discourage reuse elsewhere.

Signed-off-by: Markus Armbruster 
---
 qapi/common.json | 11 ---
 qapi/net.json| 12 +++-
 include/net/filter.h |  2 +-
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/qapi/common.json b/qapi/common.json
index 6fed9cde1a..f1bb841951 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -51,17 +51,6 @@
 { 'enum': 'OnOffSplit',
   'data': [ 'on', 'off', 'split' ] }
 
-##
-# @String:
-#
-# A fat type wrapping 'str', to be embedded in lists.
-#
-# Since: 1.2
-##
-{ 'struct': 'String',
-  'data': {
-'str': 'str' } }
-
 ##
 # @StrOrNull:
 #
diff --git a/qapi/net.json b/qapi/net.json
index 68493d6ac9..0a993e1a3d 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -6,7 +6,6 @@
 # = Net devices
 ##
 
-{ 'include': 'common.json' }
 { 'include': 'sockets.json' }
 
 ##
@@ -105,6 +104,17 @@
 '*addr':'str',
 '*vectors': 'uint32' } }
 
+##
+# @String:
+#
+# A fat type wrapping 'str', to be embedded in lists.
+#
+# Since: 1.2
+##
+{ 'struct': 'String',
+  'data': {
+'str': 'str' } }
+
 ##
 # @NetdevUserOptions:
 #
diff --git a/include/net/filter.h b/include/net/filter.h
index 27ffc630df..f15f7932b2 100644
--- a/include/net/filter.h
+++ b/include/net/filter.h
@@ -9,7 +9,7 @@
 #ifndef QEMU_NET_FILTER_H
 #define QEMU_NET_FILTER_H
 
-#include "qapi/qapi-types-net.h"
+#include "qapi/qapi-types-common.h"
 #include "qemu/queue.h"
 #include "qom/object.h"
 #include "net/queue.h"
-- 
2.43.0

[PATCH 12/15] qapi: Plug trivial documentation holes around former simple unions

2024-02-04 Thread Markus Armbruster

The conversion of simple to flat unions left the @data members
undocumented.  Add documentation where it's trivial.  Copy verbatim
from the wrapped type's description where possible.

Leftovers: String (to be taken care of in the next commit), and
TransActionAction (left for another day).

Signed-off-by: Markus Armbruster 
---
 qapi/block-core.json | 10 ++
 qapi/char.json   | 26 ++
 qapi/machine.json| 10 ++
 qapi/pragma.json | 34 --
 qapi/sockets.json|  6 ++
 qapi/tpm.json|  4 
 qapi/ui.json | 12 
 7 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 80ed4122f2..55b583f079 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -196,6 +196,8 @@
 ##
 # @ImageInfoSpecificQCow2Wrapper:
 #
+# @data: image information specific to QCOW2
+#
 # Since: 1.7
 ##
 { 'struct': 'ImageInfoSpecificQCow2Wrapper',
@@ -204,6 +206,8 @@
 ##
 # @ImageInfoSpecificVmdkWrapper:
 #
+# @data: image information specific to VMDK
+#
 # Since: 6.1
 ##
 { 'struct': 'ImageInfoSpecificVmdkWrapper',
@@ -212,6 +216,8 @@
 ##
 # @ImageInfoSpecificLUKSWrapper:
 #
+# @data: image information specific to LUKS
+#
 # Since: 2.7
 ##
 { 'struct': 'ImageInfoSpecificLUKSWrapper',
@@ -223,6 +229,8 @@
 ##
 # @ImageInfoSpecificRbdWrapper:
 #
+# @data: image information specific to RBD
+#
 # Since: 6.1
 ##
 { 'struct': 'ImageInfoSpecificRbdWrapper',
@@ -231,6 +239,8 @@
 ##
 # @ImageInfoSpecificFileWrapper:
 #
+# @data: image information specific to files
+#
 # Since: 8.0
 ##
 { 'struct': 'ImageInfoSpecificFileWrapper',
diff --git a/qapi/char.json b/qapi/char.json
index 6c6ad3b10c..e3e1b2c9f5 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -498,6 +498,8 @@
 ##
 # @ChardevFileWrapper:
 #
+# @data: Configuration info for file chardevs
+#
 # Since: 1.4
 ##
 { 'struct': 'ChardevFileWrapper',
@@ -506,6 +508,8 @@
 ##
 # @ChardevHostdevWrapper:
 #
+# @data: Configuration info for device and pipe chardevs
+#
 # Since: 1.4
 ##
 { 'struct': 'ChardevHostdevWrapper',
@@ -514,6 +518,8 @@
 ##
 # @ChardevSocketWrapper:
 #
+# @data: Configuration info for (stream) socket chardevs
+#
 # Since: 1.4
 ##
 { 'struct': 'ChardevSocketWrapper',
@@ -522,6 +528,8 @@
 ##
 # @ChardevUdpWrapper:
 #
+# @data: Configuration info for datagram socket chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevUdpWrapper',
@@ -530,6 +538,8 @@
 ##
 # @ChardevCommonWrapper:
 #
+# @data: Configuration shared across all chardev backends
+#
 # Since: 2.6
 ##
 { 'struct': 'ChardevCommonWrapper',
@@ -538,6 +548,8 @@
 ##
 # @ChardevMuxWrapper:
 #
+# @data: Configuration info for mux chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevMuxWrapper',
@@ -546,6 +558,8 @@
 ##
 # @ChardevStdioWrapper:
 #
+# @data: Configuration info for stdio chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevStdioWrapper',
@@ -554,6 +568,8 @@
 ##
 # @ChardevSpiceChannelWrapper:
 #
+# @data: Configuration info for spice vm channel chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevSpiceChannelWrapper',
@@ -563,6 +579,8 @@
 ##
 # @ChardevSpicePortWrapper:
 #
+# @data: Configuration info for spice port chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevSpicePortWrapper',
@@ -572,6 +590,8 @@
 ##
 # @ChardevQemuVDAgentWrapper:
 #
+# @data: Configuration info for qemu vdagent implementation
+#
 # Since: 6.1
 ##
 { 'struct': 'ChardevQemuVDAgentWrapper',
@@ -581,6 +601,8 @@
 ##
 # @ChardevDBusWrapper:
 #
+# @data: Configuration info for DBus chardevs
+#
 # Since: 7.0
 ##
 { 'struct': 'ChardevDBusWrapper',
@@ -590,6 +612,8 @@
 ##
 # @ChardevVCWrapper:
 #
+# @data: Configuration info for virtual console chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevVCWrapper',
@@ -598,6 +622,8 @@
 ##
 # @ChardevRingbufWrapper:
 #
+# @data: Configuration info for ring buffer chardevs
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevRingbufWrapper',
diff --git a/qapi/machine.json b/qapi/machine.json
index aa99fa333f..6a25e39f44 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1396,6 +1396,8 @@
 ##
 # @PCDIMMDeviceInfoWrapper:
 #
+# @data: PCDIMMDevice state information
+#
 # Since: 2.1
 ##
 { 'struct': 'PCDIMMDeviceInfoWrapper',
@@ -1404,6 +1406,8 @@
 ##
 # @VirtioPMEMDeviceInfoWrapper:
 #
+# @data: VirtioPMEM state information
+#
 # Since: 2.1
 ##
 { 'struct': 'VirtioPMEMDeviceInfoWrapper',
@@ -1412,6 +1416,8 @@
 ##
 # @VirtioMEMDeviceInfoWrapper:
 #
+# @data: VirtioMEMDevice state information
+#
 # Since: 2.1
 ##
 { 'struct': 'VirtioMEMDeviceInfoWrapper',
@@ -1420,6 +1426,8 @@
 ##
 # @SgxEPCDeviceInfoWrapper:
 #
+# @data: Sgx EPC state information
+#
 # Since: 6.2
 ##
 { 'struct': 'SgxEPCDeviceInfoWrapper',
@@ -1428,6 +1436,8 @@
 ##
 # @HvBalloonDeviceInfoWrapper:
 #
+# @data: hv-balloon provided memory state information
+#
 # Since: 8.2
 ##
 { 'struct': 'HvBalloonDeviceInfoWrapper',
diff --git a/qapi/pragma.json b/qapi/pragma.json
index

[PATCH 10/15] qapi/yank: Clean up documentaion of yank

2024-02-04 Thread Markus Armbruster

The command's doc comment describes the argument, but it's not marked
up as such.  Easy enough to fix.

Signed-off-by: Markus Armbruster 
---
 qapi/pragma.json | 3 +--
 qapi/yank.json   | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/qapi/pragma.json b/qapi/pragma.json
index 0fa64742b5..544f138afa 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -148,8 +148,7 @@
 'query-rocker',
 'query-rocker-ports',
 'query-stats-schemas',
-'watchdog-set-action',
-'yank' ],
+'watchdog-set-action' ],
 # Externally visible types whose member names may use uppercase
 'member-name-exceptions': [ # visible in:
 'ACPISlotType', # query-acpi-ospm-status
diff --git a/qapi/yank.json b/qapi/yank.json
index 60eda20816..bfc71a07a6 100644
--- a/qapi/yank.json
+++ b/qapi/yank.json
@@ -74,7 +74,7 @@
 # Try to recover from hanging QEMU by yanking the specified instances.
 # See @YankInstance for more information.
 #
-# Takes a list of @YankInstance as argument.
+# @instances: the instances to be yanked
 #
 # Returns:
 # - Nothing on success
-- 
2.43.0

[PATCH 13/15] qapi: Improve documentation of file descriptor socket addresses

2024-02-04 Thread Markus Armbruster

SocketAddress branch @fd is documented in enum SocketAddressType,
unlike the other branches.  That's because the branch's type is String
from common.json.

Use a local copy of String, so we can put the documentation in the
usual place.

Signed-off-by: Markus Armbruster 
---
 qapi/sockets.json  | 40 +-
 include/hw/virtio/vhost-vsock-common.h |  1 +
 chardev/char-socket.c  |  2 +-
 util/qemu-sockets.c|  3 +-
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/qapi/sockets.json b/qapi/sockets.json
index c3b616731d..5e6af5504d 100644
--- a/qapi/sockets.json
+++ b/qapi/sockets.json
@@ -5,8 +5,6 @@
 # = Socket data types
 ##
 
-{ 'include': 'common.json' }
-
 ##
 # @NetworkAddressFamily:
 #
@@ -116,6 +114,24 @@
 'cid': 'str',
 'port': 'str' } }
 
+##
+# @FdSocketAddress:
+#
+# A file descriptor name or number.
+#
+# @str: decimal is for file descriptor number, otherwise it's a file
+# descriptor name.  Named file descriptors are permitted in
+# monitor commands, in combination with the 'getfd' command.
+# Decimal file descriptors are permitted at startup or other
+# contexts where no monitor context is active.
+#
+#
+# Since: 1.2
+##
+{ 'struct': 'FdSocketAddress',
+  'data': {
+'str': 'str' } }
+
 ##
 # @InetSocketAddressWrapper:
 #
@@ -147,12 +163,14 @@
   'data': { 'data': 'VsockSocketAddress' } }
 
 ##
-# @StringWrapper:
+# @FdSocketAddressWrapper:
+#
+# @data: file descriptor name or number
 #
 # Since: 1.3
 ##
-{ 'struct': 'StringWrapper',
-  'data': { 'data': 'String' } }
+{ 'struct': 'FdSocketAddressWrapper',
+  'data': { 'data': 'FdSocketAddress' } }
 
 ##
 # @SocketAddressLegacy:
@@ -173,7 +191,7 @@
 'inet': 'InetSocketAddressWrapper',
 'unix': 'UnixSocketAddressWrapper',
 'vsock': 'VsockSocketAddressWrapper',
-'fd': 'StringWrapper' } }
+'fd': 'FdSocketAddressWrapper' } }
 
 ##
 # @SocketAddressType:
@@ -186,11 +204,7 @@
 #
 # @vsock: VMCI address
 #
-# @fd: decimal is for file descriptor number, otherwise a file
-# descriptor name.  Named file descriptors are permitted in
-# monitor commands, in combination with the 'getfd' command.
-# Decimal file descriptors are permitted at startup or other
-# contexts where no monitor context is active.
+# @fd: Socket file descriptor
 #
 # Since: 2.9
 ##
@@ -200,7 +214,7 @@
 ##
 # @SocketAddress:
 #
-# Captures the address of a socket, which could also be a named file
+# Captures the address of a socket, which could also be a socket file
 # descriptor
 #
 # @type: Transport type
@@ -213,4 +227,4 @@
   'data': { 'inet': 'InetSocketAddress',
 'unix': 'UnixSocketAddress',
 'vsock': 'VsockSocketAddress',
-'fd': 'String' } }
+'fd': 'FdSocketAddress' } }
diff --git a/include/hw/virtio/vhost-vsock-common.h 
b/include/hw/virtio/vhost-vsock-common.h
index 93c782101d..75a74e8a99 100644
--- a/include/hw/virtio/vhost-vsock-common.h
+++ b/include/hw/virtio/vhost-vsock-common.h
@@ -11,6 +11,7 @@
 #ifndef QEMU_VHOST_VSOCK_COMMON_H
 #define QEMU_VHOST_VSOCK_COMMON_H
 
+#include "qapi/qapi-types-common.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/vhost.h"
 #include "qom/object.h"
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 73947da188..ff8f845cca 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -1504,7 +1504,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, 
ChardevBackend *backend,
 };
 } else {
 addr->type = SOCKET_ADDRESS_TYPE_FD;
-addr->u.fd.data = g_new(String, 1);
+addr->u.fd.data = g_new(FdSocketAddress, 1);
 addr->u.fd.data->str = g_strdup(fd);
 }
 sock->addr = addr;
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 83e84b1186..60c44b2b56 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -1464,7 +1464,8 @@ SocketAddress *socket_address_flatten(SocketAddressLegacy 
*addr_legacy)
 break;
 case SOCKET_ADDRESS_TYPE_FD:
 addr->type = SOCKET_ADDRESS_TYPE_FD;
-QAPI_CLONE_MEMBERS(String, >u.fd, addr_legacy->u.fd.data);
+QAPI_CLONE_MEMBERS(FdSocketAddress, >u.fd,
+   addr_legacy->u.fd.data);
 break;
 default:
 abort();
-- 
2.43.0

[PATCH 09/15] qga/qapi-schema: Plug trivial documentation holes

2024-02-04 Thread Markus Armbruster

Add missing return member documentation of guest-get-disks,
guest-get-devices, guest-get-diskstats, and guest-get-cpustats.

The NVMe SMART information returned by guest-getdisks remains
undocumented.  Add a TODO there.

Signed-off-by: Markus Armbruster 
---
 qga/qapi-schema.json | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index f3d168d542..b8efe31897 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -36,12 +36,6 @@
 'guest-sync-delimited' ],
 # Types and commands with undocumented members:
 'documentation-exceptions': [
-'GuestCpuStats',
-'GuestCpuStatsType',
-'GuestDeviceId',
-'GuestDeviceType',
-'GuestDiskSmart',
-'GuestDiskStatsInfo',
 'GuestNVMeSmart' ] } }
 
 ##
@@ -944,6 +938,8 @@
 # NVMe smart information, based on NVMe specification, section
 # 
 #
+# TODO: document members briefly
+#
 # Since: 7.1
 ##
 { 'struct': 'GuestNVMeSmart',
@@ -978,7 +974,7 @@
 #
 # Disk type related smart information.
 #
-# - @nvme: NVMe disk smart
+# @type: disk bus type
 #
 # Since: 7.1
 ##
@@ -1499,6 +1495,8 @@
 
 ##
 # @GuestDeviceType:
+#
+# @pci: PCI device
 ##
 { 'enum': 'GuestDeviceType',
   'data': [ 'pci' ] }
@@ -1518,7 +1516,9 @@
 ##
 # @GuestDeviceId:
 #
-# Id of the device - @pci: PCI ID, since: 5.2
+# Id of the device
+#
+# @type: device type
 #
 # Since: 5.2
 ##
@@ -1700,6 +1700,8 @@
 # @major: major device number of disk
 #
 # @minor: minor device number of disk
+#
+# @stats: I/O statistics
 ##
 { 'struct': 'GuestDiskStatsInfo',
   'data': {'name': 'str',
@@ -1723,7 +1725,9 @@
 ##
 # @GuestCpuStatsType:
 #
-# An enumeration of OS type
+# Guest operating systems supporting CPU statistics
+#
+# @linux: Linux
 #
 # Since: 7.1
 ##
@@ -1780,7 +1784,7 @@
 #
 # Get statistics of each CPU in millisecond.
 #
-# - @linux: Linux style CPU statistics
+# @type: guest operating system
 #
 # Since: 7.1
 ##
-- 
2.43.0

[PATCH 02/15] docs/devel/qapi-code-gen: Tweak doc comment whitespace

2024-02-04 Thread Markus Armbruster

Missed in commit a937b6aa739 (qapi: Reformat doc comments to conform
to current conventions).

Signed-off-by: Markus Armbruster 
---
 docs/devel/qapi-code-gen.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index 13d38dbb09..69c8a1e8bd 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -1019,7 +1019,7 @@ For example::
  # @device: If the stats are for a virtual block device, the name
  # corresponding to the virtual block device.
  #
- # @node-name: The node name of the device. (since 2.3)
+ # @node-name: The node name of the device.  (Since 2.3)
  #
  # ... more members ...
  #
@@ -1035,7 +1035,8 @@ For example::
  # Query the @BlockStats for all virtual block devices.
  #
  # @query-nodes: If true, the command will query all the block nodes
- # ... explain, explain ...  (since 2.3)
+ # ... explain, explain ...
+ # (Since 2.3)
  #
  # Returns: A list of @BlockStats for each virtual block devices.
  #
-- 
2.43.0

[PATCH 01/15] docs/devel/qapi-code-gen: Normalize version refs x.y.0 to just x.y

2024-02-04 Thread Markus Armbruster

Missed in commit 9bc6e893b72 (qapi: Normalize version references x.y.0
to just x.y).

Signed-off-by: Markus Armbruster 
---
 docs/devel/qapi-code-gen.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index 76be722f4c..13d38dbb09 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst
@@ -1023,7 +1023,7 @@ For example::
  #
  # ... more members ...
  #
- # Since: 0.14.0
+ # Since: 0.14
  ##
  { 'struct': 'BlockStats',
'data': {'*device': 'str', '*node-name': 'str',
@@ -1039,7 +1039,7 @@ For example::
  #
  # Returns: A list of @BlockStats for each virtual block devices.
  #
- # Since: 0.14.0
+ # Since: 0.14
  #
  # Example:
  #
-- 
2.43.0

[PATCH 04/15] qapi: Indent tagged doc comment sections properly

2024-02-04 Thread Markus Armbruster

docs/devel/qapi-code-gen demands that the "second and subsequent lines
of sections other than "Example"/"Examples" should be indented".
Commit a937b6aa739q (qapi: Reformat doc comments to conform to current
conventions) missed a few instances, and messed up a few others.
Clean that up.

Signed-off-by: Markus Armbruster 
---
 qapi/migration.json | 46 -
 qapi/misc.json  | 12 +
 qapi/qdev.json  | 12 -
 tests/qapi-schema/doc-good.json | 10 +++
 tests/qapi-schema/doc-good.out  |  2 +-
 5 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 819708321d..bf89765a26 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1699,24 +1699,24 @@
 #
 # Notes:
 #
-# 1. The 'query-migrate' command should be used to check migration's
-#progress and final result (this information is provided by the
-#'status' member)
+# 1. The 'query-migrate' command should be used to check
+#migration's progress and final result (this information is
+#provided by the 'status' member)
 #
-# 2. All boolean arguments default to false
+# 2. All boolean arguments default to false
 #
-# 3. The user Monitor's "detach" argument is invalid in QMP and should
-#not be used
+# 3. The user Monitor's "detach" argument is invalid in QMP and
+#should not be used
 #
-# 4. The uri argument should have the Uniform Resource Identifier of
-#default destination VM. This connection will be bound to default
-#network.
+# 4. The uri argument should have the Uniform Resource Identifier
+#of default destination VM. This connection will be bound to
+#default network.
 #
-# 5. For now, number of migration streams is restricted to one, i.e
-#number of items in 'channels' list is just 1.
+# 5. For now, number of migration streams is restricted to one,
+#i.e number of items in 'channels' list is just 1.
 #
-# 6. The 'uri' and 'channels' arguments are mutually exclusive;
-#exactly one of the two should be present.
+# 6. The 'uri' and 'channels' arguments are mutually exclusive;
+#exactly one of the two should be present.
 #
 # Example:
 #
@@ -1781,20 +1781,20 @@
 #
 # Notes:
 #
-# 1. It's a bad idea to use a string for the uri, but it needs
-#to stay compatible with -incoming and the format of the uri
-#is already exposed above libvirt.
+# 1. It's a bad idea to use a string for the uri, but it needs to
+#stay compatible with -incoming and the format of the uri is
+#already exposed above libvirt.
 #
-# 2. QEMU must be started with -incoming defer to allow
-#migrate-incoming to be used.
+# 2. QEMU must be started with -incoming defer to allow
+#migrate-incoming to be used.
 #
-# 3. The uri format is the same as for -incoming
+# 3. The uri format is the same as for -incoming
 #
-# 5. For now, number of migration streams is restricted to one, i.e
-#number of items in 'channels' list is just 1.
+# 5. For now, number of migration streams is restricted to one,
+#i.e number of items in 'channels' list is just 1.
 #
-# 4. The 'uri' and 'channels' arguments are mutually exclusive;
-#exactly one of the two should be present.
+# 4. The 'uri' and 'channels' arguments are mutually exclusive;
+#exactly one of the two should be present.
 #
 # Example:
 #
diff --git a/qapi/misc.json b/qapi/misc.json
index 2ca8c39874..4108a0c951 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -348,9 +348,10 @@
 # - If file descriptor was not received, GenericError
 # - If @fdset-id is a negative value, GenericError
 #
-# Notes: The list of fd sets is shared by all monitor connections.
+# Notes:
+# The list of fd sets is shared by all monitor connections.
 #
-# If @fdset-id is not specified, a new fd set will be created.
+# If @fdset-id is not specified, a new fd set will be created.
 #
 # Since: 1.2
 #
@@ -379,10 +380,11 @@
 #
 # Since: 1.2
 #
-# Notes: The list of fd sets is shared by all monitor connections.
+# Notes:
+# The list of fd sets is shared by all monitor connections.
 #
-# If @fd is not specified, all file descriptors in @fdset-id will be
-# removed.
+# If @fd is not specified, all file descriptors in @fdset-id will
+# be removed.
 #
 # Example:
 #
diff --git a/qapi/qdev.json b/qapi/qdev.json
index 25bac5e611..3b3ccfa413 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -53,14 +53,14 @@
 #
 # Notes:
 #
-# 1. Additional arguments depend on the type.
+# 1. Additional arguments depend on the type.
 #
-# 2. For detailed information about this command, please refer to the
-#'docs/qdev-device-use.txt' file.
+# 2. For detailed information about this command, please refer to
+#the 'docs/qdev-device-use.txt' file.
 #
-# 3. It's possible to list device properties by running QEMU with the
-#"-device

Re: [PATCH v1 11/15] libvhost-user: Speedup gpa_to_mem_region() and vu_gpa_to_va()

2024-02-04 Thread David Hildenbrand


On 04.02.24 23:07, Raphael Norwitz wrote:

On Sun, Feb 4, 2024 at 9:51 AM David Hildenbrand  wrote:


On 04.02.24 03:10, Raphael Norwitz wrote:

One comment on this one.

On Fri, Feb 2, 2024 at 4:56 PM David Hildenbrand  wrote:


Let's speed up GPA to memory region / virtual address lookup. Store the
memory regions ordered by guest physical addresses, and use binary
search for address translation, as well as when adding/removing memory
regions.

Most importantly, this will speed up GPA->VA address translation when we
have many memslots.

Signed-off-by: David Hildenbrand 
---
   subprojects/libvhost-user/libvhost-user.c | 49 +--
   1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index d036b54ed0..75e47b7bb3 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -199,19 +199,30 @@ vu_panic(VuDev *dev, const char *msg, ...)
   static VuDevRegion *
   vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
   {
-unsigned int i;
+int low = 0;
+int high = dev->nregions - 1;

   /*
* Memory regions cannot overlap in guest physical address space. Each
* GPA belongs to exactly one memory region, so there can only be one
* match.
+ *
+ * We store our memory regions ordered by GPA and can simply perform a
+ * binary search.
*/
-for (i = 0; i < dev->nregions; i++) {
-VuDevRegion *cur = >regions[i];
+while (low <= high) {
+unsigned int mid = low + (high - low) / 2;
+VuDevRegion *cur = >regions[mid];

   if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) {
   return cur;
   }
+if (guest_addr >= cur->gpa + cur->size) {
+low = mid + 1;
+}
+if (guest_addr < cur->gpa) {
+high = mid - 1;
+}
   }
   return NULL;
   }
@@ -273,9 +284,14 @@ vu_remove_all_mem_regs(VuDev *dev)
   static void
   _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
   {
+const uint64_t start_gpa = msg_region->guest_phys_addr;
+const uint64_t end_gpa = start_gpa + msg_region->memory_size;
   int prot = PROT_READ | PROT_WRITE;
   VuDevRegion *r;
   void *mmap_addr;
+int low = 0;
+int high = dev->nregions - 1;
+unsigned int idx;

   DPRINT("Adding region %d\n", dev->nregions);
   DPRINT("guest_phys_addr: 0x%016"PRIx64"\n",
@@ -295,6 +311,29 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
*msg_region, int fd)
   prot = PROT_NONE;
   }

+/*
+ * We will add memory regions into the array sorted by GPA. Perform a
+ * binary search to locate the insertion point: it will be at the low
+ * index.
+ */
+while (low <= high) {
+unsigned int mid = low + (high - low)  / 2;
+VuDevRegion *cur = >regions[mid];
+
+/* Overlap of GPA addresses. */


Looks like this check will only catch if the new region is fully
contained within an existing region. I think we need to check whether
either start or end region are in the range, i.e.:


That check should cover all cases of overlaps, not just fully contained.

See the QEMU implementation of range_overlaps_rang() that contains a
similar logic:

return !(range2->upb < range1->lob || range1->upb < range2->lob);

 !(range2->upb < range1->lob || range1->upb < range2->lob);
=  !(range2->upb < range1->lob) && !(range1->upb < range2->lob)
=   range2->upb >= range1->lob && range1->upb >= range2->lob
=   range1->lob <= range2->upb && range2->lob <= range1->upb

In QEMU, upb is inclusive, if it were exclusive (like we have here):

=   range1->lob < range2->upb && range2->lob < range1->upb

Which is what we have here with:

range1->lob = start_gpa
range1->upb = end_gpa
range2->lob = cur->gpa
range2->upb = cur->gpa + cur->size

Also if you are interested, see

https://stackoverflow.com/questions/3269434/whats-the-most-efficient-way-to-test-if-two-ranges-overlap

Thanks!


Got it, thanks for the full explanation. With that:

Reviewed-by: Raphael Norwitz 


Thanks!

--
Cheers,

David / dhildenb

Re: [PATCH v1 01/15] libvhost-user: Fix msg_region->userspace_addr computation

2024-02-04 Thread David Hildenbrand


On 04.02.24 23:01, Raphael Norwitz wrote:

On Sun, Feb 4, 2024 at 9:36 AM David Hildenbrand  wrote:


On 04.02.24 02:35, Raphael Norwitz wrote:

As a heads up, I've left Nutanix and updated it in MAINTAINERS. Will
be updating it again shortly so tagging these with my new work email.



Thanks for the fast review! The mail server already complained to me :)

Maybe consider adding yourself as reviewer for vhost as well? (which
covers libvhost-user), I took your mail address from git history, not
get_maintainers.pl.


I don't expect I'll have much time to review code outside of
vhost-user-blk/vhost-user-scsi, but happy to add an entry if it helps
folks tag me on relevant patches.


If it helps, it might make sense to split out libvhost-user into a 
separate MAINTAINERS section.


--
Cheers,

David / dhildenb

[PATCH] hw: riscv: Allow large kernels to boot by moving the initrd further way in RAM

2024-02-04 Thread Alexandre Ghiti

Currently, the initrd is placed at 128MB, which overlaps with the kernel
when it is large (for example syzbot kernels are). From the kernel side,
there is no reason we could not push the initrd further away in memory
to accomodate large kernels, so move the initrd at 512MB when possible.

Signed-off-by: Alexandre Ghiti 
---
 hw/riscv/boot.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 0ffca05189..9a367af2fa 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -188,13 +188,13 @@ static void riscv_load_initrd(MachineState *machine, 
uint64_t kernel_entry)
  * kernel is uncompressed it will not clobber the initrd. However
  * on boards without much RAM we must ensure that we still leave
  * enough room for a decent sized initrd, and on boards with large
- * amounts of RAM we must avoid the initrd being so far up in RAM
- * that it is outside lowmem and inaccessible to the kernel.
- * So for boards with less  than 256MB of RAM we put the initrd
- * halfway into RAM, and for boards with 256MB of RAM or more we put
- * the initrd at 128MB.
+ * amounts of RAM, we put the initrd at 512MB to allow large kernels
+ * to boot.
+ * So for boards with less than 1GB of RAM we put the initrd
+ * halfway into RAM, and for boards with 1GB of RAM or more we put
+ * the initrd at 512MB.
  */
-start = kernel_entry + MIN(mem_size / 2, 128 * MiB);
+start = kernel_entry + MIN(mem_size / 2, 512 * MiB);
 
 size = load_ramdisk(filename, start, mem_size - start);
 if (size == -1) {
-- 
2.39.2

Re: [PULL 00/47] nic-config.for-upstream queue

2024-02-04 Thread Thomas Huth


On 02/02/2024 16.40, Peter Maydell wrote:

On Fri, 2 Feb 2024 at 15:36, David Woodhouse  wrote:


On Fri, 2024-02-02 at 15:32 +, Peter Maydell wrote:


This fails "make check' because some of the qom-test and
test-hmp checks fail when the QEMU binary segfaults.

https://gitlab.com/qemu-project/qemu/-/jobs/6084552256
https://gitlab.com/qemu-project/qemu/-/jobs/6084044180


Thanks.  Any idea why that didn't show up in my own pipeline?
https://gitlab.com/dwmw2/qemu/-/pipelines/1160949234


I think because the failing runners are the aarch64 and
s390 host ones, which we don't let run for anything
except real merge-pullreq test runs because they're
limited resource. I guess that perhaps we have at some point
said "we don't need to run all the guest architectures
on all jobs"


It's rather "we cannot run all the guest architectures on all jobs due to 
time constraints"



and not noticed that this leaves the
coverage for the submaintainer only-uses-the-public-runners
CI testing with gaps.

CCing Alex and Thomas for possible suggestions.


Well, not everybody has access to non-x86 machines, so there's not too much 
we can do about this, can we? The only possibility is: We still have support 
for travis-CI, so if you've got a github account, you can test aarch64, 
s390x and ppc64le there before sending a pull request. The .travis-ci.yml 
file needs some love, though, one of the jobs is currently timing out ... 
it's on my TODO list since weeks, but I just didn't find enough spare time 
to fix it properly yet.


 Thomas

Re: [PATCH 0/5] migration/multifd: Fix channel creation vs. cleanup races

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 04:11:23PM -0300, Fabiano Rosas wrote:
> Hi,
> 
> This contains 2 patches from my previous series addressing the
> p->running misuse and the TLS thread leak and 3 new patches to fix the
> cleanup-while-creating-threads race.
> 
> For the p->running I'm keeping the idea from the other series to
> remove p->running and use a more narrow p->thread_created flag. This
> flag is used only inform whether the thread has been created so we can
> join it.
> 
> For the cleanup race I have moved some code around and added a
> semaphore to make multifd_save_setup() only return once all channel
> creation tasks have started.
> 
> The idea is that after multifd_save_setup() returns, no new creations
> are in flight and the p->thread_created flags will never change again,
> so they're enough to cause the cleanup code to wait for the threads to
> join.
> 
> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1162798843
> 
> @Peter: I can rebase this on top of your series once we decide about
> it.

I have one thing to double check with you in patch 5, besides that the
whole set looks all good to me.  Copy Dan here in case he has any input.

If you confirm both sides (my replies to last patch of both this set and
the other lockless change of mine), feel free to repost directly based on
that series for v2.

Thanks,

-- 
Peter Xu

Re: [PATCH] target/riscv: Enable xtheadsync under user mode

2024-02-04 Thread Alistair Francis

On Sun, Feb 4, 2024 at 3:53 PM LIU Zhiwei  wrote:
>
> According to xtheadsync[1][2] documentation, it can be used in user mode and
> the behavior is same with other priviledges.
>
> [1]:https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadsync/sync.adoc
> [2]:https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadsync/sync_i.adoc
>
> Signed-off-by: LIU Zhiwei 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/insn_trans/trans_xthead.c.inc | 10 --
>  1 file changed, 10 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_xthead.c.inc 
> b/target/riscv/insn_trans/trans_xthead.c.inc
> index dbb6411239..22488412d4 100644
> --- a/target/riscv/insn_trans/trans_xthead.c.inc
> +++ b/target/riscv/insn_trans/trans_xthead.c.inc
> @@ -992,7 +992,6 @@ static bool trans_th_sfence_vmas(DisasContext *ctx, 
> arg_th_sfence_vmas *a)
>  #endif
>  }
>
> -#ifndef CONFIG_USER_ONLY
>  static void gen_th_sync_local(DisasContext *ctx)
>  {
>  /*
> @@ -1003,14 +1002,12 @@ static void gen_th_sync_local(DisasContext *ctx)
>  tcg_gen_exit_tb(NULL, 0);
>  ctx->base.is_jmp = DISAS_NORETURN;
>  }
> -#endif
>
>  static bool trans_th_sync(DisasContext *ctx, arg_th_sync *a)
>  {
>  (void) a;
>  REQUIRE_XTHEADSYNC(ctx);
>
> -#ifndef CONFIG_USER_ONLY
>  REQUIRE_PRIV_MSU(ctx);
>
>  /*
> @@ -1019,9 +1016,6 @@ static bool trans_th_sync(DisasContext *ctx, 
> arg_th_sync *a)
>  gen_th_sync_local(ctx);
>
>  return true;
> -#else
> -return false;
> -#endif
>  }
>
>  static bool trans_th_sync_i(DisasContext *ctx, arg_th_sync_i *a)
> @@ -1029,7 +1023,6 @@ static bool trans_th_sync_i(DisasContext *ctx, 
> arg_th_sync_i *a)
>  (void) a;
>  REQUIRE_XTHEADSYNC(ctx);
>
> -#ifndef CONFIG_USER_ONLY
>  REQUIRE_PRIV_MSU(ctx);
>
>  /*
> @@ -1038,9 +1031,6 @@ static bool trans_th_sync_i(DisasContext *ctx, 
> arg_th_sync_i *a)
>  gen_th_sync_local(ctx);
>
>  return true;
> -#else
> -return false;
> -#endif
>  }
>
>  static bool trans_th_sync_is(DisasContext *ctx, arg_th_sync_is *a)
> --
> 2.25.1
>
>

Re: [PATCH 5/5] migration/multifd: Add a synchronization point for channel creation

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 04:11:28PM -0300, Fabiano Rosas wrote:
> It is possible that one of the multifd channels fails to be created at
> multifd_new_send_channel_async() while the rest of the channel
> creation tasks are still in flight.
> 
> This could lead to multifd_save_cleanup() executing the
> qemu_thread_join() loop too early and not waiting for the threads
> which haven't been created yet, leading to the freeing of resources
> that the newly created threads will try to access and crash.
> 
> Add a synchronization point after which there will be no attempts at
> thread creation and therefore calling multifd_save_cleanup() past that
> point will ensure it properly waits for the threads.
> 
> A note about performance: Prior to this patch, if a channel took too
> long to be established, other channels could finish connecting first
> and already start taking load. Now we're bounded by the
> slowest-connecting channel.

Yes, I think this should (hopefully!) be fine.

> 
> Signed-off-by: Fabiano Rosas 
> ---
>  migration/multifd.c | 67 +
>  1 file changed, 37 insertions(+), 30 deletions(-)
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 1851206352..888ac8b05d 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -360,6 +360,11 @@ struct {
>  MultiFDPages_t *pages;
>  /* global number of generated multifd packets */
>  uint64_t packet_num;
> +/*
> + * Synchronization point past which no more channels will be
> + * created.
> + */
> +QemuSemaphore channels_created;
>  /* send channels ready */
>  QemuSemaphore channels_ready;
>  /*
> @@ -561,6 +566,7 @@ void multifd_save_cleanup(void)
>  error_free(local_err);
>  }
>  }
> +qemu_sem_destroy(_send_state->channels_created);
>  qemu_sem_destroy(_send_state->channels_ready);
>  g_free(multifd_send_state->params);
>  multifd_send_state->params = NULL;
> @@ -787,13 +793,6 @@ static void multifd_tls_outgoing_handshake(QIOTask *task,
>  trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
>  
>  migrate_set_error(migrate_get_current(), err);
> -/*
> - * Error happen, mark multifd_send_thread status as 'quit' although it
> - * is not created, and then tell who pay attention to me.
> - */
> -p->quit = true;
> -qemu_sem_post(_send_state->channels_ready);
> -qemu_sem_post(>sem_sync);
>  error_free(err);
>  }
>  
> @@ -862,39 +861,37 @@ static bool multifd_channel_connect(MultiFDSendParams 
> *p,
>  return true;
>  }
>  
> -static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
> - QIOChannel *ioc, Error *err)
> -{
> - migrate_set_error(migrate_get_current(), err);
> - /* Error happen, we need to tell who pay attention to me */
> - qemu_sem_post(_send_state->channels_ready);
> - qemu_sem_post(>sem_sync);
> - /*
> -  * Although multifd_send_thread is not created, but main migration
> -  * thread need to judge whether it is running, so we need to mark
> -  * its status.
> -  */
> - p->quit = true;
> - object_unref(OBJECT(ioc));
> - error_free(err);
> -}
> -
>  static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>  {
>  MultiFDSendParams *p = opaque;
>  QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
>  Error *local_err = NULL;
> +bool ret;
>  
>  trace_multifd_new_send_channel_async(p->id);
> -if (!qio_task_propagate_error(task, _err)) {
> -qio_channel_set_delay(ioc, false);
> -if (multifd_channel_connect(p, ioc, _err)) {
> -return;
> -}
> +
> +if (qio_task_propagate_error(task, _err)) {
> +ret = false;
> +goto out;
> +}
> +
> +qio_channel_set_delay(ioc, false);
> +ret = multifd_channel_connect(p, ioc, _err);
> +
> +out:
> +/*
> + * Here we're not interested whether creation succeeded, only that
> + * it happened at all.
> + */
> +qemu_sem_post(_send_state->channels_created);
> +if (ret) {
> +return;
>  }
>  
>  trace_multifd_new_send_channel_async_error(p->id, local_err);
> -multifd_new_send_channel_cleanup(p, ioc, local_err);
> +migrate_set_error(migrate_get_current(), local_err);
> +object_unref(OBJECT(ioc));
> +error_free(local_err);
>  }
>  
>  static void multifd_new_send_channel_create(gpointer opaque)
> @@ -918,6 +915,7 @@ bool multifd_save_setup(void)
>  multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
>  multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
>  multifd_send_state->pages = multifd_pages_init(page_count);
> +qemu_sem_init(_send_state->channels_created, 0);
>  qemu_sem_init(_send_state->channels_ready, 0);
>  qatomic_set(_send_state->exiting, 0);
>  multifd_send_state->ops =

Re: [PATCH] target/riscv: Enable xtheadsync under user mode

2024-02-04 Thread Alistair Francis

On Sun, Feb 4, 2024 at 3:53 PM LIU Zhiwei  wrote:
>
> According to xtheadsync[1][2] documentation, it can be used in user mode and
> the behavior is same with other priviledges.
>
> [1]:https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadsync/sync.adoc
> [2]:https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadsync/sync_i.adoc
>
> Signed-off-by: LIU Zhiwei 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/insn_trans/trans_xthead.c.inc | 10 --
>  1 file changed, 10 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_xthead.c.inc 
> b/target/riscv/insn_trans/trans_xthead.c.inc
> index dbb6411239..22488412d4 100644
> --- a/target/riscv/insn_trans/trans_xthead.c.inc
> +++ b/target/riscv/insn_trans/trans_xthead.c.inc
> @@ -992,7 +992,6 @@ static bool trans_th_sfence_vmas(DisasContext *ctx, 
> arg_th_sfence_vmas *a)
>  #endif
>  }
>
> -#ifndef CONFIG_USER_ONLY
>  static void gen_th_sync_local(DisasContext *ctx)
>  {
>  /*
> @@ -1003,14 +1002,12 @@ static void gen_th_sync_local(DisasContext *ctx)
>  tcg_gen_exit_tb(NULL, 0);
>  ctx->base.is_jmp = DISAS_NORETURN;
>  }
> -#endif
>
>  static bool trans_th_sync(DisasContext *ctx, arg_th_sync *a)
>  {
>  (void) a;
>  REQUIRE_XTHEADSYNC(ctx);
>
> -#ifndef CONFIG_USER_ONLY
>  REQUIRE_PRIV_MSU(ctx);
>
>  /*
> @@ -1019,9 +1016,6 @@ static bool trans_th_sync(DisasContext *ctx, 
> arg_th_sync *a)
>  gen_th_sync_local(ctx);
>
>  return true;
> -#else
> -return false;
> -#endif
>  }
>
>  static bool trans_th_sync_i(DisasContext *ctx, arg_th_sync_i *a)
> @@ -1029,7 +1023,6 @@ static bool trans_th_sync_i(DisasContext *ctx, 
> arg_th_sync_i *a)
>  (void) a;
>  REQUIRE_XTHEADSYNC(ctx);
>
> -#ifndef CONFIG_USER_ONLY
>  REQUIRE_PRIV_MSU(ctx);
>
>  /*
> @@ -1038,9 +1031,6 @@ static bool trans_th_sync_i(DisasContext *ctx, 
> arg_th_sync_i *a)
>  gen_th_sync_local(ctx);
>
>  return true;
> -#else
> -return false;
> -#endif
>  }
>
>  static bool trans_th_sync_is(DisasContext *ctx, arg_th_sync_is *a)
> --
> 2.25.1
>
>

Re: [PATCH 3/6] target/riscv: add remaining named features

2024-02-04 Thread Alistair Francis

On Fri, Feb 2, 2024 at 7:44 PM Daniel Henrique Barboza
 wrote:
>
>
>
> On 2/1/24 23:14, Alistair Francis wrote:
> > On Thu, Feb 1, 2024 at 5:15 AM Daniel Henrique Barboza
> >  wrote:
> >>
> >>
> >>
> >> On 1/29/24 22:10, Alistair Francis wrote:
> >>> On Fri, Jan 26, 2024 at 5:54 AM Daniel Henrique Barboza
> >>>  wrote:
> 
>  The RVA22U64 and RVA22S64 profiles mandates certain extensions that,
>  until now, we were implying that they were available.
> 
>  We can't do this anymore since named features also has a riscv,isa
>  entry.  Let's add them to riscv_cpu_named_features[].
> 
>  They will also need to be explicitly enabled in both profile
>  descriptions. TCG will enable the named features it already implements,
>  other accelerators are free to handle it as they like.
> 
>  After this patch, here's the riscv,isa from a buildroot using the
>  'rva22s64' CPU:
> 
> # cat /proc/device-tree/cpus/cpu@0/riscv,isa
>  rv64imafdc_zic64b_zicbom_zicbop_zicboz_ziccamoa_ziccif_zicclsm_ziccrse_
>  zicntr_zicsr_zifencei_zihintpause_zihpm_za64rs_zfhmin_zca_zcd_zba_zbb_
>  zbs_zkt_sscounterenw_sstvala_sstvecd_svade_svinval_svpbmt#
> 
>  Signed-off-by: Daniel Henrique Barboza 
>  ---
> target/riscv/cpu.c | 41 +-
> target/riscv/cpu_cfg.h |  9 +
> target/riscv/tcg/tcg-cpu.c | 19 +-
> 3 files changed, 59 insertions(+), 10 deletions(-)
> 
>  diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>  index 28d3cfa8ce..1ecd8a57ed 100644
>  --- a/target/riscv/cpu.c
>  +++ b/target/riscv/cpu.c
>  @@ -101,6 +101,10 @@ const RISCVIsaExtData isa_edata_arr[] = {
> ISA_EXT_DATA_ENTRY(zicbom, PRIV_VERSION_1_12_0, ext_zicbom),
> ISA_EXT_DATA_ENTRY(zicbop, PRIV_VERSION_1_12_0, ext_zicbop),
> ISA_EXT_DATA_ENTRY(zicboz, PRIV_VERSION_1_12_0, ext_zicboz),
>  +ISA_EXT_DATA_ENTRY(ziccamoa, PRIV_VERSION_1_11_0, ext_ziccamoa),
>  +ISA_EXT_DATA_ENTRY(ziccif, PRIV_VERSION_1_11_0, ext_ziccif),
>  +ISA_EXT_DATA_ENTRY(zicclsm, PRIV_VERSION_1_11_0, ext_zicclsm),
>  +ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, ext_ziccrse),
> ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
> ISA_EXT_DATA_ENTRY(zicntr, PRIV_VERSION_1_12_0, ext_zicntr),
> ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_zicsr),
>  @@ -109,6 +113,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
> ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, 
>  ext_zihintpause),
> ISA_EXT_DATA_ENTRY(zihpm, PRIV_VERSION_1_12_0, ext_zihpm),
> ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
>  +ISA_EXT_DATA_ENTRY(za64rs, PRIV_VERSION_1_12_0, ext_za64rs),
> ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas),
> ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
> ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa),
>  @@ -170,8 +175,12 @@ const RISCVIsaExtData isa_edata_arr[] = {
> ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, ext_smepmp),
> ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
> ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
>  +ISA_EXT_DATA_ENTRY(ssccptr, PRIV_VERSION_1_11_0, ext_ssccptr),
> ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
>  +ISA_EXT_DATA_ENTRY(sscounterenw, PRIV_VERSION_1_12_0, 
>  ext_sscounterenw),
> ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc),
>  +ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, ext_sstvala),
>  +ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, ext_sstvecd),
> ISA_EXT_DATA_ENTRY(svade, PRIV_VERSION_1_11_0, ext_svade),
> ISA_EXT_DATA_ENTRY(svadu, PRIV_VERSION_1_12_0, ext_svadu),
> ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval),
>  @@ -1523,6 +1532,22 @@ const RISCVCPUMultiExtConfig 
>  riscv_cpu_named_features[] = {
> MULTI_EXT_CFG_BOOL("svade", ext_svade, true),
> MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true),
> 
>  +/*
>  + * cache-related extensions that are always enabled
>  + * since QEMU RISC-V does not have a cache model.
>  + */
>  +MULTI_EXT_CFG_BOOL("za64rs", ext_za64rs, true),
>  +MULTI_EXT_CFG_BOOL("ziccif", ext_ziccif, true),
>  +MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true),
>  +MULTI_EXT_CFG_BOOL("ziccamoa", ext_ziccamoa, true),
>  +MULTI_EXT_CFG_BOOL("zicclsm", ext_zicclsm, true),
>  +MULTI_EXT_CFG_BOOL("ssccptr", ext_ssccptr, true),
>  +
>  +/* Other named features that QEMU TCG always implements */
>  +MULTI_EXT_CFG_BOOL("sstvecd", ext_sstvecd,

Re: [PATCH v4 0/4] target/riscv: SMBIOS support for RISC-V virt machine

2024-02-04 Thread Alistair Francis

On Wed, Jan 24, 2024 at 4:44 AM Heinrich Schuchardt
 wrote:
>
> Generate SMBIOS tables for the RISC-V mach-virt.
> Add CONFIG_SMBIOS=y to the RISC-V default config.
>
> With the series the following firmware tables are provided:
>
> etc/smbios/smbios-anchor
> etc/smbios/smbios-tables
>
> Add processor-family to the '-smbios type=4' command line options.
>
> v4:
> remove a superfluous #ifdef
> v3:
> use misa_mxl_max to determine bitness
> v2:
> set processor family
>
> Heinrich Schuchardt (4):
>   smbios: add processor-family option
>   smbios: function to set default processor family
>   target/riscv: SMBIOS support for RISC-V virt machine
>   qemu-options: enable -smbios option on RISC-V

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  hw/riscv/Kconfig |  1 +
>  hw/riscv/virt.c  | 42 
>  hw/smbios/smbios.c   | 20 +++--
>  include/hw/firmware/smbios.h |  1 +
>  qemu-options.hx  |  6 +++---
>  5 files changed, 65 insertions(+), 5 deletions(-)
>
> --
> 2.43.0
>
>

Re: Re: [PATCH 0/2] Export debug triggers as an extension

2024-02-04 Thread Alistair Francis

On Mon, Feb 5, 2024 at 2:50 PM Anup Patel  wrote:
>
> On Mon, Feb 5, 2024 at 9:36 AM Alistair Francis  wrote:
> >
> > On Mon, Jan 22, 2024 at 7:16 PM Andrew Jones  
> > wrote:
> > >
> > > On Mon, Jan 22, 2024 at 03:42:10PM +1000, Alistair Francis wrote:
> > > > > > From memory the "debug" property is for the original debug spec:
> > > > > > https://github.com/riscv/riscv-debug-spec/releases/tag/task_group_vote
> > > > > >
> > > > > > That was ratified and is an official extension. AFAIK this is what 
> > > > > > is
> > > > > > in physical hardware as well.
> > > > > >
> > > > > > The actual PDF says draft though, I'm not sure what's going on 
> > > > > > there.
> > > > > >
> > > > > > The debug spec doesn't have a Z* name, so it's just "debug", at 
> > > > > > least
> > > > > > AFAIK.
> > > > > >
> > > > > > "sdtrig" seems to be a new backwards-incompatible extension doing
> > > > > > basically the same thing. What a mess
> > > ...
> > > > >
> > > > > I've done a bit of digging and I agree things are quite messy. Here 
> > > > > are
> > > > > my discoveries:
> > > > >
> > > > > The debug option and the code for triggers was added in these commits:
> > > > >
> > > > > c9711bd778 target/riscv: cpu: Enable native debug feature
> > > > > 38b4e781a4 target/riscv: machine: Add debug state description
> > > > > b6092544fc target/riscv: csr: Hook debug CSR read/write
> > > > > 1acdb3b013 target/riscv: cpu: Add a config option for native debug
> > > > > 95799e36c1 target/riscv: Add initial support for the Sdtrig extension
> > > > >
> > > > > In March 2022 - since the commit refers to the Sdtrig extension name
> > > > > and from the date this was an implementation not of the ratified 0.13
> > > > > debug spec (which did not have Sdtrig as a separate extension) but
> > > > > rather a version of the in development 1.0 debug spec.
> > > >
> > > > Yeah... We used the "stable" from master. That is our mistake there.
> > > >
> > > > I'm pretty sure we targeted the 0.13. The "Sdtrig" was only added in
> > > > the v4 as the changelog says: "mention Sdtrig extension in the commit"
> > > >
> > > > >
> > > > > It's not trivial to tell if it's closer to the ratified 0.13 version 
> > > > > or
> > > > > the (hopefully soon to be frozen) 1.0 version.
> > > > >
> > > > > As the only part of the debug specification to be implemented is the
> > > > > triggers then effectively the debug option is x-sdtrig.
> > > > >
> > > > > I don't think there is any way for code running on the machine to
> > > > > identify what version of the debug is implemented - the appropriate
> > > > > register is only available for external debug. Once 1.0 is frozen then
> > > > > the presence of Sdtrig isa string would indicate 1.0 trigger support 
> > > > > is
> > > > > available.
> > > > >
> > > > > According to JIRA - https://jira.riscv.org/browse/RVS-981 the debug
> > > > > specification should freeze this month.
> > > > >
> > > > > How about considering this as a solution:
> > > > >
> > > > > - Add a new x-sdtrig option that defaults to false
> > > > > - Deprecate debug option - but retain it with default on
> > > >
> > > > We can't deprecate a ratified spec. The 0.13 just seems to call it
> > > > "debug" so that's what we are stuck with
> > > >
> > > > > - Add warning if triggers are used and x-sdtrig is not enabled
> > > > > - Update the trigger implementation to match frozen spec
> > > >
> > > > We will need to support two versions, as there are two ratified specs.
> > > >
> > >
> > > We'll likely want to be allowed to deprecate ratified extensions as riscv
> > > evolves. Despite best intentions, extensions may be designed and ratified
> > > which ultimately fail to be of much utility, and new extensions will
> > > supersede old extensions. If QEMU keeps every extension it adds, then
> > > we'll slow progress on new extensions by maintaining old extension code.
> > > The old extensions will also bitrot or waste CI resources getting tested
> > > for no reason.
> >
> > I agree that we might need to deprecate extensions.
> >
> > I'm not sure the debug extension is there though. The debug extension
> > is used in current shipping hardware and has been ratified. The Sdtrig
> > isn't even ratified yet
> > (https://lists.riscv.org/g/tech-announce/message/320)
>
> Is shipping real hardware OR ratification a requirement of
> QEMU patch acceptance ?

We will accept an extension when it is ratified. The question here is,
what if two ratified extensions conflict?

The answer to me seems that we need to support both them. *Maybe* at
some point in the future we can then drop the debug extension. That
would require the Sdtrig extension to be widely used and debug not
used (which is why I brought up shipping hardware).

Alistair

>
> Regards,
> Anup
>
> >
> > Right now I feel that we should at least wait for hardware that
> > supports Sdtrig to start to come out. Then we can look at deprecating
> > debug. Deprecating it now seems a bit premature.
> >
> > Alistair

Re: [PATCH 4/5] migration/multifd: Move multifd_save_setup into migration thread

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 04:11:27PM -0300, Fabiano Rosas wrote:
> We currently have an unfavorable situation around multifd channels
> creation and the migration thread execution.
> 
> We create the multifd channels with qio_channel_socket_connect_async
> -> qio_task_run_in_thread, but only connect them at the
> multifd_new_send_channel_async callback, called from
> qio_task_complete, which is registered as a glib event.
> 
> So at multifd_save_setup() we create the channels, but they will only
> be actually usable after the whole multifd_save_setup() calling stack
> returns back to the main loop. Which means that the migration thread
> is already up and running without any possibility for the multifd
> channels to be ready on time.
> 
> We currently rely on the channels-ready semaphore blocking
> multifd_send_sync_main() until channels start to come up and release
> it. However there have been bugs recently found when a channel's
> creation fails and multifd_save_cleanup() is allowed to run while
> other channels are still being created.
> 
> Let's start to organize this situation by moving the
> multifd_save_setup() call into the migration thread. That way we
> unblock the main-loop to dispatch the completion callbacks and
> actually have a chance of getting the multifd channels ready for when
> the migration thread needs them.
> 
> The next patches will deal with the synchronization aspects.
> 
> Note that this takes multifd_save_setup() out of the BQL.
> 
> Signed-off-by: Fabiano Rosas 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH 3/5] migration/multifd: Move multifd_save_setup error handling in to the function

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 04:11:26PM -0300, Fabiano Rosas wrote:
> Hide the error handling inside multifd_save_setup to make it cleaner
> for the next patch to move the function around.
> 
> Signed-off-by: Fabiano Rosas 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH] ppc/pnv: Add PowerPC Special Purpose Registers

2024-02-04 Thread dan tan

On Thu, 18 Jan 2024 12:27:12 +1000, Nicholas Piggin wrote:
> On Thu Jan 18, 2024 at 8:34 AM AEST, dan tan wrote:
>>The handling of the following two registers are added -
>>DAWR1  (0x0bd, 189) - Data Address Watchpoint 1
>>DAWRX1 (0x0b5, 181) - Data Address Watchpoint Extension 1
>> 
>>  Signed-off-by: dan tan 
> 
> Small nit, but there's some extra whitespace on the left here and in
> Subject header which is normally not required.
> 

I will fix that on the respin

>> ---
>> target/ppc/cpu.c | 51
>> 
>> target/ppc/cpu.h |  6 ++
>> target/ppc/cpu_init.c| 10 ++
>> target/ppc/excp_helper.c | 11 ++-
>> target/ppc/helper.h  |  2 ++
>> target/ppc/machine.c |  1 +
>> target/ppc/misc_helper.c | 10 ++
>> target/ppc/spr_common.h  |  2 ++
>> target/ppc/translate.c   | 12 
>> 9 files changed, 104 insertions(+), 1 deletion(-)
>> 
>> diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
>> index e3ad8e0..8a77328 100644
>> --- a/target/ppc/cpu.c
>> +++ b/target/ppc/cpu.c
>> @@ -188,6 +188,57 @@ void ppc_store_dawrx0(CPUPPCState *env, uint32_t val)
>> env->spr[SPR_DAWRX0] = val;
>> ppc_update_daw0(env);
>> }
>> +
>> +void ppc_update_daw1(CPUPPCState *env)
>> +{
>> +CPUState *cs = env_cpu(env);
>> +target_ulong deaw = env->spr[SPR_DAWR1] & PPC_BITMASK(0, 60);
>> +uint32_t dawrx = env->spr[SPR_DAWRX1];
>> +int mrd = extract32(dawrx, PPC_BIT_NR(48), 54 - 48);
>> +bool dw = extract32(dawrx, PPC_BIT_NR(57), 1);
>> +bool dr = extract32(dawrx, PPC_BIT_NR(58), 1);
>> +bool hv = extract32(dawrx, PPC_BIT_NR(61), 1);
>> +bool sv = extract32(dawrx, PPC_BIT_NR(62), 1);
>> +bool pr = extract32(dawrx, PPC_BIT_NR(62), 1);
>> +vaddr len;
>> +int flags;
>> +
>> +if (env->dawr1_watchpoint) {
>> +cpu_watchpoint_remove_by_ref(cs, env->dawr1_watchpoint);
>> +env->dawr1_watchpoint = NULL;
>> +}
>> +
>> +if (!dr && !dw) {
>> +return;
>> +}
>> +
>> +if (!hv && !sv && !pr) {
>> +return;
>> +}
>> +
>> +len = (mrd + 1) * 8;
>> +flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
>> +if (dr) {
>> +flags |= BP_MEM_READ;
>> +}
>> +if (dw) {
>> +flags |= BP_MEM_WRITE;
>> +}
>> +
>> +cpu_watchpoint_insert(cs, deaw, len, flags, >dawr1_watchpoint);
>> +}
> 
> I would say this is just beyond the point where we should share
> code with daw0. You could make a function that takes DAWR(x) SPR
> numbers or values, and a pointer to the watchpoint to use.
> 

Noted. Will make the change

>> +
>> +void ppc_store_dawr1(CPUPPCState *env, target_ulong val)
>> +{
>> +env->spr[SPR_DAWR1] = val;
>> +ppc_update_daw1(env);
>> +}
>> +
>> +void ppc_store_dawrx1(CPUPPCState *env, uint32_t val)
>> +{
>> +env->spr[SPR_DAWRX1] = val;
>> +ppc_update_daw1(env);
>> +}
>> #endif
>> #endif
>> 
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index f8101ff..ab34fc7 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -1237,6 +1237,7 @@ struct CPUArchState {
>> ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
>> struct CPUBreakpoint *ciabr_breakpoint;
>> struct CPUWatchpoint *dawr0_watchpoint;
>> +struct CPUWatchpoint *dawr1_watchpoint;
>> #endif
>> target_ulong sr[32];   /* segment registers */
>> uint32_t nb_BATs;  /* number of BATs */
>> @@ -1552,6 +1553,9 @@ void ppc_store_ciabr(CPUPPCState *env, target_ulong
>> value);
>> void ppc_update_daw0(CPUPPCState *env);
>> void ppc_store_dawr0(CPUPPCState *env, target_ulong value);
>> void ppc_store_dawrx0(CPUPPCState *env, uint32_t value);
>> +void ppc_update_daw1(CPUPPCState *env);
>> +void ppc_store_dawr1(CPUPPCState *env, target_ulong value);
>> +void ppc_store_dawrx1(CPUPPCState *env, uint32_t value);
>> #endif /* !defined(CONFIG_USER_ONLY) */
>> void ppc_store_msr(CPUPPCState *env, target_ulong value);
>> 
>> @@ -1737,9 +1741,11 @@ void ppc_compat_add_property(Object *obj, const char
>> *name,
>> #define SPR_PSPB  (0x09F)
>> #define SPR_DPDES (0x0B0)
>> #define SPR_DAWR0 (0x0B4)
>> +#define SPR_DAWR1 (0x0B5)
>> #define SPR_RPR   (0x0BA)
>> #define SPR_CIABR (0x0BB)
>> #define SPR_DAWRX0(0x0BC)
>> +#define SPR_DAWRX1(0x0BD)
>> #define SPR_HFSCR (0x0BE)
>> #define SPR_VRSAVE(0x100)
>> #define SPR_USPRG0(0x100)
>> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
>> index 40fe14a..d75c359 100644
>> --- a/target/ppc/cpu_init.c
>> +++ b/target/ppc/cpu_init.c
>> @@ -5119,11 +5119,21 @@ static void register_book3s_207_dbg_sprs(CPUPPCState
>> *env)
>> SPR_NOACCESS, SPR_NOACCESS,
>> _read_generic, _write_dawr0,
>> KVM_REG_PPC_DAWR, 0x);
>> +spr_register_kvm_hv(env,

Re: [PATCH 2/5] migration/multifd: Remove p->running

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 04:11:25PM -0300, Fabiano Rosas wrote:
> We currently only need p->running to avoid calling qemu_thread_join()
> on a non existent thread if the thread has never been created.
> 
> However, there are at least two bugs in this logic:
> 
> 1) On the sending side, p->running is set too early and
> qemu_thread_create() can be skipped due to an error during TLS
> handshake, leaving the flag set and leading to a crash when
> multifd_save_cleanup() calls qemu_thread_join().
> 
> 2) During exit, the multifd thread clears the flag while holding the
> channel lock. The counterpart at multifd_save_cleanup() reads the flag
> outside of the lock and might free the mutex while the multifd thread
> still has it locked.
> 
> Fix the first issue by setting the flag right before creating the
> thread. Rename it from p->running to p->thread_created to clarify its
> usage.
> 
> Fix the second issue by not clearing the flag at the multifd thread
> exit. We don't have any use for that.
> 
> Note that these bugs are straight-forward logic issues and not race
> conditions. There is still a gap for races to affect this code due to
> multifd_save_cleanup() being allowed to run concurrently with the
> thread creation loop. This issue is solved in the next patch.
> 

Cc: qemu-stable 

> Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake")
> Reported-by: Avihai Horon 
> Reported-by: 
> Signed-off-by: Fabiano Rosas 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH 1/5] migration/multifd: Join the TLS thread

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 04:11:24PM -0300, Fabiano Rosas wrote:
> We're currently leaking the resources of the TLS thread by not joining
> it and also overwriting the p->thread pointer altogether.
> 
> Signed-off-by: Fabiano Rosas 

Reviewed-by: Peter Xu 

Does this deserve below?

Fixes: a1af605bd5 ("migration/multifd: fix hangup with TLS-Multifd due to 
blocking handshake")
Cc: qemu-stable 

-- 
Peter Xu

Re: [PATCH] linux-user/riscv: Fix sizeof_reg typo in vdso unwind info

2024-02-04 Thread Richard Henderson


On 2/5/24 14:33, Alistair Francis wrote:

On Sat, Jan 13, 2024 at 8:04 AM Richard Henderson
 wrote:


Reported-by: Vineet Gupta 
Signed-off-by: Richard Henderson 


Is there a branch with this so I can pull in the binary changes?


Already merged as

commit 1b21fe27e75a59bfe2513f5abcc6a18cfc35cfc8
Author: Richard Henderson 
Date:   Sat Jan 13 09:02:38 2024 +1100

linux-user/riscv: Adjust vdso signal frame cfa offsets


r~

Re: Re: [PATCH 0/2] Export debug triggers as an extension

2024-02-04 Thread Anup Patel

On Mon, Feb 5, 2024 at 9:36 AM Alistair Francis  wrote:
>
> On Mon, Jan 22, 2024 at 7:16 PM Andrew Jones  wrote:
> >
> > On Mon, Jan 22, 2024 at 03:42:10PM +1000, Alistair Francis wrote:
> > > > > From memory the "debug" property is for the original debug spec:
> > > > > https://github.com/riscv/riscv-debug-spec/releases/tag/task_group_vote
> > > > >
> > > > > That was ratified and is an official extension. AFAIK this is what is
> > > > > in physical hardware as well.
> > > > >
> > > > > The actual PDF says draft though, I'm not sure what's going on there.
> > > > >
> > > > > The debug spec doesn't have a Z* name, so it's just "debug", at least
> > > > > AFAIK.
> > > > >
> > > > > "sdtrig" seems to be a new backwards-incompatible extension doing
> > > > > basically the same thing. What a mess
> > ...
> > > >
> > > > I've done a bit of digging and I agree things are quite messy. Here are
> > > > my discoveries:
> > > >
> > > > The debug option and the code for triggers was added in these commits:
> > > >
> > > > c9711bd778 target/riscv: cpu: Enable native debug feature
> > > > 38b4e781a4 target/riscv: machine: Add debug state description
> > > > b6092544fc target/riscv: csr: Hook debug CSR read/write
> > > > 1acdb3b013 target/riscv: cpu: Add a config option for native debug
> > > > 95799e36c1 target/riscv: Add initial support for the Sdtrig extension
> > > >
> > > > In March 2022 - since the commit refers to the Sdtrig extension name
> > > > and from the date this was an implementation not of the ratified 0.13
> > > > debug spec (which did not have Sdtrig as a separate extension) but
> > > > rather a version of the in development 1.0 debug spec.
> > >
> > > Yeah... We used the "stable" from master. That is our mistake there.
> > >
> > > I'm pretty sure we targeted the 0.13. The "Sdtrig" was only added in
> > > the v4 as the changelog says: "mention Sdtrig extension in the commit"
> > >
> > > >
> > > > It's not trivial to tell if it's closer to the ratified 0.13 version or
> > > > the (hopefully soon to be frozen) 1.0 version.
> > > >
> > > > As the only part of the debug specification to be implemented is the
> > > > triggers then effectively the debug option is x-sdtrig.
> > > >
> > > > I don't think there is any way for code running on the machine to
> > > > identify what version of the debug is implemented - the appropriate
> > > > register is only available for external debug. Once 1.0 is frozen then
> > > > the presence of Sdtrig isa string would indicate 1.0 trigger support is
> > > > available.
> > > >
> > > > According to JIRA - https://jira.riscv.org/browse/RVS-981 the debug
> > > > specification should freeze this month.
> > > >
> > > > How about considering this as a solution:
> > > >
> > > > - Add a new x-sdtrig option that defaults to false
> > > > - Deprecate debug option - but retain it with default on
> > >
> > > We can't deprecate a ratified spec. The 0.13 just seems to call it
> > > "debug" so that's what we are stuck with
> > >
> > > > - Add warning if triggers are used and x-sdtrig is not enabled
> > > > - Update the trigger implementation to match frozen spec
> > >
> > > We will need to support two versions, as there are two ratified specs.
> > >
> >
> > We'll likely want to be allowed to deprecate ratified extensions as riscv
> > evolves. Despite best intentions, extensions may be designed and ratified
> > which ultimately fail to be of much utility, and new extensions will
> > supersede old extensions. If QEMU keeps every extension it adds, then
> > we'll slow progress on new extensions by maintaining old extension code.
> > The old extensions will also bitrot or waste CI resources getting tested
> > for no reason.
>
> I agree that we might need to deprecate extensions.
>
> I'm not sure the debug extension is there though. The debug extension
> is used in current shipping hardware and has been ratified. The Sdtrig
> isn't even ratified yet
> (https://lists.riscv.org/g/tech-announce/message/320)

Is shipping real hardware OR ratification a requirement of
QEMU patch acceptance ?

Regards,
Anup

>
> Right now I feel that we should at least wait for hardware that
> supports Sdtrig to start to come out. Then we can look at deprecating
> debug. Deprecating it now seems a bit premature.
>
> Alistair
>
> >
> > I don't know the history of 'debug' and 'sdtrig', other than what I've
> > read above, but, to me, it looks like 'debug' might be one of the first
> > extensions which should be deprecated. Assuming we have a long enough
> > deprecation period, then I think it's always safe to attempt a
> > deprecation. If somebody shouts, then it can always be taken back off the
> > chopping block.
> >
> > Thanks,
> > drew
> >
>

Re: [PATCH v2 23/23] migration/multifd: Optimize sender side to be lockless

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 06:34:08PM -0300, Fabiano Rosas wrote:
> pet...@redhat.com writes:
> 
> > From: Peter Xu 
> >
> > When reviewing my attempt to refactor send_prepare(), Fabiano suggested we
> > try out with dropping the mutex in multifd code [1].
> >
> > I thought about that before but I never tried to change the code.  Now
> > maybe it's time to give it a stab.  This only optimizes the sender side.
> >
> > The trick here is multifd has a clear provider/consumer model, that the
> > migration main thread publishes requests (either pending_job/pending_sync),
> > while the multifd sender threads are consumers.  Here we don't have a lot
> > of comlicated data sharing, and the jobs can logically be submitted 
> > lockless.
> 
> complicated
> 
> >
> > Arm the code with atomic weapons.  Two things worth mentioning:
> >
> >   - For multifd_send_pages(): we can use qatomic_load_acquire() when trying
> >   to find a free channel, but that's expensive if we attach one ACQUIRE per
> >   channel.  Instead, make it atomic_read() on the pending_job flag, but
> 
> s/make it/keep it/
> 
> The diff doesn't show the atomic_read already there so it's confusing.

Right.  I also has a trivial typo on s/atomic_read/qatomic_read/..

I tried to rephrase the last sentence:

  - For multifd_send_pages(): we can use qatomic_load_acquire() when trying
  to find a free channel, but that's expensive if we attach one ACQUIRE per
  channel.  Instead, keep the qatomic_read() on reading the pending_job
  flag as we do already, meanwhile use one smp_mb_acquire() after the loop
  to guarantee the memory ordering.

Maybe slightly clearer?

> 
> >   merge the ACQUIRE into one single smp_mb_acquire() later.
> >
> >   - For pending_sync: it doesn't have any extra data required since now
> >   p->flags are never touched, it should be safe to not use memory barrier.
> >   That's different from pending_sync.
> 
> pending_job?

Yep, all the rest fixed.

> 
> >
> > Provide rich comments for all the lockless operations to state how they are
> > paired.  With that, we can remove the mutex.
> >
> > [1] https://lore.kernel.org/r/87o7d1jlu5@suse.de
> >
> > Suggested-by: Fabiano Rosas 
> > Signed-off-by: Peter Xu 
> > ---
> >  migration/multifd.h |  2 --
> >  migration/multifd.c | 51 +++--
> >  2 files changed, 26 insertions(+), 27 deletions(-)
> >
> > diff --git a/migration/multifd.h b/migration/multifd.h
> > index 98876ff94a..78a2317263 100644
> > --- a/migration/multifd.h
> > +++ b/migration/multifd.h
> > @@ -91,8 +91,6 @@ typedef struct {
> >  /* syncs main thread and channels */
> >  QemuSemaphore sem_sync;
> >  
> > -/* this mutex protects the following parameters */
> > -QemuMutex mutex;
> >  /* is this channel thread running */
> >  bool running;
> >  /* multifd flags for each packet */
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index b317d57d61..ef13e2e781 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -501,19 +501,19 @@ static bool multifd_send_pages(void)
> >  }
> >  }
> >  
> > -qemu_mutex_lock(>mutex);
> > -assert(!p->pages->num);
> > -assert(!p->pages->block);
> >  /*
> > - * Double check on pending_job==false with the lock.  In the future if
> > - * we can have >1 requester thread, we can replace this with a "goto
> > - * retry", but that is for later.
> > + * Make sure we read p->pending_job before all the rest.  Pairs with
> > + * qatomic_store_release() in multifd_send_thread().
> >   */
> > -assert(qatomic_read(>pending_job) == false);
> > -qatomic_set(>pending_job, true);
> > +smp_mb_acquire();
> > +assert(!p->pages->num);
> >  multifd_send_state->pages = p->pages;
> >  p->pages = pages;
> > -qemu_mutex_unlock(>mutex);
> > +/*
> > + * Making sure p->pages is setup before marking pending_job=true. Pairs
> > + * with the qatomic_load_acquire() in multifd_send_thread().
> > + */
> > +qatomic_store_release(>pending_job, true);
> >  qemu_sem_post(>sem);
> >  
> >  return true;
> > @@ -648,7 +648,6 @@ static bool 
> > multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
> >  }
> >  multifd_send_channel_destroy(p->c);
> >  p->c = NULL;
> > -qemu_mutex_destroy(>mutex);
> >  qemu_sem_destroy(>sem);
> >  qemu_sem_destroy(>sem_sync);
> >  g_free(p->name);
> > @@ -742,14 +741,12 @@ int multifd_send_sync_main(void)
> >  
> >  trace_multifd_send_sync_main_signal(p->id);
> >  
> > -qemu_mutex_lock(>mutex);
> >  /*
> >   * We should be the only user so far, so not possible to be set by
> >   * others concurrently.
> >   */
> >  assert(qatomic_read(>pending_sync) == false);
> >  qatomic_set(>pending_sync, true);
> > -qemu_mutex_unlock(>mutex);
> >  qemu_sem_post(>sem);
> >  }
> >  for (i = 0; i <

Re: [PATCH] linux-user/riscv: Fix sizeof_reg typo in vdso unwind info

2024-02-04 Thread Alistair Francis

On Sat, Jan 13, 2024 at 8:04 AM Richard Henderson
 wrote:
>
> Reported-by: Vineet Gupta 
> Signed-off-by: Richard Henderson 

Is there a branch with this so I can pull in the binary changes?

Alistair

> ---
>  linux-user/riscv/vdso-32.so | Bin 2900 -> 2900 bytes
>  linux-user/riscv/vdso-64.so | Bin 3856 -> 3856 bytes
>  linux-user/riscv/vdso.S |   2 +-
>  3 files changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/linux-user/riscv/vdso-32.so b/linux-user/riscv/vdso-32.so
> index 
> 1ad1e5c8b1fe36b0fe4bcb6c06fab8219ecd..5829c1dc90f1c1aafde69f5b9f5bc9afb85251f7
>  100755
> GIT binary patch
> delta 112
> zcmV-$0FVFF7StB71OXHyD|k zhh2zViCc0VjaQCVk5!OVkyDaVlTnmVl~0yVmra;VnM;~Vn@OBVokyNVpGC7%
> S1ndF^OjB%HZ#A<;2M+}Mt1p-U
>
> delta 112
> zcmV-$0FVFF7StB71OXI7ek-E|Z8Fh9(G%V1AYqwmRqNri7y zhgOJGiBgJCi%yJ8jY^J4k4BJ0kwTI{lRlI@l{%I S1ndF^P;+@qVQaHR2M+{CwJgd2
>
> diff --git a/linux-user/riscv/vdso-64.so b/linux-user/riscv/vdso-64.so
> index 
> 83992bebe6d0182f24edfffc531015fd2f4e1cfb..de18e35537a493ba35307e93a2d33faaf489c0b3
>  100755
> GIT binary patch
> delta 118
> zcmV-+0Ez#Q9*`cepaB$miap5s7G5`$2#eqLH6 zhIof|h;)f^ig1f=jBJf+j%bf!l3 YlidYEvmgdT0|snTdTn?%vuO#Z1^ZDlW*H
>
> delta 117
> zcmV-*0E+*R9*`cepaB$}OC4Olbn*vwtO;!U^UJ?5jVB4Sv;pe_K7av|bcJw+
> zY=>xwWQkykT#HzYRE X-UULl9|lAN23SxxMKVLPX$hwVbapOM
>
> diff --git a/linux-user/riscv/vdso.S b/linux-user/riscv/vdso.S
> index a86d8fc488..4b4e34aeea 100644
> --- a/linux-user/riscv/vdso.S
> +++ b/linux-user/riscv/vdso.S
> @@ -101,7 +101,7 @@ endf __vdso_flush_icache
> .cfi_startproc simple
> .cfi_signal_frame
>
> -#define sizeof_reg (__riscv_xlen / 4)
> +#define sizeof_reg (__riscv_xlen / 8)
>  #define sizeof_freg8
>  #define B_GR   (offsetof_uc_mcontext - sizeof_rt_sigframe)
>  #define B_FR   (offsetof_uc_mcontext - sizeof_rt_sigframe + offsetof_freg0)
> --
> 2.34.1
>
>

Re: [PATCH v2 19/23] migration/multifd: Cleanup multifd_save_cleanup()

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 05:54:23PM -0300, Fabiano Rosas wrote:
> pet...@redhat.com writes:
> 
> > From: Peter Xu 
> >
> > Shrink the function by moving relevant works into helpers: move the thread
> > join()s into multifd_send_terminate_threads(), then create two more helpers
> > to cover channel/state cleanups.
> >
> > Add a TODO entry for the thread terminate process because p->running is
> > still buggy.  We need to fix it at some point but not yet covered.
> >
> > Suggested-by: Fabiano Rosas 
> > Signed-off-by: Peter Xu 
> 
> Reviewed-by: Fabiano Rosas 
> 
> minor comment below
> 
> > ---
> >  migration/multifd.c | 91 +
> >  1 file changed, 59 insertions(+), 32 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 4ab8e6eff2..4cb0d2cc17 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -593,6 +593,11 @@ static void multifd_send_terminate_threads(void)
> >   * always set it.
> >   */
> >  qatomic_set(_send_state->exiting, 1);
> > +
> > +/*
> > + * Firstly, kick all threads out; no matter whether they are just idle,
> > + * or blocked in an IO system call.
> > + */
> >  for (i = 0; i < migrate_multifd_channels(); i++) {
> >  MultiFDSendParams *p = _send_state->params[i];
> >  
> > @@ -601,6 +606,21 @@ static void multifd_send_terminate_threads(void)
> >  qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
> >  }
> >  }
> > +
> > +/*
> > + * Finally recycle all the threads.
> > + *
> > + * TODO: p->running is still buggy, e.g. we can reach here without the
> > + * corresponding multifd_new_send_channel_async() get invoked yet,
> > + * then a new thread can even be created after this function returns.
> > + */
> 
> Series on the list:
> 
> https://lore.kernel.org/r/20240202191128.1901-1-faro...@suse.de

Thanks a lot.  I'll read it later today.

> 
> > +for (i = 0; i < migrate_multifd_channels(); i++) {
> > +MultiFDSendParams *p = _send_state->params[i];
> > +
> > +if (p->running) {
> > +qemu_thread_join(>thread);
> > +}
> > +}
> >  }
> >  
> >  static int multifd_send_channel_destroy(QIOChannel *send)
> > @@ -608,6 +628,41 @@ static int multifd_send_channel_destroy(QIOChannel 
> > *send)
> >  return socket_send_channel_destroy(send);
> >  }
> >  
> > +static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error 
> > **errp)
> > +{
> > +if (p->registered_yank) {
> > +migration_ioc_unregister_yank(p->c);
> > +}
> > +multifd_send_channel_destroy(p->c);
> > +p->c = NULL;
> > +qemu_mutex_destroy(>mutex);
> > +qemu_sem_destroy(>sem);
> > +qemu_sem_destroy(>sem_sync);
> > +g_free(p->name);
> > +p->name = NULL;
> > +multifd_pages_clear(p->pages);
> > +p->pages = NULL;
> > +p->packet_len = 0;
> > +g_free(p->packet);
> > +p->packet = NULL;
> > +g_free(p->iov);
> > +p->iov = NULL;
> > +multifd_send_state->ops->send_cleanup(p, errp);
> > +
> > +return *errp == NULL;
> 
> I think technically this would require the ERRP_GUARD() macro?

I normally only use ERRP_GUARD() if there can be any caller passing in
NULL, or when I am not sure it's always !NULL.

What I wanted to add here is actually assert(errp), but then I noticed
*errp==NULL plays the same role as that, because if errp==NULL, it'll crash
here when dereferencing, so it actually has an implicit assert(errp);
exactly what I wanted, but even one line less (even if not obvious).

-- 
Peter Xu

Re: Re: [PATCH 0/2] Export debug triggers as an extension

2024-02-04 Thread Alistair Francis

On Mon, Jan 22, 2024 at 7:16 PM Andrew Jones  wrote:
>
> On Mon, Jan 22, 2024 at 03:42:10PM +1000, Alistair Francis wrote:
> > > > From memory the "debug" property is for the original debug spec:
> > > > https://github.com/riscv/riscv-debug-spec/releases/tag/task_group_vote
> > > >
> > > > That was ratified and is an official extension. AFAIK this is what is
> > > > in physical hardware as well.
> > > >
> > > > The actual PDF says draft though, I'm not sure what's going on there.
> > > >
> > > > The debug spec doesn't have a Z* name, so it's just "debug", at least
> > > > AFAIK.
> > > >
> > > > "sdtrig" seems to be a new backwards-incompatible extension doing
> > > > basically the same thing. What a mess
> ...
> > >
> > > I've done a bit of digging and I agree things are quite messy. Here are
> > > my discoveries:
> > >
> > > The debug option and the code for triggers was added in these commits:
> > >
> > > c9711bd778 target/riscv: cpu: Enable native debug feature
> > > 38b4e781a4 target/riscv: machine: Add debug state description
> > > b6092544fc target/riscv: csr: Hook debug CSR read/write
> > > 1acdb3b013 target/riscv: cpu: Add a config option for native debug
> > > 95799e36c1 target/riscv: Add initial support for the Sdtrig extension
> > >
> > > In March 2022 - since the commit refers to the Sdtrig extension name
> > > and from the date this was an implementation not of the ratified 0.13
> > > debug spec (which did not have Sdtrig as a separate extension) but
> > > rather a version of the in development 1.0 debug spec.
> >
> > Yeah... We used the "stable" from master. That is our mistake there.
> >
> > I'm pretty sure we targeted the 0.13. The "Sdtrig" was only added in
> > the v4 as the changelog says: "mention Sdtrig extension in the commit"
> >
> > >
> > > It's not trivial to tell if it's closer to the ratified 0.13 version or
> > > the (hopefully soon to be frozen) 1.0 version.
> > >
> > > As the only part of the debug specification to be implemented is the
> > > triggers then effectively the debug option is x-sdtrig.
> > >
> > > I don't think there is any way for code running on the machine to
> > > identify what version of the debug is implemented - the appropriate
> > > register is only available for external debug. Once 1.0 is frozen then
> > > the presence of Sdtrig isa string would indicate 1.0 trigger support is
> > > available.
> > >
> > > According to JIRA - https://jira.riscv.org/browse/RVS-981 the debug
> > > specification should freeze this month.
> > >
> > > How about considering this as a solution:
> > >
> > > - Add a new x-sdtrig option that defaults to false
> > > - Deprecate debug option - but retain it with default on
> >
> > We can't deprecate a ratified spec. The 0.13 just seems to call it
> > "debug" so that's what we are stuck with
> >
> > > - Add warning if triggers are used and x-sdtrig is not enabled
> > > - Update the trigger implementation to match frozen spec
> >
> > We will need to support two versions, as there are two ratified specs.
> >
>
> We'll likely want to be allowed to deprecate ratified extensions as riscv
> evolves. Despite best intentions, extensions may be designed and ratified
> which ultimately fail to be of much utility, and new extensions will
> supersede old extensions. If QEMU keeps every extension it adds, then
> we'll slow progress on new extensions by maintaining old extension code.
> The old extensions will also bitrot or waste CI resources getting tested
> for no reason.

I agree that we might need to deprecate extensions.

I'm not sure the debug extension is there though. The debug extension
is used in current shipping hardware and has been ratified. The Sdtrig
isn't even ratified yet
(https://lists.riscv.org/g/tech-announce/message/320)

Right now I feel that we should at least wait for hardware that
supports Sdtrig to start to come out. Then we can look at deprecating
debug. Deprecating it now seems a bit premature.

Alistair

>
> I don't know the history of 'debug' and 'sdtrig', other than what I've
> read above, but, to me, it looks like 'debug' might be one of the first
> extensions which should be deprecated. Assuming we have a long enough
> deprecation period, then I think it's always safe to attempt a
> deprecation. If somebody shouts, then it can always be taken back off the
> chopping block.
>
> Thanks,
> drew
>

Re: [PATCH v2 22/23] migration/multifd: Fix MultiFDSendParams.packet_num race

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 06:08:22PM -0300, Fabiano Rosas wrote:
> pet...@redhat.com writes:
> 
> > From: Peter Xu 
> >
> > As reported correctly by Fabiano [1], MultiFDSendParams.packet_num is buggy
> > to be assigned and stored.  Consider two consequent operations of: (1)
> > queue a job into multifd send thread X, then (2) queue another sync request
> > to the same send thread X.  Then the MultiFDSendParams.packet_num will be
> > assigned twice, and the first assignment can get lost already.
> >
> > To avoid that, we move the packet_num assignment from p->packet_num into
> > where the thread will fill in the packet.  Use atomic operations to protect
> > the field, making sure there's no race.
> >
> > Note that atomic fetch_add() may not be good for scaling purposes, however
> > multifd should be fine as number of threads should normally not go beyond
> > 16 threads.  Let's leave that concern for later but fix the issue first.
> >
> > There's also a trick on how to make it always work even on 32 bit hosts for
> > uint64_t packet number.  Switching to uintptr_t as of now to simply the
> > case.  It will cause packet number to overflow easier on 32 bit, but that
> > shouldn't be a major concern for now as 32 bit systems is not the major
> > audience for any performance concerns like what multifd wants to address.
> >
> > We also need to move multifd_send_state definition upper, so that
> > multifd_send_fill_packet() can reference it.
> >
> > [1] https://lore.kernel.org/r/87o7d1jlu5@suse.de
> >
> > Reported-by: Fabiano Rosas 
> > Signed-off-by: Peter Xu 
> 
> Elena had reported this in October already.
> 
> Reported-by: Elena Ufimtseva 

Ah, I'll do the replacement.

> Reviewed-by: Fabiano Rosas 

Thanks,

-- 
Peter Xu

Re: [PATCH v2 18/23] migration/multifd: Rewrite multifd_queue_page()

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 05:47:05PM -0300, Fabiano Rosas wrote:
> pet...@redhat.com writes:
> 
> > From: Peter Xu 
> >
> > The current multifd_queue_page() is not easy to read and follow.  It is not
> > good with a few reasons:
> >
> >   - No helper at all to show what exactly does a condition mean; in short,
> >   readability is low.
> >
> >   - Rely on pages->ramblock being cleared to detect an empty queue.  It's
> >   slightly an overload of the ramblock pointer, per Fabiano [1], which I
> >   also agree.
> >
> >   - Contains a self recursion, even if not necessary..
> >
> > Rewrite this function.  We add some comments to make it even clearer on
> > what it does.
> >
> > [1] https://lore.kernel.org/r/87wmrpjzew@suse.de
> >
> > Signed-off-by: Peter Xu 
> 
> Reviewed-by: Fabiano Rosas 
> 
> Patch looks good, but I have a question below.
> 
> > ---
> >  migration/multifd.c | 56 ++---
> >  1 file changed, 37 insertions(+), 19 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 35d4e8ad1f..4ab8e6eff2 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -506,35 +506,53 @@ static bool multifd_send_pages(void)
> >  return true;
> >  }
> >  
> > +static inline bool multifd_queue_empty(MultiFDPages_t *pages)
> > +{
> > +return pages->num == 0;
> > +}
> > +
> > +static inline bool multifd_queue_full(MultiFDPages_t *pages)
> > +{
> > +return pages->num == pages->allocated;
> > +}
> > +
> > +static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t 
> > offset)
> > +{
> > +pages->offset[pages->num++] = offset;
> > +}
> > +
> >  /* Returns true if enqueue successful, false otherwise */
> >  bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
> >  {
> > -MultiFDPages_t *pages = multifd_send_state->pages;
> > -bool changed = false;
> > +MultiFDPages_t *pages;
> > +
> > +retry:
> > +pages = multifd_send_state->pages;
> >  
> > -if (!pages->block) {
> > +/* If the queue is empty, we can already enqueue now */
> > +if (multifd_queue_empty(pages)) {
> >  pages->block = block;
> > +multifd_enqueue(pages, offset);
> > +return true;
> >  }
> >  
> > -if (pages->block == block) {
> > -pages->offset[pages->num] = offset;
> > -pages->num++;
> > -
> > -if (pages->num < pages->allocated) {
> > -return true;
> > +/*
> > + * Not empty, meanwhile we need a flush.  It can because of either:
> > + *
> > + * (1) The page is not on the same ramblock of previous ones, or,
> > + * (2) The queue is full.
> > + *
> > + * After flush, always retry.
> > + */
> > +if (pages->block != block || multifd_queue_full(pages)) {
> > +if (!multifd_send_pages()) {
> > +return false;
> >  }
> > -} else {
> > -changed = true;
> > -}
> > -
> > -if (!multifd_send_pages()) {
> > -return false;
> > -}
> > -
> > -if (changed) {
> > -return multifd_queue_page(block, offset);
> > +goto retry;
> >  }
> >  
> > +/* Not empty, and we still have space, do it! */
> > +multifd_enqueue(pages, offset);
> 
> Hm, here you're missing the flush of the last group of pages of the last
> ramblock. Just like current code...
> 
> ...which means we're relying on the multifd_send_pages() at
> multifd_send_sync_main() to send the last few pages. So how can that
> work when multifd_flush_after_each_section==false? Because it skips the
> sync flag, but would also skip the last send. I'm confused.

IIUC it won't skip the final flush of the last pages.  See
find_dirty_block():

if (migrate_multifd() &&
!migrate_multifd_flush_after_each_section()) {
QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
int ret = multifd_send_sync_main();
if (ret < 0) {
return ret;
}
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
qemu_fflush(f);
}

IMHO this should be the last flush of the pages when we loop one more
round.

Maybe what you're talking about this one (of ram_save_complete())?

if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) {
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
}

I remember we talked about this somewhere in your "file" series,
but.. AFAIU this last RAM_SAVE_FLAG_MULTIFD_FLUSH might be redundant, it
just needs some justifications to double check I didn't miss something.

Now multifd_queue_page() is kind of lazy-mode on flushing, I think that may
make some sense (we assign job unless required, so maybe there's higher
chance that one thread is free?), but I'm not sure whether that's a huge
deal if NIC is the bandwidth, because in that case we'll wait for sender
threads anyway, and they should all be busy at any time.

However even if we

Re: [PATCH v4 0/2] riscv: support new isa extension detection devicetree properties

2024-02-04 Thread Alistair Francis

On Wed, Jan 24, 2024 at 10:57 PM Conor Dooley  wrote:
>
> From: Conor Dooley 
>
> Making it a series to keep the standalone change to riscv_isa_string()
> that Drew reported separate.
>
> Changes in v4:
> - Other than a rebase, add a helper for the mxl_max to xlen conversion
>
> Changes in v3:
> - g_free() isa_extensions too
> - use misa_mxl_max rather than the compile target for the base isa
> - add a new patch changing riscv_isa_string() to do the same
> - drop a null check that cannot be null
> - rebased on top of Alistair's next branch
>
> Changes in v2:
> - use g_strdup() for multiletter extension string copying
> - wrap stuff in #ifndef to prevent breaking the user mode build
> - rename riscv_isa_set_props() -> riscv_isa_write_fdt()
>
> CC: Alistair Francis 
> CC: Bin Meng 
> CC: Palmer Dabbelt 
> CC: Weiwei Li 
> CC: Daniel Henrique Barboza 
> CC: Andrew Jones 
> CC: Liu Zhiwei 
> CC: qemu-ri...@nongnu.org
> CC: qemu-devel@nongnu.org
>
> Conor Dooley (2):
>   target/riscv: use misa_mxl_max to populate isa string rather than
> TARGET_LONG_BITS
>   target/riscv: support new isa extension detection devicetree
> properties

Thanks!

Applied to riscv-to-apply.next with some manual rebasing

Alistair

>
>  hw/riscv/sifive_u.c|  7 ++---
>  hw/riscv/spike.c   |  6 ++--
>  hw/riscv/virt.c|  6 ++--
>  target/riscv/cpu.c | 62 +-
>  target/riscv/cpu.h |  2 ++
>  target/riscv/gdbstub.c |  2 +-
>  6 files changed, 70 insertions(+), 15 deletions(-)
>
> --
> 2.43.0
>
>

Re: [PATCH v4 1/2] target/riscv: use misa_mxl_max to populate isa string rather than TARGET_LONG_BITS

2024-02-04 Thread Alistair Francis

On Thu, Jan 25, 2024 at 12:04 AM Conor Dooley  wrote:
>
> From: Conor Dooley 
>
> A cpu may not have the same xlen as the compile time target, and
> misa_mxl_max is the source of truth for what the hart supports.
>
> The conversion from misa_mxl_max to xlen already has one user, so
> introduce a helper and use that to populate the isa string.
>
> Link: 
> https://lore.kernel.org/qemu-riscv/20240108-efa3f83dcd3997dc0af458d7@orel/
> Signed-off-by: Conor Dooley 

Reviewed-by: Alistair Francis 

Alistair

> ---
> I dropped the tags since I added the helper
> ---
>  target/riscv/cpu.c | 9 -
>  target/riscv/cpu.h | 1 +
>  target/riscv/gdbstub.c | 2 +-
>  3 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index ad1df2318b..4aa4b2e988 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -307,6 +307,11 @@ void riscv_cpu_set_misa(CPURISCVState *env, RISCVMXL 
> mxl, uint32_t ext)
>  env->misa_ext_mask = env->misa_ext = ext;
>  }
>
> +int riscv_cpu_max_xlen(CPURISCVState env)
> +{
> +return 16 << env.misa_mxl_max;
> +}
> +
>  #ifndef CONFIG_USER_ONLY
>  static uint8_t satp_mode_from_str(const char *satp_mode_str)
>  {
> @@ -2332,7 +2337,9 @@ char *riscv_isa_string(RISCVCPU *cpu)
>  int i;
>  const size_t maxlen = sizeof("rv128") + sizeof(riscv_single_letter_exts);
>  char *isa_str = g_new(char, maxlen);
> -char *p = isa_str + snprintf(isa_str, maxlen, "rv%d", TARGET_LONG_BITS);
> +int xlen = riscv_cpu_max_xlen(cpu->env);
> +char *p = isa_str + snprintf(isa_str, maxlen, "rv%d", xlen);
> +
>  for (i = 0; i < sizeof(riscv_single_letter_exts) - 1; i++) {
>  if (cpu->env.misa_ext & RV(riscv_single_letter_exts[i])) {
>  *p++ = qemu_tolower(riscv_single_letter_exts[i]);
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 05e83c4ac9..aacc031397 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -511,6 +511,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
>  MMUAccessType access_type, int mmu_idx,
>  bool probe, uintptr_t retaddr);
>  char *riscv_isa_string(RISCVCPU *cpu);
> +int riscv_cpu_max_xlen(CPURISCVState env);
>  bool riscv_cpu_option_set(const char *optname);
>
>  #define cpu_mmu_index riscv_cpu_mmu_index
> diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
> index 58b3ace0fe..f15980fdcf 100644
> --- a/target/riscv/gdbstub.c
> +++ b/target/riscv/gdbstub.c
> @@ -218,7 +218,7 @@ static int riscv_gen_dynamic_csr_xml(CPUState *cs, int 
> base_reg)
>  CPURISCVState *env = >env;
>  GString *s = g_string_new(NULL);
>  riscv_csr_predicate_fn predicate;
> -int bitsize = 16 << env->misa_mxl_max;
> +int bitsize = riscv_cpu_max_xlen(*env);
>  int i;
>
>  #if !defined(CONFIG_USER_ONLY)
> --
> 2.43.0
>
>

Re: [PATCH 2/2] migration: Fix return-path thread exit

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 12:11:09PM -0300, Fabiano Rosas wrote:
> Cédric Le Goater  writes:
> 
> > On 2/2/24 15:42, Fabiano Rosas wrote:
> >> Cédric Le Goater  writes:
> >> 
> >>> In case of error, close_return_path_on_source() can perform a shutdown
> >>> to exit the return-path thread.  However, in migrate_fd_cleanup(),
> >>> 'to_dst_file' is closed before calling close_return_path_on_source()
> >>> and the shutdown fails, leaving the source and destination waiting for
> >>> an event to occur.
> >> 
> >> At close_return_path_on_source, qemu_file_shutdown() and checking
> >> ms->to_dst_file are done under the qemu_file_lock, so how could
> >> migrate_fd_cleanup() have cleared the pointer but the ms->to_dst_file
> >> check have passed?
> >
> > This is not a locking issue, it's much simpler. migrate_fd_cleanup()
> > clears the ms->to_dst_file pointer and closes the QEMUFile and then
> > calls close_return_path_on_source() which then tries to use resources
> > which are not available anymore.
> 
> I'm missing something here. Which resources? I assume you're talking
> about this:
> 
> WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
> if (ms->to_dst_file && ms->rp_state.from_dst_file &&
> qemu_file_get_error(ms->to_dst_file)) {
> qemu_file_shutdown(ms->rp_state.from_dst_file);
> }
> }
> 
> How do we get past the 'if (ms->to_dst_file)'?

We don't; migrate_fd_cleanup() will release ms->to_dst_file, then call
close_return_path_on_source(), found that to_dst_file==NULL and then skip
the shutdown().

One other option might be that we do close_return_path_on_source() before
the chunk of releasing to_dst_file.

This "two qemufiles share the same ioc" issue had bitten us before IIRC,
and the only concern of that workaround is we keep postponing resolution of
the real issue, then we keep getting bitten by it..

Maybe we can wait a few days to see if Dan can join the conversation and if
we can reach a consensus on a complete solution.  Otherwise I think we can
still work this around, but maybe that'll require a comment block
explaining the bits after such movement.

Thanks,

-- 
Peter Xu

Re: [PULL 06/14] ci: Add a migration compatibility test job

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 10:47:05AM -0300, Fabiano Rosas wrote:
> Peter Maydell  writes:
> 
> > On Mon, 29 Jan 2024 at 03:04,  wrote:
> >>
> >> From: Fabiano Rosas 
> >>
> >> The migration tests have support for being passed two QEMU binaries to
> >> test migration compatibility.
> >>
> >> Add a CI job that builds the lastest release of QEMU and another job
> >> that uses that version plus an already present build of the current
> >> version and run the migration tests with the two, both as source and
> >> destination. I.e.:
> >>
> >>  old QEMU (n-1) -> current QEMU (development tree)
> >>  current QEMU (development tree) -> old QEMU (n-1)
> >>
> >> The purpose of this CI job is to ensure the code we're about to merge
> >> will not cause a migration compatibility problem when migrating the
> >> next release (which will contain that code) to/from the previous
> >> release.
> >>
> >> The version of migration-test used will be the one matching the older
> >> QEMU. That way we can avoid special-casing new tests that wouldn't be
> >> compatible with the older QEMU.
> >>
> >> Note: for user forks, the version tags need to be pushed to gitlab
> >> otherwise it won't be able to checkout a different version.
> >>
> >> Signed-off-by: Fabiano Rosas 
> >> Link: https://lore.kernel.org/r/20240118164951.30350-3-faro...@suse.de
> >> Signed-off-by: Peter Xu 
> >> ---
> >>  .gitlab-ci.d/buildtest.yml | 60 ++
> >>  1 file changed, 60 insertions(+)
> >>
> >> diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
> >> index e1c7801598..f0b0edc634 100644
> >> --- a/.gitlab-ci.d/buildtest.yml
> >> +++ b/.gitlab-ci.d/buildtest.yml
> >> @@ -167,6 +167,66 @@ build-system-centos:
> >>x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
> >>  MAKE_CHECK_ARGS: check-build
> >>
> >> +# Previous QEMU release. Used for cross-version migration tests.
> >> +build-previous-qemu:
> >> +  extends: .native_build_job_template
> >> +  artifacts:
> >> +when: on_success
> >> +expire_in: 2 days
> >> +paths:
> >> +  - build-previous
> >> +exclude:
> >> +  - build-previous/**/*.p
> >> +  - build-previous/**/*.a.p
> >> +  - build-previous/**/*.fa.p
> >> +  - build-previous/**/*.c.o
> >> +  - build-previous/**/*.c.o.d
> >> +  - build-previous/**/*.fa
> >> +  needs:
> >> +job: amd64-opensuse-leap-container
> >> +  variables:
> >> +IMAGE: opensuse-leap
> >> +TARGETS: x86_64-softmmu aarch64-softmmu
> >> +  before_script:
> >> +- export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' 
> >> VERSION)"
> >> +- git checkout $QEMU_PREV_VERSION
> >> +  after_script:
> >> +- mv build build-previous
> >
> > There seems to be a problem with this new CI job. Running a CI
> > run in my local repository it fails:
> >
> > https://gitlab.com/pm215/qemu/-/jobs/6075873685
> >
> > $ export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v .0/' VERSION)"
> > $ git checkout $QEMU_PREV_VERSION
> > error: pathspec 'v8.2.0' did not match any file(s) known to git
> > Running after_script
> > Running after script...
> > $ mv build build-previous
> > mv: cannot stat 'build': No such file or directory
> > WARNING: after_script failed, but job will continue unaffected: exit code 1
> > Saving cache for failed job
> >
> >
> > I don't think you can assume that private forks doing submaintainer CI
> > runs necessarily have the full set of tags that the main repo does.
> 
> Yes, I thought this would be rare enough not to be an issue, but it
> seems it's not. I don't know what could be done here, if there's no tag,
> then there's no way to resolve the actual commit hash I think.
> 
> > I suspect the sed run will also do the wrong thing when run on the
> > commit that updates the version, because then it will replace
> > "9.0.0" with "9.0.0".
> 
> I just ignored this completly because my initial idea was to leave this
> job disabled and only run it for migration patchsets and pull requests,
> so it wouldn't make sense to run at that commit.
> 
> This job is also not entirely fail proof by design because we could
> always be hitting bugs in the older QEMU version that were already fixed
> in the new version.
> 
> I think the simplest fix here is to leave the test disabled, possibly
> with an env variable to enable it.

However if so that'll be unfortunate.. because the goal of the "n-1" test
is to fail the exact commit that will break compatibility and make it
enforced, IMHO.

Failing for some migration guy pushing CI can be better than nothing
indeed, but it is just less ideal..  we want the developer / module
maintainer notice this issue, fix it instead of merging something wrong
already, then we try to find what is broken and ask for a fix (where there
will still be a window it's broken; and if unlucky across major releases).

Currently the coverage of n-1 test is indeed still more focused on
migration framework, but it'll also cover quite some default

Re: [PATCH 0/7] hw/riscv: fix leak, add more g_autofree

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 9:39 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> First patch fixes a leak found when using Valgrind. The root cause is a
> missing g_free() in a string.
>
> In fact, I found while doing reviews that we keep repeating the same
> pattern:
>
> 
> char *name;
> name = g_strdup_printf(...);
> (...)
> g_free(name);
> 
>
> With this in mind, I ended up making this rather trivial series to
> introduce more string/array autocleaning in the 'virt' machine code. The
> advantage of doing 'g_autofree' is that we'll guarantee that we'll clean
> ourselves up when the variable goes out of scope, avoiding leaks like
> the one patch 1 fixes. We want to enforce this autoclean style in
> reviews, and for that we need to get rid of at least some of the uses we
> do it right now.
>
> I didn't bother changing the 'spike' and the 'sifive' boards for now
> because the bulk of new patches is done on top of the 'virt' machine,
> so it's more important to tidy this board first.
>
>
> Daniel Henrique Barboza (7):
>   hw/riscv/virt-acpi-build.c: fix leak in build_rhct()
>   hw/riscv/numa.c: use g_autofree in socket_fdt_write_distance_matrix()
>   hw/riscv/virt.c: use g_autofree in create_fdt_socket_cpus()
>   hw/riscv/virt.c: use g_autofree in create_fdt_sockets()
>   hw/riscv/virt.c: use g_autofree in create_fdt_virtio()
>   hw/riscv/virt.c: use g_autofree in virt_machine_init()
>   hw/riscv/virt.c: use g_autofree in create_fdt_*

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  hw/riscv/numa.c|   4 +-
>  hw/riscv/virt-acpi-build.c |   2 +-
>  hw/riscv/virt.c| 109 -
>  3 files changed, 37 insertions(+), 78 deletions(-)
>
> --
> 2.43.0
>
>

Re: [PULL 05/13] linux-user: Use walk_memory_regions for open_self_maps

2024-02-04 Thread Richard Henderson


On 1/26/24 23:52, Richard Purdie wrote:

Hi Michael,

On Fri, 2024-01-26 at 16:33 +0300, Michael Tokarev wrote:

26.01.2024 16:03, Richard Purdie wrote:

I've run into a problem with this change.

We (Yocto Project) upgraded to qemu 8.2.0 recently and after that we
started seeing errors cross compiling webkitgtk on x86_64 for x86_64
during the introspection code which runs under user mode qemu.


Besides your observations, please be aware there's quite a few issues in 8.2.0.
Please take a look at https://gitlab.com/mjt0k/qemu/-/commits/staging-8.2/
(and https://gitlab.com/qemu-project/qemu/-/commits/staging-8.2/ which is 
updated
less often) for fixes already queued up, if you haven't looked there already.
8.2.1 stable/bugfix release is scheduled for the beginning of the next week.


Thanks.

I should note that I did test the staging-8.2 branch and nothing there
helped. The issue was also present with master as of yesterday.

https://bugzilla.yoctoproject.org/show_bug.cgi?id=15367 is Yocto
Projects tracking of the issue which has the commits for master and
staging-8.2 that I tested.


The yocto logs referenced here are not helpful for reproducing the problem.
Please extract a binary to run, inputs, and command-line.


r~

Re: [PATCH 7/7] hw/riscv/virt.c: use g_autofree in create_fdt_*

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 8:18 AM Daniel Henrique Barboza
 wrote:
>
> We have a lot of cases where a char or an uint32_t pointer is used once
> to alloc a string/array, read/written during the function, and then
> g_free() at the end. There's no pointer re-use - a single alloc, a
> single g_free().
>
> Use 'g_autofree' to avoid the g_free() calls.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 78 ++---
>  1 file changed, 22 insertions(+), 56 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 710fbbda2c..1c257e89d2 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -285,7 +285,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
> socket,
>  static void create_fdt_socket_memory(RISCVVirtState *s,
>   const MemMapEntry *memmap, int socket)
>  {
> -char *mem_name;
> +g_autofree char *mem_name = NULL;
>  uint64_t addr, size;
>  MachineState *ms = MACHINE(s);
>
> @@ -297,7 +297,6 @@ static void create_fdt_socket_memory(RISCVVirtState *s,
>  addr >> 32, addr, size >> 32, size);
>  qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
>  riscv_socket_fdt_write_id(ms, mem_name, socket);
> -g_free(mem_name);
>  }
>
>  static void create_fdt_socket_clint(RISCVVirtState *s,
> @@ -305,8 +304,8 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
>  uint32_t *intc_phandles)
>  {
>  int cpu;
> -char *clint_name;
> -uint32_t *clint_cells;
> +g_autofree char *clint_name = NULL;
> +g_autofree uint32_t *clint_cells = NULL;
>  unsigned long clint_addr;
>  MachineState *ms = MACHINE(s);
>  static const char * const clint_compat[2] = {
> @@ -333,9 +332,6 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
>  qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended",
>  clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
>  riscv_socket_fdt_write_id(ms, clint_name, socket);
> -g_free(clint_name);
> -
> -g_free(clint_cells);
>  }
>
>  static void create_fdt_socket_aclint(RISCVVirtState *s,
> @@ -346,9 +342,9 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
>  char *name;
>  unsigned long addr, size;
>  uint32_t aclint_cells_size;
> -uint32_t *aclint_mswi_cells;
> -uint32_t *aclint_sswi_cells;
> -uint32_t *aclint_mtimer_cells;
> +g_autofree uint32_t *aclint_mswi_cells = NULL;
> +g_autofree uint32_t *aclint_sswi_cells = NULL;
> +g_autofree uint32_t *aclint_mtimer_cells = NULL;
>  MachineState *ms = MACHINE(s);
>
>  aclint_mswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
> @@ -420,10 +416,6 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
>  riscv_socket_fdt_write_id(ms, name, socket);
>  g_free(name);
>  }
> -
> -g_free(aclint_mswi_cells);
> -g_free(aclint_mtimer_cells);
> -g_free(aclint_sswi_cells);
>  }
>
>  static void create_fdt_socket_plic(RISCVVirtState *s,
> @@ -432,8 +424,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
> uint32_t *plic_phandles)
>  {
>  int cpu;
> -char *plic_name;
> -uint32_t *plic_cells;
> +g_autofree char *plic_name = NULL;
> +g_autofree uint32_t *plic_cells;
>  unsigned long plic_addr;
>  MachineState *ms = MACHINE(s);
>  static const char * const plic_compat[2] = {
> @@ -493,10 +485,6 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
> memmap[VIRT_PLATFORM_BUS].size,
> VIRT_PLATFORM_BUS_IRQ);
>  }
> -
> -g_free(plic_name);
> -
> -g_free(plic_cells);
>  }
>
>  uint32_t imsic_num_bits(uint32_t count)
> @@ -515,11 +503,12 @@ static void create_fdt_one_imsic(RISCVVirtState *s, 
> hwaddr base_addr,
>   bool m_mode, uint32_t imsic_guest_bits)
>  {
>  int cpu, socket;
> -char *imsic_name;
> +g_autofree char *imsic_name = NULL;
>  MachineState *ms = MACHINE(s);
>  int socket_count = riscv_socket_count(ms);
> -uint32_t imsic_max_hart_per_socket;
> -uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
> +uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size;
> +g_autofree uint32_t *imsic_cells = NULL;
> +g_autofree uint32_t *imsic_regs = NULL;
>
>  imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
>  imsic_regs = g_new0(uint32_t, socket_count * 4);
> @@ -571,10 +560,6 @@ static void create_fdt_one_imsic(RISCVVirtState *s, 
> hwaddr base_addr,
>IMSIC_MMIO_GROUP_MIN_SHIFT);
>  }
>  qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
> -
> -g_free(imsic_name);
> -g_free(imsic_regs);
> -g_free(imsic_cells);
>  }
>
>  static void

Re: [PATCH 6/7] hw/riscv/virt.c: use g_autofree in virt_machine_init()

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 9:38 AM Daniel Henrique Barboza
 wrote:
>
> Move 'soc_name' to the loop, and give it g_autofree, to avoid the manual
> g_free().
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index f8278df83f..710fbbda2c 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -1356,7 +1356,6 @@ static void virt_machine_init(MachineState *machine)
>  RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
>  MemoryRegion *system_memory = get_system_memory();
>  MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
> -char *soc_name;
>  DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
>  int i, base_hartid, hart_count;
>  int socket_count = riscv_socket_count(machine);
> @@ -1376,6 +1375,8 @@ static void virt_machine_init(MachineState *machine)
>  /* Initialize sockets */
>  mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL;
>  for (i = 0; i < socket_count; i++) {
> +g_autofree char *soc_name = g_strdup_printf("soc%d", i);
> +
>  if (!riscv_socket_check_hartids(machine, i)) {
>  error_report("discontinuous hartids in socket%d", i);
>  exit(1);
> @@ -1393,10 +1394,8 @@ static void virt_machine_init(MachineState *machine)
>  exit(1);
>  }
>
> -soc_name = g_strdup_printf("soc%d", i);
>  object_initialize_child(OBJECT(machine), soc_name, >soc[i],
>  TYPE_RISCV_HART_ARRAY);
> -g_free(soc_name);
>  object_property_set_str(OBJECT(>soc[i]), "cpu-type",
>  machine->cpu_type, _abort);
>  object_property_set_int(OBJECT(>soc[i]), "hartid-base",
> --
> 2.43.0
>
>

Re: [PATCH 5/7] hw/riscv/virt.c: use g_autofree in create_fdt_virtio()

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 9:38 AM Daniel Henrique Barboza
 wrote:
>
> Put 'name' declaration inside the loop, with g_autofree, to avoid
> manually doing g_free() in each iteration.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index d0f402e0d5..f8278df83f 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -820,12 +820,12 @@ static void create_fdt_virtio(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>uint32_t irq_virtio_phandle)
>  {
>  int i;
> -char *name;
>  MachineState *ms = MACHINE(s);
>
>  for (i = 0; i < VIRTIO_COUNT; i++) {
> -name = g_strdup_printf("/soc/virtio_mmio@%lx",
> +g_autofree char *name =  g_strdup_printf("/soc/virtio_mmio@%lx",
>  (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size));
> +
>  qemu_fdt_add_subnode(ms->fdt, name);
>  qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio");
>  qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> @@ -840,7 +840,6 @@ static void create_fdt_virtio(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  qemu_fdt_setprop_cells(ms->fdt, name, "interrupts",
> VIRTIO_IRQ + i, 0x4);
>  }
> -g_free(name);
>  }
>  }
>
> --
> 2.43.0
>
>

Re: [PATCH 4/7] hw/riscv/virt.c: use g_autofree in create_fdt_sockets()

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 8:16 AM Daniel Henrique Barboza
 wrote:
>
> Move 'clust_name' inside the loop, and g_autofree, to avoid having to
> g_free() manually in each loop iteration.
>
> 'intc_phandles' is also g_autofreed to avoid another manual g_free().
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 9 +++--
>  1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 373b1dd96b..d0f402e0d5 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -721,11 +721,11 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
> MemMapEntry *memmap,
> uint32_t *irq_virtio_phandle,
> uint32_t *msi_pcie_phandle)
>  {
> -char *clust_name;
>  int socket, phandle_pos;
>  MachineState *ms = MACHINE(s);
>  uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
> -uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
> +uint32_t xplic_phandles[MAX_NODES];
> +g_autofree uint32_t *intc_phandles = NULL;
>  int socket_count = riscv_socket_count(ms);
>
>  qemu_fdt_add_subnode(ms->fdt, "/cpus");
> @@ -739,6 +739,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>
>  phandle_pos = ms->smp.cpus;
>  for (socket = (socket_count - 1); socket >= 0; socket--) {
> +g_autofree char *clust_name = NULL;
>  phandle_pos -= s->soc[socket].num_harts;
>
>  clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
> @@ -749,8 +750,6 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>
>  create_fdt_socket_memory(s, memmap, socket);
>
> -g_free(clust_name);
> -
>  if (tcg_enabled()) {
>  if (s->have_aclint) {
>  create_fdt_socket_aclint(s, memmap, socket,
> @@ -793,8 +792,6 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  }
>  }
>
> -g_free(intc_phandles);
> -
>  if (kvm_enabled() && virt_use_kvm_aia(s)) {
>  *irq_mmio_phandle = xplic_phandles[0];
>  *irq_virtio_phandle = xplic_phandles[0];
> --
> 2.43.0
>
>

Re: [PATCH 3/7] hw/riscv/virt.c: use g_autofree in create_fdt_socket_cpus()

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 8:16 AM Daniel Henrique Barboza
 wrote:
>
> Move all char pointers to the loop. Use g_autofree in all of them to
> avoid the g_free() calls.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 12 +---
>  1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index f9fd1341fc..373b1dd96b 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -215,12 +215,16 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, 
> int socket,
>  int cpu;
>  uint32_t cpu_phandle;
>  MachineState *ms = MACHINE(s);
> -char *name, *cpu_name, *core_name, *intc_name, *sv_name;
>  bool is_32_bit = riscv_is_32bit(>soc[0]);
>  uint8_t satp_mode_max;
>
>  for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
>  RISCVCPU *cpu_ptr = >soc[socket].harts[cpu];
> +g_autofree char *name = NULL;
> +g_autofree char *cpu_name = NULL;
> +g_autofree char *core_name = NULL;
> +g_autofree char *intc_name = NULL;
> +g_autofree char *sv_name = NULL;
>
>  cpu_phandle = (*phandle)++;
>
> @@ -233,12 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, 
> int socket,
>  sv_name = g_strdup_printf("riscv,%s",
>satp_mode_str(satp_mode_max, 
> is_32_bit));
>  qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
> -g_free(sv_name);
>  }
>
>  name = riscv_isa_string(cpu_ptr);
>  qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
> -g_free(name);
>
>  if (cpu_ptr->cfg.ext_zicbom) {
>  qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
> @@ -277,10 +279,6 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, 
> int socket,
>  core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
>  qemu_fdt_add_subnode(ms->fdt, core_name);
>  qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
> -
> -g_free(core_name);
> -g_free(intc_name);
> -g_free(cpu_name);
>  }
>  }
>
> --
> 2.43.0
>
>

Re: [PATCH 2/7] hw/riscv/numa.c: use g_autofree in socket_fdt_write_distance_matrix()

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 8:17 AM Daniel Henrique Barboza
 wrote:
>
> Use g_autofree in 'dist_matrix' to avoid the manual g_free().
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/numa.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
> index d319aefb45..cf686f4ff1 100644
> --- a/hw/riscv/numa.c
> +++ b/hw/riscv/numa.c
> @@ -167,7 +167,8 @@ void riscv_socket_fdt_write_id(const MachineState *ms, 
> const char *node_name,
>  void riscv_socket_fdt_write_distance_matrix(const MachineState *ms)
>  {
>  int i, j, idx;
> -uint32_t *dist_matrix, dist_matrix_size;
> +g_autofree uint32_t *dist_matrix = NULL;
> +uint32_t dist_matrix_size;
>
>  if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
>  dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
> @@ -189,7 +190,6 @@ void riscv_socket_fdt_write_distance_matrix(const 
> MachineState *ms)
>  "numa-distance-map-v1");
>  qemu_fdt_setprop(ms->fdt, "/distance-map", "distance-matrix",
>   dist_matrix, dist_matrix_size);
> -g_free(dist_matrix);
>  }
>  }
>
> --
> 2.43.0
>
>

Re: [PATCH 1/7] hw/riscv/virt-acpi-build.c: fix leak in build_rhct()

2024-02-04 Thread Alistair Francis

On Tue, Jan 23, 2024 at 8:16 AM Daniel Henrique Barboza
 wrote:
>
> The 'isa' char pointer isn't being freed after use.
>
> Issue detected by Valgrind:
>
> ==38752== 128 bytes in 1 blocks are definitely lost in loss record 3,190 of 
> 3,884
> ==38752==at 0x484280F: malloc (vg_replace_malloc.c:442)
> ==38752==by 0x5189619: g_malloc (gmem.c:130)
> ==38752==by 0x51A5BF2: g_strconcat (gstrfuncs.c:628)
> ==38752==by 0x6C1E3E: riscv_isa_string_ext (cpu.c:2321)
> ==38752==by 0x6C1E3E: riscv_isa_string (cpu.c:2343)
> ==38752==by 0x6BD2EA: build_rhct (virt-acpi-build.c:232)
> ==38752==by 0x6BD2EA: virt_acpi_build (virt-acpi-build.c:556)
> ==38752==by 0x6BDC86: virt_acpi_setup (virt-acpi-build.c:662)
> ==38752==by 0x9C8DC6: notifier_list_notify (notify.c:39)
> ==38752==by 0x4A595A: qdev_machine_creation_done (machine.c:1589)
> ==38752==by 0x61E052: qemu_machine_creation_done (vl.c:2680)
> ==38752==by 0x61E052: qmp_x_exit_preconfig.part.0 (vl.c:2709)
> ==38752==by 0x6220C6: qmp_x_exit_preconfig (vl.c:2702)
> ==38752==by 0x6220C6: qemu_init (vl.c:3758)
> ==38752==by 0x425858: main (main.c:47)
>
> Fixes: ebfd392893 ("hw/riscv/virt: virt-acpi-build.c: Add RHCT Table")
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt-acpi-build.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
> index 26c7e4482d..fb8baf64f6 100644
> --- a/hw/riscv/virt-acpi-build.c
> +++ b/hw/riscv/virt-acpi-build.c
> @@ -196,7 +196,7 @@ static void build_rhct(GArray *table_data,
>  RISCVCPU *cpu = >soc[0].harts[0];
>  uint32_t mmu_offset = 0;
>  uint8_t satp_mode_max;
> -char *isa;
> +g_autofree char *isa = NULL;
>
>  AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
>  .oem_table_id = s->oem_table_id };
> --
> 2.43.0
>
>

Re: [PULL 06/15] tests/qtest/migration: Don't use -cpu max for aarch64

2024-02-04 Thread Peter Xu

On Fri, Feb 02, 2024 at 10:51:36AM +, Peter Maydell wrote:
> On Thu, 1 Feb 2024 at 23:50, Peter Xu  wrote:
> >
> > Fabiano, I think you forgot to reply-to-all.. adding back the list and
> > people in the loop.
> >
> > On Thu, Feb 01, 2024 at 10:12:44AM -0300, Fabiano Rosas wrote:
> > > Peter Xu  writes:
> > >
> > > > On Wed, Jan 31, 2024 at 10:09:16AM -0300, Fabiano Rosas wrote:
> > > >> If we ask for KVM and it falls back to TCG, we need a cpu that supports
> > > >> both. We don't have that. I've put some command-line combinations at 
> > > >> the
> > > >> end of the email[1], take a look.
> > > >
> > > > Thanks a lot, Fabiano.  I think I have a better picture now.
> > > >
> > > > Now the question is whether it'll be worthwhile we (migration) 
> > > > explicitly
> > > > provide code to workaround such issue in qtest, or we wait for ARM side
> > > > until we have a processor that can be both stable and support KVM+TCG.
> > > >
> > > > I actually personally prefer to wait - it's not too bad after all, 
> > > > because
> > > > it only affects the new "n-1" migration test.  Most of the migration
> > > > functionality will still be covered there in CI for ARM.
> > >
> > > That's fine with me. We just need to do something about the arm CI job
> > > which is currently disabled waiting for a fix. We could remove it or add
> > > some words somewhere explaining the situation. I can do that once we
> > > reach an agreement here.
> >
> > Yes.  IMHO we can keep the test (with SKIPPED=1) but amend the message,
> > which will start to state inaccurately:
> >
> > # This job is disabled until we release 9.0. The existing
> > # migration-test in 8.2 is broken on aarch64. The fix was already
> > # commited, but it will only take effect once 9.0 is out.
> >
> > IMHO then it won't mean 9.0 will have it fixed, but we'll simply wait for a
> > cpu model that is ready for both kvm+tcg, then we replace "max".
> 
> We already have a CPU model that works for both KVM and TCG: that
> is "max". We're not going to add another one.

Thanks, but then this is pretty sad.  I'm surprised aarch64 doesn't have
such requirement to allow some VM config to run across all kinds of hosts.

> The difference is just that we provide different cross-version migration
> compatibility support levels for the two cases. (Strictly speaking, I'm
> not sure we strongly support migration compat for 'max' on KVM either --
> for instance you probably need to be doing a migration on the same host
> CPU type and the same host kernel version. It's just that the definition
> of "max" on KVM is less QEMU-dependent and more host-kernel-dependent, so
> in your particular situation running the test cases you're less likely to
> see any possible breakage.)

Yes we don't have issue for the current CI on KVM compatibilities, but QEMU
does matter for sure.

Then we can either (1) add code as Fabiano suggested to choose different
cpu model by adding hack code in qtest, or (2) we simply not support
aarch64 on cross binary test like most of the rest of the arch, but only
support x86, until any arch can provide a stable CPU that support all
config of hosts (we can document it in the CI file).

I'd vote for (2).  Fabiano, do you have any preference?

-- 
Peter Xu

Re: [PATCH v2 2/2] target/riscv: Support xtheadmaee for thead-c906

2024-02-04 Thread Alistair Francis

On Sun, Feb 4, 2024 at 3:44 PM LIU Zhiwei  wrote:
>
> This patch set fix the regression on kernel pointed by Björn Töpel in
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg1018232.html.
>
> thead-c906 uses some flags in pte [60-63] bits. It has history reasons that
> SVPBMT didn't exist when thead-c906 came to wotrld. We named this feature as
> xtheadmaee[1]. this feature is controlled by an custom CSR named mxstatus,
> whose maee field encodes whether enable the pte [60-63] bits.
>
> [1]:https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadmaee.adoc
>
> Signed-off-by: LIU Zhiwei 
> ---
> v1->v2:
> 1) Remove mxstatus user mode access
> 2) Add reference documentation to the commit log
> ---
>  target/riscv/cpu.c |  6 
>  target/riscv/cpu.h |  9 ++
>  target/riscv/cpu_bits.h|  6 
>  target/riscv/cpu_cfg.h |  4 ++-
>  target/riscv/cpu_helper.c  | 25 ---
>  target/riscv/meson.build   |  1 +
>  target/riscv/tcg/tcg-cpu.c |  7 +++-
>  target/riscv/xthead_csr.c  | 65 ++
>  8 files changed, 110 insertions(+), 13 deletions(-)
>  create mode 100644 target/riscv/xthead_csr.c
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 2dcbc9ff32..bfdbb0539a 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -171,6 +171,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
>  ISA_EXT_DATA_ENTRY(xtheadmemidx, PRIV_VERSION_1_11_0, ext_xtheadmemidx),
>  ISA_EXT_DATA_ENTRY(xtheadmempair, PRIV_VERSION_1_11_0, 
> ext_xtheadmempair),
>  ISA_EXT_DATA_ENTRY(xtheadsync, PRIV_VERSION_1_11_0, ext_xtheadsync),
> +ISA_EXT_DATA_ENTRY(xtheadmaee, PRIV_VERSION_1_11_0, ext_xtheadmaee),
>  ISA_EXT_DATA_ENTRY(xventanacondops, PRIV_VERSION_1_12_0, 
> ext_XVentanaCondOps),
>
>  DEFINE_PROP_END_OF_LIST(),
> @@ -506,6 +507,7 @@ static void rv64_thead_c906_cpu_init(Object *obj)
>
>  cpu->cfg.mvendorid = THEAD_VENDOR_ID;
>  #ifndef CONFIG_USER_ONLY
> +cpu->cfg.ext_xtheadmaee = true;
>  set_satp_mode_max_supported(cpu, VM_1_10_SV39);
>  #endif
>
> @@ -949,6 +951,9 @@ static void riscv_cpu_reset_hold(Object *obj)
>  }
>
>  pmp_unlock_entries(env);
> +if (riscv_cpu_cfg(env)->ext_xtheadmaee) {
> +env->th_mxstatus |= TH_MXSTATUS_MAEE;
> +}
>  #endif
>  env->xl = riscv_cpu_mxl(env);
>  riscv_cpu_update_mask(env);
> @@ -1439,6 +1444,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
>  MULTI_EXT_CFG_BOOL("xtheadmemidx", ext_xtheadmemidx, false),
>  MULTI_EXT_CFG_BOOL("xtheadmempair", ext_xtheadmempair, false),
>  MULTI_EXT_CFG_BOOL("xtheadsync", ext_xtheadsync, false),
> +MULTI_EXT_CFG_BOOL("xtheadmaee", ext_xtheadmaee, false),
>  MULTI_EXT_CFG_BOOL("xventanacondops", ext_XVentanaCondOps, false),
>
>  DEFINE_PROP_END_OF_LIST(),
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 5f3955c38d..1bacf40355 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -412,6 +412,14 @@ struct CPUArchState {
>  target_ulong cur_pmmask;
>  target_ulong cur_pmbase;
>
> +union {
> +/* Custom CSR for Xuantie CPU */
> +struct {
> +#ifndef CONFIG_USER_ONLY
> +target_ulong th_mxstatus;
> +#endif
> +};
> +};
>  /* Fields from here on are preserved across CPU reset. */
>  QEMUTimer *stimer; /* Internal timer for S-mode interrupt */
>  QEMUTimer *vstimer; /* Internal timer for VS-mode interrupt */
> @@ -799,6 +807,7 @@ void riscv_add_satp_mode_properties(Object *obj);
>  bool riscv_cpu_accelerator_compatible(RISCVCPU *cpu);
>
>  /* CSR function table */
> +extern riscv_csr_operations th_csr_ops[CSR_TABLE_SIZE];
>  extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
>
>  extern const bool valid_vm_1_10_32[], valid_vm_1_10_64[];
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index e116f6c252..67ebb1cefe 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -897,4 +897,10 @@ typedef enum RISCVException {
>  /* JVT CSR bits */
>  #define JVT_MODE   0x3F
>  #define JVT_BASE   (~0x3F)
> +
> +/* Xuantie custom CSRs */
> +#define CSR_TH_MXSTATUS 0x7c0
> +
> +#define TH_MXSTATUS_MAEE_SHIFT  21
> +#define TH_MXSTATUS_MAEE(0x1 << TH_MXSTATUS_MAEE_SHIFT)
>  #endif
> diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
> index 780ae6ef17..3735c69fd6 100644
> --- a/target/riscv/cpu_cfg.h
> +++ b/target/riscv/cpu_cfg.h
> @@ -136,6 +136,7 @@ struct RISCVCPUConfig {
>  bool ext_xtheadmemidx;
>  bool ext_xtheadmempair;
>  bool ext_xtheadsync;
> +bool ext_xtheadmaee;
>  bool ext_XVentanaCondOps;
>
>  uint32_t pmu_mask;
> @@ -176,7 +177,8 @@ static inline bool has_xthead_p(const RISCVCPUConfig *cfg)
> cfg->ext_xtheadcondmov ||
> cfg->ext_xtheadfmemidx || cfg->ext_xtheadfmv ||
> cfg->ext_xtheadmac || cfg->ext_xtheadmemidx ||

[PATCH 1/3] target/arm: Fix SVE/SME gross MTE suppression checks

2024-02-04 Thread Richard Henderson

The TBI and TCMA bits are located within mtedesc, not desc.

Signed-off-by: Richard Henderson 
---
 target/arm/tcg/sme_helper.c |  8 
 target/arm/tcg/sve_helper.c | 12 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 1ee2690ceb..904bfdac43 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -573,8 +573,8 @@ void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
 /* Perform gross MTE suppression early. */
-if (!tbi_check(desc, bit55) ||
-tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+if (!tbi_check(mtedesc, bit55) ||
+tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
 mtedesc = 0;
 }
 
@@ -750,8 +750,8 @@ void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, 
target_ulong addr,
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
 /* Perform gross MTE suppression early. */
-if (!tbi_check(desc, bit55) ||
-tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+if (!tbi_check(mtedesc, bit55) ||
+tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
 mtedesc = 0;
 }
 
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index bce4295d28..6853f58c19 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5800,8 +5800,8 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, 
target_ulong addr,
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
 /* Perform gross MTE suppression early. */
-if (!tbi_check(desc, bit55) ||
-tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+if (!tbi_check(mtedesc, bit55) ||
+tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
 mtedesc = 0;
 }
 
@@ -6156,8 +6156,8 @@ void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, 
target_ulong addr,
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
 /* Perform gross MTE suppression early. */
-if (!tbi_check(desc, bit55) ||
-tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+if (!tbi_check(mtedesc, bit55) ||
+tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
 mtedesc = 0;
 }
 
@@ -6410,8 +6410,8 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, 
target_ulong addr,
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
 /* Perform gross MTE suppression early. */
-if (!tbi_check(desc, bit55) ||
-tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+if (!tbi_check(mtedesc, bit55) ||
+tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
 mtedesc = 0;
 }
 
-- 
2.34.1

[PATCH 2/3] target/arm: Move SVE/SME MTE disable checks inward

2024-02-04 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/tcg/sme_helper.c | 32 +---
 target/arm/tcg/sve_helper.c | 76 +
 2 files changed, 20 insertions(+), 88 deletions(-)

diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 904bfdac43..b3e0ba9b29 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -459,14 +459,7 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
 sve_cont_ldst_watchpoints(, env, vg, addr, esize, esize,
   BP_MEM_READ, ra);
 
-/*
- * Handle mte checks for all active elements.
- * Since TBI must be set for MTE, !mtedesc => !mte_active.
- */
-if (mtedesc) {
-sve_cont_ldst_mte_check(, env, vg, addr, esize, esize,
-mtedesc, ra);
-}
+sve_cont_ldst_mte_check(, env, vg, addr, esize, esize, mtedesc, ra);
 
 flags = info.page[0].flags | info.page[1].flags;
 if (unlikely(flags != 0)) {
@@ -567,17 +560,10 @@ void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
  CopyFn *cpy_fn)
 {
 uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
-int bit55 = extract64(addr, 55, 1);
 
 /* Remove mtedesc from the normal sve descriptor. */
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-/* Perform gross MTE suppression early. */
-if (!tbi_check(mtedesc, bit55) ||
-tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
-mtedesc = 0;
-}
-
 sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
 host_fn, tlb_fn, clr_fn, cpy_fn);
 }
@@ -655,14 +641,7 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
 sve_cont_ldst_watchpoints(, env, vg, addr, esize, esize,
   BP_MEM_WRITE, ra);
 
-/*
- * Handle mte checks for all active elements.
- * Since TBI must be set for MTE, !mtedesc => !mte_active.
- */
-if (mtedesc) {
-sve_cont_ldst_mte_check(, env, vg, addr, esize, esize,
-mtedesc, ra);
-}
+sve_cont_ldst_mte_check(, env, vg, addr, esize, esize, mtedesc, ra);
 
 flags = info.page[0].flags | info.page[1].flags;
 if (unlikely(flags != 0)) {
@@ -744,17 +723,10 @@ void sme_st1_mte(CPUARMState *env, void *za, uint64_t 
*vg, target_ulong addr,
  sve_ldst1_tlb_fn *tlb_fn)
 {
 uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
-int bit55 = extract64(addr, 55, 1);
 
 /* Remove mtedesc from the normal sve descriptor. */
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-/* Perform gross MTE suppression early. */
-if (!tbi_check(mtedesc, bit55) ||
-tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
-mtedesc = 0;
-}
-
 sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
 vertical, host_fn, tlb_fn);
 }
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 6853f58c19..9fd469b00f 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5604,6 +5604,13 @@ void sve_cont_ldst_mte_check(SVEContLdSt *info, 
CPUARMState *env,
  int msize, uint32_t mtedesc, uintptr_t ra)
 {
 intptr_t mem_off, reg_off, reg_last;
+int bit55 = extract64(addr, 55, 1);
+
+/* Perform gross MTE suppression early. */
+if (!tbi_check(mtedesc, bit55) ||
+tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
+return;
+}
 
 /* Process the page only if MemAttr == Tagged. */
 if (info->page[0].tagged) {
@@ -5677,14 +5684,9 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const 
target_ulong addr,
 sve_cont_ldst_watchpoints(, env, vg, addr, 1 << esz, N << msz,
   BP_MEM_READ, retaddr);
 
-/*
- * Handle mte checks for all active elements.
- * Since TBI must be set for MTE, !mtedesc => !mte_active.
- */
-if (mtedesc) {
-sve_cont_ldst_mte_check(, env, vg, addr, 1 << esz, N << msz,
-mtedesc, retaddr);
-}
+/* Handle mte checks for all active elements. */
+sve_cont_ldst_mte_check(, env, vg, addr, 1 << esz, N << msz,
+mtedesc, retaddr);
 
 flags = info.page[0].flags | info.page[1].flags;
 if (unlikely(flags != 0)) {
@@ -5794,17 +5796,10 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, 
target_ulong addr,
sve_ldst1_tlb_fn *tlb_fn)
 {
 uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
-int bit55 = extract64(addr, 55, 1);
 
 /* Remove mtedesc from the normal sve descriptor. */
 desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-/* Perform gross MTE suppression early. */
-if (!tbi_check(mtedesc, bit55) ||
-tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
-mtedesc

[PATCH 0/3] target/arm: Split mtedesc from vector desc

2024-02-04 Thread Richard Henderson

This aims to solve an assertion failure in simd_desc() that
is triggered by overflowing the 22-bit simd data field, when
SVE vectors are configured to be long and MTE is enabled.

Gustavo, can you please run this through your test case?
I looked back through the irc log and only see the
memory-tagging-extension.rst test case for PR_SET_TAGGED_ADDR_CTRL,
not one that tests MTE+SVE together.


r~


Richard Henderson (3):
  target/arm: Fix SVE/SME gross MTE suppression checks
  target/arm: Move SVE/SME MTE disable checks inward
  target/arm: Split mtedesc from vector desc

 target/arm/internals.h |8 +-
 target/arm/tcg/helper-sme.h|  108 +--
 target/arm/tcg/helper-sve.h| 1255 
 target/arm/tcg/sme_helper.c|  155 +---
 target/arm/tcg/sve_helper.c|  431 +++
 target/arm/tcg/translate-sme.c |   33 +-
 target/arm/tcg/translate-sve.c | 1246 +++
 7 files changed, 926 insertions(+), 2310 deletions(-)

-- 
2.34.1

Re: [PATCH] target/riscv: Use RISCVException as return type for all csr ops

2024-02-04 Thread Alistair Francis

On Tue, Jan 30, 2024 at 10:49 PM LIU Zhiwei
 wrote:
>
> The real return value type has been converted to RISCVException,
> but some function declarations still not. This patch makes all
> csr operation declarations use RISCVExcetion.
>
> Signed-off-by: LIU Zhiwei 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/csr.c | 117 -
>  1 file changed, 74 insertions(+), 43 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 674ea075a4..ac9a856cc5 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -242,7 +242,7 @@ static RISCVException any32(CPURISCVState *env, int csrno)
>
>  }
>
> -static int aia_any(CPURISCVState *env, int csrno)
> +static RISCVException aia_any(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_smaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -251,7 +251,7 @@ static int aia_any(CPURISCVState *env, int csrno)
>  return any(env, csrno);
>  }
>
> -static int aia_any32(CPURISCVState *env, int csrno)
> +static RISCVException aia_any32(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_smaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -269,7 +269,7 @@ static RISCVException smode(CPURISCVState *env, int csrno)
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
>
> -static int smode32(CPURISCVState *env, int csrno)
> +static RISCVException smode32(CPURISCVState *env, int csrno)
>  {
>  if (riscv_cpu_mxl(env) != MXL_RV32) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -278,7 +278,7 @@ static int smode32(CPURISCVState *env, int csrno)
>  return smode(env, csrno);
>  }
>
> -static int aia_smode(CPURISCVState *env, int csrno)
> +static RISCVException aia_smode(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -287,7 +287,7 @@ static int aia_smode(CPURISCVState *env, int csrno)
>  return smode(env, csrno);
>  }
>
> -static int aia_smode32(CPURISCVState *env, int csrno)
> +static RISCVException aia_smode32(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -496,7 +496,7 @@ static RISCVException pointer_masking(CPURISCVState *env, 
> int csrno)
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
>
> -static int aia_hmode(CPURISCVState *env, int csrno)
> +static RISCVException aia_hmode(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -505,7 +505,7 @@ static int aia_hmode(CPURISCVState *env, int csrno)
>   return hmode(env, csrno);
>  }
>
> -static int aia_hmode32(CPURISCVState *env, int csrno)
> +static RISCVException aia_hmode32(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -681,7 +681,8 @@ static RISCVException read_vl(CPURISCVState *env, int 
> csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_vlenb(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_vlenb(CPURISCVState *env, int csrno,
> + target_ulong *val)
>  {
>  *val = riscv_cpu_cfg(env)->vlen >> 3;
>  return RISCV_EXCP_NONE;
> @@ -742,13 +743,15 @@ static RISCVException write_vstart(CPURISCVState *env, 
> int csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_vcsr(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_vcsr(CPURISCVState *env, int csrno,
> +target_ulong *val)
>  {
>  *val = (env->vxrm << VCSR_VXRM_SHIFT) | (env->vxsat << VCSR_VXSAT_SHIFT);
>  return RISCV_EXCP_NONE;
>  }
>
> -static int write_vcsr(CPURISCVState *env, int csrno, target_ulong val)
> +static RISCVException write_vcsr(CPURISCVState *env, int csrno,
> + target_ulong val)
>  {
>  #if !defined(CONFIG_USER_ONLY)
>  env->mstatus |= MSTATUS_VS;
> @@ -798,13 +801,15 @@ static RISCVException read_timeh(CPURISCVState *env, 
> int csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_hpmcounter(CPURISCVState *env, int csrno,
> +  target_ulong *val)
>  {
>  *val = get_ticks(false);
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno,
> +   target_ulong *val)
>  {
>  *val = get_ticks(true);
>  return RISCV_EXCP_NONE;
> @@ -812,7 +817,8 @@ static int read_hpmcounterh(CPURISCVState *env, int 
> csrno, target_ulong *val)
>
>  #else /* CONFIG_USER_ONLY */
>
> -static int read_mhpmevent(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_mhpmevent(CPURISCVState

[PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine

2024-02-04 Thread Bibo Mao

The cdrom test skips to execute on LoongArch system with command
"make check", this patch enables cdrom test for LoongArch virt
machine platform.

With this patch, cdrom test passes to run on LoongArch virt
machine type.

Signed-off-by: Bibo Mao 
---
 tests/qtest/cdrom-test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 0945383789..c8b97d8d9a 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -271,6 +271,9 @@ int main(int argc, char **argv)
 const char *virtmachine[] = { "virt", NULL };
 add_cdrom_param_tests(virtmachine);
 }
+} else if (g_str_equal(arch, "loongarch64")) {
+const char *virtmachine[] = { "virt", NULL };
+add_cdrom_param_tests(virtmachine);
 } else {
 const char *nonemachine[] = { "none", NULL };
 add_cdrom_param_tests(nonemachine);
-- 
2.39.3

Re: [PATCH] target/riscv: Use RISCVException as return type for all csr ops

2024-02-04 Thread Alistair Francis

On Tue, Jan 30, 2024 at 10:49 PM LIU Zhiwei
 wrote:
>
> The real return value type has been converted to RISCVException,
> but some function declarations still not. This patch makes all
> csr operation declarations use RISCVExcetion.
>
> Signed-off-by: LIU Zhiwei 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/csr.c | 117 -
>  1 file changed, 74 insertions(+), 43 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 674ea075a4..ac9a856cc5 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -242,7 +242,7 @@ static RISCVException any32(CPURISCVState *env, int csrno)
>
>  }
>
> -static int aia_any(CPURISCVState *env, int csrno)
> +static RISCVException aia_any(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_smaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -251,7 +251,7 @@ static int aia_any(CPURISCVState *env, int csrno)
>  return any(env, csrno);
>  }
>
> -static int aia_any32(CPURISCVState *env, int csrno)
> +static RISCVException aia_any32(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_smaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -269,7 +269,7 @@ static RISCVException smode(CPURISCVState *env, int csrno)
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
>
> -static int smode32(CPURISCVState *env, int csrno)
> +static RISCVException smode32(CPURISCVState *env, int csrno)
>  {
>  if (riscv_cpu_mxl(env) != MXL_RV32) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -278,7 +278,7 @@ static int smode32(CPURISCVState *env, int csrno)
>  return smode(env, csrno);
>  }
>
> -static int aia_smode(CPURISCVState *env, int csrno)
> +static RISCVException aia_smode(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -287,7 +287,7 @@ static int aia_smode(CPURISCVState *env, int csrno)
>  return smode(env, csrno);
>  }
>
> -static int aia_smode32(CPURISCVState *env, int csrno)
> +static RISCVException aia_smode32(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -496,7 +496,7 @@ static RISCVException pointer_masking(CPURISCVState *env, 
> int csrno)
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
>
> -static int aia_hmode(CPURISCVState *env, int csrno)
> +static RISCVException aia_hmode(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -505,7 +505,7 @@ static int aia_hmode(CPURISCVState *env, int csrno)
>   return hmode(env, csrno);
>  }
>
> -static int aia_hmode32(CPURISCVState *env, int csrno)
> +static RISCVException aia_hmode32(CPURISCVState *env, int csrno)
>  {
>  if (!riscv_cpu_cfg(env)->ext_ssaia) {
>  return RISCV_EXCP_ILLEGAL_INST;
> @@ -681,7 +681,8 @@ static RISCVException read_vl(CPURISCVState *env, int 
> csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_vlenb(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_vlenb(CPURISCVState *env, int csrno,
> + target_ulong *val)
>  {
>  *val = riscv_cpu_cfg(env)->vlen >> 3;
>  return RISCV_EXCP_NONE;
> @@ -742,13 +743,15 @@ static RISCVException write_vstart(CPURISCVState *env, 
> int csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_vcsr(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_vcsr(CPURISCVState *env, int csrno,
> +target_ulong *val)
>  {
>  *val = (env->vxrm << VCSR_VXRM_SHIFT) | (env->vxsat << VCSR_VXSAT_SHIFT);
>  return RISCV_EXCP_NONE;
>  }
>
> -static int write_vcsr(CPURISCVState *env, int csrno, target_ulong val)
> +static RISCVException write_vcsr(CPURISCVState *env, int csrno,
> + target_ulong val)
>  {
>  #if !defined(CONFIG_USER_ONLY)
>  env->mstatus |= MSTATUS_VS;
> @@ -798,13 +801,15 @@ static RISCVException read_timeh(CPURISCVState *env, 
> int csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_hpmcounter(CPURISCVState *env, int csrno,
> +  target_ulong *val)
>  {
>  *val = get_ticks(false);
>  return RISCV_EXCP_NONE;
>  }
>
> -static int read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno,
> +   target_ulong *val)
>  {
>  *val = get_ticks(true);
>  return RISCV_EXCP_NONE;
> @@ -812,7 +817,8 @@ static int read_hpmcounterh(CPURISCVState *env, int 
> csrno, target_ulong *val)
>
>  #else /* CONFIG_USER_ONLY */
>
> -static int read_mhpmevent(CPURISCVState *env, int csrno, target_ulong *val)
> +static RISCVException read_mhpmevent(CPURISCVState *env, int

Re: [PATCH] target/riscv: FCSR doesn't contain vxrm and vxsat

2024-02-04 Thread Alistair Francis

On Tue, Jan 30, 2024 at 9:10 PM LIU Zhiwei  wrote:
>
> vxrm and vxsat have been moved into a special register vcsr since
> RVV v1.0. So remove them from FCSR for vector 1.0.
>
> Signed-off-by: LIU Zhiwei 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu_bits.h | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index ebd7917d49..e116f6c252 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -32,14 +32,6 @@
>  #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
>  #define FSR_AEXC(FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
>
> -/* Vector Fixed-Point round model */
> -#define FSR_VXRM_SHIFT  9
> -#define FSR_VXRM(0x3 << FSR_VXRM_SHIFT)
> -
> -/* Vector Fixed-Point saturation flag */
> -#define FSR_VXSAT_SHIFT 8
> -#define FSR_VXSAT   (0x1 << FSR_VXSAT_SHIFT)
> -
>  /* Control and Status Registers */
>
>  /* User Trap Setup */
> --
> 2.25.1
>
>

Re: [PATCH 2/3] hw/arm : Connect DM163 to STM32L4x5

2024-02-04 Thread Alistair Francis

On Sat, Jan 27, 2024 at 7:09 AM Inès Varhol
 wrote:
>
> Signed-off-by: Arnaud Minier 
> Signed-off-by: Inès Varhol 

Acked-by: Alistair Francis 

Alistair

> ---
>  hw/arm/Kconfig |  1 +
>  hw/arm/stm32l4x5_soc.c | 55 +-
>  include/hw/arm/stm32l4x5_soc.h |  3 ++
>  3 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
> index 3e49b913f8..818aa2f1a2 100644
> --- a/hw/arm/Kconfig
> +++ b/hw/arm/Kconfig
> @@ -463,6 +463,7 @@ config STM32L4X5_SOC
>  select STM32L4X5_SYSCFG
>  select STM32L4X5_RCC
>  select STM32L4X5_GPIO
> +select DM163
>
>  config XLNX_ZYNQMP_ARM
>  bool
> diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
> index 478c6ba056..8663546901 100644
> --- a/hw/arm/stm32l4x5_soc.c
> +++ b/hw/arm/stm32l4x5_soc.c
> @@ -26,7 +26,9 @@
>  #include "qapi/error.h"
>  #include "exec/address-spaces.h"
>  #include "sysemu/sysemu.h"
> +#include "hw/core/split-irq.h"
>  #include "hw/arm/stm32l4x5_soc.h"
> +#include "hw/display/dm163.h"
>  #include "hw/qdev-clock.h"
>  #include "hw/misc/unimp.h"
>
> @@ -78,6 +80,31 @@ static const int exti_irq[NUM_EXTI_IRQ] = {
>  #define RCC_BASE_ADDRESS 0x40021000
>  #define RCC_IRQ 5
>
> +/*
> + * There are actually 14 input pins in the DM163 device.
> + * Here the DM163 input pin EN isn't connected to the STM32L4x5
> + * GPIOs as the IM120417002 colors shield doesn't actually use
> + * this pin to drive the RGB matrix.
> + */
> +#define NUM_DM163_INPUTS 13
> +
> +static const int dm163_input[NUM_DM163_INPUTS] = {
> +1 * 16 + 2,  /* ROW0  PB2   */
> +0 * 16 + 15, /* ROW1  PA15  */
> +0 * 16 + 2,  /* ROW2  PA2   */
> +0 * 16 + 7,  /* ROW3  PA7   */
> +0 * 16 + 6,  /* ROW4  PA6   */
> +0 * 16 + 5,  /* ROW5  PA5   */
> +1 * 16 + 0,  /* ROW6  PB0   */
> +0 * 16 + 3,  /* ROW7  PA3   */
> +0 * 16 + 4,  /* SIN (SDA) PA4   */
> +1 * 16 + 1,  /* DCK (SCK) PB1   */
> +2 * 16 + 3,  /* RST_B (RST) PC3 */
> +2 * 16 + 4,  /* LAT_B (LAT) PC4 */
> +2 * 16 + 5,  /* SELBK (SB)  PC5 */
> +};
> +
> +
>  static const uint32_t gpio_addr[] = {
>  0x4800,
>  0x48000400,
> @@ -116,6 +143,8 @@ static void stm32l4x5_soc_initfn(Object *obj)
>  g_autofree char *name = g_strdup_printf("gpio%c", 'a' + i);
>  object_initialize_child(obj, name, >gpio[i], TYPE_STM32L4X5_GPIO);
>  }
> +
> +object_initialize_child(obj, "dm163", >dm163, TYPE_DM163);
>  }
>
>  static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
> @@ -124,9 +153,10 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
> Error **errp)
>  Stm32l4x5SocState *s = STM32L4X5_SOC(dev_soc);
>  const Stm32l4x5SocClass *sc = STM32L4X5_SOC_GET_CLASS(dev_soc);
>  MemoryRegion *system_memory = get_system_memory();
> -DeviceState *armv7m, *dev;
> +DeviceState *armv7m, *dev, *gpio_output_fork;
>  SysBusDevice *busdev;
>  uint32_t pin_index;
> +int gpio, pin;
>
>  if (!memory_region_init_rom(>flash, OBJECT(dev_soc), "flash",
>  sc->flash_size, errp)) {
> @@ -166,6 +196,12 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
> Error **errp)
>  return;
>  }
>
> +/* DM163 */
> +dev = DEVICE(>dm163);
> +if (!qdev_realize(dev, NULL, errp)) {
> +return;
> +}
> +
>  /* GPIOs */
>  for (unsigned i = 0; i < NUM_GPIOS; i++) {
>  g_autofree char *name = g_strdup_printf("%c", 'A' + i);
> @@ -204,6 +240,23 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
> Error **errp)
>  }
>  }
>
> +for (unsigned i = 0; i < NUM_DM163_INPUTS; i++) {
> +gpio_output_fork = qdev_new(TYPE_SPLIT_IRQ);
> +qdev_prop_set_uint32(gpio_output_fork, "num-lines", 2);
> +qdev_realize_and_unref(gpio_output_fork, NULL, _fatal);
> +
> +qdev_connect_gpio_out(gpio_output_fork, 0,
> +  qdev_get_gpio_in(DEVICE(>syscfg),
> +   dm163_input[i]));
> +qdev_connect_gpio_out(gpio_output_fork, 1,
> +  qdev_get_gpio_in(DEVICE(>dm163),
> +   i));
> +gpio = dm163_input[i] / 16;
> +pin = dm163_input[i] % 16;
> +qdev_connect_gpio_out(DEVICE(>gpio[gpio]), pin,
> +  qdev_get_gpio_in(DEVICE(gpio_output_fork), 0));
> +}
> +
>  /* EXTI device */
>  busdev = SYS_BUS_DEVICE(>exti);
>  if (!sysbus_realize(busdev, errp)) {
> diff --git a/include/hw/arm/stm32l4x5_soc.h b/include/hw/arm/stm32l4x5_soc.h
> index cb4da08629..60b31d430e 100644
> --- a/include/hw/arm/stm32l4x5_soc.h
> +++ b/include/hw/arm/stm32l4x5_soc.h
> @@ -30,6 +30,7 @@
>  #include "hw/misc/stm32l4x5_exti.h"
>  #include "hw/misc/stm32l4x5_rcc.h"
>  #include "hw/gpio/stm32l4x5_gpio.h"
> +#include

Re: [PATCH 1/3] hw/display : Add device DM163

2024-02-04 Thread Alistair Francis

On Sat, Jan 27, 2024 at 5:38 AM Inès Varhol
 wrote:
>
> This device implements the IM120417002 colors shield v1.1 for Arduino
> (which relies on the DM163 8x3-channel led driving logic) and features
> a simple display of an 8x8 RGB matrix. The columns of the matrix are
> driven by the DM163 and the rows are driven externally.
>
> Signed-off-by: Arnaud Minier 
> Signed-off-by: Inès Varhol 

Acked-by: Alistair Francis 

Alistair

> ---
>  hw/display/Kconfig |   3 +
>  hw/display/dm163.c | 307 +
>  hw/display/meson.build |   1 +
>  hw/display/trace-events|  13 ++
>  include/hw/display/dm163.h |  57 +++
>  5 files changed, 381 insertions(+)
>  create mode 100644 hw/display/dm163.c
>  create mode 100644 include/hw/display/dm163.h
>
> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> index 1aafe1923d..4dbfc6e7af 100644
> --- a/hw/display/Kconfig
> +++ b/hw/display/Kconfig
> @@ -139,3 +139,6 @@ config XLNX_DISPLAYPORT
>  bool
>  # defaults to "N", enabled by specific boards
>  depends on PIXMAN
> +
> +config DM163
> +bool
> diff --git a/hw/display/dm163.c b/hw/display/dm163.c
> new file mode 100644
> index 00..565fc84ddf
> --- /dev/null
> +++ b/hw/display/dm163.c
> @@ -0,0 +1,307 @@
> +/*
> + * QEMU DM163 8x3-channel constant current led driver
> + * driving columns of associated 8x8 RGB matrix.
> + *
> + * Copyright (C) 2024 Samuel Tardieu 
> + * Copyright (C) 2024 Arnaud Minier 
> + * Copyright (C) 2024 Inès Varhol 
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +/*
> + * The reference used for the DM163 is the following :
> + * http://www.siti.com.tw/product/spec/LED/DM163.pdf
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "migration/vmstate.h"
> +#include "hw/irq.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/display/dm163.h"
> +#include "ui/console.h"
> +#include "trace.h"
> +
> +#define LED_SQUARE_SIZE 100
> +/* Number of frames a row stays visible after being turned off. */
> +#define ROW_PERSISTANCE 2
> +
> +static const VMStateDescription vmstate_dm163 = {
> +.name = TYPE_DM163,
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.fields = (const VMStateField[]) {
> +VMSTATE_UINT8(activated_rows, DM163State),
> +VMSTATE_UINT64_ARRAY(bank0_shift_register, DM163State, 3),
> +VMSTATE_UINT64_ARRAY(bank1_shift_register, DM163State, 3),
> +VMSTATE_UINT16_ARRAY(latched_outputs, DM163State, DM163_NUM_LEDS),
> +VMSTATE_UINT16_ARRAY(outputs, DM163State, DM163_NUM_LEDS),
> +VMSTATE_UINT8(dck, DM163State),
> +VMSTATE_UINT8(en_b, DM163State),
> +VMSTATE_UINT8(lat_b, DM163State),
> +VMSTATE_UINT8(rst_b, DM163State),
> +VMSTATE_UINT8(selbk, DM163State),
> +VMSTATE_UINT8(sin, DM163State),
> +VMSTATE_UINT32_2DARRAY(buffer, DM163State,
> +COLOR_BUFFER_SIZE + 1, RGB_MATRIX_NUM_COLS),
> +VMSTATE_UINT8(last_buffer_idx, DM163State),
> +VMSTATE_UINT8_ARRAY(buffer_idx_of_row, DM163State, 
> RGB_MATRIX_NUM_ROWS),
> +VMSTATE_UINT8_ARRAY(age_of_row, DM163State, RGB_MATRIX_NUM_ROWS),
> +VMSTATE_END_OF_LIST()
> +}
> +};
> +
> +static void dm163_reset_hold(Object *obj)
> +{
> +DM163State *s = DM163(obj);
> +
> +/* Reset only stops the PWM. */
> +memset(s->outputs, 0, sizeof(s->outputs));
> +
> +/* The last row of the buffer stores a turned off row */
> +memset(s->buffer[COLOR_BUFFER_SIZE], 0, sizeof(s->buffer[0]));
> +}
> +
> +static void dm163_dck_gpio_handler(void *opaque, int line, int new_state)
> +{
> +DM163State *s = DM163(opaque);
> +
> +if (new_state && !s->dck) {
> +/*
> + * On raising dck, sample selbk to get the bank to use, and
> + * sample sin for the bit to enter into the bank shift buffer.
> + */
> +uint64_t *sb =
> +s->selbk ? s->bank1_shift_register : s->bank0_shift_register;
> +/* Output the outgoing bit on sout */
> +const bool sout = (s->selbk ? sb[2] & MAKE_64BIT_MASK(63, 1) :
> +   sb[2] & MAKE_64BIT_MASK(15, 1)) != 0;
> +qemu_set_irq(s->sout, sout);
> +/* Enter sin into the shift buffer */
> +sb[2] = (sb[2] << 1) | ((sb[1] >> 63) & 1);
> +sb[1] = (sb[1] << 1) | ((sb[0] >> 63) & 1);
> +sb[0] = (sb[0] << 1) | s->sin;
> +}
> +
> +s->dck = new_state;
> +trace_dm163_dck(new_state);
> +}
> +
> +static void dm163_propagate_outputs(DM163State *s)
> +{
> +s->last_buffer_idx = (s->last_buffer_idx + 1) % COLOR_BUFFER_SIZE;
> +/* Values are output when reset and enable are both high. */
> +if (s->rst_b && !s->en_b) {
> +memcpy(s->outputs, s->latched_outputs, sizeof(s->outputs));
> +} else {
> +memset(s->outputs, 0, sizeof(s->outputs));
> +}
> +for (unsigned x = 0; x < RGB_MATRIX_NUM_COLS; x++) {
> +

Re: [PATCH v11 0/3] gdbstub and TCG plugin improvements

2024-02-04 Thread Alistair Francis

On Sat, Feb 3, 2024 at 8:12 PM Akihiko Odaki  wrote:
>
> This series extracts fixes and refactorings that can be applied
> independently from "[PATCH v9 00/23] plugins: Allow to read registers".
>
> The patch "target/riscv: Move MISA limits to class" was replaced with
> patch "target/riscv: Move misa_mxl_max to class" since I found instances
> may have different misa_ext_mask.
>
> V6 -> V7:
>   Rebased.
>
> V5 -> V6:
>   Added patch "default-configs: Add TARGET_XML_FILES definition".
>   Rebased.
>
> V4 -> V5:
>   Added patch "hw/riscv: Use misa_mxl instead of misa_mxl_max".
>
> V3 -> V4:
>   Added patch "gdbstub: Check if gdb_regs is NULL".
>
> V2 -> V3:
>   Restored patch sets from the previous version.
>   Rebased to commit 800485762e6564e04e2ab315132d477069562d91.
>
> V1 -> V2:
>   Added patch "target/riscv: Do not allow MXL_RV32 for TARGET_RISCV64".
>   Added patch "target/riscv: Initialize gdb_core_xml_file only once".
>   Dropped patch "target/riscv: Remove misa_mxl validation".
>   Dropped patch "target/riscv: Move misa_mxl_max to class".
>   Dropped patch "target/riscv: Validate misa_mxl_max only once".
>
> Signed-off-by: Akihiko Odaki 
> ---
> Changes in v11:
> - Rebased on: https://github.com/alistair23/qemu/tree/riscv-to-apply.next
> - Link to v10: 
> https://lore.kernel.org/r/20240128-riscv-v10-0-fdbe59397...@daynix.com
>
> Changes in v10:
> - Dropped patch "hw/riscv: Use misa_mxl instead of misa_mxl_max" due to
>   invalid assumption that the relevant code is only used for kernel
>   loading.
> - Link to v9: 
> https://lore.kernel.org/r/20240115-riscv-v9-0-ff171e1ae...@daynix.com
>
> Changes in v9:
> - Rebased to commit 977542ded7e6b28d2bc077bcda24568c716e393c.
> - Link to v8: 
> https://lore.kernel.org/r/20231218-riscv-v8-0-c9bf2b158...@daynix.com
>
> Changes in v8:
> - Added a more detailed explanation for patch "hw/riscv: Use misa_mxl
>   instead of misa_mxl_max". (Alistair Francis)
> - Link to v7: 
> https://lore.kernel.org/r/20231213-riscv-v7-0-a760156a3...@daynix.com
>
> ---
> Akihiko Odaki (3):
>   target/riscv: Remove misa_mxl validation
>   target/riscv: Move misa_mxl_max to class
>   target/riscv: Validate misa_mxl_max only once

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  target/riscv/cpu.h |   4 +-
>  hw/riscv/boot.c|   3 +-
>  target/riscv/cpu.c | 181 
> ++---
>  target/riscv/gdbstub.c |  12 ++-
>  target/riscv/kvm/kvm-cpu.c |  10 +--
>  target/riscv/machine.c |   7 +-
>  target/riscv/tcg/tcg-cpu.c |  44 ++-
>  target/riscv/translate.c   |   3 +-
>  8 files changed, 133 insertions(+), 131 deletions(-)
> ---
> base-commit: 0c9d286cf791cdda76fd57e4562e2cb18d4a79e2
> change-id: 20231213-riscv-fcc9640986cf
>
> Best regards,
> --
> Akihiko Odaki 
>
>

Re: [PATCH v1 11/15] libvhost-user: Speedup gpa_to_mem_region() and vu_gpa_to_va()

2024-02-04 Thread Raphael Norwitz

On Sun, Feb 4, 2024 at 9:51 AM David Hildenbrand  wrote:
>
> On 04.02.24 03:10, Raphael Norwitz wrote:
> > One comment on this one.
> >
> > On Fri, Feb 2, 2024 at 4:56 PM David Hildenbrand  wrote:
> >>
> >> Let's speed up GPA to memory region / virtual address lookup. Store the
> >> memory regions ordered by guest physical addresses, and use binary
> >> search for address translation, as well as when adding/removing memory
> >> regions.
> >>
> >> Most importantly, this will speed up GPA->VA address translation when we
> >> have many memslots.
> >>
> >> Signed-off-by: David Hildenbrand 
> >> ---
> >>   subprojects/libvhost-user/libvhost-user.c | 49 +--
> >>   1 file changed, 45 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/subprojects/libvhost-user/libvhost-user.c 
> >> b/subprojects/libvhost-user/libvhost-user.c
> >> index d036b54ed0..75e47b7bb3 100644
> >> --- a/subprojects/libvhost-user/libvhost-user.c
> >> +++ b/subprojects/libvhost-user/libvhost-user.c
> >> @@ -199,19 +199,30 @@ vu_panic(VuDev *dev, const char *msg, ...)
> >>   static VuDevRegion *
> >>   vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
> >>   {
> >> -unsigned int i;
> >> +int low = 0;
> >> +int high = dev->nregions - 1;
> >>
> >>   /*
> >>* Memory regions cannot overlap in guest physical address space. 
> >> Each
> >>* GPA belongs to exactly one memory region, so there can only be one
> >>* match.
> >> + *
> >> + * We store our memory regions ordered by GPA and can simply perform a
> >> + * binary search.
> >>*/
> >> -for (i = 0; i < dev->nregions; i++) {
> >> -VuDevRegion *cur = >regions[i];
> >> +while (low <= high) {
> >> +unsigned int mid = low + (high - low) / 2;
> >> +VuDevRegion *cur = >regions[mid];
> >>
> >>   if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) 
> >> {
> >>   return cur;
> >>   }
> >> +if (guest_addr >= cur->gpa + cur->size) {
> >> +low = mid + 1;
> >> +}
> >> +if (guest_addr < cur->gpa) {
> >> +high = mid - 1;
> >> +}
> >>   }
> >>   return NULL;
> >>   }
> >> @@ -273,9 +284,14 @@ vu_remove_all_mem_regs(VuDev *dev)
> >>   static void
> >>   _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
> >>   {
> >> +const uint64_t start_gpa = msg_region->guest_phys_addr;
> >> +const uint64_t end_gpa = start_gpa + msg_region->memory_size;
> >>   int prot = PROT_READ | PROT_WRITE;
> >>   VuDevRegion *r;
> >>   void *mmap_addr;
> >> +int low = 0;
> >> +int high = dev->nregions - 1;
> >> +unsigned int idx;
> >>
> >>   DPRINT("Adding region %d\n", dev->nregions);
> >>   DPRINT("guest_phys_addr: 0x%016"PRIx64"\n",
> >> @@ -295,6 +311,29 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
> >> *msg_region, int fd)
> >>   prot = PROT_NONE;
> >>   }
> >>
> >> +/*
> >> + * We will add memory regions into the array sorted by GPA. Perform a
> >> + * binary search to locate the insertion point: it will be at the low
> >> + * index.
> >> + */
> >> +while (low <= high) {
> >> +unsigned int mid = low + (high - low)  / 2;
> >> +VuDevRegion *cur = >regions[mid];
> >> +
> >> +/* Overlap of GPA addresses. */
> >
> > Looks like this check will only catch if the new region is fully
> > contained within an existing region. I think we need to check whether
> > either start or end region are in the range, i.e.:
>
> That check should cover all cases of overlaps, not just fully contained.
>
> See the QEMU implementation of range_overlaps_rang() that contains a
> similar logic:
>
> return !(range2->upb < range1->lob || range1->upb < range2->lob);
>
> !(range2->upb < range1->lob || range1->upb < range2->lob);
> =  !(range2->upb < range1->lob) && !(range1->upb < range2->lob)
> =   range2->upb >= range1->lob && range1->upb >= range2->lob
> =   range1->lob <= range2->upb && range2->lob <= range1->upb
>
> In QEMU, upb is inclusive, if it were exclusive (like we have here):
>
> =   range1->lob < range2->upb && range2->lob < range1->upb
>
> Which is what we have here with:
>
> range1->lob = start_gpa
> range1->upb = end_gpa
> range2->lob = cur->gpa
> range2->upb = cur->gpa + cur->size
>
> Also if you are interested, see
>
> https://stackoverflow.com/questions/3269434/whats-the-most-efficient-way-to-test-if-two-ranges-overlap
>
> Thanks!

Got it, thanks for the full explanation. With that:

Reviewed-by: Raphael Norwitz 

>
> --
> Cheers,
>
> David / dhildenb
>

Re: [PATCH v1 01/15] libvhost-user: Fix msg_region->userspace_addr computation

2024-02-04 Thread Raphael Norwitz

On Sun, Feb 4, 2024 at 9:36 AM David Hildenbrand  wrote:
>
> On 04.02.24 02:35, Raphael Norwitz wrote:
> > As a heads up, I've left Nutanix and updated it in MAINTAINERS. Will
> > be updating it again shortly so tagging these with my new work email.
> >
>
> Thanks for the fast review! The mail server already complained to me :)
>
> Maybe consider adding yourself as reviewer for vhost as well? (which
> covers libvhost-user), I took your mail address from git history, not
> get_maintainers.pl.

I don't expect I'll have much time to review code outside of
vhost-user-blk/vhost-user-scsi, but happy to add an entry if it helps
folks tag me on relevant patches.

>
> > On Fri, Feb 2, 2024 at 4:54 PM David Hildenbrand  wrote:
> >>
> >> We barely had mmap_offset set in the past. With virtio-mem and
> >> dynamic-memslots that will change.
> >>
> >> In vu_add_mem_reg() and vu_set_mem_table_exec_postcopy(), we are
> >> performing pointer arithmetics, which is wrong. Let's simply
> >> use dev_region->mmap_addr instead of "void *mmap_addr".
> >>
> >> Fixes: ec94c8e621de ("Support adding individual regions in libvhost-user")
> >> Fixes: 9bb38019942c ("vhost+postcopy: Send address back to qemu")
> >> Cc: Raphael Norwitz 
> >> Signed-off-by: David Hildenbrand 
> >
> > Reviewed-by: Raphael Norwitz 
>
>
> --
> Cheers,
>
> David / dhildenb
>

[PULL 18/39] tcg/aarch64: Massage tcg_out_brcond()

2024-02-04 Thread Richard Henderson

From: Philippe Mathieu-Daudé 

In order to ease next commit review, modify tcg_out_brcond()
to switch over TCGCond. No logical change intended.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20240119224737.48943-1-phi...@linaro.org>
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.c.inc | 31 +++
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 70df250c04..a19158f4ea 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1416,12 +1416,20 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, 
TCGCond c, TCGArg a,
TCGArg b, bool b_const, TCGLabel *l)
 {
 intptr_t offset;
-bool need_cmp;
+bool need_cmp = true;
 
-if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
-need_cmp = false;
-} else {
-need_cmp = true;
+switch (c) {
+case TCG_COND_EQ:
+case TCG_COND_NE:
+if (b_const && b == 0) {
+need_cmp = false;
+}
+break;
+default:
+break;
+}
+
+if (need_cmp) {
 tcg_out_cmp(s, ext, c, a, b, b_const);
 }
 
@@ -1435,10 +1443,17 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, 
TCGCond c, TCGArg a,
 
 if (need_cmp) {
 tcg_out_insn(s, 3202, B_C, c, offset);
-} else if (c == TCG_COND_EQ) {
-tcg_out_insn(s, 3201, CBZ, ext, a, offset);
 } else {
-tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
+switch (c) {
+case TCG_COND_EQ:
+tcg_out_insn(s, 3201, CBZ, ext, a, offset);
+break;
+case TCG_COND_NE:
+tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
+break;
+default:
+g_assert_not_reached();
+}
 }
 }
 
-- 
2.34.1

[PULL 19/39] tcg/aarch64: Generate TBZ, TBNZ

2024-02-04 Thread Richard Henderson

Test the sign bit for LT/GE vs 0, and TSTNE/EQ vs a power of 2.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20240119224737.48943-2-phi...@linaro.org>
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.c.inc | 74 ++--
 1 file changed, 62 insertions(+), 12 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index a19158f4ea..36fc46ae93 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -105,6 +105,18 @@ static bool reloc_pc19(tcg_insn_unit *src_rw, const 
tcg_insn_unit *target)
 return false;
 }
 
+static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ptrdiff_t offset = target - src_rx;
+
+if (offset == sextract64(offset, 0, 14)) {
+*src_rw = deposit32(*src_rw, 5, 14, offset);
+return true;
+}
+return false;
+}
+
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 intptr_t value, intptr_t addend)
 {
@@ -115,6 +127,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
 case R_AARCH64_CONDBR19:
 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
+case R_AARCH64_TSTBR14:
+return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
 default:
 g_assert_not_reached();
 }
@@ -380,6 +394,10 @@ typedef enum {
 /* Conditional branch (immediate).  */
 I3202_B_C   = 0x5400,
 
+/* Test and branch (immediate).  */
+I3205_TBZ   = 0x3600,
+I3205_TBNZ  = 0x3700,
+
 /* Unconditional branch (immediate).  */
 I3206_B = 0x1400,
 I3206_BL= 0x9400,
@@ -660,6 +678,14 @@ static void tcg_out_insn_3202(TCGContext *s, AArch64Insn 
insn,
 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7) << 5);
 }
 
+static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
+  TCGReg rt, int imm6, int imm14)
+{
+insn |= (imm6 & 0x20) << (31 - 5);
+insn |= (imm6 & 0x1f) << 19;
+tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
+}
+
 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 {
 tcg_out32(s, insn | (imm26 & 0x03ff));
@@ -1415,41 +1441,65 @@ static inline void tcg_out_goto_label(TCGContext *s, 
TCGLabel *l)
 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
TCGArg b, bool b_const, TCGLabel *l)
 {
-intptr_t offset;
+int tbit = -1;
 bool need_cmp = true;
 
 switch (c) {
 case TCG_COND_EQ:
 case TCG_COND_NE:
+/* cmp xN,0; b.ne L -> cbnz xN,L */
 if (b_const && b == 0) {
 need_cmp = false;
 }
 break;
+case TCG_COND_LT:
+case TCG_COND_GE:
+/* cmp xN,0; b.mi L -> tbnz xN,63,L */
+if (b_const && b == 0) {
+c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
+tbit = ext ? 63 : 31;
+need_cmp = false;
+}
+break;
+case TCG_COND_TSTEQ:
+case TCG_COND_TSTNE:
+/* tst xN,1< tbnz xN,B,L */
+if (b_const && is_power_of_2(b)) {
+tbit = ctz64(b);
+need_cmp = false;
+}
+break;
 default:
 break;
 }
 
 if (need_cmp) {
 tcg_out_cmp(s, ext, c, a, b, b_const);
-}
-
-if (!l->has_value) {
 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
-offset = tcg_in32(s) >> 5;
-} else {
-offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
-tcg_debug_assert(offset == sextract64(offset, 0, 19));
+tcg_out_insn(s, 3202, B_C, c, 0);
+return;
 }
 
-if (need_cmp) {
-tcg_out_insn(s, 3202, B_C, c, offset);
+if (tbit >= 0) {
+tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
+switch (c) {
+case TCG_COND_TSTEQ:
+tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
+break;
+case TCG_COND_TSTNE:
+tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
+break;
+default:
+g_assert_not_reached();
+}
 } else {
+tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
 switch (c) {
 case TCG_COND_EQ:
-tcg_out_insn(s, 3201, CBZ, ext, a, offset);
+tcg_out_insn(s, 3201, CBZ, ext, a, 0);
 break;
 case TCG_COND_NE:
-tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
+tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
 break;
 default:
 g_assert_not_reached();
-- 
2.34.1

[PULL 38/39] tcg/s390x: Support TCG_COND_TST{EQ,NE}

2024-02-04 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target.h |   2 +-
 tcg/s390x/tcg-target.c.inc | 139 +
 2 files changed, 97 insertions(+), 44 deletions(-)

diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 53bed8c8d2..ae448c3a3a 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -138,7 +138,7 @@ extern uint64_t s390_facilities[3];
 
 #define TCG_TARGET_HAS_qemu_ldst_i128 1
 
-#define TCG_TARGET_HAS_tst0
+#define TCG_TARGET_HAS_tst1
 
 #define TCG_TARGET_HAS_v64HAVE_FACILITY(VECTOR)
 #define TCG_TARGET_HAS_v128   HAVE_FACILITY(VECTOR)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 7f97080f52..41f693ebbc 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -112,6 +112,9 @@ typedef enum S390Opcode {
 RI_OILH = 0xa50a,
 RI_OILL = 0xa50b,
 RI_TMLL = 0xa701,
+RI_TMLH = 0xa700,
+RI_TMHL = 0xa703,
+RI_TMHH = 0xa702,
 
 RIEb_CGRJ= 0xec64,
 RIEb_CLGRJ   = 0xec65,
@@ -404,10 +407,15 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind 
kind, int slot)
 #define S390_CC_NEVER   0
 #define S390_CC_ALWAYS  15
 
+#define S390_TM_EQ  8  /* CC == 0 */
+#define S390_TM_NE  7  /* CC in {1,2,3} */
+
 /* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
-static const uint8_t tcg_cond_to_s390_cond[] = {
+static const uint8_t tcg_cond_to_s390_cond[16] = {
 [TCG_COND_EQ]  = S390_CC_EQ,
 [TCG_COND_NE]  = S390_CC_NE,
+[TCG_COND_TSTEQ] = S390_CC_EQ,
+[TCG_COND_TSTNE] = S390_CC_NE,
 [TCG_COND_LT]  = S390_CC_LT,
 [TCG_COND_LE]  = S390_CC_LE,
 [TCG_COND_GT]  = S390_CC_GT,
@@ -421,9 +429,11 @@ static const uint8_t tcg_cond_to_s390_cond[] = {
 /* Condition codes that result from a LOAD AND TEST.  Here, we have no
unsigned instruction variation, however since the test is vs zero we
can re-map the outcomes appropriately.  */
-static const uint8_t tcg_cond_to_ltr_cond[] = {
+static const uint8_t tcg_cond_to_ltr_cond[16] = {
 [TCG_COND_EQ]  = S390_CC_EQ,
 [TCG_COND_NE]  = S390_CC_NE,
+[TCG_COND_TSTEQ] = S390_CC_ALWAYS,
+[TCG_COND_TSTNE] = S390_CC_NEVER,
 [TCG_COND_LT]  = S390_CC_LT,
 [TCG_COND_LE]  = S390_CC_LE,
 [TCG_COND_GT]  = S390_CC_GT,
@@ -542,10 +552,13 @@ static bool risbg_mask(uint64_t c)
 static bool tcg_target_const_match(int64_t val, int ct,
TCGType type, TCGCond cond, int vece)
 {
+uint64_t uval = val;
+
 if (ct & TCG_CT_CONST) {
 return true;
 }
 if (type == TCG_TYPE_I32) {
+uval = (uint32_t)val;
 val = (int32_t)val;
 }
 
@@ -567,6 +580,15 @@ static bool tcg_target_const_match(int64_t val, int ct,
 case TCG_COND_GTU:
 ct |= TCG_CT_CONST_U32;  /* CLGFI */
 break;
+case TCG_COND_TSTNE:
+case TCG_COND_TSTEQ:
+if (is_const_p16(uval) >= 0) {
+return true;  /* TMxx */
+}
+if (risbg_mask(uval)) {
+return true;  /* RISBG */
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -588,10 +610,6 @@ static bool tcg_target_const_match(int64_t val, int ct,
 if (ct & TCG_CT_CONST_INV) {
 val = ~val;
 }
-/*
- * Note that is_const_p16 is a subset of is_const_p32,
- * so we don't need both constraints.
- */
 if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
 return true;
 }
@@ -868,6 +886,9 @@ static const S390Opcode oi_insns[4] = {
 static const S390Opcode lif_insns[2] = {
 RIL_LLILF, RIL_LLIHF,
 };
+static const S390Opcode tm_insns[4] = {
+RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH
+};
 
 /* load a register with an immediate value */
 static void tcg_out_movi(TCGContext *s, TCGType type,
@@ -1228,6 +1249,36 @@ static int tgen_cmp2(TCGContext *s, TCGType type, 
TCGCond c, TCGReg r1,
 TCGCond inv_c = tcg_invert_cond(c);
 S390Opcode op;
 
+if (is_tst_cond(c)) {
+tcg_debug_assert(!need_carry);
+
+if (!c2const) {
+if (type == TCG_TYPE_I32) {
+tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2);
+} else {
+tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2);
+}
+goto exit;
+}
+
+if (type == TCG_TYPE_I32) {
+c2 = (uint32_t)c2;
+}
+
+int i = is_const_p16(c2);
+if (i >= 0) {
+tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16));
+*inv_cc = TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ;
+return *inv_cc ^ 15;
+}
+
+if (risbg_mask(c2)) {
+tgen_andi_risbg(s, TCG_REG_R0, r1, c2);
+goto exit;
+}
+g_assert_not_reached();
+}
+
 if (c2const) {
 if (c2 == 0) {
 if (!(is_unsigned &&

[PULL 05/39] tcg/optimize: Do swap_commutative2 in do_constant_folding_cond2

2024-02-04 Thread Richard Henderson

Mirror the new do_constant_folding_cond1 by doing all
argument and condition adjustment within one helper.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/optimize.c | 107 ++---
 1 file changed, 57 insertions(+), 50 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9c04dba099..08a9280432 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -713,43 +713,6 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
 return -1;
 }
 
-/*
- * Return -1 if the condition can't be simplified,
- * and the result of the condition (0 or 1) if it can.
- */
-static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
-{
-TCGArg al = p1[0], ah = p1[1];
-TCGArg bl = p2[0], bh = p2[1];
-
-if (arg_is_const(bl) && arg_is_const(bh)) {
-tcg_target_ulong blv = arg_info(bl)->val;
-tcg_target_ulong bhv = arg_info(bh)->val;
-uint64_t b = deposit64(blv, 32, 32, bhv);
-
-if (arg_is_const(al) && arg_is_const(ah)) {
-tcg_target_ulong alv = arg_info(al)->val;
-tcg_target_ulong ahv = arg_info(ah)->val;
-uint64_t a = deposit64(alv, 32, 32, ahv);
-return do_constant_folding_cond_64(a, b, c);
-}
-if (b == 0) {
-switch (c) {
-case TCG_COND_LTU:
-return 0;
-case TCG_COND_GEU:
-return 1;
-default:
-break;
-}
-}
-}
-if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
-return do_constant_folding_cond_eq(c);
-}
-return -1;
-}
-
 /**
  * swap_commutative:
  * @dest: TCGArg of the destination argument, or NO_DEST.
@@ -796,6 +759,10 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 return false;
 }
 
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
 static int do_constant_folding_cond1(OptContext *ctx, TCGArg dest,
  TCGArg *p1, TCGArg *p2, TCGArg *pcond)
 {
@@ -813,6 +780,51 @@ static int do_constant_folding_cond1(OptContext *ctx, 
TCGArg dest,
 return r;
 }
 
+static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
+{
+TCGArg al, ah, bl, bh;
+TCGCond c;
+bool swap;
+
+swap = swap_commutative2(args, args + 2);
+c = args[4];
+if (swap) {
+args[4] = c = tcg_swap_cond(c);
+}
+
+al = args[0];
+ah = args[1];
+bl = args[2];
+bh = args[3];
+
+if (arg_is_const(bl) && arg_is_const(bh)) {
+tcg_target_ulong blv = arg_info(bl)->val;
+tcg_target_ulong bhv = arg_info(bh)->val;
+uint64_t b = deposit64(blv, 32, 32, bhv);
+
+if (arg_is_const(al) && arg_is_const(ah)) {
+tcg_target_ulong alv = arg_info(al)->val;
+tcg_target_ulong ahv = arg_info(ah)->val;
+uint64_t a = deposit64(alv, 32, 32, ahv);
+return do_constant_folding_cond_64(a, b, c);
+}
+if (b == 0) {
+switch (c) {
+case TCG_COND_LTU:
+return 0;
+case TCG_COND_GEU:
+return 1;
+default:
+break;
+}
+}
+}
+if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
+return do_constant_folding_cond_eq(c);
+}
+return -1;
+}
+
 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 {
 for (int i = 0; i < nb_args; i++) {
@@ -1225,15 +1237,13 @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
 
 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
 {
-TCGCond cond = op->args[4];
-TCGArg label = op->args[5];
+TCGCond cond;
+TCGArg label;
 int i, inv = 0;
 
-if (swap_commutative2(>args[0], >args[2])) {
-op->args[4] = cond = tcg_swap_cond(cond);
-}
-
-i = do_constant_folding_cond2(>args[0], >args[2], cond);
+i = do_constant_folding_cond2(ctx, >args[0]);
+cond = op->args[4];
+label = op->args[5];
 if (i >= 0) {
 goto do_brcond_const;
 }
@@ -1986,14 +1996,11 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
 
 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
 {
-TCGCond cond = op->args[5];
+TCGCond cond;
 int i, inv = 0;
 
-if (swap_commutative2(>args[1], >args[3])) {
-op->args[5] = cond = tcg_swap_cond(cond);
-}
-
-i = do_constant_folding_cond2(>args[1], >args[3], cond);
+i = do_constant_folding_cond2(ctx, >args[1]);
+cond = op->args[5];
 if (i >= 0) {
 goto do_setcond_const;
 }
-- 
2.34.1

[PULL 33/39] tcg/ppc: Tidy up tcg_target_const_match

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index b9323baa86..26e0bc31d7 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -282,31 +282,36 @@ static bool reloc_pc34(tcg_insn_unit *src_rw, const 
tcg_insn_unit *target)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, int ct,
+static bool tcg_target_const_match(int64_t sval, int ct,
TCGType type, TCGCond cond, int vece)
 {
+uint64_t uval = sval;
+
 if (ct & TCG_CT_CONST) {
 return 1;
 }
 
-/* The only 32-bit constraint we use aside from
-   TCG_CT_CONST is TCG_CT_CONST_S16.  */
 if (type == TCG_TYPE_I32) {
-val = (int32_t)val;
+uval = (uint32_t)sval;
+sval = (int32_t)sval;
 }
 
-if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
 return 1;
-} else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+}
+if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
 return 1;
-} else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+}
+if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
 return 1;
-} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+}
+if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
 return 1;
-} else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+}
+if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
 return 1;
-} else if ((ct & TCG_CT_CONST_WSZ)
-   && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+}
+if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
 return 1;
 }
 return 0;
-- 
2.34.1

[PULL 21/39] tcg/arm: Split out tcg_out_cmp()

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
Message-Id: <20231028194522.245170-12-richard.hender...@linaro.org>
[PMD: Split from bigger patch, part 1/2]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20231108145244.72421-1-phi...@linaro.org>
---
 tcg/arm/tcg-target.c.inc | 32 +---
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 113094a5b2..4ea17845bb 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1191,6 +1191,13 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 }
 }
 
+static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a,
+   TCGArg b, int b_const)
+{
+tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const);
+return cond;
+}
+
 static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
 const int *const_args)
 {
@@ -1809,9 +1816,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 /* Constraints mean that v2 is always in the same register as dest,
  * so we only need to do "if condition passed, move v1 to dest".
  */
-tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-args[1], args[2], const_args[2]);
-tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
+c = tcg_out_cmp(s, args[5], args[1], args[2], const_args[2]);
+tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV,
 ARITH_MVN, args[0], 0, args[3], const_args[3]);
 break;
 case INDEX_op_add_i32:
@@ -1961,25 +1967,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 break;
 
 case INDEX_op_brcond_i32:
-tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-   args[0], args[1], const_args[1]);
-tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
-   arg_label(args[3]));
+c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]);
+tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3]));
 break;
 case INDEX_op_setcond_i32:
-tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-args[1], args[2], const_args[2]);
-tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
+c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]);
+tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c],
 ARITH_MOV, args[0], 0, 1);
-tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
+tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
 ARITH_MOV, args[0], 0, 0);
 break;
 case INDEX_op_negsetcond_i32:
-tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-args[1], args[2], const_args[2]);
-tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
+c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]);
+tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c],
 ARITH_MVN, args[0], 0, 0);
-tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
+tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
 ARITH_MOV, args[0], 0, 0);
 break;
 
-- 
2.34.1

[PULL 23/39] tcg/i386: Pass x86 condition codes to tcg_out_cmov

2024-02-04 Thread Richard Henderson

Hoist the tcg_cond_to_jcc index outside the function.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index accaaa2660..2d6100a8f4 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1699,14 +1699,14 @@ static void tcg_out_setcond2(TCGContext *s, const 
TCGArg *args,
 }
 #endif
 
-static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
+static void tcg_out_cmov(TCGContext *s, int jcc, int rexw,
  TCGReg dest, TCGReg v1)
 {
 if (have_cmov) {
-tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
+tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1);
 } else {
 TCGLabel *over = gen_new_label();
-tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
+tcg_out_jxx(s, jcc ^ 1, over, 1);
 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
 tcg_out_label(s, over);
 }
@@ -1717,7 +1717,7 @@ static void tcg_out_movcond(TCGContext *s, int rexw, 
TCGCond cond,
 TCGReg v1)
 {
 tcg_out_cmp(s, c1, c2, const_c2, rexw);
-tcg_out_cmov(s, cond, rexw, dest, v1);
+tcg_out_cmov(s, tcg_cond_to_jcc[cond], rexw, dest, v1);
 }
 
 static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
@@ -1729,12 +1729,12 @@ static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg 
dest, TCGReg arg1,
 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
 } else {
 tcg_debug_assert(dest != arg2);
-tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
+tcg_out_cmov(s, JCC_JB, rexw, dest, arg2);
 }
 } else {
 tcg_debug_assert(dest != arg2);
 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
-tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
+tcg_out_cmov(s, JCC_JE, rexw, dest, arg2);
 }
 }
 
@@ -1747,7 +1747,7 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg 
dest, TCGReg arg1,
 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
 } else {
 tcg_debug_assert(dest != arg2);
-tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
+tcg_out_cmov(s, JCC_JB, rexw, dest, arg2);
 }
 } else {
 tcg_debug_assert(!const_a2);
@@ -1760,7 +1760,7 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg 
dest, TCGReg arg1,
 
 /* Since we have destroyed the flags from BSR, we have to re-test.  */
 tcg_out_cmp(s, arg1, 0, 1, rexw);
-tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
+tcg_out_cmov(s, JCC_JE, rexw, dest, arg2);
 }
 }
 
-- 
2.34.1

[PULL 10/39] target/alpha: Use TCG_COND_TST{EQ,NE} for CMOVLB{C,S}

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/alpha/translate.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 566adc4cd6..220eda2137 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -1676,16 +1676,12 @@ static DisasJumpType translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x14:
 /* CMOVLBS */
-tmp = tcg_temp_new();
-tcg_gen_andi_i64(tmp, va, 1);
-tcg_gen_movcond_i64(TCG_COND_NE, vc, tmp, load_zero(ctx),
+tcg_gen_movcond_i64(TCG_COND_TSTNE, vc, va, tcg_constant_i64(1),
 vb, load_gpr(ctx, rc));
 break;
 case 0x16:
 /* CMOVLBC */
-tmp = tcg_temp_new();
-tcg_gen_andi_i64(tmp, va, 1);
-tcg_gen_movcond_i64(TCG_COND_EQ, vc, tmp, load_zero(ctx),
+tcg_gen_movcond_i64(TCG_COND_TSTEQ, vc, va, tcg_constant_i64(1),
 vb, load_gpr(ctx, rc));
 break;
 case 0x20:
-- 
2.34.1

[PULL 16/39] tcg: Add TCGConst argument to tcg_target_const_match

2024-02-04 Thread Richard Henderson

Fill the new argument from any condition within the opcode.
Not yet used within any backend.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/tcg.c| 34 ++--
 tcg/aarch64/tcg-target.c.inc |  3 ++-
 tcg/arm/tcg-target.c.inc |  3 ++-
 tcg/i386/tcg-target.c.inc|  3 ++-
 tcg/loongarch64/tcg-target.c.inc |  3 ++-
 tcg/mips/tcg-target.c.inc|  3 ++-
 tcg/ppc/tcg-target.c.inc |  3 ++-
 tcg/riscv/tcg-target.c.inc   |  3 ++-
 tcg/s390x/tcg-target.c.inc   |  3 ++-
 tcg/sparc64/tcg-target.c.inc |  3 ++-
 tcg/tci/tcg-target.c.inc |  3 ++-
 11 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 60cb31c400..d6670237fb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -173,7 +173,8 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg 
val,
 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
  const TCGHelperInfo *info);
 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int 
vece);
+static bool tcg_target_const_match(int64_t val, int ct,
+   TCGType type, TCGCond cond, int vece);
 #ifdef TCG_TARGET_NEED_LDST_LABELS
 static int tcg_out_ldst_finalize(TCGContext *s);
 #endif
@@ -4786,6 +4787,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 TCGTemp *ts;
 TCGArg new_args[TCG_MAX_OP_ARGS];
 int const_args[TCG_MAX_OP_ARGS];
+TCGCond op_cond;
 
 nb_oargs = def->nb_oargs;
 nb_iargs = def->nb_iargs;
@@ -4798,6 +4800,33 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 i_allocated_regs = s->reserved_regs;
 o_allocated_regs = s->reserved_regs;
 
+switch (op->opc) {
+case INDEX_op_brcond_i32:
+case INDEX_op_brcond_i64:
+op_cond = op->args[2];
+break;
+case INDEX_op_setcond_i32:
+case INDEX_op_setcond_i64:
+case INDEX_op_negsetcond_i32:
+case INDEX_op_negsetcond_i64:
+case INDEX_op_cmp_vec:
+op_cond = op->args[3];
+break;
+case INDEX_op_brcond2_i32:
+op_cond = op->args[4];
+break;
+case INDEX_op_movcond_i32:
+case INDEX_op_movcond_i64:
+case INDEX_op_setcond2_i32:
+case INDEX_op_cmpsel_vec:
+op_cond = op->args[5];
+break;
+default:
+/* No condition within opcode. */
+op_cond = TCG_COND_ALWAYS;
+break;
+}
+
 /* satisfy input constraints */
 for (k = 0; k < nb_iargs; k++) {
 TCGRegSet i_preferred_regs, i_required_regs;
@@ -4811,7 +4840,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 ts = arg_temp(arg);
 
 if (ts->val_type == TEMP_VAL_CONST
-&& tcg_target_const_match(ts->val, ts->type, arg_ct->ct, 
TCGOP_VECE(op))) {
+&& tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
+  op_cond, TCGOP_VECE(op))) {
 /* constant is OK for instruction */
 const_args[i] = 1;
 new_args[i] = ts->val;
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index a3efa1e67a..420e4a35ea 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -270,7 +270,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int 
*imm8)
 }
 }
 
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
+static bool tcg_target_const_match(int64_t val, int ct,
+   TCGType type, TCGCond cond, int vece)
 {
 if (ct & TCG_CT_CONST) {
 return 1;
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index a9aa8aa91c..113094a5b2 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -501,7 +501,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int 
*imm8)
  * mov operand2: values represented with x << (2 * y), x < 0x100
  * add, sub, eor...: ditto
  */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
+static bool tcg_target_const_match(int64_t val, int ct,
+   TCGType type, TCGCond cond, int vece)
 {
 if (ct & TCG_CT_CONST) {
 return 1;
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index d268199fc1..accaaa2660 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -195,7 +195,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
+static bool tcg_target_const_match(int64_t val, int ct,
+   TCGType type, TCGCond cond, int vece)
 {
 if (ct & TCG_CT_CONST) {
 return 1;
diff --git a/tcg/loongarch64/tcg-target.c.inc

[PULL 06/39] tcg/optimize: Handle TCG_COND_TST{EQ,NE}

2024-02-04 Thread Richard Henderson

Fold constant comparisons.
Canonicalize "tst x,x" to equality vs zero.
Canonicalize "tst x,sign" to sign test vs zero.
Fold double-word comparisons with zero parts.
Fold setcond of "tst x,pow2" to a bit extract.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/optimize.c | 240 -
 1 file changed, 218 insertions(+), 22 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 08a9280432..2ed6322f97 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -625,9 +625,15 @@ static bool do_constant_folding_cond_32(uint32_t x, 
uint32_t y, TCGCond c)
 return x <= y;
 case TCG_COND_GTU:
 return x > y;
-default:
-g_assert_not_reached();
+case TCG_COND_TSTEQ:
+return (x & y) == 0;
+case TCG_COND_TSTNE:
+return (x & y) != 0;
+case TCG_COND_ALWAYS:
+case TCG_COND_NEVER:
+break;
 }
+g_assert_not_reached();
 }
 
 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
@@ -653,12 +659,18 @@ static bool do_constant_folding_cond_64(uint64_t x, 
uint64_t y, TCGCond c)
 return x <= y;
 case TCG_COND_GTU:
 return x > y;
-default:
-g_assert_not_reached();
+case TCG_COND_TSTEQ:
+return (x & y) == 0;
+case TCG_COND_TSTNE:
+return (x & y) != 0;
+case TCG_COND_ALWAYS:
+case TCG_COND_NEVER:
+break;
 }
+g_assert_not_reached();
 }
 
-static bool do_constant_folding_cond_eq(TCGCond c)
+static int do_constant_folding_cond_eq(TCGCond c)
 {
 switch (c) {
 case TCG_COND_GT:
@@ -673,9 +685,14 @@ static bool do_constant_folding_cond_eq(TCGCond c)
 case TCG_COND_LEU:
 case TCG_COND_EQ:
 return 1;
-default:
-g_assert_not_reached();
+case TCG_COND_TSTEQ:
+case TCG_COND_TSTNE:
+return -1;
+case TCG_COND_ALWAYS:
+case TCG_COND_NEVER:
+break;
 }
+g_assert_not_reached();
 }
 
 /*
@@ -703,8 +720,10 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
 } else if (arg_is_const_val(y, 0)) {
 switch (c) {
 case TCG_COND_LTU:
+case TCG_COND_TSTNE:
 return 0;
 case TCG_COND_GEU:
+case TCG_COND_TSTEQ:
 return 1;
 default:
 return -1;
@@ -777,7 +796,30 @@ static int do_constant_folding_cond1(OptContext *ctx, 
TCGArg dest,
 }
 
 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
-return r;
+if (r >= 0) {
+return r;
+}
+if (!is_tst_cond(cond)) {
+return -1;
+}
+
+/*
+ * TSTNE x,x -> NE x,0
+ * TSTNE x,-1 -> NE x,0
+ */
+if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) {
+*p2 = arg_new_constant(ctx, 0);
+*pcond = tcg_tst_eqne_cond(cond);
+return -1;
+}
+
+/* TSTNE x,sign -> LT x,0 */
+if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
+   ? INT32_MIN : INT64_MIN))) {
+*p2 = arg_new_constant(ctx, 0);
+*pcond = tcg_tst_ltge_cond(cond);
+}
+return -1;
 }
 
 static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
@@ -785,6 +827,7 @@ static int do_constant_folding_cond2(OptContext *ctx, 
TCGArg *args)
 TCGArg al, ah, bl, bh;
 TCGCond c;
 bool swap;
+int r;
 
 swap = swap_commutative2(args, args + 2);
 c = args[4];
@@ -806,21 +849,54 @@ static int do_constant_folding_cond2(OptContext *ctx, 
TCGArg *args)
 tcg_target_ulong alv = arg_info(al)->val;
 tcg_target_ulong ahv = arg_info(ah)->val;
 uint64_t a = deposit64(alv, 32, 32, ahv);
-return do_constant_folding_cond_64(a, b, c);
+
+r = do_constant_folding_cond_64(a, b, c);
+if (r >= 0) {
+return r;
+}
 }
+
 if (b == 0) {
 switch (c) {
 case TCG_COND_LTU:
+case TCG_COND_TSTNE:
 return 0;
 case TCG_COND_GEU:
+case TCG_COND_TSTEQ:
 return 1;
 default:
 break;
 }
 }
+
+/* TSTNE x,-1 -> NE x,0 */
+if (b == -1 && is_tst_cond(c)) {
+args[3] = args[2] = arg_new_constant(ctx, 0);
+args[4] = tcg_tst_eqne_cond(c);
+return -1;
+}
+
+/* TSTNE x,sign -> LT x,0 */
+if (b == INT64_MIN && is_tst_cond(c)) {
+/* bl must be 0, so copy that to bh */
+args[3] = bl;
+args[4] = tcg_tst_ltge_cond(c);
+return -1;
+}
 }
+
 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
-return do_constant_folding_cond_eq(c);
+r = do_constant_folding_cond_eq(c);
+if (r >= 0) {
+return r;
+}
+
+/* TSTNE x,x -> NE x,0 */
+if (is_tst_cond(c)) {
+args[3] = args[2]

[PULL 04/39] tcg/optimize: Split out do_constant_folding_cond1

2024-02-04 Thread Richard Henderson

Handle modifications to the arguments and condition
in a single place.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/optimize.c | 57 --
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 73019b9996..9c04dba099 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -796,6 +796,23 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 return false;
 }
 
+static int do_constant_folding_cond1(OptContext *ctx, TCGArg dest,
+ TCGArg *p1, TCGArg *p2, TCGArg *pcond)
+{
+TCGCond cond;
+bool swap;
+int r;
+
+swap = swap_commutative(dest, p1, p2);
+cond = *pcond;
+if (swap) {
+*pcond = cond = tcg_swap_cond(cond);
+}
+
+r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
+return r;
+}
+
 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 {
 for (int i = 0; i < nb_args; i++) {
@@ -1193,14 +1210,8 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
 
 static bool fold_brcond(OptContext *ctx, TCGOp *op)
 {
-TCGCond cond = op->args[2];
-int i;
-
-if (swap_commutative(NO_DEST, >args[0], >args[1])) {
-op->args[2] = cond = tcg_swap_cond(cond);
-}
-
-i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
+int i = do_constant_folding_cond1(ctx, NO_DEST, >args[0],
+  >args[1], >args[2]);
 if (i == 0) {
 tcg_op_remove(ctx->tcg, op);
 return true;
@@ -1695,21 +1706,18 @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
 
 static bool fold_movcond(OptContext *ctx, TCGOp *op)
 {
-TCGCond cond = op->args[5];
 int i;
 
-if (swap_commutative(NO_DEST, >args[1], >args[2])) {
-op->args[5] = cond = tcg_swap_cond(cond);
-}
 /*
  * Canonicalize the "false" input reg to match the destination reg so
  * that the tcg backend can implement a "move if true" operation.
  */
 if (swap_commutative(op->args[0], >args[4], >args[3])) {
-op->args[5] = cond = tcg_invert_cond(cond);
+op->args[5] = tcg_invert_cond(op->args[5]);
 }
 
-i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+i = do_constant_folding_cond1(ctx, NO_DEST, >args[1],
+  >args[2], >args[5]);
 if (i >= 0) {
 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
 }
@@ -1723,6 +1731,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
 uint64_t tv = arg_info(op->args[3])->val;
 uint64_t fv = arg_info(op->args[4])->val;
 TCGOpcode opc, negopc = 0;
+TCGCond cond = op->args[5];
 
 switch (ctx->type) {
 case TCG_TYPE_I32:
@@ -1950,14 +1959,8 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
 
 static bool fold_setcond(OptContext *ctx, TCGOp *op)
 {
-TCGCond cond = op->args[3];
-int i;
-
-if (swap_commutative(op->args[0], >args[1], >args[2])) {
-op->args[3] = cond = tcg_swap_cond(cond);
-}
-
-i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+int i = do_constant_folding_cond1(ctx, op->args[0], >args[1],
+  >args[2], >args[3]);
 if (i >= 0) {
 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 }
@@ -1969,14 +1972,8 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
 
 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
 {
-TCGCond cond = op->args[3];
-int i;
-
-if (swap_commutative(op->args[0], >args[1], >args[2])) {
-op->args[3] = cond = tcg_swap_cond(cond);
-}
-
-i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+int i = do_constant_folding_cond1(ctx, op->args[0], >args[1],
+  >args[2], >args[3]);
 if (i >= 0) {
 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
 }
-- 
2.34.1

[PULL 32/39] tcg/ppc: Use cr0 in tcg_to_bc and tcg_to_isel

2024-02-04 Thread Richard Henderson

Using cr0 means we could choose to use rc=1 to compute the condition.
Adjust the tables and tcg_out_cmp that feeds them.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 68 
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 830d2fe73a..b9323baa86 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -671,30 +671,30 @@ enum {
 };
 
 static const uint32_t tcg_to_bc[] = {
-[TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
-[TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
-[TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
-[TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
-[TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
-[TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
-[TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
-[TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
-[TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
-[TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
+[TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
+[TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
+[TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
+[TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
+[TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
+[TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
+[TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
+[TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
+[TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
+[TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
 };
 
 /* The low bit here is set if the RA and RB fields must be inverted.  */
 static const uint32_t tcg_to_isel[] = {
-[TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
-[TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
-[TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
-[TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
-[TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
-[TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
-[TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
-[TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
-[TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
-[TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
+[TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
+[TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
+[TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
+[TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
+[TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
+[TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
+[TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
+[TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
+[TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
+[TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
 };
 
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
@@ -1827,7 +1827,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, 
TCGCond cond,
 if (have_isa_3_10) {
 tcg_insn_unit bi, opc;
 
-tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
 
 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
 bi = tcg_to_bc[cond] & (0x1f << 16);
@@ -1880,7 +1880,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, 
TCGCond cond,
 if (have_isel) {
 int isel, tab;
 
-tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
 
 isel = tcg_to_isel[cond];
 
@@ -1966,7 +1966,7 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond,
TCGArg arg1, TCGArg arg2, int const_arg2,
TCGLabel *l, TCGType type)
 {
-tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
 tcg_out_bc_lab(s, cond, l);
 }
 
@@ -1980,7 +1980,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, 
TCGCond cond,
 return;
 }
 
-tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
+tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
 
 if (have_isel) {
 int isel = tcg_to_isel[cond];
@@ -2024,7 +2024,7 @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, 
uint32_t opc,
 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
 tcg_out32(s, opc | RA(a0) | RS(a1));
 } else {
-tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
+tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
 /* Note that the only other valid constant for a2 is 0.  */
 if (have_isel) {
 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
@@ -2079,7 +2079,7 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg 
*args,
 do_equality:
 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
-tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) |

[PULL 15/39] target/s390x: Improve general case of disas_jcc

2024-02-04 Thread Richard Henderson

Avoid code duplication by handling 7 of the 14 cases
by inverting the test for the other 7 cases.

Use TCG_COND_TSTNE for cc in {1,3}.
Use (cc - 1) <= 1 for cc in {1,2}.

Acked-by: Ilya Leoshkevich 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/s390x/tcg/translate.c | 82 +---
 1 file changed, 30 insertions(+), 52 deletions(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 05fd29589c..3d6a9f44a6 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -885,67 +885,45 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, 
uint32_t mask)
 case CC_OP_STATIC:
 c->is_64 = false;
 c->u.s32.a = cc_op;
-switch (mask) {
-case 0x8 | 0x4 | 0x2: /* cc != 3 */
-cond = TCG_COND_NE;
+
+/* Fold half of the cases using bit 3 to invert. */
+switch (mask & 8 ? mask ^ 0xf : mask) {
+case 0x1: /* cc == 3 */
+cond = TCG_COND_EQ;
 c->u.s32.b = tcg_constant_i32(3);
 break;
-case 0x8 | 0x4 | 0x1: /* cc != 2 */
-cond = TCG_COND_NE;
-c->u.s32.b = tcg_constant_i32(2);
-break;
-case 0x8 | 0x2 | 0x1: /* cc != 1 */
-cond = TCG_COND_NE;
-c->u.s32.b = tcg_constant_i32(1);
-break;
-case 0x8 | 0x2: /* cc == 0 || cc == 2 => (cc & 1) == 0 */
-cond = TCG_COND_EQ;
-c->u.s32.a = tcg_temp_new_i32();
-c->u.s32.b = tcg_constant_i32(0);
-tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
-break;
-case 0x8 | 0x4: /* cc < 2 */
-cond = TCG_COND_LTU;
-c->u.s32.b = tcg_constant_i32(2);
-break;
-case 0x8: /* cc == 0 */
-cond = TCG_COND_EQ;
-c->u.s32.b = tcg_constant_i32(0);
-break;
-case 0x4 | 0x2 | 0x1: /* cc != 0 */
-cond = TCG_COND_NE;
-c->u.s32.b = tcg_constant_i32(0);
-break;
-case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */
-cond = TCG_COND_NE;
-c->u.s32.a = tcg_temp_new_i32();
-c->u.s32.b = tcg_constant_i32(0);
-tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
-break;
-case 0x4: /* cc == 1 */
-cond = TCG_COND_EQ;
-c->u.s32.b = tcg_constant_i32(1);
-break;
-case 0x2 | 0x1: /* cc > 1 */
-cond = TCG_COND_GTU;
-c->u.s32.b = tcg_constant_i32(1);
-break;
 case 0x2: /* cc == 2 */
 cond = TCG_COND_EQ;
 c->u.s32.b = tcg_constant_i32(2);
 break;
-case 0x1: /* cc == 3 */
+case 0x4: /* cc == 1 */
 cond = TCG_COND_EQ;
-c->u.s32.b = tcg_constant_i32(3);
+c->u.s32.b = tcg_constant_i32(1);
+break;
+case 0x2 | 0x1: /* cc == 2 || cc == 3 => cc > 1 */
+cond = TCG_COND_GTU;
+c->u.s32.b = tcg_constant_i32(1);
+break;
+case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */
+cond = TCG_COND_TSTNE;
+c->u.s32.b = tcg_constant_i32(1);
+break;
+case 0x4 | 0x2: /* cc == 1 || cc == 2 => (cc - 1) <= 1 */
+cond = TCG_COND_LEU;
+c->u.s32.a = tcg_temp_new_i32();
+c->u.s32.b = tcg_constant_i32(1);
+tcg_gen_addi_i32(c->u.s32.a, cc_op, -1);
+break;
+case 0x4 | 0x2 | 0x1: /* cc != 0 */
+cond = TCG_COND_NE;
+c->u.s32.b = tcg_constant_i32(0);
 break;
 default:
-/* CC is masked by something else: (8 >> cc) & mask.  */
-cond = TCG_COND_NE;
-c->u.s32.a = tcg_temp_new_i32();
-c->u.s32.b = tcg_constant_i32(0);
-tcg_gen_shr_i32(c->u.s32.a, tcg_constant_i32(8), cc_op);
-tcg_gen_andi_i32(c->u.s32.a, c->u.s32.a, mask);
-break;
+/* case 0: never, handled above. */
+g_assert_not_reached();
+}
+if (mask & 8) {
+cond = tcg_invert_cond(cond);
 }
 break;
 
-- 
2.34.1

[PULL 26/39] tcg/i386: Improve TSTNE/TESTEQ vs powers of two

2024-02-04 Thread Richard Henderson

Use "test x,x" when the bit is one of the 4 sign bits.
Use "bt imm,x" otherwise.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target-con-set.h |  6 ++--
 tcg/i386/tcg-target-con-str.h |  1 +
 tcg/i386/tcg-target.c.inc | 54 +++
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 7d00a7dde8..e24241cfa2 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -20,7 +20,7 @@ C_O0_I2(L, L)
 C_O0_I2(qi, r)
 C_O0_I2(re, r)
 C_O0_I2(ri, r)
-C_O0_I2(r, re)
+C_O0_I2(r, reT)
 C_O0_I2(s, L)
 C_O0_I2(x, r)
 C_O0_I3(L, L, L)
@@ -34,7 +34,7 @@ C_O1_I1(r, r)
 C_O1_I1(x, r)
 C_O1_I1(x, x)
 C_O1_I2(q, 0, qi)
-C_O1_I2(q, r, re)
+C_O1_I2(q, r, reT)
 C_O1_I2(r, 0, ci)
 C_O1_I2(r, 0, r)
 C_O1_I2(r, 0, re)
@@ -50,7 +50,7 @@ C_N1_I2(r, r, r)
 C_N1_I2(r, r, rW)
 C_O1_I3(x, 0, x, x)
 C_O1_I3(x, x, x, x)
-C_O1_I4(r, r, re, r, 0)
+C_O1_I4(r, r, reT, r, 0)
 C_O1_I4(r, r, r, ri, ri)
 C_O2_I1(r, r, L)
 C_O2_I2(a, d, a, r)
diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h
index 95a30e58cd..cc22db227b 100644
--- a/tcg/i386/tcg-target-con-str.h
+++ b/tcg/i386/tcg-target-con-str.h
@@ -28,5 +28,6 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS)/* 
qemu_st8_i32 data */
  */
 CONST('e', TCG_CT_CONST_S32)
 CONST('I', TCG_CT_CONST_I32)
+CONST('T', TCG_CT_CONST_TST)
 CONST('W', TCG_CT_CONST_WSZ)
 CONST('Z', TCG_CT_CONST_U32)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index f2414177bd..0b8c60d021 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -132,6 +132,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind 
kind, int slot)
 #define TCG_CT_CONST_U32 0x200
 #define TCG_CT_CONST_I32 0x400
 #define TCG_CT_CONST_WSZ 0x800
+#define TCG_CT_CONST_TST 0x1000
 
 /* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
@@ -202,7 +203,8 @@ static bool tcg_target_const_match(int64_t val, int ct,
 return 1;
 }
 if (type == TCG_TYPE_I32) {
-if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) {
+if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 |
+  TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) {
 return 1;
 }
 } else {
@@ -215,6 +217,17 @@ static bool tcg_target_const_match(int64_t val, int ct,
 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
 return 1;
 }
+/*
+ * This will be used in combination with TCG_CT_CONST_S32,
+ * so "normal" TESTQ is already matched.  Also accept:
+ *TESTQ -> TESTL   (uint32_t)
+ *TESTQ -> BT  (is_power_of_2)
+ */
+if ((ct & TCG_CT_CONST_TST)
+&& is_tst_cond(cond)
+&& (val == (uint32_t)val || is_power_of_2(val))) {
+return 1;
+}
 }
 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
 return 1;
@@ -396,6 +409,7 @@ static bool tcg_target_const_match(int64_t val, int ct,
 #define OPC_SHLX(0xf7 | P_EXT38 | P_DATA16)
 #define OPC_SHRX(0xf7 | P_EXT38 | P_SIMDF2)
 #define OPC_SHRD_Ib (0xac | P_EXT)
+#define OPC_TESTB  (0x84)
 #define OPC_TESTL  (0x85)
 #define OPC_TZCNT   (0xbc | P_EXT | P_SIMDF3)
 #define OPC_UD2 (0x0b | P_EXT)
@@ -442,6 +456,12 @@ static bool tcg_target_const_match(int64_t val, int ct,
 #define OPC_GRP3_Ev (0xf7)
 #define OPC_GRP5(0xff)
 #define OPC_GRP14   (0x73 | P_EXT | P_DATA16)
+#define OPC_GRPBT   (0xba | P_EXT)
+
+#define OPC_GRPBT_BT4
+#define OPC_GRPBT_BTS   5
+#define OPC_GRPBT_BTR   6
+#define OPC_GRPBT_BTC   7
 
 /* Group 1 opcode extensions for 0x80-0x83.
These are also used as modifiers for OPC_ARITH.  */
@@ -1454,7 +1474,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel 
*l, bool small)
 static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
TCGArg arg2, int const_arg2, int rexw)
 {
-int jz;
+int jz, js;
 
 if (!is_tst_cond(cond)) {
 if (!const_arg2) {
@@ -1469,6 +1489,7 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, 
TCGArg arg1,
 }
 
 jz = tcg_cond_to_jcc[cond];
+js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS);
 
 if (!const_arg2) {
 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2);
@@ -1476,17 +1497,40 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, 
TCGArg arg1,
 }
 
 if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) {
+if (arg2 == 0x80) {
+tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
+return js;
+}
 tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1);
 tcg_out8(s, arg2);
 return jz;
 }
 
 if ((arg2 & ~0xff00) == 0 && arg1 < 4) {
+if (arg2 == 0x8000) {

[PULL 39/39] tcg/tci: Support TCG_COND_TST{EQ,NE}

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/tci/tcg-target.h |  2 +-
 tcg/tci.c| 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 609b2f4e4a..a076f401d2 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -117,7 +117,7 @@
 
 #define TCG_TARGET_HAS_qemu_ldst_i128   0
 
-#define TCG_TARGET_HAS_tst  0
+#define TCG_TARGET_HAS_tst  1
 
 /* Number of registers available. */
 #define TCG_TARGET_NB_REGS 16
diff --git a/tcg/tci.c b/tcg/tci.c
index 3cc851b7bd..39adcb7d82 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -228,6 +228,12 @@ static bool tci_compare32(uint32_t u0, uint32_t u1, 
TCGCond condition)
 case TCG_COND_GTU:
 result = (u0 > u1);
 break;
+case TCG_COND_TSTEQ:
+result = (u0 & u1) == 0;
+break;
+case TCG_COND_TSTNE:
+result = (u0 & u1) != 0;
+break;
 default:
 g_assert_not_reached();
 }
@@ -270,6 +276,12 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, 
TCGCond condition)
 case TCG_COND_GTU:
 result = (u0 > u1);
 break;
+case TCG_COND_TSTEQ:
+result = (u0 & u1) == 0;
+break;
+case TCG_COND_TSTNE:
+result = (u0 & u1) != 0;
+break;
 default:
 g_assert_not_reached();
 }
@@ -1041,6 +1053,8 @@ static const char *str_c(TCGCond c)
 [TCG_COND_GEU] = "geu",
 [TCG_COND_LEU] = "leu",
 [TCG_COND_GTU] = "gtu",
+[TCG_COND_TSTEQ] = "tsteq",
+[TCG_COND_TSTNE] = "tstne",
 };
 
 assert((unsigned)c < ARRAY_SIZE(cond));
-- 
2.34.1

[PULL 30/39] tcg/sparc64: Support TCG_COND_TST{EQ,NE}

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/sparc64/tcg-target.h |  2 +-
 tcg/sparc64/tcg-target.c.inc | 16 ++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index ae2910c4ee..a18906a14e 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -149,7 +149,7 @@ extern bool use_vis3_instructions;
 
 #define TCG_TARGET_HAS_qemu_ldst_i128   0
 
-#define TCG_TARGET_HAS_tst  0
+#define TCG_TARGET_HAS_tst  1
 
 #define TCG_AREG0 TCG_REG_I0
 
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 10fb8a1a0d..176c98740b 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -607,9 +607,11 @@ static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg 
rs1,
uns ? ARITH_UDIV : ARITH_SDIV);
 }
 
-static const uint8_t tcg_cond_to_bcond[] = {
+static const uint8_t tcg_cond_to_bcond[16] = {
 [TCG_COND_EQ] = COND_E,
 [TCG_COND_NE] = COND_NE,
+[TCG_COND_TSTEQ] = COND_E,
+[TCG_COND_TSTNE] = COND_NE,
 [TCG_COND_LT] = COND_L,
 [TCG_COND_GE] = COND_GE,
 [TCG_COND_LE] = COND_LE,
@@ -649,7 +651,8 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int 
flags, TCGLabel *l)
 static void tcg_out_cmp(TCGContext *s, TCGCond cond,
 TCGReg c1, int32_t c2, int c2const)
 {
-tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
+tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const,
+   is_tst_cond(cond) ? ARITH_ANDCC : ARITH_SUBCC);
 }
 
 static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
@@ -744,6 +747,15 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond 
cond, TCGReg ret,
 cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
break;
 
+case TCG_COND_TSTEQ:
+case TCG_COND_TSTNE:
+/* Transform to inequality vs zero.  */
+tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_AND);
+c1 = TCG_REG_G0;
+c2 = TCG_REG_T1, c2const = 0;
+cond = (cond == TCG_COND_TSTEQ ? TCG_COND_GEU : TCG_COND_LTU);
+   break;
+
 case TCG_COND_GTU:
 case TCG_COND_LEU:
 /* If we don't need to load a constant into a register, we can
-- 
2.34.1

[PULL 27/39] tcg/i386: Use TEST r,r to test 8/16/32 bits

2024-02-04 Thread Richard Henderson

From: Paolo Bonzini 

Just like when testing against the sign bits, TEST r,r can be used when the
immediate is 0xff, 0xff00, 0x, 0x.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 17 +
 1 file changed, 17 insertions(+)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 0b8c60d021..c6ba498623 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1501,6 +1501,10 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, 
TCGArg arg1,
 tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
 return js;
 }
+if (arg2 == 0xff) {
+tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
+return jz;
+}
 tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1);
 tcg_out8(s, arg2);
 return jz;
@@ -1511,11 +1515,24 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, 
TCGArg arg1,
 tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4);
 return js;
 }
+if (arg2 == 0xff00) {
+tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4);
+return jz;
+}
 tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4);
 tcg_out8(s, arg2 >> 8);
 return jz;
 }
 
+if (arg2 == 0x) {
+tcg_out_modrm(s, OPC_TESTL | P_DATA16, arg1, arg1);
+return jz;
+}
+if (arg2 == 0xu) {
+tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
+return jz;
+}
+
 if (is_power_of_2(rexw ? arg2 : (uint32_t)arg2)) {
 int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE);
 int sh = ctz64(arg2);
-- 
2.34.1

[PULL 36/39] tcg/s390x: Split constraint A into J+U

2024-02-04 Thread Richard Henderson

Signed 33-bit == signed 32-bit + unsigned 32-bit.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target-con-set.h |  8 
 tcg/s390x/tcg-target-con-str.h |  2 +-
 tcg/s390x/tcg-target.c.inc | 36 +-
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 9a42037499..665851d84a 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -15,7 +15,7 @@
 C_O0_I1(r)
 C_O0_I2(r, r)
 C_O0_I2(r, ri)
-C_O0_I2(r, rA)
+C_O0_I2(r, rJU)
 C_O0_I2(v, r)
 C_O0_I3(o, m, r)
 C_O1_I1(r, r)
@@ -27,7 +27,7 @@ C_O1_I2(r, 0, rI)
 C_O1_I2(r, 0, rJ)
 C_O1_I2(r, r, r)
 C_O1_I2(r, r, ri)
-C_O1_I2(r, r, rA)
+C_O1_I2(r, r, rJU)
 C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rJ)
 C_O1_I2(r, r, rK)
@@ -39,10 +39,10 @@ C_O1_I2(v, v, r)
 C_O1_I2(v, v, v)
 C_O1_I3(v, v, v, v)
 C_O1_I4(r, r, ri, rI, r)
-C_O1_I4(r, r, rA, rI, r)
+C_O1_I4(r, r, rJU, rI, r)
 C_O2_I1(o, m, r)
 C_O2_I2(o, m, 0, r)
 C_O2_I2(o, m, r, r)
 C_O2_I3(o, m, 0, 1, r)
 C_N1_O1_I4(r, r, 0, 1, ri, r)
-C_N1_O1_I4(r, r, 0, 1, rA, r)
+C_N1_O1_I4(r, r, 0, 1, rJU, r)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
index 25675b449e..9d2cb775dc 100644
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -16,10 +16,10 @@ REGS('o', 0x) /* odd numbered general regs */
  * Define constraint letters for constants:
  * CONST(letter, TCG_CT_CONST_* bit set)
  */
-CONST('A', TCG_CT_CONST_S33)
 CONST('I', TCG_CT_CONST_S16)
 CONST('J', TCG_CT_CONST_S32)
 CONST('K', TCG_CT_CONST_P32)
 CONST('N', TCG_CT_CONST_INV)
 CONST('R', TCG_CT_CONST_INVRISBG)
+CONST('U', TCG_CT_CONST_U32)
 CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 54645d1f55..b2815ec648 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -30,7 +30,7 @@
 
 #define TCG_CT_CONST_S16(1 << 8)
 #define TCG_CT_CONST_S32(1 << 9)
-#define TCG_CT_CONST_S33(1 << 10)
+#define TCG_CT_CONST_U32(1 << 10)
 #define TCG_CT_CONST_ZERO   (1 << 11)
 #define TCG_CT_CONST_P32(1 << 12)
 #define TCG_CT_CONST_INV(1 << 13)
@@ -542,22 +542,23 @@ static bool tcg_target_const_match(int64_t val, int ct,
TCGType type, TCGCond cond, int vece)
 {
 if (ct & TCG_CT_CONST) {
-return 1;
+return true;
 }
-
 if (type == TCG_TYPE_I32) {
 val = (int32_t)val;
 }
 
-/* The following are mutually exclusive.  */
-if (ct & TCG_CT_CONST_S16) {
-return val == (int16_t)val;
-} else if (ct & TCG_CT_CONST_S32) {
-return val == (int32_t)val;
-} else if (ct & TCG_CT_CONST_S33) {
-return val >= -0xll && val <= 0xll;
-} else if (ct & TCG_CT_CONST_ZERO) {
-return val == 0;
+if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+return true;
+}
+if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+return true;
+}
+if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+return true;
+}
+if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+return true;
 }
 
 if (ct & TCG_CT_CONST_INV) {
@@ -573,8 +574,7 @@ static bool tcg_target_const_match(int64_t val, int ct,
 if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
 return true;
 }
-
-return 0;
+return false;
 }
 
 /* Emit instructions according to the given instruction format.  */
@@ -3137,7 +3137,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 return C_O1_I2(r, r, ri);
 case INDEX_op_setcond_i64:
 case INDEX_op_negsetcond_i64:
-return C_O1_I2(r, r, rA);
+return C_O1_I2(r, r, rJU);
 
 case INDEX_op_clz_i64:
 return C_O1_I2(r, r, rI);
@@ -3187,7 +3187,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_brcond_i32:
 return C_O0_I2(r, ri);
 case INDEX_op_brcond_i64:
-return C_O0_I2(r, rA);
+return C_O0_I2(r, rJU);
 
 case INDEX_op_bswap16_i32:
 case INDEX_op_bswap16_i64:
@@ -3240,7 +3240,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_movcond_i32:
 return C_O1_I4(r, r, ri, rI, r);
 case INDEX_op_movcond_i64:
-return C_O1_I4(r, r, rA, rI, r);
+return C_O1_I4(r, r, rJU, rI, r);
 
 case INDEX_op_div2_i32:
 case INDEX_op_div2_i64:
@@ -3259,7 +3259,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 
 case INDEX_op_add2_i64:
 case INDEX_op_sub2_i64:
-return C_N1_O1_I4(r, r, 0, 1, rA, r);
+return C_N1_O1_I4(r, r, 0, 1, rJU, r);
 
 case INDEX_op_st_vec:
 return C_O0_I2(v, r);
-- 
2.34.1

[PULL 24/39] tcg/i386: Move tcg_cond_to_jcc[] into tcg_out_cmp

2024-02-04 Thread Richard Henderson

Return the x86 condition codes to use after the compare.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 2d6100a8f4..02718a02d8 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1449,8 +1449,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel 
*l, bool small)
 }
 }
 
-static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
-int const_arg2, int rexw)
+static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
+   TCGArg arg2, int const_arg2, int rexw)
 {
 if (const_arg2) {
 if (arg2 == 0) {
@@ -1462,14 +1462,15 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, 
TCGArg arg2,
 } else {
 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
 }
+return tcg_cond_to_jcc[cond];
 }
 
 static void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond,
TCGArg arg1, TCGArg arg2, int const_arg2,
TCGLabel *label, bool small)
 {
-tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
-tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
+int jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
+tcg_out_jxx(s, jcc, label, small);
 }
 
 #if TCG_TARGET_REG_BITS == 32
@@ -1561,6 +1562,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, 
TCGCond cond,
 {
 bool inv = false;
 bool cleared;
+int jcc;
 
 switch (cond) {
 case TCG_COND_NE:
@@ -1597,7 +1599,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, 
TCGCond cond,
  * We can then use NEG or INC to produce the desired result.
  * This is always smaller than the SETCC expansion.
  */
-tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
+tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw);
 
 /* X - X - C = -C = (C ? -1 : 0) */
 tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest);
@@ -1644,8 +1646,8 @@ static void tcg_out_setcond(TCGContext *s, int rexw, 
TCGCond cond,
 cleared = true;
 }
 
-tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
-tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
+tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest);
 
 if (!cleared) {
 tcg_out_ext8u(s, dest, dest);
@@ -1716,8 +1718,8 @@ static void tcg_out_movcond(TCGContext *s, int rexw, 
TCGCond cond,
 TCGReg dest, TCGReg c1, TCGArg c2, int const_c2,
 TCGReg v1)
 {
-tcg_out_cmp(s, c1, c2, const_c2, rexw);
-tcg_out_cmov(s, tcg_cond_to_jcc[cond], rexw, dest, v1);
+int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw);
+tcg_out_cmov(s, jcc, rexw, dest, v1);
 }
 
 static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
@@ -1759,8 +1761,8 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg 
dest, TCGReg arg1,
 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
 
 /* Since we have destroyed the flags from BSR, we have to re-test.  */
-tcg_out_cmp(s, arg1, 0, 1, rexw);
-tcg_out_cmov(s, JCC_JE, rexw, dest, arg2);
+int jcc = tcg_out_cmp(s, TCG_COND_EQ, arg1, 0, 1, rexw);
+tcg_out_cmov(s, jcc, rexw, dest, arg2);
 }
 }
 
-- 
2.34.1

[PULL 13/39] target/sparc: Use TCG_COND_TSTEQ in gen_op_mulscc

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/sparc/translate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 7df6f83b13..d9304a5bc4 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -488,6 +488,7 @@ static void gen_op_subccc(TCGv dst, TCGv src1, TCGv src2)
 static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
 TCGv zero = tcg_constant_tl(0);
+TCGv one = tcg_constant_tl(1);
 TCGv t_src1 = tcg_temp_new();
 TCGv t_src2 = tcg_temp_new();
 TCGv t0 = tcg_temp_new();
@@ -499,8 +500,7 @@ static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
  * if (!(env->y & 1))
  *   src2 = 0;
  */
-tcg_gen_andi_tl(t0, cpu_y, 0x1);
-tcg_gen_movcond_tl(TCG_COND_EQ, t_src2, t0, zero, zero, t_src2);
+tcg_gen_movcond_tl(TCG_COND_TSTEQ, t_src2, cpu_y, one, zero, t_src2);
 
 /*
  * b2 = src1 & 1;
-- 
2.34.1

[PULL 09/39] target/alpha: Use TCG_COND_TST{EQ,NE} for BLB{C,S}

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
Message-Id: <20231028194522.245170-33-richard.hender...@linaro.org>
[PMD: Split from bigger patch, part 2/2]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20231108205247.83234-2-phi...@linaro.org>
---
 target/alpha/translate.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index e9cb623277..566adc4cd6 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -482,10 +482,10 @@ static DisasJumpType gen_bcond_internal(DisasContext 
*ctx, TCGCond cond,
 }
 
 static DisasJumpType gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
-   int32_t disp, int mask)
+   int32_t disp)
 {
 return gen_bcond_internal(ctx, cond, load_gpr(ctx, ra),
-  mask, disp);
+  is_tst_cond(cond), disp);
 }
 
 /* Fold -0.0 for comparison with COND.  */
@@ -2820,35 +2820,35 @@ static DisasJumpType translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x38:
 /* BLBC */
-ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 1);
+ret = gen_bcond(ctx, TCG_COND_TSTEQ, ra, disp21);
 break;
 case 0x39:
 /* BEQ */
-ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 0);
+ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21);
 break;
 case 0x3A:
 /* BLT */
-ret = gen_bcond(ctx, TCG_COND_LT, ra, disp21, 0);
+ret = gen_bcond(ctx, TCG_COND_LT, ra, disp21);
 break;
 case 0x3B:
 /* BLE */
-ret = gen_bcond(ctx, TCG_COND_LE, ra, disp21, 0);
+ret = gen_bcond(ctx, TCG_COND_LE, ra, disp21);
 break;
 case 0x3C:
 /* BLBS */
-ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21, 1);
+ret = gen_bcond(ctx, TCG_COND_TSTNE, ra, disp21);
 break;
 case 0x3D:
 /* BNE */
-ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21, 0);
+ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21);
 break;
 case 0x3E:
 /* BGE */
-ret = gen_bcond(ctx, TCG_COND_GE, ra, disp21, 0);
+ret = gen_bcond(ctx, TCG_COND_GE, ra, disp21);
 break;
 case 0x3F:
 /* BGT */
-ret = gen_bcond(ctx, TCG_COND_GT, ra, disp21, 0);
+ret = gen_bcond(ctx, TCG_COND_GT, ra, disp21);
 break;
 invalid_opc:
 ret = gen_invalid(ctx);
-- 
2.34.1

[PULL 29/39] tcg/sparc64: Pass TCGCond to tcg_out_cmp

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/sparc64/tcg-target.c.inc | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index e16b25e309..10fb8a1a0d 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -646,7 +646,8 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int 
flags, TCGLabel *l)
 tcg_out_bpcc0(s, scond, flags, off19);
 }
 
-static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
+static void tcg_out_cmp(TCGContext *s, TCGCond cond,
+TCGReg c1, int32_t c2, int c2const)
 {
 tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
 }
@@ -654,7 +655,7 @@ static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t 
c2, int c2const)
 static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
int32_t arg2, int const_arg2, TCGLabel *l)
 {
-tcg_out_cmp(s, arg1, arg2, const_arg2);
+tcg_out_cmp(s, cond, arg1, arg2, const_arg2);
 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
 tcg_out_nop(s);
 }
@@ -671,7 +672,7 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond 
cond, TCGReg ret,
 TCGReg c1, int32_t c2, int c2const,
 int32_t v1, int v1const)
 {
-tcg_out_cmp(s, c1, c2, c2const);
+tcg_out_cmp(s, cond, c1, c2, c2const);
 tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
 }
 
@@ -691,7 +692,7 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, 
TCGReg arg1,
 tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
   | INSN_COND(rcond) | off16);
 } else {
-tcg_out_cmp(s, arg1, arg2, const_arg2);
+tcg_out_cmp(s, cond, arg1, arg2, const_arg2);
 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
 }
 tcg_out_nop(s);
@@ -715,7 +716,7 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond 
cond, TCGReg ret,
 if (c2 == 0 && rcond && (!v1const || check_fit_i32(v1, 10))) {
 tcg_out_movr(s, rcond, ret, c1, v1, v1const);
 } else {
-tcg_out_cmp(s, c1, c2, c2const);
+tcg_out_cmp(s, cond, c1, c2, c2const);
 tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
 }
 }
@@ -759,13 +760,13 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond 
cond, TCGReg ret,
 /* FALLTHRU */
 
 default:
-tcg_out_cmp(s, c1, c2, c2const);
+tcg_out_cmp(s, cond, c1, c2, c2const);
 tcg_out_movi_s13(s, ret, 0);
 tcg_out_movcc(s, cond, MOVCC_ICC, ret, neg ? -1 : 1, 1);
 return;
 }
 
-tcg_out_cmp(s, c1, c2, c2const);
+tcg_out_cmp(s, cond, c1, c2, c2const);
 if (cond == TCG_COND_LTU) {
 if (neg) {
 /* 0 - 0 - C = -C = (C ? -1 : 0) */
@@ -799,7 +800,7 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond 
cond, TCGReg ret,
 c2 = c1, c2const = 0, c1 = TCG_REG_G0;
 /* FALLTHRU */
 case TCG_COND_LTU:
-tcg_out_cmp(s, c1, c2, c2const);
+tcg_out_cmp(s, cond, c1, c2, c2const);
 tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
 return;
 default:
@@ -814,7 +815,7 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond 
cond, TCGReg ret,
 tcg_out_movi_s13(s, ret, 0);
 tcg_out_movr(s, rcond, ret, c1, neg ? -1 : 1, 1);
 } else {
-tcg_out_cmp(s, c1, c2, c2const);
+tcg_out_cmp(s, cond, c1, c2, c2const);
 tcg_out_movi_s13(s, ret, 0);
 tcg_out_movcc(s, cond, MOVCC_XCC, ret, neg ? -1 : 1, 1);
 }
@@ -1102,7 +1103,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, 
HostAddress *h,
 tcg_out_movi_s32(s, TCG_REG_T3, compare_mask);
 tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND);
 }
-tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0);
+tcg_out_cmp(s, TCG_COND_NE, TCG_REG_T2, TCG_REG_T3, 0);
 
 ldst = new_ldst_label(s);
 ldst->is_ld = is_ld;
-- 
2.34.1

[PULL 37/39] tcg/s390x: Add TCG_CT_CONST_CMP

2024-02-04 Thread Richard Henderson

Better constraint for tcg_out_cmp, based on the comparison.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target-con-set.h |  6 +--
 tcg/s390x/tcg-target-con-str.h |  1 +
 tcg/s390x/tcg-target.c.inc | 72 +-
 3 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 665851d84a..f75955eaa8 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -15,7 +15,7 @@
 C_O0_I1(r)
 C_O0_I2(r, r)
 C_O0_I2(r, ri)
-C_O0_I2(r, rJU)
+C_O0_I2(r, rC)
 C_O0_I2(v, r)
 C_O0_I3(o, m, r)
 C_O1_I1(r, r)
@@ -27,7 +27,7 @@ C_O1_I2(r, 0, rI)
 C_O1_I2(r, 0, rJ)
 C_O1_I2(r, r, r)
 C_O1_I2(r, r, ri)
-C_O1_I2(r, r, rJU)
+C_O1_I2(r, r, rC)
 C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rJ)
 C_O1_I2(r, r, rK)
@@ -39,7 +39,7 @@ C_O1_I2(v, v, r)
 C_O1_I2(v, v, v)
 C_O1_I3(v, v, v, v)
 C_O1_I4(r, r, ri, rI, r)
-C_O1_I4(r, r, rJU, rI, r)
+C_O1_I4(r, r, rC, rI, r)
 C_O2_I1(o, m, r)
 C_O2_I2(o, m, 0, r)
 C_O2_I2(o, m, r, r)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
index 9d2cb775dc..745f6c0df5 100644
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -16,6 +16,7 @@ REGS('o', 0x) /* odd numbered general regs */
  * Define constraint letters for constants:
  * CONST(letter, TCG_CT_CONST_* bit set)
  */
+CONST('C', TCG_CT_CONST_CMP)
 CONST('I', TCG_CT_CONST_S16)
 CONST('J', TCG_CT_CONST_S32)
 CONST('K', TCG_CT_CONST_P32)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index b2815ec648..7f97080f52 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -35,6 +35,7 @@
 #define TCG_CT_CONST_P32(1 << 12)
 #define TCG_CT_CONST_INV(1 << 13)
 #define TCG_CT_CONST_INVRISBG   (1 << 14)
+#define TCG_CT_CONST_CMP(1 << 15)
 
 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
 #define ALL_VECTOR_REGS  MAKE_64BIT_MASK(32, 32)
@@ -548,6 +549,29 @@ static bool tcg_target_const_match(int64_t val, int ct,
 val = (int32_t)val;
 }
 
+if (ct & TCG_CT_CONST_CMP) {
+switch (cond) {
+case TCG_COND_EQ:
+case TCG_COND_NE:
+ct |= TCG_CT_CONST_S32 | TCG_CT_CONST_U32;  /* CGFI or CLGFI */
+break;
+case TCG_COND_LT:
+case TCG_COND_GE:
+case TCG_COND_LE:
+case TCG_COND_GT:
+ct |= TCG_CT_CONST_S32;  /* CGFI */
+break;
+case TCG_COND_LTU:
+case TCG_COND_GEU:
+case TCG_COND_LEU:
+case TCG_COND_GTU:
+ct |= TCG_CT_CONST_U32;  /* CLGFI */
+break;
+default:
+g_assert_not_reached();
+}
+}
+
 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
 return true;
 }
@@ -1229,22 +1253,34 @@ static int tgen_cmp2(TCGContext *s, TCGType type, 
TCGCond c, TCGReg r1,
 goto exit;
 }
 
-/*
- * Constraints are for a signed 33-bit operand, which is a
- * convenient superset of this signed/unsigned test.
- */
-if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
-op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
-tcg_out_insn_RIL(s, op, r1, c2);
-goto exit;
+/* Should match TCG_CT_CONST_CMP. */
+switch (c) {
+case TCG_COND_LT:
+case TCG_COND_GE:
+case TCG_COND_LE:
+case TCG_COND_GT:
+tcg_debug_assert(c2 == (int32_t)c2);
+op = RIL_CGFI;
+break;
+case TCG_COND_EQ:
+case TCG_COND_NE:
+if (c2 == (int32_t)c2) {
+op = RIL_CGFI;
+break;
+}
+/* fall through */
+case TCG_COND_LTU:
+case TCG_COND_GEU:
+case TCG_COND_LEU:
+case TCG_COND_GTU:
+tcg_debug_assert(c2 == (uint32_t)c2);
+op = RIL_CLGFI;
+break;
+default:
+g_assert_not_reached();
 }
-
-/* Load everything else into a register. */
-tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2);
-c2 = TCG_TMP0;
-}
-
-if (type == TCG_TYPE_I32) {
+tcg_out_insn_RIL(s, op, r1, c2);
+} else if (type == TCG_TYPE_I32) {
 op = (is_unsigned ? RR_CLR : RR_CR);
 tcg_out_insn_RR(s, op, r1, c2);
 } else {
@@ -3137,7 +3173,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 return C_O1_I2(r, r, ri);
 case INDEX_op_setcond_i64:
 case INDEX_op_negsetcond_i64:
-return C_O1_I2(r, r, rJU);
+return C_O1_I2(r, r, rC);
 
 case INDEX_op_clz_i64:
 return C_O1_I2(r, r, rI);
@@ -3187,7 +3223,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_brcond_i32:
 return C_O0_I2(r, ri);
 case INDEX_op_brcond_i64:
-return C_O0_I2(r, rJU);
+

[PULL 31/39] tcg/ppc: Sink tcg_to_bc usage into tcg_out_bc

2024-02-04 Thread Richard Henderson

Rename the current tcg_out_bc function to tcg_out_bc_lab, and
create a new function that takes an integer displacement + link.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 850ace98b2..830d2fe73a 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1946,14 +1946,20 @@ static void tcg_out_setcond(TCGContext *s, TCGType 
type, TCGCond cond,
 }
 }
 
-static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
+static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
 {
+tcg_out32(s, tcg_to_bc[cond] | bd);
+}
+
+static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
+{
+int bd = 0;
 if (l->has_value) {
-bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
+bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
 } else {
 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
 }
-tcg_out32(s, bc);
+tcg_out_bc(s, cond, bd);
 }
 
 static void tcg_out_brcond(TCGContext *s, TCGCond cond,
@@ -1961,7 +1967,7 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond,
TCGLabel *l, TCGType type)
 {
 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
-tcg_out_bc(s, tcg_to_bc[cond], l);
+tcg_out_bc_lab(s, cond, l);
 }
 
 static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
@@ -2003,7 +2009,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, 
TCGCond cond,
 }
 }
 /* Branch forward over one insn */
-tcg_out32(s, tcg_to_bc[cond] | 8);
+tcg_out_bc(s, cond, 8);
 if (v2 == 0) {
 tcg_out_movi(s, type, dest, 0);
 } else {
@@ -2024,11 +2030,11 @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, 
uint32_t opc,
 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
 } else if (!const_a2 && a0 == a2) {
-tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
+tcg_out_bc(s, TCG_COND_EQ, 8);
 tcg_out32(s, opc | RA(a0) | RS(a1));
 } else {
 tcg_out32(s, opc | RA(a0) | RS(a1));
-tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
+tcg_out_bc(s, TCG_COND_NE, 8);
 if (const_a2) {
 tcg_out_movi(s, type, a0, 0);
 } else {
@@ -2108,11 +2114,11 @@ static void tcg_out_setcond2(TCGContext *s, const 
TCGArg *args,
 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
 }
 
-static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
- const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+const int *const_args)
 {
 tcg_out_cmp2(s, args, const_args);
-tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
+tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5]));
 }
 
 static void tcg_out_mb(TCGContext *s, TCGArg a0)
@@ -2446,7 +2452,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, 
HostAddress *h,
 
 /* Load a pointer into the current opcode w/conditional branch-link. */
 ldst->label_ptr[0] = s->code_ptr;
-tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+tcg_out_bc(s, TCG_COND_NE, LK);
 
 h->base = TCG_REG_TMP1;
 } else {
-- 
2.34.1

[PULL 12/39] target/m68k: Use TCG_COND_TST{EQ,NE} in gen_fcc_cond

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/m68k/translate.c | 74 ++---
 1 file changed, 33 insertions(+), 41 deletions(-)

diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index f886190f88..d7d5ff4300 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -5129,46 +5129,44 @@ undef:
 static void gen_fcc_cond(DisasCompare *c, DisasContext *s, int cond)
 {
 TCGv fpsr;
+int imm = 0;
 
-c->v2 = tcg_constant_i32(0);
 /* TODO: Raise BSUN exception.  */
 fpsr = tcg_temp_new();
 gen_load_fcr(s, fpsr, M68K_FPSR);
+c->v1 = fpsr;
+
 switch (cond) {
 case 0:  /* False */
 case 16: /* Signaling False */
-c->v1 = c->v2;
 c->tcond = TCG_COND_NEVER;
 break;
 case 1:  /* EQual Z */
 case 17: /* Signaling EQual Z */
-c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_Z);
-c->tcond = TCG_COND_NE;
+imm = FPSR_CC_Z;
+c->tcond = TCG_COND_TSTNE;
 break;
 case 2:  /* Ordered Greater Than !(A || Z || N) */
 case 18: /* Greater Than !(A || Z || N) */
-c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr,
- FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N);
-c->tcond = TCG_COND_EQ;
+imm = FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N;
+c->tcond = TCG_COND_TSTEQ;
 break;
 case 3:  /* Ordered Greater than or Equal Z || !(A || N) */
 case 19: /* Greater than or Equal Z || !(A || N) */
 c->v1 = tcg_temp_new();
 tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A);
 tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_A));
-tcg_gen_andi_i32(fpsr, fpsr, FPSR_CC_Z | FPSR_CC_N);
 tcg_gen_or_i32(c->v1, c->v1, fpsr);
 tcg_gen_xori_i32(c->v1, c->v1, FPSR_CC_N);
-c->tcond = TCG_COND_NE;
+imm = FPSR_CC_Z | FPSR_CC_N;
+c->tcond = TCG_COND_TSTNE;
 break;
 case 4:  /* Ordered Less Than !(!N || A || Z); */
 case 20: /* Less Than !(!N || A || Z); */
 c->v1 = tcg_temp_new();
 tcg_gen_xori_i32(c->v1, fpsr, FPSR_CC_N);
-tcg_gen_andi_i32(c->v1, c->v1, FPSR_CC_N | FPSR_CC_A | FPSR_CC_Z);
-c->tcond = TCG_COND_EQ;
+imm = FPSR_CC_N | FPSR_CC_A | FPSR_CC_Z;
+c->tcond = TCG_COND_TSTEQ;
 break;
 case 5:  /* Ordered Less than or Equal Z || (N && !A) */
 case 21: /* Less than or Equal Z || (N && !A) */
@@ -5176,49 +5174,45 @@ static void gen_fcc_cond(DisasCompare *c, DisasContext 
*s, int cond)
 tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A);
 tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_A));
 tcg_gen_andc_i32(c->v1, fpsr, c->v1);
-tcg_gen_andi_i32(c->v1, c->v1, FPSR_CC_Z | FPSR_CC_N);
-c->tcond = TCG_COND_NE;
+imm = FPSR_CC_Z | FPSR_CC_N;
+c->tcond = TCG_COND_TSTNE;
 break;
 case 6:  /* Ordered Greater or Less than !(A || Z) */
 case 22: /* Greater or Less than !(A || Z) */
-c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z);
-c->tcond = TCG_COND_EQ;
+imm = FPSR_CC_A | FPSR_CC_Z;
+c->tcond = TCG_COND_TSTEQ;
 break;
 case 7:  /* Ordered !A */
 case 23: /* Greater, Less or Equal !A */
-c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A);
-c->tcond = TCG_COND_EQ;
+imm = FPSR_CC_A;
+c->tcond = TCG_COND_TSTEQ;
 break;
 case 8:  /* Unordered A */
 case 24: /* Not Greater, Less or Equal A */
-c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A);
-c->tcond = TCG_COND_NE;
+imm = FPSR_CC_A;
+c->tcond = TCG_COND_TSTNE;
 break;
 case 9:  /* Unordered or Equal A || Z */
 case 25: /* Not Greater or Less then A || Z */
-c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z);
-c->tcond = TCG_COND_NE;
+imm = FPSR_CC_A | FPSR_CC_Z;
+c->tcond = TCG_COND_TSTNE;
 break;
 case 10: /* Unordered or Greater Than A || !(N || Z)) */
 case 26: /* Not Less or Equal A || !(N || Z)) */
 c->v1 = tcg_temp_new();
 tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_Z);
 tcg_gen_shli_i32(c->v1, c->v1, ctz32(FPSR_CC_N) - ctz32(FPSR_CC_Z));
-tcg_gen_andi_i32(fpsr, fpsr, FPSR_CC_A | FPSR_CC_N);
 tcg_gen_or_i32(c->v1, c->v1, fpsr);
 tcg_gen_xori_i32(c->v1, c->v1, FPSR_CC_N);
-c->tcond = TCG_COND_NE;
+imm = FPSR_CC_A | FPSR_CC_N;
+c->tcond = TCG_COND_TSTNE;
 break;
 case 11: /* Unordered or Greater or Equal A || Z || !N */
 case 27: /* Not Less Than A || Z || !N */
 c->v1 = tcg_temp_new();
-tcg_gen_andi_i32(c->v1, fpsr, FPSR_CC_A | FPSR_CC_Z | FPSR_CC_N);
-tcg_gen_xori_i32(c->v1,

[PULL 08/39] target/alpha: Pass immediate value to gen_bcond_internal()

2024-02-04 Thread Richard Henderson

Simplify gen_bcond() by passing an immediate value.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
Message-Id: <20231028194522.245170-33-richard.hender...@linaro.org>
[PMD: Split from bigger patch, part 1/2]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20231108205247.83234-1-phi...@linaro.org>
---
 target/alpha/translate.c | 21 +++--
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 4b464f8651..e9cb623277 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -453,13 +453,13 @@ static DisasJumpType gen_bdirect(DisasContext *ctx, int 
ra, int32_t disp)
 }
 
 static DisasJumpType gen_bcond_internal(DisasContext *ctx, TCGCond cond,
-TCGv cmp, int32_t disp)
+TCGv cmp, uint64_t imm, int32_t disp)
 {
 uint64_t dest = ctx->base.pc_next + (disp << 2);
 TCGLabel *lab_true = gen_new_label();
 
 if (use_goto_tb(ctx, dest)) {
-tcg_gen_brcondi_i64(cond, cmp, 0, lab_true);
+tcg_gen_brcondi_i64(cond, cmp, imm, lab_true);
 
 tcg_gen_goto_tb(0);
 tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
@@ -472,11 +472,11 @@ static DisasJumpType gen_bcond_internal(DisasContext 
*ctx, TCGCond cond,
 
 return DISAS_NORETURN;
 } else {
-TCGv_i64 z = load_zero(ctx);
+TCGv_i64 i = tcg_constant_i64(imm);
 TCGv_i64 d = tcg_constant_i64(dest);
 TCGv_i64 p = tcg_constant_i64(ctx->base.pc_next);
 
-tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p);
+tcg_gen_movcond_i64(cond, cpu_pc, cmp, i, d, p);
 return DISAS_PC_UPDATED;
 }
 }
@@ -484,15 +484,8 @@ static DisasJumpType gen_bcond_internal(DisasContext *ctx, 
TCGCond cond,
 static DisasJumpType gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
int32_t disp, int mask)
 {
-if (mask) {
-TCGv tmp = tcg_temp_new();
-DisasJumpType ret;
-
-tcg_gen_andi_i64(tmp, load_gpr(ctx, ra), 1);
-ret = gen_bcond_internal(ctx, cond, tmp, disp);
-return ret;
-}
-return gen_bcond_internal(ctx, cond, load_gpr(ctx, ra), disp);
+return gen_bcond_internal(ctx, cond, load_gpr(ctx, ra),
+  mask, disp);
 }
 
 /* Fold -0.0 for comparison with COND.  */
@@ -533,7 +526,7 @@ static DisasJumpType gen_fbcond(DisasContext *ctx, TCGCond 
cond, int ra,
 DisasJumpType ret;
 
 gen_fold_mzero(cond, cmp_tmp, load_fpr(ctx, ra));
-ret = gen_bcond_internal(ctx, cond, cmp_tmp, disp);
+ret = gen_bcond_internal(ctx, cond, cmp_tmp, 0, disp);
 return ret;
 }
 
-- 
2.34.1

[PULL 34/39] tcg/ppc: Add TCG_CT_CONST_CMP

2024-02-04 Thread Richard Henderson

Better constraint for tcg_out_cmp, based on the comparison.
We can't yet remove the fallback to load constants into a
scratch because of tcg_out_cmp2, but that path should not
be as frequent.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target-con-set.h |  5 ++--
 tcg/ppc/tcg-target-con-str.h |  1 +
 tcg/ppc/tcg-target.c.inc | 48 ++--
 3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
index cb47b29452..9f99bde505 100644
--- a/tcg/ppc/tcg-target-con-set.h
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -11,7 +11,7 @@
  */
 C_O0_I1(r)
 C_O0_I2(r, r)
-C_O0_I2(r, ri)
+C_O0_I2(r, rC)
 C_O0_I2(v, r)
 C_O0_I3(r, r, r)
 C_O0_I3(o, m, r)
@@ -26,13 +26,14 @@ C_O1_I2(r, rI, ri)
 C_O1_I2(r, rI, rT)
 C_O1_I2(r, r, r)
 C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rC)
 C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rT)
 C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rZW)
 C_O1_I2(v, v, v)
 C_O1_I3(v, v, v, v)
-C_O1_I4(r, r, ri, rZ, rZ)
+C_O1_I4(r, r, rC, rZ, rZ)
 C_O1_I4(r, r, r, ri, ri)
 C_O2_I1(r, r, r)
 C_N1O1_I1(o, m, r)
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
index 20846901de..16b687216e 100644
--- a/tcg/ppc/tcg-target-con-str.h
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -16,6 +16,7 @@ REGS('v', ALL_VECTOR_REGS)
  * Define constraint letters for constants:
  * CONST(letter, TCG_CT_CONST_* bit set)
  */
+CONST('C', TCG_CT_CONST_CMP)
 CONST('I', TCG_CT_CONST_S16)
 CONST('M', TCG_CT_CONST_MONE)
 CONST('T', TCG_CT_CONST_S32)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 26e0bc31d7..535ef2cbe7 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -92,11 +92,13 @@
 #define SZR  (TCG_TARGET_REG_BITS / 8)
 
 #define TCG_CT_CONST_S16  0x100
+#define TCG_CT_CONST_U16  0x200
 #define TCG_CT_CONST_S32  0x400
 #define TCG_CT_CONST_U32  0x800
 #define TCG_CT_CONST_ZERO 0x1000
 #define TCG_CT_CONST_MONE 0x2000
 #define TCG_CT_CONST_WSZ  0x4000
+#define TCG_CT_CONST_CMP  0x8000
 
 #define ALL_GENERAL_REGS  0xu
 #define ALL_VECTOR_REGS   0xull
@@ -296,9 +298,35 @@ static bool tcg_target_const_match(int64_t sval, int ct,
 sval = (int32_t)sval;
 }
 
+if (ct & TCG_CT_CONST_CMP) {
+switch (cond) {
+case TCG_COND_EQ:
+case TCG_COND_NE:
+ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
+break;
+case TCG_COND_LT:
+case TCG_COND_GE:
+case TCG_COND_LE:
+case TCG_COND_GT:
+ct |= TCG_CT_CONST_S16;
+break;
+case TCG_COND_LTU:
+case TCG_COND_GEU:
+case TCG_COND_LEU:
+case TCG_COND_GTU:
+ct |= TCG_CT_CONST_U16;
+break;
+default:
+g_assert_not_reached();
+}
+}
+
 if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
 return 1;
 }
+if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
+return 1;
+}
 if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
 return 1;
 }
@@ -1682,7 +1710,10 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg 
arg1, TCGArg arg2,
 
 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 
-/* Simplify the comparisons below wrt CMPI.  */
+/*
+ * Simplify the comparisons below wrt CMPI.
+ * All of the tests are 16-bit, so a 32-bit sign extend always works.
+ */
 if (type == TCG_TYPE_I32) {
 arg2 = (int32_t)arg2;
 }
@@ -3991,8 +4022,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_sar_i32:
 case INDEX_op_rotl_i32:
 case INDEX_op_rotr_i32:
-case INDEX_op_setcond_i32:
-case INDEX_op_negsetcond_i32:
 case INDEX_op_and_i64:
 case INDEX_op_andc_i64:
 case INDEX_op_shl_i64:
@@ -4000,8 +4029,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_sar_i64:
 case INDEX_op_rotl_i64:
 case INDEX_op_rotr_i64:
-case INDEX_op_setcond_i64:
-case INDEX_op_negsetcond_i64:
 return C_O1_I2(r, r, ri);
 
 case INDEX_op_mul_i32:
@@ -4045,11 +4072,16 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)
 
 case INDEX_op_brcond_i32:
 case INDEX_op_brcond_i64:
-return C_O0_I2(r, ri);
-
+return C_O0_I2(r, rC);
+case INDEX_op_setcond_i32:
+case INDEX_op_setcond_i64:
+case INDEX_op_negsetcond_i32:
+case INDEX_op_negsetcond_i64:
+return C_O1_I2(r, r, rC);
 case INDEX_op_movcond_i32:
 case INDEX_op_movcond_i64:
-return C_O1_I4(r, r, ri, rZ, rZ);
+return C_O1_I4(r, r, rC, rZ, rZ);
+
 case INDEX_op_deposit_i32:
 case INDEX_op_deposit_i64:
 return C_O1_I2(r, 0, rZ);
-- 
2.34.1

[PULL 22/39] tcg/arm: Support TCG_COND_TST{EQ,NE}

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
Message-Id: <20231028194522.245170-12-richard.hender...@linaro.org>
[PMD: Split from bigger patch, part 2/2]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20231108145244.72421-2-phi...@linaro.org>
---
 tcg/arm/tcg-target.h |  2 +-
 tcg/arm/tcg-target.c.inc | 29 -
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 7bf42045a7..a43875cb09 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -125,7 +125,7 @@ extern bool use_neon_instructions;
 
 #define TCG_TARGET_HAS_qemu_ldst_i128   0
 
-#define TCG_TARGET_HAS_tst  0
+#define TCG_TARGET_HAS_tst  1
 
 #define TCG_TARGET_HAS_v64  use_neon_instructions
 #define TCG_TARGET_HAS_v128 use_neon_instructions
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 4ea17845bb..ffd23ef789 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1194,7 +1194,27 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a,
TCGArg b, int b_const)
 {
-tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const);
+if (!is_tst_cond(cond)) {
+tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const);
+return cond;
+}
+
+cond = tcg_tst_eqne_cond(cond);
+if (b_const) {
+int imm12 = encode_imm(b);
+
+/*
+ * The compare constraints allow rIN, but TST does not support N.
+ * Be prepared to load the constant into a scratch register.
+ */
+if (imm12 >= 0) {
+tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12);
+return cond;
+}
+tcg_out_movi32(s, COND_AL, TCG_REG_TMP, b);
+b = TCG_REG_TMP;
+}
+tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0));
 return cond;
 }
 
@@ -1225,6 +1245,13 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg 
*args,
 tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
 return cond;
 
+case TCG_COND_TSTEQ:
+case TCG_COND_TSTNE:
+/* Similar, but with TST instead of CMP. */
+tcg_out_dat_rI(s, COND_AL, ARITH_TST, 0, ah, bh, const_bh);
+tcg_out_dat_rI(s, COND_EQ, ARITH_TST, 0, al, bl, const_bl);
+return tcg_tst_eqne_cond(cond);
+
 case TCG_COND_LT:
 case TCG_COND_GE:
 /* We perform a double-word subtraction and examine the result.
-- 
2.34.1

[PULL 14/39] target/s390x: Use TCG_COND_TSTNE for CC_OP_{TM,ICM}

2024-02-04 Thread Richard Henderson

These are all test-and-compare type instructions.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/s390x/tcg/translate.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index a5fd9cccaa..05fd29589c 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -754,10 +754,10 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, 
uint32_t mask)
 case CC_OP_TM_64:
 switch (mask) {
 case 8:
-cond = TCG_COND_EQ;
+cond = TCG_COND_TSTEQ;
 break;
 case 4 | 2 | 1:
-cond = TCG_COND_NE;
+cond = TCG_COND_TSTNE;
 break;
 default:
 goto do_dynamic;
@@ -768,11 +768,11 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, 
uint32_t mask)
 case CC_OP_ICM:
 switch (mask) {
 case 8:
-cond = TCG_COND_EQ;
+cond = TCG_COND_TSTEQ;
 break;
 case 4 | 2 | 1:
 case 4 | 2:
-cond = TCG_COND_NE;
+cond = TCG_COND_TSTNE;
 break;
 default:
 goto do_dynamic;
@@ -854,18 +854,14 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, 
uint32_t mask)
 c->u.s64.a = cc_dst;
 c->u.s64.b = tcg_constant_i64(0);
 break;
+
 case CC_OP_LTGT_64:
 case CC_OP_LTUGTU_64:
-c->u.s64.a = cc_src;
-c->u.s64.b = cc_dst;
-break;
-
 case CC_OP_TM_32:
 case CC_OP_TM_64:
 case CC_OP_ICM:
-c->u.s64.a = tcg_temp_new_i64();
-c->u.s64.b = tcg_constant_i64(0);
-tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst);
+c->u.s64.a = cc_src;
+c->u.s64.b = cc_dst;
 break;
 
 case CC_OP_ADDU:
-- 
2.34.1

[PULL 28/39] tcg/sparc64: Hoist read of tcg_cond_to_rcond

2024-02-04 Thread Richard Henderson

Use a non-zero value here (an illegal encoding) as a better
condition than is_unsigned_cond for when MOVR/BPR is usable.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/sparc64/tcg-target.c.inc | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index ac86b92b75..e16b25e309 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -620,7 +620,7 @@ static const uint8_t tcg_cond_to_bcond[] = {
 [TCG_COND_GTU] = COND_GU,
 };
 
-static const uint8_t tcg_cond_to_rcond[] = {
+static const uint8_t tcg_cond_to_rcond[16] = {
 [TCG_COND_EQ] = RCOND_Z,
 [TCG_COND_NE] = RCOND_NZ,
 [TCG_COND_LT] = RCOND_LZ,
@@ -679,7 +679,8 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, 
TCGReg arg1,
int32_t arg2, int const_arg2, TCGLabel *l)
 {
 /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
-if (arg2 == 0 && !is_unsigned_cond(cond)) {
+int rcond = tcg_cond_to_rcond[cond];
+if (arg2 == 0 && rcond) {
 int off16 = 0;
 
 if (l->has_value) {
@@ -688,7 +689,7 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, 
TCGReg arg1,
 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
 }
 tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
-  | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+  | INSN_COND(rcond) | off16);
 } else {
 tcg_out_cmp(s, arg1, arg2, const_arg2);
 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
@@ -696,11 +697,10 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond 
cond, TCGReg arg1,
 tcg_out_nop(s);
 }
 
-static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
+static void tcg_out_movr(TCGContext *s, int rcond, TCGReg ret, TCGReg c1,
  int32_t v1, int v1const)
 {
-tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
-  | (tcg_cond_to_rcond[cond] << 10)
+tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) | (rcond << 10)
   | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
 }
 
@@ -711,9 +711,9 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond 
cond, TCGReg ret,
 /* For 64-bit signed comparisons vs zero, we can avoid the compare.
Note that the immediate range is one bit smaller, so we must check
for that as well.  */
-if (c2 == 0 && !is_unsigned_cond(cond)
-&& (!v1const || check_fit_i32(v1, 10))) {
-tcg_out_movr(s, cond, ret, c1, v1, v1const);
+int rcond = tcg_cond_to_rcond[cond];
+if (c2 == 0 && rcond && (!v1const || check_fit_i32(v1, 10))) {
+tcg_out_movr(s, rcond, ret, c1, v1, v1const);
 } else {
 tcg_out_cmp(s, c1, c2, c2const);
 tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
@@ -788,6 +788,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond 
cond, TCGReg ret,
 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
 TCGReg c1, int32_t c2, int c2const, bool neg)
 {
+int rcond;
+
 if (use_vis3_instructions && !neg) {
 switch (cond) {
 case TCG_COND_NE:
@@ -807,9 +809,10 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond 
cond, TCGReg ret,
 
 /* For 64-bit signed comparisons vs zero, we can avoid the compare
if the input does not overlap the output.  */
-if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+rcond = tcg_cond_to_rcond[cond];
+if (c2 == 0 && rcond && c1 != ret) {
 tcg_out_movi_s13(s, ret, 0);
-tcg_out_movr(s, cond, ret, c1, neg ? -1 : 1, 1);
+tcg_out_movr(s, rcond, ret, c1, neg ? -1 : 1, 1);
 } else {
 tcg_out_cmp(s, c1, c2, c2const);
 tcg_out_movi_s13(s, ret, 0);
-- 
2.34.1

[PULL 35/39] tcg/ppc: Support TCG_COND_TST{EQ,NE}

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.h |   2 +-
 tcg/ppc/tcg-target.c.inc | 122 ---
 2 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 60ce49e672..04a7aba4d3 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -143,7 +143,7 @@ typedef enum {
 #define TCG_TARGET_HAS_qemu_ldst_i128   \
 (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
 
-#define TCG_TARGET_HAS_tst  0
+#define TCG_TARGET_HAS_tst  1
 
 /*
  * While technically Altivec could support V64, it has no 64-bit store
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 535ef2cbe7..7f3829beeb 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -283,11 +283,15 @@ static bool reloc_pc34(tcg_insn_unit *src_rw, const 
tcg_insn_unit *target)
 return false;
 }
 
+static bool mask_operand(uint32_t c, int *mb, int *me);
+static bool mask64_operand(uint64_t c, int *mb, int *me);
+
 /* test if a constant matches the constraint */
 static bool tcg_target_const_match(int64_t sval, int ct,
TCGType type, TCGCond cond, int vece)
 {
 uint64_t uval = sval;
+int mb, me;
 
 if (ct & TCG_CT_CONST) {
 return 1;
@@ -316,6 +320,17 @@ static bool tcg_target_const_match(int64_t sval, int ct,
 case TCG_COND_GTU:
 ct |= TCG_CT_CONST_U16;
 break;
+case TCG_COND_TSTEQ:
+case TCG_COND_TSTNE:
+if ((uval & ~0x) == 0 || (uval & ~0xull) == 0) {
+return 1;
+}
+if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32
+? mask_operand(uval, , )
+: mask64_operand(uval << clz64(uval), , )) {
+return 1;
+}
+return 0;
 default:
 g_assert_not_reached();
 }
@@ -703,9 +718,11 @@ enum {
 CR_SO
 };
 
-static const uint32_t tcg_to_bc[] = {
+static const uint32_t tcg_to_bc[16] = {
 [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
 [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
+[TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
+[TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
 [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
 [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
 [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
@@ -717,9 +734,11 @@ static const uint32_t tcg_to_bc[] = {
 };
 
 /* The low bit here is set if the RA and RB fields must be inverted.  */
-static const uint32_t tcg_to_isel[] = {
+static const uint32_t tcg_to_isel[16] = {
 [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
 [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
+[TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
+[TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
 [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
 [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
 [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
@@ -872,19 +891,31 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, 
TCGReg ret, TCGReg arg)
 return true;
 }
 
-static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
-   int sh, int mb)
+static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+   int sh, int mb, bool rc)
 {
 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
-tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
+tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
 }
 
-static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
-   int sh, int mb, int me)
+static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+int sh, int mb)
 {
-tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
+tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
+}
+
+static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+   int sh, int mb, int me, bool rc)
+{
+tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me) | rc);
+}
+
+static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+int sh, int mb, int me)
+{
+tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
 }
 
 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
@@ -1702,6 +1733,50 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType 
type, TCGArg val,
 return false;
 }
 
+/*
+ * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
+ * If RC, then also set RC0.
+ */
+static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
+ bool const_arg2, TCGType type, bool rc)
+{
+int mb, me;
+
+if (!const_arg2) {
+tcg_out32(s,

[PULL 03/39] tcg/optimize: Split out arg_is_const_val

2024-02-04 Thread Richard Henderson

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/optimize.c | 38 +++---
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index f2d01654c5..73019b9996 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -124,11 +124,22 @@ static inline bool ts_is_const(TCGTemp *ts)
 return ts_info(ts)->is_const;
 }
 
+static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
+{
+TempOptInfo *ti = ts_info(ts);
+return ti->is_const && ti->val == val;
+}
+
 static inline bool arg_is_const(TCGArg arg)
 {
 return ts_is_const(arg_temp(arg));
 }
 
+static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
+{
+return ts_is_const_val(arg_temp(arg), val);
+}
+
 static inline bool ts_is_copy(TCGTemp *ts)
 {
 return ts_info(ts)->next_copy != ts;
@@ -689,7 +700,7 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
 }
 } else if (args_are_copies(x, y)) {
 return do_constant_folding_cond_eq(c);
-} else if (arg_is_const(y) && arg_info(y)->val == 0) {
+} else if (arg_is_const_val(y, 0)) {
 switch (c) {
 case TCG_COND_LTU:
 return 0;
@@ -954,7 +965,7 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 /* If the binary operation has first argument @i, fold to @i. */
 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 {
-if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+if (arg_is_const_val(op->args[1], i)) {
 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 }
 return false;
@@ -963,7 +974,7 @@ static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, 
uint64_t i)
 /* If the binary operation has first argument @i, fold to NOT. */
 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 {
-if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+if (arg_is_const_val(op->args[1], i)) {
 return fold_to_not(ctx, op, 2);
 }
 return false;
@@ -972,7 +983,7 @@ static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, 
uint64_t i)
 /* If the binary operation has second argument @i, fold to @i. */
 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 {
-if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+if (arg_is_const_val(op->args[2], i)) {
 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 }
 return false;
@@ -981,7 +992,7 @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, 
uint64_t i)
 /* If the binary operation has second argument @i, fold to identity. */
 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
 {
-if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+if (arg_is_const_val(op->args[2], i)) {
 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 }
 return false;
@@ -990,7 +1001,7 @@ static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, 
uint64_t i)
 /* If the binary operation has second argument @i, fold to NOT. */
 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 {
-if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+if (arg_is_const_val(op->args[2], i)) {
 return fold_to_not(ctx, op, 1);
 }
 return false;
@@ -1223,8 +1234,8 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
  * Simplify LT/GE comparisons vs zero to a single compare
  * vs the high word of the input.
  */
-if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
-arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
+if (arg_is_const_val(op->args[2], 0) &&
+arg_is_const_val(op->args[3], 0)) {
 goto do_brcond_high;
 }
 break;
@@ -1448,9 +1459,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
 }
 
 /* Inserting a value into zero at offset 0. */
-if (arg_is_const(op->args[1])
-&& arg_info(op->args[1])->val == 0
-&& op->args[3] == 0) {
+if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
 uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
 
 op->opc = and_opc;
@@ -1461,8 +1470,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
 }
 
 /* Inserting zero into a value. */
-if (arg_is_const(op->args[2])
-&& arg_info(op->args[2])->val == 0) {
+if (arg_is_const_val(op->args[2], 0)) {
 uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
 
 op->opc = and_opc;
@@ -2000,8 +2008,8 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
  * Simplify LT/GE comparisons vs zero to a single compare
  * vs the high word of the input.
  */
-if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
-arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
+if (arg_is_const_val(op->args[3], 0) &&
+

1 2 >

1 - 100 of 122 matches

Mail list logo