Paolo Bonzini <[email protected]> writes:
> From: Marc-André Lureau <[email protected]>
>
> Generate high-level native Rust declarations for the QAPI types.
>
> - char* is mapped to String, scalars to there corresponding Rust types
>
> - enums use #[repr(u32)] and can be transmuted to their C counterparts
>
> - has_foo/foo members are mapped to Option<T>
>
> - lists are represented as Vec<T>
>
> - structures map fields 1:1 to Rust
>
> - alternate are represented as Rust enum, each variant being a 1-element
> tuple
>
> - unions are represented in a similar way as in C: a struct S with a "u"
> member (since S may have extra 'base' fields). The discriminant
> isn't a member of S, since Rust enum already include it, but it can be
> recovered with "mystruct.u.into()"
>
> Anything that includes a recursive struct puts it in a Box. Lists are
> not considered recursive, because Vec breaks the recursion (it's possible
> to construct an object containing an empty Vec of its own type).
>
> Signed-off-by: Marc-André Lureau <[email protected]>
> Link:
> https://lore.kernel.org/r/[email protected]
> [Paolo: rewrite conversion of schema types to Rust types]
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> meson.build | 4 +-
> scripts/qapi/backend.py | 25 +++
> scripts/qapi/common.py | 43 +++++
> scripts/qapi/rs.py | 61 +++++++
> scripts/qapi/rs_types.py | 373 +++++++++++++++++++++++++++++++++++++++
> scripts/qapi/schema.py | 59 +++++--
> 6 files changed, 546 insertions(+), 19 deletions(-)
> create mode 100644 scripts/qapi/rs.py
> create mode 100644 scripts/qapi/rs_types.py
>
> diff --git a/meson.build b/meson.build
> index db87358d62d..4228792f0f6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -3540,11 +3540,13 @@ qapi_gen_depends = [ meson.current_source_dir() /
> 'scripts/qapi/__init__.py',
> meson.current_source_dir() /
> 'scripts/qapi/introspect.py',
> meson.current_source_dir() / 'scripts/qapi/main.py',
> meson.current_source_dir() / 'scripts/qapi/parser.py',
> + meson.current_source_dir() / 'scripts/qapi/rs_types.py',
> meson.current_source_dir() / 'scripts/qapi/schema.py',
> meson.current_source_dir() / 'scripts/qapi/source.py',
> meson.current_source_dir() / 'scripts/qapi/types.py',
> meson.current_source_dir() / 'scripts/qapi/visit.py',
> - meson.current_source_dir() / 'scripts/qapi-gen.py'
> + meson.current_source_dir() / 'scripts/qapi-gen.py',
> + meson.current_source_dir() / 'scripts/qapi/rs.py',
> ]
>
> tracetool = [
> diff --git a/scripts/qapi/backend.py b/scripts/qapi/backend.py
> index 49ae6ecdd33..8023acce0d6 100644
> --- a/scripts/qapi/backend.py
> +++ b/scripts/qapi/backend.py
> @@ -7,6 +7,7 @@
> from .events import gen_events
> from .features import gen_features
> from .introspect import gen_introspect
> +from .rs_types import gen_rs_types
> from .schema import QAPISchema
> from .types import gen_types
> from .visit import gen_visit
> @@ -63,3 +64,27 @@ def generate(self,
> gen_commands(schema, output_dir, prefix, gen_tracing)
> gen_events(schema, output_dir, prefix)
> gen_introspect(schema, output_dir, prefix, unmask)
> +
> +
> +class QAPIRsBackend(QAPIBackend):
> + # pylint: disable=too-few-public-methods
> +
> + def generate(self,
> + schema: QAPISchema,
> + output_dir: str,
> + prefix: str,
> + unmask: bool,
> + builtins: bool,
> + gen_tracing: bool) -> None:
> + """
> + Generate Rust code for the given schema into the target directory.
> +
> + :param schema_file: The primary QAPI schema file.
> + :param output_dir: The output directory to store generated code.
> + :param prefix: Optional C-code prefix for symbol names.
> + :param unmask: Expose non-ABI names through introspection?
> + :param builtins: Generate code for built-in types?
> +
> + :raise QAPIError: On failures.
> + """
> + gen_rs_types(schema, output_dir, prefix)
> diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
> index c75396a01b5..e9261a3411e 100644
> --- a/scripts/qapi/common.py
> +++ b/scripts/qapi/common.py
> @@ -64,6 +64,13 @@ def camel_to_upper(value: str) -> str:
> return ret.upper()
>
>
> +def camel_to_lower(value: str) -> str:
> + """
> + Converts CamelCase to camel_case.
> + """
> + return camel_to_upper(value).lower()
> +
> +
> def c_enum_const(type_name: str,
> const_name: str,
> prefix: Optional[str] = None) -> str:
> @@ -129,6 +136,42 @@ def c_name(name: str, protect: bool = True) -> str:
> return name
>
>
> +def rs_name(name: str) -> str:
> + """
> + Map @name to a valid, possibly raw Rust identifier.
> + """
> + name = re.sub(r'[^A-Za-z0-9_]', '_', name)
> + if name[0].isnumeric():
.isdigit()? It's what c_name() uses...
> + name = '_' + name
In review of v1, I pointed to "The Rust Reference"
Identifiers starting with an underscore are typically used to
indicate an identifier that is intentionally unused, and will
silence the unused warning in rustc.
https://doc.rust-lang.org/reference/identifiers.html
You replied "In this case it doesn't really matter: public items (such
as QAPI enum entries, or struct fields) do not raise the unused warning
anyway."
What gives us confidence rs_name() will only be used where it doesn't
really matter?
> + # based from the list:
> + # https://doc.rust-lang.org/reference/keywords.html
> + if name in ('Self', 'abstract', 'as', 'async',
> + 'await', 'become', 'box', 'break',
> + 'const', 'continue', 'crate', 'do',
> + 'dyn', 'else', 'enum', 'extern',
> + 'false', 'final', 'fn', 'for',
> + 'if', 'impl', 'in', 'let',
> + 'loop', 'macro', 'match', 'mod',
> + 'move', 'mut', 'override', 'priv',
> + 'pub', 'ref', 'return', 'self',
> + 'static', 'struct', 'super', 'trait',
> + 'true', 'try', 'type', 'typeof',
> + 'union', 'unsafe', 'unsized', 'use',
> + 'virtual', 'where', 'while', 'yield'):
> + name = 'r#' + name
TIL...
> + # avoid some clashes with the standard library
> + if name in ('String',):
> + name = 'Qapi' + name
This hides the unwise use of 'String' in qapi/net.json from Rust. I'd
rather rename that one.
> +
> + return name
> +
> +
> +def to_camel_case(value: str) -> str:
> + return ''.join('_' + word if word[0].isdigit()
> + else word[:1].upper() + word[1:]
> + for word in filter(None, re.split("[-_]+", value)))
Please use r'...' for regular expressions always.
Why do you need filter()?
This maps 'foo-0123-bar' to 'Foo_0123Bar'. Intentional? I'd kind of
expect 'Foo0123Bar'.
> +
> +
> class Indentation:
> """
> Indentation level management.
> diff --git a/scripts/qapi/rs.py b/scripts/qapi/rs.py
> new file mode 100644
> index 00000000000..2cf0c0e07f1
> --- /dev/null
> +++ b/scripts/qapi/rs.py
> @@ -0,0 +1,61 @@
> +# This work is licensed under the terms of the GNU GPL, version 2.
> +# See the COPYING file in the top-level directory.
> +"""
> +QAPI Rust generator
> +"""
> +
> +import os
> +import re
> +import subprocess
> +import sys
> +
> +from .common import mcgen as mcgen_common
> +from .gen import QAPIGen
> +from .schema import QAPISchemaVisitor
> +
> +
> +def mcgen(s: str, **kwds: object) -> str:
> + s = mcgen_common(s, **kwds)
> + return re.sub(r'(?: *\n)+', '\n', s)
This eats trailing spaces and blank lines. The latter is a big hammer.
Without it, I see unwanted blank lines generated. With it, I see wanted
blank lines eaten. For instance:
// @generated by qapi-gen, DO NOT EDIT
//!
//! Schema-defined QAPI types
//!
//! Copyright (c) 2025 Red Hat, Inc.
//!
//! This work is licensed under the terms of the GNU LGPL, version 2.1 or
//! later. See the COPYING.LIB file in the top-level directory.
#![allow(unexpected_cfgs)]
#![allow(non_camel_case_types)]
#![allow(clippy::empty_structs_with_brackets)]
#![allow(clippy::large_enum_variant)]
#![allow(clippy::pub_underscore_fields)]
// Because QAPI structs can contain float, for simplicity we never
// derive Eq. Clippy however would complain for those structs
// that *could* be Eq too.
#![allow(clippy::derive_partial_eq_without_eq)]
use serde_derive::{Serialize, Deserialize};
use util::qobject::QObject;
becomes
// @generated by qapi-gen, DO NOT EDIT
//!
//! Schema-defined QAPI types
//!
//! Copyright (c) 2025 Red Hat, Inc.
//!
//! This work is licensed under the terms of the GNU LGPL, version 2.1 or
//! later. See the COPYING.LIB file in the top-level directory.
#![allow(unexpected_cfgs)]
#![allow(non_camel_case_types)]
#![allow(clippy::empty_structs_with_brackets)]
#![allow(clippy::large_enum_variant)]
#![allow(clippy::pub_underscore_fields)]
// Because QAPI structs can contain float, for simplicity we never
// derive Eq. Clippy however would complain for those structs
// that *could* be Eq too.
#![allow(clippy::derive_partial_eq_without_eq)]
use serde_derive::{Serialize, Deserialize};
use util::qobject::QObject;
This text is generated by QAPIGenRs._top() and
QAPISchemaGenRsTypeVisitor.visit_begin(). The blank lines are clearly
intentional there.
Hmm.
Possibly related: rustfmt below.
> +
> +
> +class QAPIGenRs(QAPIGen):
> + def __init__(self, fname: str, blurb: str, pydoc: str):
> + super().__init__(fname)
> + self._blurb = blurb
> + self._copyright = '\n//! '.join(re.findall(r'^Copyright .*', pydoc,
> + re.MULTILINE))
> +
> + def _top(self) -> str:
> + return mcgen('''
> +// @generated by qapi-gen, DO NOT EDIT
> +
> +//!
> +//! Schema-defined QAPI types
I think you want %(blurb) here.
> +//!
> +//! %(copyright)s
> +//!
> +//! This work is licensed under the terms of the GNU LGPL, version 2.1 or
> +//! later. See the COPYING.LIB file in the top-level directory.
> +
> +''',
> + tool=os.path.basename(sys.argv[0]),
> + blurb=self._blurb, copyright=self._copyright)
> +
> +
> +class QAPISchemaRsVisitor(QAPISchemaVisitor):
> +
> + def __init__(self, prefix: str, what: str,
> + blurb: str, pydoc: str):
> + super().__init__()
> + self._prefix = prefix
> + self._what = what
> + self._gen = QAPIGenRs(self._prefix + self._what + '.rs', blurb,
> pydoc)
Break the line before blurb, please.
> +
> + def write(self, output_dir: str) -> None:
> + self._gen.write(output_dir)
> +
> + try:
> + subprocess.check_call(['rustfmt', self._gen.fname],
> cwd=output_dir)
Break the line before cwd=, please.
> + except FileNotFoundError:
> + pass
This runs rustfmt to clean up the generated file. Silently does nothing
if we don't have rustfmt.
Should we make rustfmt a hard requirement? Please discuss this briefly
in the commit message.
> diff --git a/scripts/qapi/rs_types.py b/scripts/qapi/rs_types.py
> new file mode 100644
> index 00000000000..64702eb54ae
> --- /dev/null
> +++ b/scripts/qapi/rs_types.py
[Interesting part left for tomorrow...]
> diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
> index 15f5d97418f..a65b25141fa 100644
> --- a/scripts/qapi/schema.py
> +++ b/scripts/qapi/schema.py
> @@ -37,6 +37,7 @@
> docgen_ifcond,
> gen_endif,
> gen_if,
> + rs_name,
> rsgen_ifcond,
> )
> from .error import QAPIError, QAPISemError, QAPISourceError
> @@ -341,6 +342,11 @@ def c_param_type(self) -> str:
class QAPISchemaType(QAPISchemaDefinition, ABC):
# Return the C type for common use.
# For the types we commonly box, this is a pointer type.
@abstractmethod
def c_type(self) -> str:
pass
# Return the C type to be used in a parameter list.
def c_param_type(self) -> str:
return self.c_type()
# Return the C type to be used where we suppress boxing.
> def c_unboxed_type(self) -> str:
> return self.c_type()
>
> + # Return the Rust type for common use
Are the uncommon uses?
There are for C types, and that's why we have both .c_type(),
.c_param_type(), nad .c_unboxed_type().
> + @abstractmethod
> + def rs_type(self) -> str:
> + pass
> +
> @abstractmethod
> def json_type(self) -> str:
> pass
> @@ -382,11 +388,12 @@ def describe(self) -> str:
> class QAPISchemaBuiltinType(QAPISchemaType):
> meta = 'built-in'
>
> - def __init__(self, name: str, json_type: str, c_type: str):
> + def __init__(self, name: str, json_type: str, rs_type: str, c_type: str):
> super().__init__(name, None, None)
> assert json_type in ('string', 'number', 'int', 'boolean', 'null',
> 'value')
> self._json_type_name = json_type
> + self._rs_type_name = rs_type
> self._c_type_name = c_type
>
> def c_name(self) -> str:
> @@ -406,6 +413,9 @@ def json_type(self) -> str:
> def doc_type(self) -> str:
> return self.json_type()
>
> + def rs_type(self) -> str:
> + return self._rs_type_name
> +
> def visit(self, visitor: QAPISchemaVisitor) -> None:
> super().visit(visitor)
> visitor.visit_builtin_type(self.name, self.info, self.json_type())
> @@ -449,6 +459,9 @@ def is_implicit(self) -> bool:
> def c_type(self) -> str:
> return c_name(self.name)
>
> + def rs_type(self) -> str:
> + return rs_name(self.name)
> +
> def member_names(self) -> List[str]:
> return [m.name for m in self.members]
>
> @@ -498,6 +511,9 @@ def is_implicit(self) -> bool:
> def c_type(self) -> str:
> return c_name(self.name) + POINTER_SUFFIX
>
> + def rs_type(self) -> str:
> + return 'Vec<%s>' % self.element_type.rs_type()
This may be called only after .check(), because that's when
.element_type becomes valid. .ifcond() has the same precondition, and
states it explicitly with assert self._checked. Let's do the same here.
> +
> def json_type(self) -> str:
> return 'array'
>
> @@ -630,6 +646,9 @@ def c_type(self) -> str:
> def c_unboxed_type(self) -> str:
> return c_name(self.name)
>
> + def rs_type(self) -> str:
> + return rs_name(self.name)
> +
> def json_type(self) -> str:
> return 'object'
>
> @@ -711,6 +730,9 @@ def c_type(self) -> str:
> def json_type(self) -> str:
> return 'value'
>
> + def rs_type(self) -> str:
> + return rs_name(self.name)
> +
> def visit(self, visitor: QAPISchemaVisitor) -> None:
> super().visit(visitor)
> visitor.visit_alternate_type(
> @@ -1234,9 +1256,10 @@ def _def_include(self, expr: QAPIExpression) -> None:
> QAPISchemaInclude(self._make_module(include), expr.info))
>
> def _def_builtin_type(
> - self, name: str, json_type: str, c_type: str
> + self, name: str, json_type: str, rs_type: str, c_type: str
> ) -> None:
> - self._def_definition(QAPISchemaBuiltinType(name, json_type, c_type))
> + builtin = QAPISchemaBuiltinType(name, json_type, rs_type, c_type)
> + self._def_definition(builtin)
> # Instantiating only the arrays that are actually used would
> # be nice, but we can't as long as their generated code
> # (qapi-builtin-types.[ch]) may be shared by some other
> @@ -1255,21 +1278,21 @@ def is_predefined(self, name: str) -> bool:
> return False
>
> def _def_predefineds(self) -> None:
> - for t in [('str', 'string', 'char' + POINTER_SUFFIX),
> - ('number', 'number', 'double'),
> - ('int', 'int', 'int64_t'),
> - ('int8', 'int', 'int8_t'),
> - ('int16', 'int', 'int16_t'),
> - ('int32', 'int', 'int32_t'),
> - ('int64', 'int', 'int64_t'),
> - ('uint8', 'int', 'uint8_t'),
> - ('uint16', 'int', 'uint16_t'),
> - ('uint32', 'int', 'uint32_t'),
> - ('uint64', 'int', 'uint64_t'),
> - ('size', 'int', 'uint64_t'),
> - ('bool', 'boolean', 'bool'),
> - ('any', 'value', 'QObject' + POINTER_SUFFIX),
> - ('null', 'null', 'QNull' + POINTER_SUFFIX)]:
> + for t in [('str', 'string', 'String', 'char' + POINTER_SUFFIX),
> + ('number', 'number', 'f64', 'double'),
> + ('int', 'int', 'i64', 'int64_t'),
> + ('int8', 'int', 'i8', 'int8_t'),
> + ('int16', 'int', 'i16', 'int16_t'),
> + ('int32', 'int', 'i32', 'int32_t'),
> + ('int64', 'int', 'i64', 'int64_t'),
> + ('uint8', 'int', 'u8', 'uint8_t'),
> + ('uint16', 'int', 'u16', 'uint16_t'),
> + ('uint32', 'int', 'u32', 'uint32_t'),
> + ('uint64', 'int', 'u64', 'uint64_t'),
> + ('size', 'int', 'u64', 'uint64_t'),
> + ('bool', 'boolean', 'bool', 'bool'),
> + ('any', 'value', 'QObject', 'QObject' +
> POINTER_SUFFIX),
> + ('null', 'null', '()', 'QNull' +
> POINTER_SUFFIX)]:
> self._def_builtin_type(*t)
> self.the_empty_object_type = QAPISchemaObjectType(
> 'q_empty', None, None, None, None, None, [], None)