Paolo Bonzini <[email protected]> writes:

> From: Marc-AndrĂ© Lureau <[email protected]>
>
> Generate high-level native Rust declarations for the QAPI types.
>
> - char* is mapped to String, scalars to there corresponding Rust types
>
> - enums use #[repr(u32)] and can be transmuted to their C counterparts
>
> - has_foo/foo members are mapped to Option<T>
>
> - lists are represented as Vec<T>
>
> - structures map fields 1:1 to Rust
>
> - alternate are represented as Rust enum, each variant being a 1-element
>   tuple
>
> - unions are represented in a similar way as in C: a struct S with a "u"
>   member (since S may have extra 'base' fields). The discriminant
>   isn't a member of S, since Rust enum already include it, but it can be
>   recovered with "mystruct.u.into()"
>
> Anything that includes a recursive struct puts it in a Box.  Lists are
> not considered recursive, because Vec breaks the recursion (it's possible
> to construct an object containing an empty Vec of its own type).
>
> Signed-off-by: Marc-AndrĂ© Lureau <[email protected]>
> Link: 
> https://lore.kernel.org/r/[email protected]
> [Paolo: rewrite conversion of schema types to Rust types]
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
>  meson.build              |   4 +-
>  scripts/qapi/backend.py  |  25 +++
>  scripts/qapi/common.py   |  43 +++++
>  scripts/qapi/rs.py       |  61 +++++++
>  scripts/qapi/rs_types.py | 373 +++++++++++++++++++++++++++++++++++++++
>  scripts/qapi/schema.py   |  59 +++++--
>  6 files changed, 546 insertions(+), 19 deletions(-)
>  create mode 100644 scripts/qapi/rs.py
>  create mode 100644 scripts/qapi/rs_types.py
>
> diff --git a/meson.build b/meson.build
> index db87358d62d..4228792f0f6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -3540,11 +3540,13 @@ qapi_gen_depends = [ meson.current_source_dir() / 
> 'scripts/qapi/__init__.py',
>                       meson.current_source_dir() / 
> 'scripts/qapi/introspect.py',
>                       meson.current_source_dir() / 'scripts/qapi/main.py',
>                       meson.current_source_dir() / 'scripts/qapi/parser.py',
> +                     meson.current_source_dir() / 'scripts/qapi/rs_types.py',
>                       meson.current_source_dir() / 'scripts/qapi/schema.py',
>                       meson.current_source_dir() / 'scripts/qapi/source.py',
>                       meson.current_source_dir() / 'scripts/qapi/types.py',
>                       meson.current_source_dir() / 'scripts/qapi/visit.py',
> -                     meson.current_source_dir() / 'scripts/qapi-gen.py'
> +                     meson.current_source_dir() / 'scripts/qapi-gen.py',
> +                     meson.current_source_dir() / 'scripts/qapi/rs.py',
>  ]
>  
>  tracetool = [
> diff --git a/scripts/qapi/backend.py b/scripts/qapi/backend.py
> index 49ae6ecdd33..8023acce0d6 100644
> --- a/scripts/qapi/backend.py
> +++ b/scripts/qapi/backend.py
> @@ -7,6 +7,7 @@
>  from .events import gen_events
>  from .features import gen_features
>  from .introspect import gen_introspect
> +from .rs_types import gen_rs_types
>  from .schema import QAPISchema
>  from .types import gen_types
>  from .visit import gen_visit
> @@ -63,3 +64,27 @@ def generate(self,
>          gen_commands(schema, output_dir, prefix, gen_tracing)
>          gen_events(schema, output_dir, prefix)
>          gen_introspect(schema, output_dir, prefix, unmask)
> +
> +
> +class QAPIRsBackend(QAPIBackend):
> +    # pylint: disable=too-few-public-methods
> +
> +    def generate(self,
> +                 schema: QAPISchema,
> +                 output_dir: str,
> +                 prefix: str,
> +                 unmask: bool,
> +                 builtins: bool,
> +                 gen_tracing: bool) -> None:
> +        """
> +        Generate Rust code for the given schema into the target directory.
> +
> +        :param schema_file: The primary QAPI schema file.
> +        :param output_dir: The output directory to store generated code.
> +        :param prefix: Optional C-code prefix for symbol names.
> +        :param unmask: Expose non-ABI names through introspection?
> +        :param builtins: Generate code for built-in types?
> +
> +        :raise QAPIError: On failures.
> +        """
> +        gen_rs_types(schema, output_dir, prefix)
> diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
> index c75396a01b5..e9261a3411e 100644
> --- a/scripts/qapi/common.py
> +++ b/scripts/qapi/common.py
> @@ -64,6 +64,13 @@ def camel_to_upper(value: str) -> str:
>      return ret.upper()
>  
>  
> +def camel_to_lower(value: str) -> str:
> +    """
> +    Converts CamelCase to camel_case.
> +    """
> +    return camel_to_upper(value).lower()
> +
> +
>  def c_enum_const(type_name: str,
>                   const_name: str,
>                   prefix: Optional[str] = None) -> str:
> @@ -129,6 +136,42 @@ def c_name(name: str, protect: bool = True) -> str:
>      return name
>  
>  
> +def rs_name(name: str) -> str:
> +    """
> +    Map @name to a valid, possibly raw Rust identifier.
> +    """
> +    name = re.sub(r'[^A-Za-z0-9_]', '_', name)
> +    if name[0].isnumeric():

.isdigit()?  It's what c_name() uses...

> +        name = '_' + name

In review of v1, I pointed to "The Rust Reference"

       Identifiers starting with an underscore are typically used to
       indicate an identifier that is intentionally unused, and will
       silence the unused warning in rustc.

       https://doc.rust-lang.org/reference/identifiers.html

You replied "In this case it doesn't really matter: public items (such
as QAPI enum entries, or struct fields) do not raise the unused warning
anyway."

What gives us confidence rs_name() will only be used where it doesn't
really matter?

> +    # based from the list:
> +    # https://doc.rust-lang.org/reference/keywords.html
> +    if name in ('Self', 'abstract', 'as', 'async',
> +                'await', 'become', 'box', 'break',
> +                'const', 'continue', 'crate', 'do',
> +                'dyn', 'else', 'enum', 'extern',
> +                'false', 'final', 'fn', 'for',
> +                'if', 'impl', 'in', 'let',
> +                'loop', 'macro', 'match', 'mod',
> +                'move', 'mut', 'override', 'priv',
> +                'pub', 'ref', 'return', 'self',
> +                'static', 'struct', 'super', 'trait',
> +                'true', 'try', 'type', 'typeof',
> +                'union', 'unsafe', 'unsized', 'use',
> +                'virtual', 'where', 'while', 'yield'):
> +        name = 'r#' + name

TIL...

> +    # avoid some clashes with the standard library
> +    if name in ('String',):
> +        name = 'Qapi' + name

This hides the unwise use of 'String' in qapi/net.json from Rust.  I'd
rather rename that one.

> +
> +    return name
> +
> +
> +def to_camel_case(value: str) -> str:
> +    return ''.join('_' + word if word[0].isdigit()
> +                   else word[:1].upper() + word[1:]
> +                   for word in filter(None, re.split("[-_]+", value)))

Please use r'...' for regular expressions always.

Why do you need filter()?

This maps 'foo-0123-bar' to 'Foo_0123Bar'.  Intentional?  I'd kind of
expect 'Foo0123Bar'.

> +
> +
>  class Indentation:
>      """
>      Indentation level management.
> diff --git a/scripts/qapi/rs.py b/scripts/qapi/rs.py
> new file mode 100644
> index 00000000000..2cf0c0e07f1
> --- /dev/null
> +++ b/scripts/qapi/rs.py
> @@ -0,0 +1,61 @@
> +# This work is licensed under the terms of the GNU GPL, version 2.
> +# See the COPYING file in the top-level directory.
> +"""
> +QAPI Rust generator
> +"""
> +
> +import os
> +import re
> +import subprocess
> +import sys
> +
> +from .common import mcgen as mcgen_common
> +from .gen import QAPIGen
> +from .schema import QAPISchemaVisitor
> +
> +
> +def mcgen(s: str, **kwds: object) -> str:
> +    s = mcgen_common(s, **kwds)
> +    return re.sub(r'(?: *\n)+', '\n', s)

This eats trailing spaces and blank lines.  The latter is a big hammer.
Without it, I see unwanted blank lines generated.  With it, I see wanted
blank lines eaten.  For instance:

    // @generated by qapi-gen, DO NOT EDIT

    //!
    //! Schema-defined QAPI types
    //!
    //! Copyright (c) 2025 Red Hat, Inc.
    //!
    //! This work is licensed under the terms of the GNU LGPL, version 2.1 or
    //! later. See the COPYING.LIB file in the top-level directory.

    #![allow(unexpected_cfgs)]
    #![allow(non_camel_case_types)]
    #![allow(clippy::empty_structs_with_brackets)]
    #![allow(clippy::large_enum_variant)]
    #![allow(clippy::pub_underscore_fields)]

    // Because QAPI structs can contain float, for simplicity we never
    // derive Eq.  Clippy however would complain for those structs
    // that *could* be Eq too.
    #![allow(clippy::derive_partial_eq_without_eq)]

    use serde_derive::{Serialize, Deserialize};

    use util::qobject::QObject;

becomes

    // @generated by qapi-gen, DO NOT EDIT
    //!
    //! Schema-defined QAPI types
    //!
    //! Copyright (c) 2025 Red Hat, Inc.
    //!
    //! This work is licensed under the terms of the GNU LGPL, version 2.1 or
    //! later. See the COPYING.LIB file in the top-level directory.
    #![allow(unexpected_cfgs)]
    #![allow(non_camel_case_types)]
    #![allow(clippy::empty_structs_with_brackets)]
    #![allow(clippy::large_enum_variant)]
    #![allow(clippy::pub_underscore_fields)]
    // Because QAPI structs can contain float, for simplicity we never
    // derive Eq.  Clippy however would complain for those structs
    // that *could* be Eq too.
    #![allow(clippy::derive_partial_eq_without_eq)]
    use serde_derive::{Serialize, Deserialize};
    use util::qobject::QObject;

This text is generated by QAPIGenRs._top() and
QAPISchemaGenRsTypeVisitor.visit_begin().  The blank lines are clearly
intentional there.

Hmm.

Possibly related: rustfmt below.

> +
> +
> +class QAPIGenRs(QAPIGen):
> +    def __init__(self, fname: str, blurb: str, pydoc: str):
> +        super().__init__(fname)
> +        self._blurb = blurb
> +        self._copyright = '\n//! '.join(re.findall(r'^Copyright .*', pydoc,
> +                                                   re.MULTILINE))
> +
> +    def _top(self) -> str:
> +        return mcgen('''
> +// @generated by qapi-gen, DO NOT EDIT
> +
> +//!
> +//! Schema-defined QAPI types

I think you want %(blurb) here.

> +//!
> +//! %(copyright)s
> +//!
> +//! This work is licensed under the terms of the GNU LGPL, version 2.1 or
> +//! later. See the COPYING.LIB file in the top-level directory.
> +
> +''',
> +                     tool=os.path.basename(sys.argv[0]),
> +                     blurb=self._blurb, copyright=self._copyright)
> +
> +
> +class QAPISchemaRsVisitor(QAPISchemaVisitor):
> +
> +    def __init__(self, prefix: str, what: str,
> +                 blurb: str, pydoc: str):
> +        super().__init__()
> +        self._prefix = prefix
> +        self._what = what
> +        self._gen = QAPIGenRs(self._prefix + self._what + '.rs', blurb, 
> pydoc)

Break the line before blurb, please.

> +
> +    def write(self, output_dir: str) -> None:
> +        self._gen.write(output_dir)
> +
> +        try:
> +            subprocess.check_call(['rustfmt', self._gen.fname], 
> cwd=output_dir)

Break the line before cwd=, please.

> +        except FileNotFoundError:
> +            pass

This runs rustfmt to clean up the generated file.  Silently does nothing
if we don't have rustfmt.

Should we make rustfmt a hard requirement?  Please discuss this briefly
in the commit message.

> diff --git a/scripts/qapi/rs_types.py b/scripts/qapi/rs_types.py
> new file mode 100644
> index 00000000000..64702eb54ae
> --- /dev/null
> +++ b/scripts/qapi/rs_types.py

[Interesting part left for tomorrow...]

> diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
> index 15f5d97418f..a65b25141fa 100644
> --- a/scripts/qapi/schema.py
> +++ b/scripts/qapi/schema.py
> @@ -37,6 +37,7 @@
>      docgen_ifcond,
>      gen_endif,
>      gen_if,
> +    rs_name,
>      rsgen_ifcond,
>  )
>  from .error import QAPIError, QAPISemError, QAPISourceError
> @@ -341,6 +342,11 @@ def c_param_type(self) -> str:
   class QAPISchemaType(QAPISchemaDefinition, ABC):
       # Return the C type for common use.
       # For the types we commonly box, this is a pointer type.
       @abstractmethod
       def c_type(self) -> str:
           pass

       # Return the C type to be used in a parameter list.
       def c_param_type(self) -> str:
           return self.c_type()

       # Return the C type to be used where we suppress boxing.
>      def c_unboxed_type(self) -> str:
>          return self.c_type()
>  
> +    # Return the Rust type for common use

Are the uncommon uses?

There are for C types, and that's why we have both .c_type(),
.c_param_type(), nad .c_unboxed_type().

> +    @abstractmethod
> +    def rs_type(self) -> str:
> +        pass
> +
>      @abstractmethod
>      def json_type(self) -> str:
>          pass
> @@ -382,11 +388,12 @@ def describe(self) -> str:
>  class QAPISchemaBuiltinType(QAPISchemaType):
>      meta = 'built-in'
>  
> -    def __init__(self, name: str, json_type: str, c_type: str):
> +    def __init__(self, name: str, json_type: str, rs_type: str, c_type: str):
>          super().__init__(name, None, None)
>          assert json_type in ('string', 'number', 'int', 'boolean', 'null',
>                               'value')
>          self._json_type_name = json_type
> +        self._rs_type_name = rs_type
>          self._c_type_name = c_type
>  
>      def c_name(self) -> str:
> @@ -406,6 +413,9 @@ def json_type(self) -> str:
>      def doc_type(self) -> str:
>          return self.json_type()
>  
> +    def rs_type(self) -> str:
> +        return self._rs_type_name
> +
>      def visit(self, visitor: QAPISchemaVisitor) -> None:
>          super().visit(visitor)
>          visitor.visit_builtin_type(self.name, self.info, self.json_type())
> @@ -449,6 +459,9 @@ def is_implicit(self) -> bool:
>      def c_type(self) -> str:
>          return c_name(self.name)
>  
> +    def rs_type(self) -> str:
> +        return rs_name(self.name)
> +
>      def member_names(self) -> List[str]:
>          return [m.name for m in self.members]
>  
> @@ -498,6 +511,9 @@ def is_implicit(self) -> bool:
>      def c_type(self) -> str:
>          return c_name(self.name) + POINTER_SUFFIX
>  
> +    def rs_type(self) -> str:
> +        return 'Vec<%s>' % self.element_type.rs_type()

This may be called only after .check(), because that's when
.element_type becomes valid.  .ifcond() has the same precondition, and
states it explicitly with assert self._checked.  Let's do the same here.

> +
>      def json_type(self) -> str:
>          return 'array'
>  
> @@ -630,6 +646,9 @@ def c_type(self) -> str:
>      def c_unboxed_type(self) -> str:
>          return c_name(self.name)
>  
> +    def rs_type(self) -> str:
> +        return rs_name(self.name)
> +
>      def json_type(self) -> str:
>          return 'object'
>  
> @@ -711,6 +730,9 @@ def c_type(self) -> str:
>      def json_type(self) -> str:
>          return 'value'
>  
> +    def rs_type(self) -> str:
> +        return rs_name(self.name)
> +
>      def visit(self, visitor: QAPISchemaVisitor) -> None:
>          super().visit(visitor)
>          visitor.visit_alternate_type(
> @@ -1234,9 +1256,10 @@ def _def_include(self, expr: QAPIExpression) -> None:
>              QAPISchemaInclude(self._make_module(include), expr.info))
>  
>      def _def_builtin_type(
> -        self, name: str, json_type: str, c_type: str
> +        self, name: str, json_type: str, rs_type: str, c_type: str
>      ) -> None:
> -        self._def_definition(QAPISchemaBuiltinType(name, json_type, c_type))
> +        builtin = QAPISchemaBuiltinType(name, json_type, rs_type, c_type)
> +        self._def_definition(builtin)
>          # Instantiating only the arrays that are actually used would
>          # be nice, but we can't as long as their generated code
>          # (qapi-builtin-types.[ch]) may be shared by some other
> @@ -1255,21 +1278,21 @@ def is_predefined(self, name: str) -> bool:
>          return False
>  
>      def _def_predefineds(self) -> None:
> -        for t in [('str',    'string',  'char' + POINTER_SUFFIX),
> -                  ('number', 'number',  'double'),
> -                  ('int',    'int',     'int64_t'),
> -                  ('int8',   'int',     'int8_t'),
> -                  ('int16',  'int',     'int16_t'),
> -                  ('int32',  'int',     'int32_t'),
> -                  ('int64',  'int',     'int64_t'),
> -                  ('uint8',  'int',     'uint8_t'),
> -                  ('uint16', 'int',     'uint16_t'),
> -                  ('uint32', 'int',     'uint32_t'),
> -                  ('uint64', 'int',     'uint64_t'),
> -                  ('size',   'int',     'uint64_t'),
> -                  ('bool',   'boolean', 'bool'),
> -                  ('any',    'value',   'QObject' + POINTER_SUFFIX),
> -                  ('null',   'null',    'QNull' + POINTER_SUFFIX)]:
> +        for t in [('str',    'string',  'String',  'char' + POINTER_SUFFIX),
> +                  ('number', 'number',  'f64',     'double'),
> +                  ('int',    'int',     'i64',     'int64_t'),
> +                  ('int8',   'int',     'i8',      'int8_t'),
> +                  ('int16',  'int',     'i16',     'int16_t'),
> +                  ('int32',  'int',     'i32',     'int32_t'),
> +                  ('int64',  'int',     'i64',     'int64_t'),
> +                  ('uint8',  'int',     'u8',      'uint8_t'),
> +                  ('uint16', 'int',     'u16',     'uint16_t'),
> +                  ('uint32', 'int',     'u32',     'uint32_t'),
> +                  ('uint64', 'int',     'u64',     'uint64_t'),
> +                  ('size',   'int',     'u64',     'uint64_t'),
> +                  ('bool',   'boolean', 'bool',    'bool'),
> +                  ('any',    'value',   'QObject', 'QObject' + 
> POINTER_SUFFIX),
> +                  ('null',   'null',    '()',      'QNull' + 
> POINTER_SUFFIX)]:
>              self._def_builtin_type(*t)
>          self.the_empty_object_type = QAPISchemaObjectType(
>              'q_empty', None, None, None, None, None, [], None)


Reply via email to