Re: [RFC 2/6] scripts/simpletrace-rust: Support Event & Arguments in trace module

2024-05-28 Thread Zhao Liu
> > +/*
> > + * Refer to the description of ALLOWED_TYPES in
> > + * scripts/tracetool/__init__.py.
> 
> Please don't reference the Python implementation because this will not
> age well. It may bitrot if the Python code changes or if the Python
> implementation is deprecated then the source file will go away
> altogether. Make the Rust implementation self-contained. If there are
> common file format concerns shared by implementations, then move that
> information to a separate document in docs/interop/ (i.e. a simpletrace
> file format specification).

Thanks for your guidance, will do.

> > + */
> > +const ALLOWED_TYPES: [ 20] = [
> > +"int",
> > +"long",
> > +"short",
> > +"char",
> > +"bool",
> > +"unsigned",
> > +"signed",
> > +"int8_t",
> > +"uint8_t",
> > +"int16_t",
> > +"uint16_t",
> > +"int32_t",
> > +"uint32_t",
> > +"int64_t",
> > +"uint64_t",
> > +"void",
> > +"size_t",
> > +"ssize_t",
> > +"uintptr_t",
> > +"ptrdiff_t",
> > +];
> > +
> > +const STRING_TYPES: [ 4] =
> > +["const char*", "char*", "const char *", "char *"];
> > +
> > +/* TODO: Support 'vcpu' property. */
> 
> The vcpu property was removed in commit d9a6bad542cd ("docs: remove
> references to TCG tracing"). Is this comment outdated or are you
> planning to bring it back?

Thanks! I have no plan for this, I just follow _VALID_PROPS[] in
scripts/tracetool/__init__.py. As you commented above, I think I should
just ignore it. ;-)

> > +const VALID_PROPS: [ 1] = ["disable"];

[snip]

> > +pub fn build(arg_str: ) -> Result
> > +{
> > +let mut args = Arguments::new();
> > +for arg in arg_str.split(',').map(|s| s.trim()) {
> > +if arg.is_empty() {
> > +return Err(Error::EmptyArg);
> > +}
> > +
> > +if arg == "void" {
> > +continue;
> > +}
> > +
> > +let (arg_type, identifier) = if arg.contains('*') {
> > +/* FIXME: Implement rsplit_inclusive(). */
> > +let p = arg.rfind('*').unwrap();
> > +(
> > +/* Safe because arg contains "*" and p exists. */
> > +unsafe { arg.get_unchecked(..p + 1) },
> > +/* Safe because arg contains "*" and p exists. */
> > +unsafe { arg.get_unchecked(p + 1..) },
> > +)
> > +} else {
> > +arg.rsplit_once(' ').unwrap()
> > +};
> 
> Can you write this without unsafe? Maybe rsplit_once(' ') followed by a
> check for (_, '*identifier'). If the identifier starts with '*', then
> arg_type += ' *' and identifier = identifier[1:].

Clever idea! It should work, will try this way.

> > +
> > +validate_c_type(arg_type)?;
> > +args.props.push(ArgProperty::new(arg_type, identifier));
> > +}
> > +Ok(args)
> > +}
> > +}
> > +

[snip]

> > +pub fn build(line_str: , lineno: u32, filename: ) -> 
> > Result
> > +{
> > +static RE: Lazy = Lazy::new(|| {
> > +Regex::new(
> > +r#"(?x)
> > +((?P[\w\s]+)\s+)?
> > +(?P\w+)
> > +\((?P[^)]*)\)
> > +\s*
> > +(?:(?:(?P".+),)?\s*(?P".+))?
> 
> What is the purpose of fmt_trans?
> 
> > +\s*"#,
> > +)
> > +.unwrap()
> 
> I wonder if regular expressions help here. It's not easy to read this
> regex and there is a bunch of logic that takes apart the matches
> afterwards. It might even be clearer to use string methods to split
> fields.

Yes, regular matching is a burden here (it's a "lazy simplification" on
my part), and I'll think if it's possible to avoid regular matching with
string methods.

> Please add a comment showing the format that's being parsed:
> 
>  // [disable] ( [,  ] ...) ""
> 

OK.

> > +});
> > +
> > +let caps_res = RE.captures(line_str);
> > +if caps_res.is_none() {
> > +return Err(Error::UnknownEvent(line_str.to_owned()));
> > +}
> > +let caps = caps_res.unwrap();
> > +let name = caps.name("name").map_or("", |m| m.as_str());
> > +let props: Vec = if caps.name("props").is_some() {
> > +caps.name("props")
> > +.unwrap()
> > +.as_str()
> > +.split_whitespace()
> > +.map(|s| s.to_string())
> > +.collect()
> > +} else {
> > +Vec::new()
> > +};
> > +let fmt: String =
> > +caps.name("fmt").map_or("", |m| m.as_str()).to_string();
> > +let fmt_trans: String = caps
> > +.name("fmt_trans")
> > +.map_or("", |m| m.as_str())
> > +.to_string();
> > +
> > +if fmt.contains("%m") || fmt_trans.contains("%m") {
> > +return Err(Error::InvalidFormat(
> 

Re: [RFC 2/6] scripts/simpletrace-rust: Support Event & Arguments in trace module

2024-05-27 Thread Stefan Hajnoczi
On Mon, May 27, 2024 at 04:14:17PM +0800, Zhao Liu wrote:
> Refer to scripts/tracetool/__init__.py, add Event & Arguments
> abstractions in trace module.
> 
> Suggested-by: Paolo Bonzini 
> Signed-off-by: Zhao Liu 
> ---
>  scripts/simpletrace-rust/Cargo.lock   |  52 
>  scripts/simpletrace-rust/Cargo.toml   |   2 +
>  scripts/simpletrace-rust/src/trace.rs | 330 +-
>  3 files changed, 383 insertions(+), 1 deletion(-)
> 
> diff --git a/scripts/simpletrace-rust/Cargo.lock 
> b/scripts/simpletrace-rust/Cargo.lock
> index 4a0ff8092dcb..3d815014eb44 100644
> --- a/scripts/simpletrace-rust/Cargo.lock
> +++ b/scripts/simpletrace-rust/Cargo.lock
> @@ -2,6 +2,15 @@
>  # It is not intended for manual editing.
>  version = 3
>  
> +[[package]]
> +name = "aho-corasick"
> +version = "1.1.3"
> +source = "registry+https://github.com/rust-lang/crates.io-index;
> +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
> +dependencies = [
> + "memchr",
> +]
> +
>  [[package]]
>  name = "anstream"
>  version = "0.6.14"
> @@ -90,6 +99,18 @@ version = "1.70.0"
>  source = "registry+https://github.com/rust-lang/crates.io-index;
>  checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
>  
> +[[package]]
> +name = "memchr"
> +version = "2.7.2"
> +source = "registry+https://github.com/rust-lang/crates.io-index;
> +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
> +
> +[[package]]
> +name = "once_cell"
> +version = "1.19.0"
> +source = "registry+https://github.com/rust-lang/crates.io-index;
> +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
> +
>  [[package]]
>  name = "proc-macro2"
>  version = "1.0.83"
> @@ -108,11 +129,42 @@ dependencies = [
>   "proc-macro2",
>  ]
>  
> +[[package]]
> +name = "regex"
> +version = "1.10.4"
> +source = "registry+https://github.com/rust-lang/crates.io-index;
> +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
> +dependencies = [
> + "aho-corasick",
> + "memchr",
> + "regex-automata",
> + "regex-syntax",
> +]
> +
> +[[package]]
> +name = "regex-automata"
> +version = "0.4.6"
> +source = "registry+https://github.com/rust-lang/crates.io-index;
> +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
> +dependencies = [
> + "aho-corasick",
> + "memchr",
> + "regex-syntax",
> +]
> +
> +[[package]]
> +name = "regex-syntax"
> +version = "0.8.3"
> +source = "registry+https://github.com/rust-lang/crates.io-index;
> +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
> +
>  [[package]]
>  name = "simpletrace-rust"
>  version = "0.1.0"
>  dependencies = [
>   "clap",
> + "once_cell",
> + "regex",
>   "thiserror",
>  ]
>  
> diff --git a/scripts/simpletrace-rust/Cargo.toml 
> b/scripts/simpletrace-rust/Cargo.toml
> index b44ba1569dad..24a79d04e566 100644
> --- a/scripts/simpletrace-rust/Cargo.toml
> +++ b/scripts/simpletrace-rust/Cargo.toml
> @@ -8,4 +8,6 @@ license = "GPL-2.0-or-later"
>  
>  [dependencies]
>  clap = "4.5.4"
> +once_cell = "1.19.0"
> +regex = "1.10.4"
>  thiserror = "1.0.20"
> diff --git a/scripts/simpletrace-rust/src/trace.rs 
> b/scripts/simpletrace-rust/src/trace.rs
> index 3a4b06435b8b..f41d6e0b5bc3 100644
> --- a/scripts/simpletrace-rust/src/trace.rs
> +++ b/scripts/simpletrace-rust/src/trace.rs
> @@ -8,4 +8,332 @@
>   * SPDX-License-Identifier: GPL-2.0-or-later
>   */
>  
> -pub struct Event {}
> +#![allow(dead_code)]
> +
> +use std::fs::read_to_string;
> +
> +use once_cell::sync::Lazy;
> +use regex::Regex;
> +use thiserror::Error;
> +
> +#[derive(Error, Debug)]
> +pub enum Error
> +{
> +#[error("Empty argument (did you forget to use 'void'?)")]
> +EmptyArg,
> +#[error("Event '{0}' has more than maximum permitted argument count")]
> +InvalidArgCnt(String),
> +#[error("{0} does not end with a new line")]
> +InvalidEventFile(String),
> +#[error("Invalid format: {0}")]
> +InvalidFormat(String),
> +#[error(
> +"Argument type '{0}' is not allowed. \
> +Only standard C types and fixed size integer \
> +types should be used. struct, union, and \
> +other complex pointer types should be \
> +declared as 'void *'"
> +)]
> +InvalidType(String),
> +#[error("Error at {0}:{1}: {2}")]
> +ReadEventFail(String, usize, String),
> +#[error("Unknown event: {0}")]
> +UnknownEvent(String),
> +#[error("Unknown properties: {0}")]
> +UnknownProp(String),
> +}
> +
> +pub type Result = std::result::Result;
> +
> +/*
> + * Refer to the description of ALLOWED_TYPES in
> + * scripts/tracetool/__init__.py.

Please don't reference the Python implementation because this will not
age well. It may bitrot if the Python code changes or if the Python
implementation is deprecated then the source file will go away
altogether. Make the Rust implementation self-contained. If there are
common file format 

[RFC 2/6] scripts/simpletrace-rust: Support Event & Arguments in trace module

2024-05-27 Thread Zhao Liu
Refer to scripts/tracetool/__init__.py, add Event & Arguments
abstractions in trace module.

Suggested-by: Paolo Bonzini 
Signed-off-by: Zhao Liu 
---
 scripts/simpletrace-rust/Cargo.lock   |  52 
 scripts/simpletrace-rust/Cargo.toml   |   2 +
 scripts/simpletrace-rust/src/trace.rs | 330 +-
 3 files changed, 383 insertions(+), 1 deletion(-)

diff --git a/scripts/simpletrace-rust/Cargo.lock 
b/scripts/simpletrace-rust/Cargo.lock
index 4a0ff8092dcb..3d815014eb44 100644
--- a/scripts/simpletrace-rust/Cargo.lock
+++ b/scripts/simpletrace-rust/Cargo.lock
@@ -2,6 +2,15 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "anstream"
 version = "0.6.14"
@@ -90,6 +99,18 @@ version = "1.70.0"
 source = "registry+https://github.com/rust-lang/crates.io-index;
 checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
 
+[[package]]
+name = "memchr"
+version = "2.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.83"
@@ -108,11 +129,42 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "regex"
+version = "1.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
+
 [[package]]
 name = "simpletrace-rust"
 version = "0.1.0"
 dependencies = [
  "clap",
+ "once_cell",
+ "regex",
  "thiserror",
 ]
 
diff --git a/scripts/simpletrace-rust/Cargo.toml 
b/scripts/simpletrace-rust/Cargo.toml
index b44ba1569dad..24a79d04e566 100644
--- a/scripts/simpletrace-rust/Cargo.toml
+++ b/scripts/simpletrace-rust/Cargo.toml
@@ -8,4 +8,6 @@ license = "GPL-2.0-or-later"
 
 [dependencies]
 clap = "4.5.4"
+once_cell = "1.19.0"
+regex = "1.10.4"
 thiserror = "1.0.20"
diff --git a/scripts/simpletrace-rust/src/trace.rs 
b/scripts/simpletrace-rust/src/trace.rs
index 3a4b06435b8b..f41d6e0b5bc3 100644
--- a/scripts/simpletrace-rust/src/trace.rs
+++ b/scripts/simpletrace-rust/src/trace.rs
@@ -8,4 +8,332 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-pub struct Event {}
+#![allow(dead_code)]
+
+use std::fs::read_to_string;
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum Error
+{
+#[error("Empty argument (did you forget to use 'void'?)")]
+EmptyArg,
+#[error("Event '{0}' has more than maximum permitted argument count")]
+InvalidArgCnt(String),
+#[error("{0} does not end with a new line")]
+InvalidEventFile(String),
+#[error("Invalid format: {0}")]
+InvalidFormat(String),
+#[error(
+"Argument type '{0}' is not allowed. \
+Only standard C types and fixed size integer \
+types should be used. struct, union, and \
+other complex pointer types should be \
+declared as 'void *'"
+)]
+InvalidType(String),
+#[error("Error at {0}:{1}: {2}")]
+ReadEventFail(String, usize, String),
+#[error("Unknown event: {0}")]
+UnknownEvent(String),
+#[error("Unknown properties: {0}")]
+UnknownProp(String),
+}
+
+pub type Result = std::result::Result;
+
+/*
+ * Refer to the description of ALLOWED_TYPES in
+ * scripts/tracetool/__init__.py.
+ */
+const ALLOWED_TYPES: [ 20] = [
+"int",
+"long",
+"short",
+"char",
+"bool",
+"unsigned",
+"signed",
+"int8_t",
+"uint8_t",
+"int16_t",
+"uint16_t",
+"int32_t",
+"uint32_t",
+"int64_t",
+"uint64_t",
+"void",
+"size_t",
+"ssize_t",
+"uintptr_t",
+"ptrdiff_t",
+];
+
+const STRING_TYPES: [ 4] =
+["const char*", "char*", "const char *", "char *"];
+
+/* TODO: Support 'vcpu' property. */
+const VALID_PROPS: [ 1] = ["disable"];
+
+fn validate_c_type(arg_type: ) -> Result<()>
+{
+static RE_TYPE: Lazy = Lazy::new(|| Regex::new(r"\*").unwrap());
+let bits: Vec =
+