Re: [ovs-dev] [PATCH v1 5/7] utilities: upcall_monitor: Print port names.

Eelco Chaudron Thu, 30 Jan 2025 05:33:47 -0800


On 17 Jan 2025, at 15:25, Adrian Moreno wrote:


> Printing just the datapath on each upcall gives little information (most
> often, there will only be one well-known datapath). Instead, print both
> the input port name (plus the datapath).
>
> In order to do this, refactor decode_nla to always generate the dump
> that only gets printed if needed. That way it can be called earlier on.
>
> Signed-off-by: Adrian Moreno <[email protected]>

In general, this looks good to me. Some small comments below.

//Eelco

> ---
>  utilities/usdt-scripts/upcall_cost.py    |   4 +-
>  utilities/usdt-scripts/upcall_monitor.py | 110 +++++++++++++----------
>  utilities/usdt-scripts/usdt_lib.py       |  27 +++++-
>  3 files changed, 90 insertions(+), 51 deletions(-)
>
> diff --git a/utilities/usdt-scripts/upcall_cost.py 
> b/utilities/usdt-scripts/upcall_cost.py
> index 47a1e30a6..2037fe69a 100755
> --- a/utilities/usdt-scripts/upcall_cost.py
> +++ b/utilities/usdt-scripts/upcall_cost.py
> @@ -354,7 +354,7 @@ class DpUpcall(Event):
>                   pkt_frag_len):
>          super(DpUpcall, self).__init__(ts, pid, comm, cpu, 
> EventType.DP_UPCALL)
>          self.dpif_name = dpif_name
> -        self.dp_port = dp_map.get(dpif_name, port)
> +        self.dp_port = dp_map.get_port_num(dpif_name, port)

get_port_number()? See below.

>          if self.dp_port is None:
>              #
>              # As we only identify interfaces at startup, new interfaces could
> @@ -448,7 +448,7 @@ class RecvUpcall(Event):
>
>      def get_system_dp_port(dpif_name):
>          dp_map = dp_map.get_map()
> -        return dp_map.get(dpif_name, {}).get("ovs-system", None)
> +        return dp_map.get_port_num(dpif_name, {}).get("ovs-system", None)
>
>      def decode_nlm(msg, indent=4, dump=True):
>          bytes_left = len(msg)
> diff --git a/utilities/usdt-scripts/upcall_monitor.py 
> b/utilities/usdt-scripts/upcall_monitor.py
> index 333e23d51..8943fd205 100755
> --- a/utilities/usdt-scripts/upcall_monitor.py
> +++ b/utilities/usdt-scripts/upcall_monitor.py
> @@ -20,10 +20,10 @@
>  # packets sent by the kernel to ovs-vswitchd. By default, it will show all
>  # upcall events, which looks something like this:
>  #
> -# TIME               CPU  COMM      PID      DPIF_NAME          TYPE 
> PKT_LEN...
> -# 5952147.003848809  2    handler4  1381158  system@ovs-system  0    98     
> 132
> -# 5952147.003879643  2    handler4  1381158  system@ovs-system  0    70     
> 160
> -# 5952147.003914924  2    handler4  1381158  system@ovs-system  0    98     
> 152
> +# TIME               CPU  COMM      PID      PORT_NAME                TYPE ..
> +# 5952147.003848809  2    handler4  1381158  eth0 (system@ovs-system)  0
> +# 5952147.003879643  2    handler4  1381158  eth0 (system@ovs-system)  0
> +# 5952147.003914924  2    handler4  1381158  eth0 (system@ovs-system)  0
>  #
>  # Also, upcalls dropped by the kernel (e.g: because the netlink buffer is 
> full)
>  # are reported.
> @@ -71,7 +71,7 @@
>  #
>  #  $ ./upcall_monitor.py --packet-decode decode --flow-key-decode nlraw \
>  #      --packet-size 128 --flow-key-size 256
> -#  TIME               CPU  COMM             PID        DPIF_NAME          ...
> +#  TIME               CPU  COMM             PID        PORT_NAME          ...
>  #  5953013.333214231  2    handler4         1381158    system@ovs-system  ...
>  #    Flow key size 132 bytes, size captured 132 bytes.
>  #      nla_len 8, nla_type OVS_KEY_ATTR_RECIRC_ID[20], data: 00 00 00 00
> @@ -121,6 +121,8 @@ from os.path import exists
>  from scapy.all import hexdump, wrpcap
>  from scapy.layers.l2 import Ether
>

No need for a new line here.

> +from usdt_lib import DpPortMapping
> +
>  import argparse
>  import psutil
>  import re
> @@ -280,7 +282,7 @@ int kretprobe__ovs_dp_upcall(struct pt_regs *ctx)
>  #
>  # print_key()
>  #
> -def print_key(event):
> +def print_key(event, decode_dump):
>      if event.key_size < options.flow_key_size:
>          key_len = event.key_size
>      else:
> @@ -301,39 +303,46 @@ def print_key(event):
>                                             dump=True),
>                       flags=re.MULTILINE))
>
> -    if options.flow_key_decode == 'nlraw':
> -        nla = decode_nlm(bytes(event.key)[:key_len])
> -    else:
> -        nla = decode_nlm(bytes(event.key)[:key_len], dump=False)
> -
> -    if "OVS_KEY_ATTR_IN_PORT" in nla:
> -        port = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0]
> -    else:
> -        port = "Unknown"
> -
> -    return port
> +    if options.flow_key_decode == "nlraw":
> +        for line in decode_dump:
> +            print(line)
>
>
>  #
>  # print_event()
>  #
>  def print_event(ctx, data, size):
> -    event = b['events'].event(data)
> -    print("{:<18.9f} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} {:<8}".
> -          format(event.ts / 1000000000,
> -                 event.cpu,
> -                 event.comm.decode("utf-8"),
> -                 event.pid,
> -                 event.dpif_name.decode("utf-8"),
> -                 event.upcall_type,
> -                 event.pkt_size,
> -                 event.key_size,
> -                 event.result))
> +    event = b["events"].event(data)
> +    dp = event.dpif_name.decode("utf-8")
> +
> +    nla, key_dump = decode_nlm(
> +        bytes(event.key)[: min(event.key_size, options.flow_key_size)]
> +    )
> +    if "OVS_KEY_ATTR_IN_PORT" in nla:
> +        port_no = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0]
> +        port = dp_map.get_port_name(dp.partition("@")[-1], port_no)

What if the port can not be found? Should we re-populate the cache? or just 
dump the internal port number?

> +    else:
> +        port = "Unknown"
> +

Adding this in this way seems like a hack, of splitting up print_ukey(), but I 
guess it would be ok for now until we move the decode_nlm to the usdt library 
as a proper object :)

> +    print(
> +        "{:<18.9f} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} {:<8}".
> +        format(
> +            event.ts / 1000000000,
> +            event.cpu,
> +            event.comm.decode("utf-8"),
> +            event.pid,
> +            f"{port} ({dp})",

Not sure if we should start mixing f”” style with format(), or f”” in general? 
If we want f”” in general, we should probably replace all format() functions.

> +            event.upcall_type,
> +            event.pkt_size,
> +            event.key_size,
> +            event.result,
> +        )
> +    )
>
>      #
>      # Decode packet only if there is data
>      #
> -    port = print_key(event)
> +    print_key(event, key_dump)
>
>      if event.pkt_size <= 0:
>          return
> @@ -342,7 +351,7 @@ def print_event(ctx, data, size):
>
>      if event.pkt_size < options.packet_size:
>          pkt_len = event.pkt_size
> -        pkt_data = bytes(event.pkt)[:event.pkt_size]
> +        pkt_data = bytes(event.pkt)[: event.pkt_size]

Any reason for the extra space? I guess for slicing we do not need a space if 
there are no operations.

>      else:
>          pkt_len = options.packet_size
>          pkt_data = bytes(event.pkt)
> @@ -369,23 +378,26 @@ def print_event(ctx, data, size):
>  #
>  # decode_nlm()
>  #
> -def decode_nlm(msg, indent=4, dump=True):
> +def decode_nlm(msg, indent=4):
>      bytes_left = len(msg)
>      result = {}
> +    dump = []
>
>      while bytes_left:
>          if bytes_left < 4:
> -            if dump:
> -                print("{}WARN: decode truncated; can't read header".format(
> -                    ' ' * indent))
> +            dump.append(
> +                "{}WARN: decode truncated; can't read header".format(
> +                    " " * indent
> +                )
> +            )
>              break
>
>          nla_len, nla_type = struct.unpack("=HH", msg[:4])
>
>          if nla_len < 4:
> -            if dump:
> -                print("{}WARN: decode truncated; nla_len < 4".format(
> -                    ' ' * indent))
> +            dump.append(
> +                "{}WARN: decode truncated; nla_len < 4".format(" " * indent)
> +            )
>              break
>
>          nla_data = msg[4:nla_len]
> @@ -397,16 +409,19 @@ def decode_nlm(msg, indent=4, dump=True):
>          else:
>              result[get_ovs_key_attr_str(nla_type)] = nla_data
>
> -        if dump:
> -            print("{}nla_len {}, nla_type {}[{}], data: {}{}".format(
> +        dump.append(
> +            "{}nla_len {}, nla_type {}[{}], data: {}{}".format(
>                  ' ' * indent, nla_len, get_ovs_key_attr_str(nla_type),
>                  nla_type,
> -                "".join("{:02x} ".format(b) for b in nla_data), trunc))
> +                "".join("{:02x} ".format(b) for b in nla_data), trunc)
> +        )
>
>          if trunc != "":
> -            if dump:
> -                print("{}WARN: decode truncated; nla_len > msg_len[{}] ".
> -                      format(' ' * indent, bytes_left))
> +            dump.append(
> +                "{}WARN: decode truncated; nla_len > msg_len[{}] ".format(
> +                    " " * indent, bytes_left
> +                )
> +            )
>              break
>
>          # update next offset, but make sure it's aligned correctly
> @@ -414,7 +429,7 @@ def decode_nlm(msg, indent=4, dump=True):
>          msg = msg[next_offset:]
>          bytes_left -= next_offset
>
> -    return result
> +    return result, dump
>
>
>  #
> @@ -499,6 +514,9 @@ def main():
>      #
>      global b
>      global options
> +    global dp_map
> +
> +    dp_map = DpPortMapping()
>
>      #
>      # Argument parsing
> @@ -607,8 +625,8 @@ def main():
>      #
>      # Print header
>      #
> -    print("{:<18} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} 
> {:<8}".format(
> -        "TIME", "CPU", "COMM", "PID", "DPIF_NAME", "TYPE", "PKT_LEN",
> +    print("{:<18} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} 
> {:<8}".format(
> +        "TIME", "CPU", "COMM", "PID", "PORT_NAME", "TYPE", "PKT_LEN",
>          "FLOW_KEY_LEN", "RESULT"))
>
>      #
> diff --git a/utilities/usdt-scripts/usdt_lib.py 
> b/utilities/usdt-scripts/usdt_lib.py
> index da3fab2bf..9ad50c540 100644
> --- a/utilities/usdt-scripts/usdt_lib.py
> +++ b/utilities/usdt-scripts/usdt_lib.py
> @@ -33,12 +33,33 @@ class DpPortMapping:
>          """Override the internal cache map."""
>          self.cache_map = cache_map
>
> -    def get(self, dp, port_no):
> -        """Get the portname from a port number."""
> +    def get_port_num(self, dp, port):
> +        """Get the port number from a port name."""
>          if self.cache_map is None:
>              self._get_mapping()
>
> -        return self.cache_map.get(dp, {}).get(port_no, None)
> +        return self.cache_map.get(dp, {}).get(port, None)
> +
> +    def get_port_name(self, dp, port_no):
> +        """Get the port name from a port number."""
> +        if self.cache_map is None:
> +            self._get_mapping()
> +
> +        if not self.cache_map.get(dp):
> +            return None
> +
> +        for name, num in self.cache_map[dp].items():
> +            if num == port_no:
> +                return name
> +
> +        return None
> +
> +    def get_port_number(self, dp, port):

Now we have both get_port_num() and get_port_number(). I think we should only 
keep the latter one.

> +        """Get the port number from a port name."""
> +        if self.cache_map is None:
> +            self._get_mapping()
> +
> +        return self.cache_map.get(dp, {}).get(port, None)
>
>      def _get_mapping(self):
>          """Get the datapath port mapping from the running OVS."""
> -- 
> 2.48.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v1 5/7] utilities: upcall_monitor: Print port names.

Reply via email to