On 17 Jan 2025, at 15:25, Adrian Moreno wrote:
> Printing just the datapath on each upcall gives little information (most > often, there will only be one well-known datapath). Instead, print both > the input port name (plus the datapath). > > In order to do this, refactor decode_nla to always generate the dump > that only gets printed if needed. That way it can be called earlier on. > > Signed-off-by: Adrian Moreno <[email protected]> In general, this looks good to me. Some small comments below. //Eelco > --- > utilities/usdt-scripts/upcall_cost.py | 4 +- > utilities/usdt-scripts/upcall_monitor.py | 110 +++++++++++++---------- > utilities/usdt-scripts/usdt_lib.py | 27 +++++- > 3 files changed, 90 insertions(+), 51 deletions(-) > > diff --git a/utilities/usdt-scripts/upcall_cost.py > b/utilities/usdt-scripts/upcall_cost.py > index 47a1e30a6..2037fe69a 100755 > --- a/utilities/usdt-scripts/upcall_cost.py > +++ b/utilities/usdt-scripts/upcall_cost.py > @@ -354,7 +354,7 @@ class DpUpcall(Event): > pkt_frag_len): > super(DpUpcall, self).__init__(ts, pid, comm, cpu, > EventType.DP_UPCALL) > self.dpif_name = dpif_name > - self.dp_port = dp_map.get(dpif_name, port) > + self.dp_port = dp_map.get_port_num(dpif_name, port) get_port_number()? See below. > if self.dp_port is None: > # > # As we only identify interfaces at startup, new interfaces could > @@ -448,7 +448,7 @@ class RecvUpcall(Event): > > def get_system_dp_port(dpif_name): > dp_map = dp_map.get_map() > - return dp_map.get(dpif_name, {}).get("ovs-system", None) > + return dp_map.get_port_num(dpif_name, {}).get("ovs-system", None) > > def decode_nlm(msg, indent=4, dump=True): > bytes_left = len(msg) > diff --git a/utilities/usdt-scripts/upcall_monitor.py > b/utilities/usdt-scripts/upcall_monitor.py > index 333e23d51..8943fd205 100755 > --- a/utilities/usdt-scripts/upcall_monitor.py > +++ b/utilities/usdt-scripts/upcall_monitor.py > @@ -20,10 +20,10 @@ > # packets sent by the kernel to ovs-vswitchd. By default, it will show all > # upcall events, which looks something like this: > # > -# TIME CPU COMM PID DPIF_NAME TYPE > PKT_LEN... > -# 5952147.003848809 2 handler4 1381158 system@ovs-system 0 98 > 132 > -# 5952147.003879643 2 handler4 1381158 system@ovs-system 0 70 > 160 > -# 5952147.003914924 2 handler4 1381158 system@ovs-system 0 98 > 152 > +# TIME CPU COMM PID PORT_NAME TYPE .. > +# 5952147.003848809 2 handler4 1381158 eth0 (system@ovs-system) 0 > +# 5952147.003879643 2 handler4 1381158 eth0 (system@ovs-system) 0 > +# 5952147.003914924 2 handler4 1381158 eth0 (system@ovs-system) 0 > # > # Also, upcalls dropped by the kernel (e.g: because the netlink buffer is > full) > # are reported. > @@ -71,7 +71,7 @@ > # > # $ ./upcall_monitor.py --packet-decode decode --flow-key-decode nlraw \ > # --packet-size 128 --flow-key-size 256 > -# TIME CPU COMM PID DPIF_NAME ... > +# TIME CPU COMM PID PORT_NAME ... > # 5953013.333214231 2 handler4 1381158 system@ovs-system ... > # Flow key size 132 bytes, size captured 132 bytes. > # nla_len 8, nla_type OVS_KEY_ATTR_RECIRC_ID[20], data: 00 00 00 00 > @@ -121,6 +121,8 @@ from os.path import exists > from scapy.all import hexdump, wrpcap > from scapy.layers.l2 import Ether > No need for a new line here. > +from usdt_lib import DpPortMapping > + > import argparse > import psutil > import re > @@ -280,7 +282,7 @@ int kretprobe__ovs_dp_upcall(struct pt_regs *ctx) > # > # print_key() > # > -def print_key(event): > +def print_key(event, decode_dump): > if event.key_size < options.flow_key_size: > key_len = event.key_size > else: > @@ -301,39 +303,46 @@ def print_key(event): > dump=True), > flags=re.MULTILINE)) > > - if options.flow_key_decode == 'nlraw': > - nla = decode_nlm(bytes(event.key)[:key_len]) > - else: > - nla = decode_nlm(bytes(event.key)[:key_len], dump=False) > - > - if "OVS_KEY_ATTR_IN_PORT" in nla: > - port = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0] > - else: > - port = "Unknown" > - > - return port > + if options.flow_key_decode == "nlraw": > + for line in decode_dump: > + print(line) > > > # > # print_event() > # > def print_event(ctx, data, size): > - event = b['events'].event(data) > - print("{:<18.9f} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} {:<8}". > - format(event.ts / 1000000000, > - event.cpu, > - event.comm.decode("utf-8"), > - event.pid, > - event.dpif_name.decode("utf-8"), > - event.upcall_type, > - event.pkt_size, > - event.key_size, > - event.result)) > + event = b["events"].event(data) > + dp = event.dpif_name.decode("utf-8") > + > + nla, key_dump = decode_nlm( > + bytes(event.key)[: min(event.key_size, options.flow_key_size)] > + ) > + if "OVS_KEY_ATTR_IN_PORT" in nla: > + port_no = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0] > + port = dp_map.get_port_name(dp.partition("@")[-1], port_no) What if the port can not be found? Should we re-populate the cache? or just dump the internal port number? > + else: > + port = "Unknown" > + Adding this in this way seems like a hack, of splitting up print_ukey(), but I guess it would be ok for now until we move the decode_nlm to the usdt library as a proper object :) > + print( > + "{:<18.9f} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} {:<8}". > + format( > + event.ts / 1000000000, > + event.cpu, > + event.comm.decode("utf-8"), > + event.pid, > + f"{port} ({dp})", Not sure if we should start mixing f”” style with format(), or f”” in general? If we want f”” in general, we should probably replace all format() functions. > + event.upcall_type, > + event.pkt_size, > + event.key_size, > + event.result, > + ) > + ) > > # > # Decode packet only if there is data > # > - port = print_key(event) > + print_key(event, key_dump) > > if event.pkt_size <= 0: > return > @@ -342,7 +351,7 @@ def print_event(ctx, data, size): > > if event.pkt_size < options.packet_size: > pkt_len = event.pkt_size > - pkt_data = bytes(event.pkt)[:event.pkt_size] > + pkt_data = bytes(event.pkt)[: event.pkt_size] Any reason for the extra space? I guess for slicing we do not need a space if there are no operations. > else: > pkt_len = options.packet_size > pkt_data = bytes(event.pkt) > @@ -369,23 +378,26 @@ def print_event(ctx, data, size): > # > # decode_nlm() > # > -def decode_nlm(msg, indent=4, dump=True): > +def decode_nlm(msg, indent=4): > bytes_left = len(msg) > result = {} > + dump = [] > > while bytes_left: > if bytes_left < 4: > - if dump: > - print("{}WARN: decode truncated; can't read header".format( > - ' ' * indent)) > + dump.append( > + "{}WARN: decode truncated; can't read header".format( > + " " * indent > + ) > + ) > break > > nla_len, nla_type = struct.unpack("=HH", msg[:4]) > > if nla_len < 4: > - if dump: > - print("{}WARN: decode truncated; nla_len < 4".format( > - ' ' * indent)) > + dump.append( > + "{}WARN: decode truncated; nla_len < 4".format(" " * indent) > + ) > break > > nla_data = msg[4:nla_len] > @@ -397,16 +409,19 @@ def decode_nlm(msg, indent=4, dump=True): > else: > result[get_ovs_key_attr_str(nla_type)] = nla_data > > - if dump: > - print("{}nla_len {}, nla_type {}[{}], data: {}{}".format( > + dump.append( > + "{}nla_len {}, nla_type {}[{}], data: {}{}".format( > ' ' * indent, nla_len, get_ovs_key_attr_str(nla_type), > nla_type, > - "".join("{:02x} ".format(b) for b in nla_data), trunc)) > + "".join("{:02x} ".format(b) for b in nla_data), trunc) > + ) > > if trunc != "": > - if dump: > - print("{}WARN: decode truncated; nla_len > msg_len[{}] ". > - format(' ' * indent, bytes_left)) > + dump.append( > + "{}WARN: decode truncated; nla_len > msg_len[{}] ".format( > + " " * indent, bytes_left > + ) > + ) > break > > # update next offset, but make sure it's aligned correctly > @@ -414,7 +429,7 @@ def decode_nlm(msg, indent=4, dump=True): > msg = msg[next_offset:] > bytes_left -= next_offset > > - return result > + return result, dump > > > # > @@ -499,6 +514,9 @@ def main(): > # > global b > global options > + global dp_map > + > + dp_map = DpPortMapping() > > # > # Argument parsing > @@ -607,8 +625,8 @@ def main(): > # > # Print header > # > - print("{:<18} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} > {:<8}".format( > - "TIME", "CPU", "COMM", "PID", "DPIF_NAME", "TYPE", "PKT_LEN", > + print("{:<18} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} > {:<8}".format( > + "TIME", "CPU", "COMM", "PID", "PORT_NAME", "TYPE", "PKT_LEN", > "FLOW_KEY_LEN", "RESULT")) > > # > diff --git a/utilities/usdt-scripts/usdt_lib.py > b/utilities/usdt-scripts/usdt_lib.py > index da3fab2bf..9ad50c540 100644 > --- a/utilities/usdt-scripts/usdt_lib.py > +++ b/utilities/usdt-scripts/usdt_lib.py > @@ -33,12 +33,33 @@ class DpPortMapping: > """Override the internal cache map.""" > self.cache_map = cache_map > > - def get(self, dp, port_no): > - """Get the portname from a port number.""" > + def get_port_num(self, dp, port): > + """Get the port number from a port name.""" > if self.cache_map is None: > self._get_mapping() > > - return self.cache_map.get(dp, {}).get(port_no, None) > + return self.cache_map.get(dp, {}).get(port, None) > + > + def get_port_name(self, dp, port_no): > + """Get the port name from a port number.""" > + if self.cache_map is None: > + self._get_mapping() > + > + if not self.cache_map.get(dp): > + return None > + > + for name, num in self.cache_map[dp].items(): > + if num == port_no: > + return name > + > + return None > + > + def get_port_number(self, dp, port): Now we have both get_port_num() and get_port_number(). I think we should only keep the latter one. > + """Get the port number from a port name.""" > + if self.cache_map is None: > + self._get_mapping() > + > + return self.cache_map.get(dp, {}).get(port, None) > > def _get_mapping(self): > """Get the datapath port mapping from the running OVS.""" > -- > 2.48.1 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
