On 3 Feb 2025, at 11:09, Adrián Moreno wrote:
> On Thu, Jan 30, 2025 at 02:33:24PM +0100, Eelco Chaudron wrote: >> >> >> On 17 Jan 2025, at 15:25, Adrian Moreno wrote: >> >>> Printing just the datapath on each upcall gives little information (most >>> often, there will only be one well-known datapath). Instead, print both >>> the input port name (plus the datapath). >>> >>> In order to do this, refactor decode_nla to always generate the dump >>> that only gets printed if needed. That way it can be called earlier on. >>> >>> Signed-off-by: Adrian Moreno <[email protected]> >> >> In general, this looks good to me. Some small comments below. >> >> //Eelco >> >>> --- >>> utilities/usdt-scripts/upcall_cost.py | 4 +- >>> utilities/usdt-scripts/upcall_monitor.py | 110 +++++++++++++---------- >>> utilities/usdt-scripts/usdt_lib.py | 27 +++++- >>> 3 files changed, 90 insertions(+), 51 deletions(-) >>> >>> diff --git a/utilities/usdt-scripts/upcall_cost.py >>> b/utilities/usdt-scripts/upcall_cost.py >>> index 47a1e30a6..2037fe69a 100755 >>> --- a/utilities/usdt-scripts/upcall_cost.py >>> +++ b/utilities/usdt-scripts/upcall_cost.py >>> @@ -354,7 +354,7 @@ class DpUpcall(Event): >>> pkt_frag_len): >>> super(DpUpcall, self).__init__(ts, pid, comm, cpu, >>> EventType.DP_UPCALL) >>> self.dpif_name = dpif_name >>> - self.dp_port = dp_map.get(dpif_name, port) >>> + self.dp_port = dp_map.get_port_num(dpif_name, port) >> >> get_port_number()? See below. >> >>> if self.dp_port is None: >>> # >>> # As we only identify interfaces at startup, new interfaces >>> could >>> @@ -448,7 +448,7 @@ class RecvUpcall(Event): >>> >>> def get_system_dp_port(dpif_name): >>> dp_map = dp_map.get_map() >>> - return dp_map.get(dpif_name, {}).get("ovs-system", None) >>> + return dp_map.get_port_num(dpif_name, {}).get("ovs-system", None) >>> >>> def decode_nlm(msg, indent=4, dump=True): >>> bytes_left = len(msg) >>> diff --git a/utilities/usdt-scripts/upcall_monitor.py >>> b/utilities/usdt-scripts/upcall_monitor.py >>> index 333e23d51..8943fd205 100755 >>> --- a/utilities/usdt-scripts/upcall_monitor.py >>> +++ b/utilities/usdt-scripts/upcall_monitor.py >>> @@ -20,10 +20,10 @@ >>> # packets sent by the kernel to ovs-vswitchd. By default, it will show all >>> # upcall events, which looks something like this: >>> # >>> -# TIME CPU COMM PID DPIF_NAME TYPE >>> PKT_LEN... >>> -# 5952147.003848809 2 handler4 1381158 system@ovs-system 0 98 >>> 132 >>> -# 5952147.003879643 2 handler4 1381158 system@ovs-system 0 70 >>> 160 >>> -# 5952147.003914924 2 handler4 1381158 system@ovs-system 0 98 >>> 152 >>> +# TIME CPU COMM PID PORT_NAME TYPE >>> .. >>> +# 5952147.003848809 2 handler4 1381158 eth0 (system@ovs-system) 0 >>> +# 5952147.003879643 2 handler4 1381158 eth0 (system@ovs-system) 0 >>> +# 5952147.003914924 2 handler4 1381158 eth0 (system@ovs-system) 0 >>> # >>> # Also, upcalls dropped by the kernel (e.g: because the netlink buffer is >>> full) >>> # are reported. >>> @@ -71,7 +71,7 @@ >>> # >>> # $ ./upcall_monitor.py --packet-decode decode --flow-key-decode nlraw \ >>> # --packet-size 128 --flow-key-size 256 >>> -# TIME CPU COMM PID DPIF_NAME >>> ... >>> +# TIME CPU COMM PID PORT_NAME >>> ... >>> # 5953013.333214231 2 handler4 1381158 system@ovs-system >>> ... >>> # Flow key size 132 bytes, size captured 132 bytes. >>> # nla_len 8, nla_type OVS_KEY_ATTR_RECIRC_ID[20], data: 00 00 00 00 >>> @@ -121,6 +121,8 @@ from os.path import exists >>> from scapy.all import hexdump, wrpcap >>> from scapy.layers.l2 import Ether >>> >> >> No need for a new line here. > > I was trying to logically split the external dependencies from internal > ones: > > https://peps.python.org/pep-0008/#imports > > We are not checking this style-related pep options, so we could ingore > them. I was not aware of this pep rule :) So let’s adhere to it. >>> +from usdt_lib import DpPortMapping >>> + >>> import argparse >>> import psutil >>> import re >>> @@ -280,7 +282,7 @@ int kretprobe__ovs_dp_upcall(struct pt_regs *ctx) >>> # >>> # print_key() >>> # >>> -def print_key(event): >>> +def print_key(event, decode_dump): >>> if event.key_size < options.flow_key_size: >>> key_len = event.key_size >>> else: >>> @@ -301,39 +303,46 @@ def print_key(event): >>> dump=True), >>> flags=re.MULTILINE)) >>> >>> - if options.flow_key_decode == 'nlraw': >>> - nla = decode_nlm(bytes(event.key)[:key_len]) >>> - else: >>> - nla = decode_nlm(bytes(event.key)[:key_len], dump=False) >>> - >>> - if "OVS_KEY_ATTR_IN_PORT" in nla: >>> - port = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0] >>> - else: >>> - port = "Unknown" >>> - >>> - return port >>> + if options.flow_key_decode == "nlraw": >>> + for line in decode_dump: >>> + print(line) >>> >>> >>> # >>> # print_event() >>> # >>> def print_event(ctx, data, size): >>> - event = b['events'].event(data) >>> - print("{:<18.9f} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} {:<8}". >>> - format(event.ts / 1000000000, >>> - event.cpu, >>> - event.comm.decode("utf-8"), >>> - event.pid, >>> - event.dpif_name.decode("utf-8"), >>> - event.upcall_type, >>> - event.pkt_size, >>> - event.key_size, >>> - event.result)) >>> + event = b["events"].event(data) >>> + dp = event.dpif_name.decode("utf-8") >>> + >>> + nla, key_dump = decode_nlm( >>> + bytes(event.key)[: min(event.key_size, options.flow_key_size)] >>> + ) >>> + if "OVS_KEY_ATTR_IN_PORT" in nla: >>> + port_no = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0] >>> + port = dp_map.get_port_name(dp.partition("@")[-1], port_no) >> >> What if the port can not be found? Should we re-populate the cache? or just >> dump the internal port number? >> > > Maybe the automatic re-populating can be added inside the port cache > itself? And if it still fails, we can just leave the port number, that's > a good idea. Sounds like a good abstraction. Make sure it’s not overwriting a static map that was put in place. Should we also add some logic to avoid constant cache updates? >>> + else: >>> + port = "Unknown" >>> + >> >> Adding this in this way seems like a hack, of splitting up print_ukey(), but >> I guess it would be ok for now until we move the decode_nlm to the usdt >> library as a proper object :) >> > > I agree. > >>> + print( >>> + "{:<18.9f} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} {:<8}". >>> + format( >>> + event.ts / 1000000000, >>> + event.cpu, >>> + event.comm.decode("utf-8"), >>> + event.pid, >>> + f"{port} ({dp})", >> >> Not sure if we should start mixing f”” style with format(), or f”” in >> general? If we want f”” in general, we should probably replace all format() >> functions. >> > > I prefer f"" style because I find it easier to read, but you're right it > makes the style inconsistent. I can use "str.format" and then suggest a > global change. Yes, we should do that. I like the f”” style too, and we should be safe with 3.6 as we need 3.7 minimal according to our NEWS section. >>> + event.upcall_type, >>> + event.pkt_size, >>> + event.key_size, >>> + event.result, >>> + ) >>> + ) >>> >>> # >>> # Decode packet only if there is data >>> # >>> - port = print_key(event) >>> + print_key(event, key_dump) >>> >>> if event.pkt_size <= 0: >>> return >>> @@ -342,7 +351,7 @@ def print_event(ctx, data, size): >>> >>> if event.pkt_size < options.packet_size: >>> pkt_len = event.pkt_size >>> - pkt_data = bytes(event.pkt)[:event.pkt_size] >>> + pkt_data = bytes(event.pkt)[: event.pkt_size] >> >> Any reason for the extra space? I guess for slicing we do not need a space >> if there are no operations. > > No need, you're right. > >> >>> else: >>> pkt_len = options.packet_size >>> pkt_data = bytes(event.pkt) >>> @@ -369,23 +378,26 @@ def print_event(ctx, data, size): >>> # >>> # decode_nlm() >>> # >>> -def decode_nlm(msg, indent=4, dump=True): >>> +def decode_nlm(msg, indent=4): >>> bytes_left = len(msg) >>> result = {} >>> + dump = [] >>> >>> while bytes_left: >>> if bytes_left < 4: >>> - if dump: >>> - print("{}WARN: decode truncated; can't read header".format( >>> - ' ' * indent)) >>> + dump.append( >>> + "{}WARN: decode truncated; can't read header".format( >>> + " " * indent >>> + ) >>> + ) >>> break >>> >>> nla_len, nla_type = struct.unpack("=HH", msg[:4]) >>> >>> if nla_len < 4: >>> - if dump: >>> - print("{}WARN: decode truncated; nla_len < 4".format( >>> - ' ' * indent)) >>> + dump.append( >>> + "{}WARN: decode truncated; nla_len < 4".format(" " * >>> indent) >>> + ) >>> break >>> >>> nla_data = msg[4:nla_len] >>> @@ -397,16 +409,19 @@ def decode_nlm(msg, indent=4, dump=True): >>> else: >>> result[get_ovs_key_attr_str(nla_type)] = nla_data >>> >>> - if dump: >>> - print("{}nla_len {}, nla_type {}[{}], data: {}{}".format( >>> + dump.append( >>> + "{}nla_len {}, nla_type {}[{}], data: {}{}".format( >>> ' ' * indent, nla_len, get_ovs_key_attr_str(nla_type), >>> nla_type, >>> - "".join("{:02x} ".format(b) for b in nla_data), trunc)) >>> + "".join("{:02x} ".format(b) for b in nla_data), trunc) >>> + ) >>> >>> if trunc != "": >>> - if dump: >>> - print("{}WARN: decode truncated; nla_len > msg_len[{}] ". >>> - format(' ' * indent, bytes_left)) >>> + dump.append( >>> + "{}WARN: decode truncated; nla_len > msg_len[{}] ".format( >>> + " " * indent, bytes_left >>> + ) >>> + ) >>> break >>> >>> # update next offset, but make sure it's aligned correctly >>> @@ -414,7 +429,7 @@ def decode_nlm(msg, indent=4, dump=True): >>> msg = msg[next_offset:] >>> bytes_left -= next_offset >>> >>> - return result >>> + return result, dump >>> >>> >>> # >>> @@ -499,6 +514,9 @@ def main(): >>> # >>> global b >>> global options >>> + global dp_map >>> + >>> + dp_map = DpPortMapping() >>> >>> # >>> # Argument parsing >>> @@ -607,8 +625,8 @@ def main(): >>> # >>> # Print header >>> # >>> - print("{:<18} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} >>> {:<8}".format( >>> - "TIME", "CPU", "COMM", "PID", "DPIF_NAME", "TYPE", "PKT_LEN", >>> + print("{:<18} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} >>> {:<8}".format( >>> + "TIME", "CPU", "COMM", "PID", "PORT_NAME", "TYPE", "PKT_LEN", >>> "FLOW_KEY_LEN", "RESULT")) >>> >>> # >>> diff --git a/utilities/usdt-scripts/usdt_lib.py >>> b/utilities/usdt-scripts/usdt_lib.py >>> index da3fab2bf..9ad50c540 100644 >>> --- a/utilities/usdt-scripts/usdt_lib.py >>> +++ b/utilities/usdt-scripts/usdt_lib.py >>> @@ -33,12 +33,33 @@ class DpPortMapping: >>> """Override the internal cache map.""" >>> self.cache_map = cache_map >>> >>> - def get(self, dp, port_no): >>> - """Get the portname from a port number.""" >>> + def get_port_num(self, dp, port): >>> + """Get the port number from a port name.""" >>> if self.cache_map is None: >>> self._get_mapping() >>> >>> - return self.cache_map.get(dp, {}).get(port_no, None) >>> + return self.cache_map.get(dp, {}).get(port, None) >>> + >>> + def get_port_name(self, dp, port_no): >>> + """Get the port name from a port number.""" >>> + if self.cache_map is None: >>> + self._get_mapping() >>> + >>> + if not self.cache_map.get(dp): >>> + return None >>> + >>> + for name, num in self.cache_map[dp].items(): >>> + if num == port_no: >>> + return name >>> + >>> + return None >>> + >>> + def get_port_number(self, dp, port): >> >> Now we have both get_port_num() and get_port_number(). I think we should >> only keep the latter one. >> > > Agree. > >>> + """Get the port number from a port name.""" >>> + if self.cache_map is None: >>> + self._get_mapping() >>> + >>> + return self.cache_map.get(dp, {}).get(port, None) >>> >>> def _get_mapping(self): >>> """Get the datapath port mapping from the running OVS.""" >>> -- >>> 2.48.1 >>> >>> _______________________________________________ >>> dev mailing list >>> [email protected] >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
