On 24 Apr 2024, at 21:53, Adrian Moreno wrote: > This simple program reads from psample and prints the packets to stdout. > It's useful for quickly collecting sampled packets.
See some comments below, did not review the actual sample application in detail. //Eelco > Signed-off-by: Adrian Moreno <amore...@redhat.com> > --- > Documentation/automake.mk | 1 + > Documentation/conf.py | 2 + > Documentation/ref/index.rst | 1 + > Documentation/ref/ovs-psample.8.rst | 47 ++++ > include/linux/automake.mk | 1 + > include/linux/psample.h | 64 ++++++ > rhel/openvswitch-fedora.spec.in | 2 + > rhel/openvswitch.spec.in | 1 + > utilities/automake.mk | 9 + > utilities/ovs-psample.c | 330 ++++++++++++++++++++++++++++ > 10 files changed, 458 insertions(+) > create mode 100644 Documentation/ref/ovs-psample.8.rst > create mode 100644 include/linux/psample.h > create mode 100644 utilities/ovs-psample.c > > diff --git a/Documentation/automake.mk b/Documentation/automake.mk > index 47d2e336a..c22facfd6 100644 > --- a/Documentation/automake.mk > +++ b/Documentation/automake.mk > @@ -165,6 +165,7 @@ RST_MANPAGES = \ > ovs-l3ping.8.rst \ > ovs-parse-backtrace.8.rst \ > ovs-pki.8.rst \ > + ovs-psample.8.rst \ > ovs-tcpdump.8.rst \ > ovs-tcpundump.1.rst \ > ovs-test.8.rst \ > diff --git a/Documentation/conf.py b/Documentation/conf.py > index 15785605a..75efed2fc 100644 > --- a/Documentation/conf.py > +++ b/Documentation/conf.py > @@ -134,6 +134,8 @@ _man_pages = [ > u'convert "tcpdump -xx" output to hex strings'), > ('ovs-test.8', > u'Check Linux drivers for performance, vlan and L3 tunneling problems'), > + ('ovs-psample.8', > + u'Print packets sampled by psample'), > ('ovs-vlan-test.8', > u'Check Linux drivers for problems with vlan traffic'), > ('ovsdb-server.7', > diff --git a/Documentation/ref/index.rst b/Documentation/ref/index.rst > index 03ada932f..d1076435a 100644 > --- a/Documentation/ref/index.rst > +++ b/Documentation/ref/index.rst > @@ -46,6 +46,7 @@ time: > ovs-pki.8 > ovs-sim.1 > ovs-parse-backtrace.8 > + ovs-psample.8 > ovs-tcpdump.8 > ovs-tcpundump.1 > ovs-test.8 > diff --git a/Documentation/ref/ovs-psample.8.rst > b/Documentation/ref/ovs-psample.8.rst > new file mode 100644 > index 000000000..c849c83d8 > --- /dev/null > +++ b/Documentation/ref/ovs-psample.8.rst > @@ -0,0 +1,47 @@ > +========== > +ovs-sample Interesting, here you call it all ovs-sample here, which is like ;) You could add options like —local-kernel --local-userspace (--ipfix, --sflow) and it will eventually work on each datapath. If you want to keep this a separate psample utility, I would not have ovs in the name, as it's rather general and not OVS specific. Maybe just psample/psample_mon, as we also have nlmon. > +========== > + > +Synopsis > +======== > + > +``ovs-sample`` > +[``--group=``<group> | ``-g`` <group>] > + > +``ovs-sample --help`` > + > +``ovs-sample --version`` > + > +Description > +=========== > + > +Open vSwitch per-flow sampling can be configured to emit the samples > +through the ``psample`` netlink multicast group. > + > +Such sampled traffic contains, apart from the packet, some metadata that > +gives further information about the packet sample. More specifically, OVS > +inserts the ``observation_domain_id`` and the ``observation_point_id`` that > +where provided in the sample action (see ``ovs-actions(7)``). > + > +the ``ovs-sample`` program provides a simple way of joining the psample > +multicast group and printing the sampled packets. > + > + > +Options > +======= > + > +.. option:: ``-g`` <group> or ``--group`` <group> > + > + Tells ``ovs-sample`` to filter out samples that don't belong to that group. > + > + Different ``Flow_Sample_Collector_Set`` entries can be configured with > + different ``group_id`` values (see ``ovs-vswitchd.conf.db(5)``). This > option > + helps focusing the output on the relevant samples. > + > +.. option:: -h, --help > + > + Prints a brief help message to the console. > + > +.. option:: -V, --version > + > + Prints version information to the console. > diff --git a/include/linux/automake.mk b/include/linux/automake.mk > index cdae5eedc..ac306b53c 100644 > --- a/include/linux/automake.mk > +++ b/include/linux/automake.mk > @@ -3,6 +3,7 @@ noinst_HEADERS += \ > include/linux/netfilter/nf_conntrack_sctp.h \ > include/linux/openvswitch.h \ > include/linux/pkt_cls.h \ > + include/linux/psample.h \ > include/linux/gen_stats.h \ > include/linux/tc_act/tc_mpls.h \ > include/linux/tc_act/tc_pedit.h \ > diff --git a/include/linux/psample.h b/include/linux/psample.h > new file mode 100644 > index 000000000..eb642f875 > --- /dev/null > +++ b/include/linux/psample.h > @@ -0,0 +1,64 @@ > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > +#ifndef __LINUX_PSAMPLE_H > +#define __LINUX_PSAMPLE_H > + > +enum { > + PSAMPLE_ATTR_IIFINDEX, > + PSAMPLE_ATTR_OIFINDEX, > + PSAMPLE_ATTR_ORIGSIZE, > + PSAMPLE_ATTR_SAMPLE_GROUP, > + PSAMPLE_ATTR_GROUP_SEQ, > + PSAMPLE_ATTR_SAMPLE_RATE, > + PSAMPLE_ATTR_DATA, > + PSAMPLE_ATTR_GROUP_REFCOUNT, > + PSAMPLE_ATTR_TUNNEL, > + > + PSAMPLE_ATTR_PAD, > + PSAMPLE_ATTR_OUT_TC, /* u16 */ > + PSAMPLE_ATTR_OUT_TC_OCC, /* u64, bytes */ > + PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ > + PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ > + PSAMPLE_ATTR_PROTO, /* u16 */ > + PSAMPLE_ATTR_USER_COOKIE, > + > + __PSAMPLE_ATTR_MAX > +}; > + > +enum psample_command { > + PSAMPLE_CMD_SAMPLE, > + PSAMPLE_CMD_GET_GROUP, > + PSAMPLE_CMD_NEW_GROUP, > + PSAMPLE_CMD_DEL_GROUP, > + PSAMPLE_CMD_SAMPLE_FILTER_SET, > +}; > + > +enum psample_tunnel_key_attr { > + PSAMPLE_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ > + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ > + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ > + PSAMPLE_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */ > + PSAMPLE_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ > + PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ > + PSAMPLE_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM > packet. */ > + PSAMPLE_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. > */ > + PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. > */ > + PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. > */ > + PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. > */ > + PSAMPLE_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested VXLAN opts* */ > + PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 > address. */ > + PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 > address. */ > + PSAMPLE_TUNNEL_KEY_ATTR_PAD, > + PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ > + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. > IPV4_INFO_BRIDGE mode.*/ > + __PSAMPLE_TUNNEL_KEY_ATTR_MAX > +}; > + > +/* Can be overridden at runtime by module option */ > +#define PSAMPLE_ATTR_MAX (__PSAMPLE_ATTR_MAX - 1) > + > +#define PSAMPLE_NL_MCGRP_CONFIG_NAME "config" > +#define PSAMPLE_NL_MCGRP_SAMPLE_NAME "packets" > +#define PSAMPLE_GENL_NAME "psample" > +#define PSAMPLE_GENL_VERSION 1 > + > +#endif > diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in > index 94b6d7431..9ee66180c 100644 > --- a/rhel/openvswitch-fedora.spec.in > +++ b/rhel/openvswitch-fedora.spec.in > @@ -471,6 +471,7 @@ fi > %{_bindir}/ovs-dpctl > %{_bindir}/ovs-dpctl-top > %{_bindir}/ovs-ofctl > +%{_bindir}/ovs-psample > %{_bindir}/ovs-vsctl > %{_bindir}/ovsdb-client > %{_bindir}/ovsdb-tool > @@ -502,6 +503,7 @@ fi > %{_mandir}/man8/ovs-kmod-ctl.8* > %{_mandir}/man8/ovs-ofctl.8* > %{_mandir}/man8/ovs-pki.8* > +%{_mandir}/man8/ovs-psample.8* > %{_mandir}/man8/ovs-vsctl.8* > %{_mandir}/man8/ovs-vswitchd.8* > %{_mandir}/man8/ovs-parse-backtrace.8* > diff --git a/rhel/openvswitch.spec.in b/rhel/openvswitch.spec.in > index 9903dd10a..06d79b921 100644 > --- a/rhel/openvswitch.spec.in > +++ b/rhel/openvswitch.spec.in > @@ -195,6 +195,7 @@ exit 0 > /usr/bin/ovs-pki > /usr/bin/ovs-tcpdump > /usr/bin/ovs-tcpundump > +/usr/bin/ovs-sample > /usr/bin/ovs-vlan-test > /usr/bin/ovs-vsctl > /usr/bin/ovsdb-client > diff --git a/utilities/automake.mk b/utilities/automake.mk > index 146b8c37f..0d07ff868 100644 > --- a/utilities/automake.mk > +++ b/utilities/automake.mk > @@ -4,6 +4,7 @@ bin_PROGRAMS += \ > utilities/ovs-dpctl \ > utilities/ovs-ofctl \ > utilities/ovs-vsctl > + Guess this was added by accident. > bin_SCRIPTS += utilities/ovs-docker \ > utilities/ovs-pki \ > utilities/ovs-pcap \ > @@ -132,6 +133,14 @@ utilities_ovs_ofctl_LDADD = \ > ofproto/libofproto.la \ > lib/libopenvswitch.la > > +if LINUX > +bin_PROGRAMS += utilities/ovs-psample > +utilities_ovs_psample_SOURCES = utilities/ovs-psample.c > +utilities_ovs_psample_LDADD = \ > + ofproto/libofproto.la \ > + lib/libopenvswitch.la > +endif > + > utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c > utilities_ovs_vsctl_LDADD = lib/libopenvswitch.la > > diff --git a/utilities/ovs-psample.c b/utilities/ovs-psample.c > new file mode 100644 > index 000000000..9127d065c > --- /dev/null > +++ b/utilities/ovs-psample.c > @@ -0,0 +1,330 @@ > +/* > + * Copyright (c) 2024 Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > + > +#include <errno.h> > +#include <getopt.h> > +#include <stdbool.h> > +#include <stdio.h> > +#include <stdlib.h> > + > +#include <linux/psample.h> > + > +#include "command-line.h" > +#include "dp-packet.h" > +#include "util.h" > +#include "netlink.h" > +#include "netlink-socket.h" > +#include "openvswitch/ofp-actions.h" > +#include "openvswitch/ofp-print.h" > +#include "openvswitch/types.h" > +#include "openvswitch/uuid.h" > +#include "openvswitch/vlog.h" > + > +VLOG_DEFINE_THIS_MODULE(ovs_sample); > + > +/* -g, --group: Group id filter option */ > +static uint32_t group_id = 0; > +static bool has_filter; > + > +static int psample_family = 0; > + > +OVS_NO_RETURN static void usage(void) > +{ > + printf("%s: OpenvSwitch psample viewer\n" > +"usage: %s [OPTIONS]\n" > +"\nOptions:\n" > +" -h, --help display this help message\n" > +" -t, --group=GROUP only display events from GROUP group_id\n" > +" -V, --version display %s version information\n", > + program_name, program_name, program_name); > + exit(EXIT_SUCCESS); > +} > + > +struct sample; > +static inline void sample_clear(struct sample *sample); > +static int parse_psample(struct ofpbuf *, struct sample *sample); > +static void psample_set_filter(struct nl_sock *sock); > +static void parse_options(int argc, char *argv[]); > +static int connect_psample_socket(struct nl_sock **sock); > +static void run(struct nl_sock *sock); > + > +int > +main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) > +{ > + struct nl_sock *sock; > + int error; > + > + parse_options(argc, argv); > + > + error = connect_psample_socket(&sock); > + if (error) { > + return error; > + } > + > + run(sock); > +} > + > +static void parse_options(int argc, char *argv[]) > +{ > + static const struct option long_options[] = { > + {"group", required_argument, NULL, 'g'}, > + {"help", no_argument, NULL, 'h'}, > + {"version", no_argument, NULL, 'V'}, > + {NULL, 0, NULL, 0}, > + }; > + > + char *short_options_ = > + ovs_cmdl_long_options_to_short_options(long_options); > + char *short_options = xasprintf("+%s", short_options_); Why not move the definition to a separate line to avoid the odd line wrapping (and maybe free early); static const struct option long_options[] = { {"group", required_argument, NULL, 'g'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0}, }; char *tmp_short_options, *short_options; tmp_short_options = ovs_cmdl_long_options_to_short_options(long_options); short_options = xasprintf("+%s", tmp_short_options); free(tmp_short_options); > + for (;;) { > + int option; > + > + option = getopt_long(argc, argv, short_options, long_options, NULL); > + if (option == -1) { > + break; > + } New line? > + switch (option) { > + case 'g': > + { If we do this we would have the { right after the case statement and do it for all branches. For example: switch (action->type) { case TC_ACT_VLAN_POP: { nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN); } break; case TC_ACT_VLAN_PUSH: { struct ovs_action_push_vlan *push; push = nl_msg_put_unspec_zero(buf, OVS_ACTION_ATTR_PUSH_VLAN, sizeof *push); push->vlan_tpid = action->vlan.vlan_push_tpid; push->vlan_tci = htons(action->vlan.vlan_push_id | (action->vlan.vlan_push_prio << 13) | VLAN_CFI); } break; > + char *endptr; > + > + if (has_filter) { > + ovs_fatal(0, "-g or --group may be specified only once"); > + } > + > + group_id = strtol(optarg, &endptr, 10); > + if (endptr - optarg != strlen(optarg)) { > + ovs_fatal(0, "-g or --group expects a valid decimal" > + " 32-bit number"); > + } > + > + has_filter = true; > + } > + break; > + case 'h': > + usage(); > + break; > + > + case 'V': > + ovs_print_version(0, 0); In theory, we are not cleaning up the memory ;) > + exit(EXIT_SUCCESS); > + > + case '?': > + exit(EXIT_FAILURE); > + > + default: > + OVS_NOT_REACHED(); > + } > + } > + free(short_options_); > + free(short_options); > +} > + > +static int connect_psample_socket(struct nl_sock **sock) > +{ > + unsigned int psample_packet_mcgroup; > + int error; > + > + error = nl_lookup_genl_family(PSAMPLE_GENL_NAME , &psample_family); > + if (error) { > + VLOG_ERR("PSAMPLE_GENL_NAME not found: %i", error); Should we exit? > + } > + > + error = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, > + PSAMPLE_NL_MCGRP_SAMPLE_NAME, > + &psample_packet_mcgroup); > + if (error) { > + VLOG_ERR("psample packet multicast group not found: %i", error); > + return error; > + } > + > + error = nl_sock_create(NETLINK_GENERIC, sock); > + if (error) { > + VLOG_ERR("cannot create netlink socket: %i ", error); > + return error; > + } > + > + nl_sock_listen_all_nsid(*sock, true); > + > + psample_set_filter(*sock); > + > + error = nl_sock_join_mcgroup(*sock, psample_packet_mcgroup); > + if (error) { > + nl_sock_destroy(*sock); > + *sock = NULL; > + VLOG_ERR("cannot join psample multicast group: %i", error); > + return error; > + } > + return 0; > +} > + > +/* Internal representation of a sample. */ > +struct sample { > + struct dp_packet packet; > + uint32_t group_id; > + uint32_t obs_domain_id; > + uint32_t obs_point_id; > + bool has_cookie; > +}; > + > +static inline void > +sample_clear(struct sample *sample) { > + sample->group_id = 0; > + sample->obs_domain_id = 0; > + sample->obs_point_id = 0; > + sample->has_cookie = false; > + dp_packet_clear(&sample->packet); > +} > + > +static int > +parse_psample(struct ofpbuf *buf, struct sample *sample) { > + static const struct nl_policy psample_packet_policy[] = { > + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, > + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC, > + .optional = true, }, > + [PSAMPLE_ATTR_USER_COOKIE] = { .type = NL_A_UNSPEC, > + .optional = true }, > + }; > + > + struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size); > + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); > + struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl); > + struct nlattr *attr; > + const char *cookie; > + > + struct nlattr *a[ARRAY_SIZE(psample_packet_policy)]; > + if (!nlmsg || !genl > + || !nl_policy_parse(&b, 0, psample_packet_policy, a, > + ARRAY_SIZE(psample_packet_policy))) { > + return EINVAL; > + } > + > + attr = a[PSAMPLE_ATTR_DATA]; > + if (attr) { > + dp_packet_push(&sample->packet, nl_attr_get(attr), > + nl_attr_get_size(attr)); > + } > + > + sample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); > + > + attr = a[PSAMPLE_ATTR_USER_COOKIE]; > + if (attr && nl_attr_get_size(attr) == 8) { > + cookie = nl_attr_get(attr); > + sample->has_cookie = true; > + sample->obs_domain_id = (uint32_t) *(&cookie[0]); > + sample->obs_point_id = (uint32_t) *(&cookie[4]); > + } > + return 0; > +} > + > +static int _psample_set_filter(struct nl_sock *sock, uint32_t group, > + bool valid) > +{ > + uint64_t stub[512 / 8]; > + struct ofpbuf buf; > + int error; > + > + ofpbuf_use_stub(&buf, stub, sizeof stub); > + > + nl_msg_put_genlmsghdr(&buf, 0, psample_family, NLM_F_REQUEST, > + PSAMPLE_CMD_SAMPLE_FILTER_SET, 1); > + if (valid) { > + nl_msg_put_u32(&buf, PSAMPLE_ATTR_SAMPLE_GROUP, group); > + } > + > + error = nl_sock_send(sock, &buf, false); > + if (error) { > + return error; > + } > + > + ofpbuf_clear(&buf); > + error = nl_sock_recv(sock, &buf, NULL, false); > + if (!error) { > + struct nlmsghdr *h = ofpbuf_at(&buf, 0, NLMSG_HDRLEN); New line? > + if (h->nlmsg_type == NLMSG_ERROR) { > + const struct nlmsgerr *e; > + e = ofpbuf_at(&buf, NLMSG_HDRLEN, > + NLMSG_ALIGN(sizeof(struct nlmsgerr))); > + if (!e) Use {} even for single line. > + return EINVAL; > + if (e && e->error < 0) > + return -e->error; > + } > + } else if (error != EAGAIN) { > + return error; > + } > + return 0; > +} > + > +static void psample_set_filter(struct nl_sock *sock) > +{ > + int error; New line? > + if (has_filter) { > + error = _psample_set_filter(sock, group_id, true); > + if (error) { > + VLOG_WARN("Failed to install in-kernel filter (%s). " > + "Falling back to userspace filtering.", > + ovs_strerror(error)); > + } > + } > +} > + > +static void run(struct nl_sock *sock) > +{ > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); > + int error; > + > + struct sample sample = {}; Move this up above the error definition. > + dp_packet_init(&sample.packet, 1500); > + > + for (;;) { > + uint64_t buf_stub[4096 / 8]; > + struct ofpbuf buf; > + > + sample_clear(&sample); > + > + ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); > + error = nl_sock_recv(sock, &buf, NULL, true); > + > + if (error == ENOBUFS) { > + fprintf(stderr, "[missed events]\n"); > + continue; > + } else if (error == EAGAIN) { > + continue; > + } else if (error) { > + VLOG_ERR_RL(&rl, "error reading samples: %i", error); > + } > + > + error = parse_psample(&buf, &sample); > + if (error) > + VLOG_ERR_RL(&rl, "error parsing samples: %i", error); > + > + if (!has_filter || sample.group_id == group_id) { > + fprintf(stdout, "group_id=0x%"PRIx32" ", > + sample.group_id); > + if (sample.has_cookie) { > + fprintf(stdout, > + "obs_domain=0x%"PRIx32",obs_point=0x%"PRIx32" ", > + sample.obs_domain_id, sample.obs_point_id); > + } > + ofp_print_dp_packet(stdout, &sample.packet); > + } > + } > +} > + > -- > 2.44.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev