On 11/01/2019 01:34, Darrell Ball wrote:
> thanks; not a full review

Thanks, responses in-line.

> 
> On Wed, Jan 9, 2019 at 10:06 AM Tiago Lam <tiago....@intel.com
> <mailto:tiago....@intel.com>> wrote:
> 
>     Previous commits have added support to the dp_packet API to handle
>     multi-segmented packets, where data is not stored contiguously in
>     memory. However, in some cases, it is inevitable and data must be
>     provided contiguously. Examples of such cases are when performing csums
>     over the entire packet data, or when write()'ing to a file descriptor
>     (for a tap interface, for example). For such cases, the dp_packet API
>     has been extended to provide a way to transform a multi-segmented
>     DPBUF_DPDK packet into a DPBUF_MALLOC system packet (at the expense of a
>     copy of memory). If the packet's data is already stored in memory
>     contigously then there's no need to convert the packet.
> 
>     Thus, the main use cases that were assuming that a dp_packet's data is
>     always held contiguously in memory were changed to make use of the new
>     "linear functions" in the dp_packet API when there's a need to traverse
>     the entire's packet data. Per the example above, when the packet's data
>     needs to be write() to the tap's file descriptor, or when the conntrack
>     module needs to verify a packet's checksum, the data is now linearized.
> 
>     Additionally, the miniflow_extract() function has been modified to check
>     if the respective packet headers don't span across multiple mbufs. This
>     requirement is needed to guarantee that callers can assume headers are
>     always in contiguous memory.
> 
>     Signed-off-by: Tiago Lam <tiago....@intel.com
>     <mailto:tiago....@intel.com>>
>     ---
>      lib/bfd.c                     |   3 +-
>      lib/conntrack-private.h       |   4 +-
>      lib/conntrack.c               |  43 +++++--
>      lib/crc32c.c                  |  35 ++++--
>      lib/crc32c.h                  |   2 +
>      lib/dp-packet.c               |  18 +++
>      lib/dp-packet.h               | 267
>     ++++++++++++++++++++++++++++++------------
>      lib/dpif-netdev.c             |  13 +-
>      lib/dpif-netlink.c            |   5 +
>      lib/dpif.c                    |   9 ++
>      lib/flow.c                    |  97 ++++++++++++---
>      lib/flow.h                    |   2 +-
>      lib/mcast-snooping.c          |   2 +
>      lib/netdev-bsd.c              |   5 +
>      lib/netdev-dummy.c            |  13 +-
>      lib/netdev-linux.c            |  13 +-
>      lib/netdev-native-tnl.c       |  26 ++--
>      lib/odp-execute.c             |  12 +-
>      lib/ovs-lldp.c                |   3 +-
>      lib/packets.c                 |  85 ++++++++++++--
>      lib/packets.h                 |   7 ++
>      ofproto/ofproto-dpif-upcall.c |  20 +++-
>      ofproto/ofproto-dpif-xlate.c  |  32 ++++-
>      tests/test-rstp.c             |   8 +-
>      tests/test-stp.c              |   8 +-
>      25 files changed, 579 insertions(+), 153 deletions(-)
> 
>     diff --git a/lib/bfd.c b/lib/bfd.c
>     index cc8c685..12e076a 100644
>     --- a/lib/bfd.c
>     +++ b/lib/bfd.c
>     @@ -721,7 +721,8 @@ bfd_process_packet(struct bfd *bfd, const struct
>     flow *flow,
>          if (!msg) {
>              VLOG_INFO_RL(&rl, "%s: Received too-short BFD control
>     message (only "
>                           "%"PRIdPTR" bytes long, at least %d required).",
>     -                     bfd->name, (uint8_t *) dp_packet_tail(p) - l7,
>     +                     bfd->name, dp_packet_size(p) -
>     +                     (l7 - (uint8_t *) dp_packet_data(p)),
>                           BFD_PACKET_LEN);
>              goto out;
>          }
>     diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
>     index a344801..1be2df1 100644
>     --- a/lib/conntrack-private.h
>     +++ b/lib/conntrack-private.h
>     @@ -159,8 +159,8 @@ tcp_payload_length(struct dp_packet *pkt)
>      {
>          const char *tcp_payload = dp_packet_get_tcp_payload(pkt);
>          if (tcp_payload) {
>     -        return ((char *) dp_packet_tail(pkt) -
>     dp_packet_l2_pad_size(pkt)
>     -                - tcp_payload);
>     +        return dp_packet_l4_size(pkt) -
>     +               (tcp_payload - (char *) dp_packet_l4(pkt));
> 
> 
> why was this change needed ?

`dp_packet_get_tcp_payload()` already makes sure the payload is within
the a segment, returning NULL otherwise. So, while this change itself
isn't needed to make this code path correct, it helps with readability
in the future now with the adoption with multi-segment mbufs. Above, the
resulting addresses of `dp_packet_tail(pkt)` and `tcp_payload` may not
be in the same segment when using multi-segment mbufs.

>  
> 
>          } else {
>              return 0;
>          }
>     diff --git a/lib/conntrack.c b/lib/conntrack.c
>     index 6f6021a..0dd2dcc 100644
>     --- a/lib/conntrack.c
>     +++ b/lib/conntrack.c
>     @@ -636,12 +636,22 @@ reverse_pat_packet(struct dp_packet *pkt,
>     const struct conn *conn)
>      static void
>      reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>      {
>     -    char *tail = dp_packet_tail(pkt);
>     -    char pad = dp_packet_l2_pad_size(pkt);
>     +    char *tail;
>     +    char pad;
>          struct conn_key inner_key;
>          const char *inner_l4 = NULL;
>     -    uint16_t orig_l3_ofs = pkt->l3_ofs;
>     -    uint16_t orig_l4_ofs = pkt->l4_ofs;
>     +    uint16_t orig_l3_ofs;
>     +    uint16_t orig_l4_ofs;
>     +
>     +    /* We need the whole packet to parse the packet below */
>     +    if (!dp_packet_is_linear(pkt)) {
>     +        dp_packet_linearize(pkt);
>     +    }
>     +
>     +    tail = dp_packet_tail(pkt);
>     +    pad = dp_packet_l2_pad_size(pkt);
>     +    orig_l3_ofs = pkt->l3_ofs;
>     +    orig_l4_ofs = pkt->l4_ofs;
> 
> 
>          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>              struct ip_header *nh = dp_packet_l3(pkt);
>     @@ -1323,6 +1333,7 @@ conntrack_execute(struct conntrack *ct, struct
>     dp_packet_batch *pkt_batch,
>                  write_ct_md(packet, zone, NULL, NULL, NULL);
>                  continue;
>              }
>     +
> 
> 
> pls don't make whitespace changes, especially for large patchsets

Sure. Will remove.

>  
> 
>              process_one(ct, packet, &ctx, zone, force, commit, now,
>     setmark,
>                          setlabel, nat_action_info, tp_src, tp_dst, helper);
>          }
>     @@ -1904,9 +1915,18 @@ static bool
>      conn_key_extract(struct conntrack *ct, struct dp_packet *pkt,
>     ovs_be16 dl_type,
>                       struct conn_lookup_ctx *ctx, uint16_t zone)
>      {
>     -    const struct eth_header *l2 = dp_packet_eth(pkt);
>     -    const struct ip_header *l3 = dp_packet_l3(pkt);
>     -    const char *l4 = dp_packet_l4(pkt);
>     +    const struct eth_header *l2;
>     +    const struct ip_header *l3;
>     +    const char *l4;
>     +
>     +    /* We need the whole packet to parse the packet below */
>     +    if (!dp_packet_is_linear(pkt)) {
>     +        dp_packet_linearize(pkt);
>     +    }
>     +
>     +    l2 = dp_packet_eth(pkt);
>     +    l3 = dp_packet_l3(pkt);
>     +    l4 = dp_packet_l4(pkt);
> 
> 
> iirc, the minimum segment size is 2k
> it seems a wrapper around csum_continue() is what is needed; is
> packet_csum_continue()  the right api ?

Likely. The analysis and implementation of moving conntrack.c to be
compliant with multi-segment mbufs hasn't been done for this patchset -
other than "linearize the packet once it gets to conntrack". This is
expected to have a performance penalty, if one uses the Userspace
connection tracker *and enables* multi-segment mbufs. This is also
mentioned in Documentation/topics/dpdk/jumbo-frames.rst.

>  
> 
> 
>          memset(ctx, 0, sizeof *ctx);
> 
>     @@ -3174,7 +3194,7 @@ handle_ftp_ctl(struct conntrack *ct, const
>     struct conn_lookup_ctx *ctx,
>                     const struct conn *conn_for_expectation,
>                     long long now, enum ftp_ctl_pkt ftp_ctl, bool nat)
>      {
>     -    struct ip_header *l3_hdr = dp_packet_l3(pkt);
>     +    struct ip_header *l3_hdr;
>          ovs_be32 v4_addr_rep = 0;
>          struct ct_addr v6_addr_rep;
>          size_t addr_offset_from_ftp_data_start = 0;
>     @@ -3183,6 +3203,13 @@ handle_ftp_ctl(struct conntrack *ct, const
>     struct conn_lookup_ctx *ctx,
>          bool do_seq_skew_adj = true;
>          enum ct_alg_mode mode = CT_FTP_MODE_ACTIVE;
> 
>     +    /* We need the whole packet to parse the packet below */
>     +    if (!dp_packet_is_linear(pkt)) {
>     +        dp_packet_linearize(pkt);
>     +    }
>     +
>     +    l3_hdr = dp_packet_l3(pkt);
>     +
>          if (detect_ftp_ctl_type(ctx, pkt) != ftp_ctl) {
>              return;
>          }
>     diff --git a/lib/crc32c.c b/lib/crc32c.c
>     index e8dd6ee..dd5bb9a 100644
>     --- a/lib/crc32c.c
>     +++ b/lib/crc32c.c
>     @@ -132,28 +132,39 @@ static const uint32_t crc32Table[256] = {
>          0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
>      };
> 
>     -/*
>     - * Compute a CRC32c checksum as per the SCTP requirements in
>     RFC4960. This
>     - * includes beginning with a checksum of all ones, and returning
>     the negated
>     - * CRC. Unlike the RFC, we return the checksum in network byte-order.
>     - */
>     -ovs_be32
>     -crc32c(const uint8_t *data, size_t size)
>     +uint32_t
>     +crc32c_continue(uint32_t partial, const uint8_t *data, size_t size)
>      {
>     -    uint32_t crc = 0xffffffffL;
>     -
>          while (size--) {
>     -        crc = crc32Table[(crc ^ *data++) & 0xff] ^ (crc >> 8);
>     +        partial = crc32Table[(partial ^ *data++) & 0xff] ^ (partial
>     >> 8);
>          }
> 
>     +    return partial;
>     +}
>     +
>     +ovs_be32
>     +crc32c_finish(uint32_t partial)
>     +{
>          /* The result of this CRC calculation provides us a value in
>     the reverse
>           * byte-order as compared with our architecture. On big-endian
>     systems,
>           * this is opposite to our return type. So, to return a big-endian
>           * value, we must swap the byte-order. */
>      #if defined(WORDS_BIGENDIAN)
>     -    crc = uint32_byteswap(crc);
>     +    crc = uint32_byteswap(partial);
>      #endif
> 
>          /* Our value is in network byte-order. OVS_FORCE keeps sparse
>     happy. */
>     -    return (OVS_FORCE ovs_be32) ~crc;
>     +    return (OVS_FORCE ovs_be32) ~partial;
>     +}
>     +
>     +/*
>     + * Compute a CRC32c checksum as per the SCTP requirements in
>     RFC4960. This
>     + * includes beginning with a checksum of all ones, and returning
>     the negated
>     + * CRC. Unlike the RFC, we return the checksum in network byte-order.
>     + */
>     +ovs_be32
>     +crc32c(const uint8_t *data, size_t size)
>     +{
>     +    uint32_t crc = 0xffffffffL;
>     +    return crc32c_finish(crc32c_continue(crc, data, size));
>      }
>     diff --git a/lib/crc32c.h b/lib/crc32c.h
>     index 92c7d7f..17c8190 100644
>     --- a/lib/crc32c.h
>     +++ b/lib/crc32c.h
>     @@ -20,6 +20,8 @@
> 
>      #include "openvswitch/types.h"
> 
>     +uint32_t crc32c_continue(uint32_t partial, const uint8_t *data,
>     size_t size);
>     +ovs_be32 crc32c_finish(uint32_t partial);
>      ovs_be32 crc32c(const uint8_t *data, size_t);
> 
>      #endif /* crc32c.h */
>     diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>     index 4e4b8fb..32cc881 100644
>     --- a/lib/dp-packet.c
>     +++ b/lib/dp-packet.c
>     @@ -103,6 +103,9 @@ void
>      dp_packet_init_dpdk(struct dp_packet *b)
>      {
>          b->source = DPBUF_DPDK;
>     +#ifdef DPDK_NETDEV
>     +    b->mstate = NULL;
>     +#endif
>      }
> 
>      /* Initializes 'b' as an empty dp_packet with an initial capacity
>     of 'size'
>     @@ -120,6 +123,21 @@ dp_packet_uninit(struct dp_packet *b)
>          if (b) {
>              if (b->source == DPBUF_MALLOC) {
>                  free(dp_packet_base(b));
>     +
>     +#ifdef DPDK_NETDEV
>     +            /* Packet has been "linearized" */
>     +            if (b->mstate) {
>     +                b->source = DPBUF_DPDK;
>     +                b->mbuf.buf_addr = b->mstate->addr;
>     +                b->mbuf.buf_len = b->mstate->len;
>     +                b->mbuf.data_off = b->mstate->off;
>     +
>     +                free(b->mstate);
>     +                b->mstate = NULL;
>     +
>     +                free_dpdk_buf((struct dp_packet *) b);
>     +            }
>     +#endif
>              } else if (b->source == DPBUF_DPDK) {
>      #ifdef DPDK_NETDEV
>                  /* If this dp_packet was allocated by DPDK it must have
>     been
>     diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>     index a003546..970aaf2 100644
>     --- a/lib/dp-packet.h
>     +++ b/lib/dp-packet.h
>     @@ -27,7 +27,6 @@
> 
>      #include "netdev-dpdk.h"
>      #include "openvswitch/list.h"
>     -#include "packets.h"
>      #include "util.h"
>      #include "flow.h"
> 
>     @@ -46,6 +45,16 @@ enum OVS_PACKED_ENUM dp_packet_source {
> 
>      #define DP_PACKET_CONTEXT_SIZE 64
> 
>     +#ifdef DPDK_NETDEV
>     +/* Struct to save data for when a DPBUF_DPDK packet is converted to
>     + * DPBUF_MALLOC. */
>     +struct mbuf_state {
>     +    void *addr;
>     +    uint16_t len;
>     +    uint16_t off;
>     +};
>     +#endif
>     +
>      /* Buffer for holding packet data.  A dp_packet is automatically
>     reallocated
>       * as necessary if it grows too large for the available memory.
>       * By default the packet type is set to Ethernet (PT_ETH).
>     @@ -53,6 +62,7 @@ enum OVS_PACKED_ENUM dp_packet_source {
>      struct dp_packet {
>      #ifdef DPDK_NETDEV
>          struct rte_mbuf mbuf;       /* DPDK mbuf */
>     +    struct mbuf_state *mstate;  /* Used when packet has been
>     "linearized" */
>      #else
>          void *base_;                /* First byte of allocated space. */
>          uint16_t allocated_;        /* Number of bytes allocated. */
>     @@ -85,6 +95,9 @@ static inline void dp_packet_set_data(struct
>     dp_packet *, void *);
>      static inline void *dp_packet_base(const struct dp_packet *);
>      static inline void dp_packet_set_base(struct dp_packet *, void *);
> 
>     +static inline bool dp_packet_is_linear(const struct dp_packet *);
>     +static inline void dp_packet_linearize(struct dp_packet *);
>     +
>      static inline uint32_t dp_packet_size(const struct dp_packet *);
>      static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
> 
>     @@ -101,6 +114,7 @@ static inline void *dp_packet_l2_5(const struct
>     dp_packet *);
>      static inline void dp_packet_set_l2_5(struct dp_packet *, void *);
>      static inline void *dp_packet_l3(const struct dp_packet *);
>      static inline void dp_packet_set_l3(struct dp_packet *, void *);
>     +static inline size_t dp_packet_l3_size(const struct dp_packet *);
>      static inline void *dp_packet_l4(const struct dp_packet *);
>      static inline void dp_packet_set_l4(struct dp_packet *, void *);
>      static inline size_t dp_packet_l4_size(const struct dp_packet *);
>     @@ -137,6 +151,11 @@ static inline void *dp_packet_at(const struct
>     dp_packet *, size_t offset,
>                                       size_t size);
>      static inline void *dp_packet_at_assert(const struct dp_packet *,
>                                              size_t offset, size_t size);
>     +
>     +static inline void
>     +dp_packet_copy_from_offset(const struct dp_packet *b, size_t offset,
>     +                           size_t size, void *buf);
>     +
>      #ifdef DPDK_NETDEV
>      static inline const struct rte_mbuf *
>      dp_packet_mbuf_from_offset(const struct dp_packet *b, size_t *offset);
>     @@ -175,21 +194,22 @@ void *dp_packet_steal_data(struct dp_packet *);
>      static inline bool dp_packet_equal(const struct dp_packet *,
>                                         const struct dp_packet *);
> 
>     +static inline ssize_t
>     +dp_packet_read_data(const struct dp_packet *b, size_t offset,
>     size_t size,
>     +                    void **ptr, void *buf);
>     +
>     +
> 
>      /* Frees memory that 'b' points to, as well as 'b' itself. */
>      static inline void
>      dp_packet_delete(struct dp_packet *b)
>      {
>          if (b) {
>     -        if (b->source == DPBUF_DPDK) {
>     -            /* If this dp_packet was allocated by DPDK it must have
>     been
>     -             * created as a dp_packet */
>     -            free_dpdk_buf((struct dp_packet*) b);
>     -            return;
>     -        }
>     -
>              dp_packet_uninit(b);
>     -        free(b);
>     +
>     +        if (b->source != DPBUF_DPDK) {
>     +            free(b);
>     +        }
>          }
>      }
> 
>     @@ -205,7 +225,9 @@ dp_packet_copy_common_members(struct dp_packet
>     *new_b,
>      }
> 
>      /* If 'b' contains at least 'offset + size' bytes of data, returns
>     a pointer to
>     - * byte 'offset'.  Otherwise, returns a null pointer. */
>     + * byte 'offset'.  Otherwise, returns a null pointer. For DPDK
>     packets, this
>     + * means the 'offset' + 'size' must fall within the same mbuf (not
>     necessarily
>     + * the first mbuf), otherwise null is returned */
>      static inline void *
>      dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>      {
>     @@ -215,18 +237,15 @@ dp_packet_at(const struct dp_packet *b, size_t
>     offset, size_t size)
> 
>      #ifdef DPDK_NETDEV
>          if (b->source == DPBUF_DPDK) {
>     -        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>     +        const struct rte_mbuf *mbuf = dp_packet_mbuf_from_offset(b,
>     &offset);
> 
>     -        while (buf && offset > buf->data_len) {
>     -            offset -= buf->data_len;
>     -
>     -            buf = buf->next;
>     +        if (!mbuf || offset + size > mbuf->data_len) {
>     +            return NULL;
>              }
> 
>     -        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) :
>     NULL;
>     +        return rte_pktmbuf_mtod_offset(mbuf, char *, offset);
>          }
>      #endif
>     -
>          return (char *) dp_packet_data(b) + offset;
>      }
> 
>     @@ -325,20 +344,24 @@ dp_packet_pull(struct dp_packet *b, size_t size)
>          return data;
>      }
> 
>     -#ifdef DPDK_NETDEV
>      /* Similar to dp_packet_try_pull() but doesn't actually pull any
>     data, only
>     - * checks if it could and returns true or false accordingly.
>     - *
>     - * Valid for dp_packets carrying mbufs only. */
>     + * checks if it could and returns 'true' or 'false', accordingly.
>     For DPDK
>     + * packets, 'true' is only returned in case the 'offset' + 'size'
>     falls within
>     + * the first mbuf, otherwise 'false' is returned */
>      static inline bool
>     -dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>     -    if (size > b->mbuf.data_len) {
>     +dp_packet_may_pull(const struct dp_packet *b, uint16_t offset,
>     size_t size)
>     +{
>     +    if (offset == UINT16_MAX) {
>     +        return false;
>     +    }
>     +#ifdef DPDK_NETDEV
>     +    /* Offset needs to be within the first mbuf */
>     +    if (offset + size > b->mbuf.data_len) {
>              return false;
>          }
>     -
>     -    return true;
>     -}
>      #endif
>     +    return (offset + size > dp_packet_size(b)) ? false : true;
>     +}
> 
>      /* If 'b' has at least 'size' bytes of data, removes that many
>     bytes from the
>       * head end of 'b' and returns the first byte removed.  Otherwise,
>     returns a
>     @@ -347,7 +370,7 @@ static inline void *
>      dp_packet_try_pull(struct dp_packet *b, size_t size)
>      {
>      #ifdef DPDK_NETDEV
>     -    if (!dp_packet_mbuf_may_pull(b, size)) {
>     +    if (!dp_packet_may_pull(b, 0, size)) {
>              return NULL;
>          }
>      #endif
>     @@ -356,6 +379,39 @@ dp_packet_try_pull(struct dp_packet *b, size_t
>     size)
>              ? dp_packet_pull(b, size) : NULL;
>      }
> 
>     +/* Reads 'size' bytes from 'offset' in 'b', linearly, to 'ptr', if
>     'buf' is
>     + * NULL. Otherwise, if a 'buf' is provided, it must have 'size'
>     bytes, and the
>     + * data will be copied there, iff it is found to be non-linear. */
>     +static inline ssize_t
>     +dp_packet_read_data(const struct dp_packet *b, size_t offset,
>     size_t size,
>     +                    void **ptr, void *buf) {
>     +    /* Zero copy */
>     +    if ((*ptr = dp_packet_at(b, offset, size)) != NULL) {
>     +        return 0;
>     +    }
>     +
>     +    /* Copy available linear data */
>     +    if (buf == NULL) {
>     +#ifdef DPDK_NETDEV
>     +        size_t mofs = offset;
>     +        const struct rte_mbuf *mbuf = dp_packet_mbuf_from_offset(b,
>     &mofs);
>     +        *ptr = dp_packet_at(b, offset, mbuf->data_len - mofs);
>     +
>     +        return size - (mbuf->data_len - mofs);
>     +#else
>     +        /* Non-DPDK dp_packets should always hit the above condition */
>     +        ovs_assert(1);
>     +#endif
>     +    }
>     +
>     +    /* Copy all data */
>     +
>     +    *ptr = buf;
>     +    dp_packet_copy_from_offset(b, offset, size, buf);
>     +
>     +    return 0;
>     +}
>     +
>      static inline bool
>      dp_packet_is_eth(const struct dp_packet *b)
>      {
>     @@ -398,12 +454,6 @@ dp_packet_set_l2_pad_size(struct dp_packet *b,
>     uint8_t pad_size)
>      static inline void *
>      dp_packet_l2_5(const struct dp_packet *b)
>      {
>     -#ifdef DPDK_NETDEV
>     -    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>     -        return NULL;
>     -    }
>     -#endif
>     -
>          return b->l2_5_ofs != UINT16_MAX
>                 ? (char *) dp_packet_data(b) + b->l2_5_ofs
>                 : NULL;
>     @@ -420,12 +470,6 @@ dp_packet_set_l2_5(struct dp_packet *b, void *l2_5)
>      static inline void *
>      dp_packet_l3(const struct dp_packet *b)
>      {
>     -#ifdef DPDK_NETDEV
>     -    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>     -        return NULL;
>     -    }
>     -#endif
>     -
>          return b->l3_ofs != UINT16_MAX
>                 ? (char *) dp_packet_data(b) + b->l3_ofs
>                 : NULL;
>     @@ -437,15 +481,29 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>          b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) :
>     UINT16_MAX;
>      }
> 
>     -static inline void *
>     -dp_packet_l4(const struct dp_packet *b)
>     +/* Returns the size of the l3 header. Caller must make sure both
>     l3_ofs and
>     + * l4_ofs are set*/
>     +static inline size_t
>     +dp_packet_l3h_size(const struct dp_packet *b)
>      {
>     -#ifdef DPDK_NETDEV
>     -    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>     -        return NULL;
>     +    return b->l4_ofs - b->l3_ofs;
>     +}
>     +
>     +static inline size_t
>     +dp_packet_l3_size(const struct dp_packet *b)
>     +{
>     +    if (!dp_packet_may_pull(b, b->l3_ofs, 0)) {
>     +        return 0;
>          }
>     -#endif
> 
>     +    size_t l3_size = dp_packet_size(b) - b->l3_ofs;
>     +
>     +    return l3_size - dp_packet_l2_pad_size(b);
>     +}
>     +
>     +static inline void *
>     +dp_packet_l4(const struct dp_packet *b)
>     +{
>          return b->l4_ofs != UINT16_MAX
>                 ? (char *) dp_packet_data(b) + b->l4_ofs
>                 : NULL;
>     @@ -460,31 +518,13 @@ dp_packet_set_l4(struct dp_packet *b, void *l4)
>      static inline size_t
>      dp_packet_l4_size(const struct dp_packet *b)
>      {
>     -#ifdef DPDK_NETDEV
>     -    if (b->source == DPBUF_DPDK) {
>     -        if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>     -            return 0;
>     -        }
>     -
>     -        struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *,
>     &b->mbuf);
>     -        size_t l4_size = mbuf->data_len - b->l4_ofs;
>     -
>     -        mbuf = mbuf->next;
>     -        while (mbuf) {
>     -            l4_size += mbuf->data_len;
>     -
>     -            mbuf = mbuf->next;
>     -        }
>     +    if (!dp_packet_may_pull(b, b->l4_ofs, 0)) {
>     +        return 0;
>     +    }
> 
>     -        l4_size -= dp_packet_l2_pad_size(b);
>     +    size_t l4_size = dp_packet_size(b) - b->l4_ofs;
> 
>     -        return l4_size;
>     -    }
>     -#endif
>     -    return b->l4_ofs != UINT16_MAX
>     -        ? (const char *)dp_packet_tail(b) - (const char
>     *)dp_packet_l4(b)
>     -        - dp_packet_l2_pad_size(b)
>     -        : 0;
>     +    return l4_size - dp_packet_l2_pad_size(b);
>      }
> 
>      static inline const void *
>     @@ -497,7 +537,8 @@ dp_packet_get_tcp_payload(const struct dp_packet *b)
>              int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
> 
>              if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <=
>     l4_size)) {
>     -            return (const char *)tcp + tcp_len;
>     +            tcp = dp_packet_at(b, b->l4_ofs, tcp_len);
>     +            return (tcp == NULL) ? NULL : tcp + tcp_len;
>              }
>          }
>          return NULL;
>     @@ -507,21 +548,24 @@ static inline const void *
>      dp_packet_get_udp_payload(const struct dp_packet *b)
>      {
>          return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN)
>     -        ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
>     +        ? (const char *) dp_packet_l4(b) + UDP_HEADER_LEN
>     +        : NULL;
>      }
> 
>      static inline const void *
>      dp_packet_get_sctp_payload(const struct dp_packet *b)
>      {
>          return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN)
>     -        ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
>     +        ? (const char *) dp_packet_l4(b) + SCTP_HEADER_LEN
>     +        : NULL;
>      }
> 
>      static inline const void *
>      dp_packet_get_icmp_payload(const struct dp_packet *b)
>      {
>          return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN)
>     -        ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
>     +        ? (const char *) dp_packet_l4(b) + ICMP_HEADER_LEN
>     +        : NULL;
>      }
> 
>      static inline const void *
>     @@ -769,6 +813,7 @@ dp_packet_mbuf_init(struct dp_packet *p)
>          p->mbuf.ol_flags = p->mbuf.tx_offload = p->mbuf.packet_type = 0;
>          p->mbuf.nb_segs = 1;
>          p->mbuf.next = NULL;
>     +    p->mstate = NULL;
>      }
> 
>      static inline bool
>     @@ -817,6 +862,66 @@ dp_packet_has_flow_mark(struct dp_packet *p,
>     uint32_t *mark)
>          return false;
>      }
> 
>     +static inline void
>     +dp_packet_copy_from_offset(const struct dp_packet *b, size_t offset,
>     +                           size_t size, void *buf) {
>     +    if (dp_packet_is_linear(b)) {
>     +        memcpy(buf, (char *)dp_packet_data(b) + offset, size);
>     +    } else {
>     +        const struct rte_mbuf *mbuf = dp_packet_mbuf_from_offset(b,
>     &offset);
>     +        rte_pktmbuf_read(mbuf, offset, size, buf);
>     +    }
>     +}
>     +
>     +static inline bool
>     +dp_packet_is_linear(const struct dp_packet *b)
>     +{
>     +    if (b->source == DPBUF_DPDK) {
>     +        return rte_pktmbuf_is_contiguous(&b->mbuf);
> 
> 
> check seems lightweight when  RTE_LIBRTE_MBUF_DEBUG is off (by default)
> 
> static inline int rte_pktmbuf_is_contiguous
> <https://doc.dpdk.org/api/rte__mbuf_8h.html#a511a116ae4822037d4f1fb561aa4ffcf>(const
> struct rte_mbuf <https://doc.dpdk.org/api/structrte__mbuf.html> *m)
> 2100 {
> 2101  __rte_mbuf_sanity_check
> <https://doc.dpdk.org/api/rte__mbuf_8h.html#a93da89e4fcf908c0615bcc71c2998413>(m,
> 1);
> 2102  return !!(m->nb_segs
> <https://doc.dpdk.org/api/structrte__mbuf.html#a44fb29a9283000c42a77b76e9ef0b070>
> == 1);
> 2103 }
> 
> 
> #ifdef RTE_LIBRTE_MBUF_DEBUG
> 867 
> 869 #define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h)
> 870 
> 871 #else /* RTE_LIBRTE_MBUF_DEBUG */
> 872 
> 874
> <https://doc.dpdk.org/api/rte__mbuf_8h.html#a93da89e4fcf908c0615bcc71c2998413>
>  #define
> __rte_mbuf_sanity_check(m, is_h) do { } while (0)
> 
> 
> 
> /* do some sanity checks on a mbuf: panic if it fails */ void
> rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header) { const
> struct rte_mbuf *m_seg; unsigned int nb_segs; if (m == NULL)
> rte_panic("mbuf is NULL\n"); /* generic checks */ if (m->pool == NULL)
> rte_panic("bad mbuf pool\n"); if (m->buf_iova == 0) rte_panic("bad IO
> addr\n"); if (m->buf_addr == NULL) rte_panic("bad virt addr\n");
> uint16_t cnt = rte_mbuf_refcnt_read(m); if ((cnt == 0) || (cnt ==
> UINT16_MAX)) rte_panic("bad ref cnt\n"); /* nothing to check for
> sub-segments */ if (is_header == 0) return; nb_segs = m->nb_segs; m_seg
> = m; while (m_seg && nb_segs != 0) { m_seg = m_seg->next; nb_segs--; }
> if (nb_segs != 0) rte_panic("bad nb_segs\n"); }
> 
>  
> 
>     +    }
>     +
>     +    return true;
>     +}
>     +
>     +/* Linearizes the data on packet 'b', by copying the data into
>     system's memory.
>     + * After this the packet is effectively a DPBUF_MALLOC packet. The
>     caller is
>     + * responsible * for ensuring 'b' needs linearization, by calling
>     + * dp_packet_is_linear().
>     + *
>     + * This is an expensive operation which should only be performed as
>     a last
>     + * resort, when multi-segments are under use but data must be accessed
>     + * linearly. */
>     +static inline void
>     +dp_packet_linearize(struct dp_packet *b)
>     +{
>     +    struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>     +    struct dp_packet *pkt = CONST_CAST(struct dp_packet *, b);
>     +    uint32_t pkt_len = dp_packet_size(pkt);
>     +    struct mbuf_state *mstate = NULL;
>     +    void *dst = xmalloc(pkt_len);
>     +
>     +    /* Copy packet's data to system's memory */
>     +    if (!rte_pktmbuf_read(mbuf, 0, pkt_len, dst)) {
>     +        return;
>     +    }
>     +
>     +    /* Free all mbufs except for the first */
>     +    dp_packet_clear(pkt);
>     +
>     +    /* Save mbuf's buf_addr to restore later */
>     +    mstate = xmalloc(sizeof(*mstate));
>     +    mstate->addr = pkt->mbuf.buf_addr;
>     +    mstate->len = pkt->mbuf.buf_len;
>     +    mstate->off = pkt->mbuf.data_off;
>     +    pkt->mstate = mstate;
>     +
>     +    /* Tranform DPBUF_DPDK packet into a DPBUF_MALLOC packet */
>     +    pkt->source = DPBUF_MALLOC;
>     +    pkt->mbuf.buf_addr = dst;
>     +    pkt->mbuf.buf_len = pkt_len;
>     +    pkt->mbuf.data_off = 0;
>     +    dp_packet_set_size(pkt, pkt_len);
>     +}
>      #else /* DPDK_NETDEV */
>      static inline bool
>      dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b)
>     @@ -945,6 +1050,24 @@ dp_packet_has_flow_mark(struct dp_packet *p
>     OVS_UNUSED,
>      {
>          return false;
>      }
>     +
>     +static inline void
>     +dp_packet_copy_from_offset(const struct dp_packet *b, size_t offset,
>     +                           size_t size, void *buf)
>     +{
>     +    memcpy(buf, (char *)dp_packet_data(b) + offset, size);
>     +}
>     +
>     +static inline bool
>     +dp_packet_is_linear(const struct dp_packet *b OVS_UNUSED)
>     +{
>     +    return true;
>     +}
>     +
>     +static inline void
>     +dp_packet_linearize(struct dp_packet *b OVS_UNUSED)
>     +{
>     +}
>      #endif /* DPDK_NETDEV */
> 
>      static inline void
>     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>     index 1564db9..1f1188d 100644
>     --- a/lib/dpif-netdev.c
>     +++ b/lib/dpif-netdev.c
>     @@ -5710,6 +5710,11 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread
>     *pmd, struct dp_packet *packet_,
>                  .support = dp_netdev_support,
>              };
> 
>     +        /* Gather the whole data for printing the packet (if debug
>     enabled) */
>     +        if (!dp_packet_is_linear(packet_)) {
>     +            dp_packet_linearize(packet_);
>     +        }
>     +
>              ofpbuf_init(&key, 0);
>              odp_flow_key_from_flow(&odp_parms, &key);
>              packet_str = ofp_dp_packet_to_string(packet_);
>     @@ -5954,6 +5959,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
>          bool smc_enable_db;
>          size_t map_cnt = 0;
>          bool batch_enable = true;
>     +    int error;
> 
>          atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
>          atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
>     @@ -6001,7 +6007,12 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
>                  }
>              }
> 
>     -        miniflow_extract(packet, &key->mf);
>     +        error = miniflow_extract(packet, &key->mf);
>     +        if (OVS_UNLIKELY(error)) {
>     +            dp_packet_delete(packet);
>     +            continue;
>     +        }
>     +
>              key->len = 0; /* Not computed yet. */
>              /* If EMC and SMC disabled skip hash computation */
>              if (smc_enable_db == true || cur_min != 0) {
>     diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>     index 0347060..54f60db 100644
>     --- a/lib/dpif-netlink.c
>     +++ b/lib/dpif-netlink.c
>     @@ -1849,6 +1849,11 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
>                      }
>                      n_ops = i;
>                  } else {
>     +                /* We will need to pass the whole to encode the
>     message */
>     +                if (!dp_packet_is_linear(op->execute.packet)) {
>     +                    dp_packet_linearize(op->execute.packet);
>     +                }
>     +
>                      dpif_netlink_encode_execute(dpif->dp_ifindex,
>     &op->execute,
>                                                  &aux->request);
>                  }
>     diff --git a/lib/dpif.c b/lib/dpif.c
>     index e35f111..b04c4a0 100644
>     --- a/lib/dpif.c
>     +++ b/lib/dpif.c
>     @@ -1244,6 +1244,7 @@ dpif_execute_helper_cb(void *aux_, struct
>     dp_packet_batch *packets_,
>              execute.probe = false;
>              execute.mtu = 0;
>              aux->error = dpif_execute(aux->dpif, &execute);
>     +
>              log_execute_message(aux->dpif, &this_module, &execute,
>                                  true, aux->error);
> 
>     @@ -1407,6 +1408,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op
>     **ops, size_t n_ops,
> 
>                      case DPIF_OP_EXECUTE:
>                          COVERAGE_INC(dpif_execute);
>     +
>                          log_execute_message(dpif, &this_module,
>     &op->execute,
>                                              false, error);
>                          break;
>     @@ -1834,6 +1836,13 @@ log_execute_message(const struct dpif *dpif,
>              uint64_t stub[1024 / 8];
>              struct ofpbuf md = OFPBUF_STUB_INITIALIZER(stub);
> 
>     +        /* We will need the whole data for logging */
>     +        struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     +                                         execute->packet);
>     +        if (!dp_packet_is_linear(p)) {
>     +            dp_packet_linearize(p);
>     +        }
>     +
>              packet = ofp_packet_to_string(dp_packet_data(execute->packet),
>                                            dp_packet_size(execute->packet),
>                                            execute->packet->packet_type);
>     diff --git a/lib/flow.c b/lib/flow.c
>     index c60446f..9d3e788 100644
>     --- a/lib/flow.c
>     +++ b/lib/flow.c
>     @@ -694,7 +694,7 @@ ipv6_sanity_check(const struct
>     ovs_16aligned_ip6_hdr *nh, size_t size)
> 
>      /* Caller is responsible for initializing 'dst' with enough storage for
>       * FLOW_U64S * 8 bytes. */
>     -void
>     +int
>      miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>      {
>          const struct pkt_metadata *md = &packet->md;
>     @@ -816,6 +816,13 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>              int ip_len;
>              uint16_t tot_len;
> 
>     +        /* Check if header is in first mbuf, otherwise return error */
>     +        if (!dp_packet_is_linear(packet)) {
>     +            if (!dp_packet_may_pull(packet, packet->l3_ofs, sizeof
>     *nh)) {
>     +                return -1;
>     +            }
>     +        }
>     +
>              if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len,
>     &tot_len))) {
>                  goto out;
>              }
>     @@ -846,6 +853,12 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>              ovs_be32 tc_flow;
>              uint16_t plen;
> 
>     +        if (!dp_packet_is_linear(packet)) {
>     +            if (!dp_packet_may_pull(packet, packet->l3_ofs, sizeof
>     *nh)) {
>     +                return -1;
>     +            }
>     +        }
>     +
>              if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
>                  goto out;
>              }
>     @@ -889,6 +902,14 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>              if (dl_type == htons(ETH_TYPE_ARP) ||
>                  dl_type == htons(ETH_TYPE_RARP)) {
>                  struct eth_addr arp_buf[2];
>     +
>     +            if (!dp_packet_is_linear(packet)) {
>     +                if (!dp_packet_may_pull(packet, packet->l3_ofs,
>     +                                        ARP_ETH_HEADER_LEN)) {
>     +                    return -1;
>     +                }
>     +            }
>     +
>                  const struct arp_eth_header *arp = (const struct
>     arp_eth_header *)
>                      data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);
> 
>     @@ -936,6 +957,13 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>                  if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
>                      const struct tcp_header *tcp = data;
> 
>     +                if (!dp_packet_is_linear(packet)) {
>     +                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
>     +                                            TCP_HEADER_LEN)) {
>     +                        return -1;
>     +                    }
>     +                }
>     +
>                      miniflow_push_be32(mf, arp_tha.ea[2], 0);
>                      miniflow_push_be32(mf, tcp_flags,
>                                         TCP_FLAGS_BE32(tcp->tcp_ctl));
>     @@ -948,6 +976,13 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>                  if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
>                      const struct udp_header *udp = data;
> 
>     +                if (!dp_packet_is_linear(packet)) {
>     +                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
>     +                                            UDP_HEADER_LEN)) {
>     +                        return -1;
>     +                    }
>     +                }
>     +
>                      miniflow_push_be16(mf, tp_src, udp->udp_src);
>                      miniflow_push_be16(mf, tp_dst, udp->udp_dst);
>                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     @@ -957,6 +992,13 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>                  if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
>                      const struct sctp_header *sctp = data;
> 
>     +                if (!dp_packet_is_linear(packet)) {
>     +                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
>     +                                            SCTP_HEADER_LEN)) {
>     +                        return -1;
>     +                    }
>     +                }
>     +
>                      miniflow_push_be16(mf, tp_src, sctp->sctp_src);
>                      miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
>                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     @@ -966,6 +1008,13 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>                  if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
>                      const struct icmp_header *icmp = data;
> 
>     +                if (!dp_packet_is_linear(packet)) {
>     +                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
>     +                                            ICMP_HEADER_LEN)) {
>     +                        return -1;
>     +                    }
>     +                }
>     +
>                      miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
>                      miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
>                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     @@ -975,6 +1024,13 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>                  if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
>                      const struct igmp_header *igmp = data;
> 
>     +                if (!dp_packet_is_linear(packet)) {
>     +                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
>     +                                            IGMP_HEADER_LEN)) {
>     +                        return -1;
>     +                    }
>     +                }
>     +
>                      miniflow_push_be16(mf, tp_src, htons(igmp->igmp_type));
>                      miniflow_push_be16(mf, tp_dst, htons(igmp->igmp_code));
>                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>     @@ -987,8 +1043,18 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>                  if (OVS_LIKELY(size >= sizeof(struct icmp6_hdr))) {
>                      const struct in6_addr *nd_target;
>                      struct eth_addr arp_buf[2];
>     -                const struct icmp6_hdr *icmp = data_pull(&data, &size,
>     -                                                         sizeof *icmp);
>     +                const struct icmp6_hdr *icmp;
>     +
>     +                if (!dp_packet_is_linear(packet)) {
>     +                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
>     +                                            sizeof *icmp)) {
>     +                        return -1;
>     +                    }
>     +                }
>     +
>     +                icmp = data_pull(&data, &size, sizeof *icmp);
>     +
>     +
>                      if (parse_icmpv6(&data, &size, icmp, &nd_target,
>     arp_buf)) {
>                          if (nd_target) {
>                              miniflow_push_words(mf, nd_target, nd_target,
>     @@ -1011,6 +1077,7 @@ miniflow_extract(struct dp_packet *packet,
>     struct miniflow *dst)
>          }
>       out:
>          dst->map = mf.map;
>     +    return 0;
>      }
> 
>      ovs_be16
>     @@ -3007,7 +3074,8 @@ static void
>      flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
>                           uint32_t pseudo_hdr_csum)
>      {
>     -    size_t l4_len = (char *) dp_packet_tail(p) - (char *)
>     dp_packet_l4(p);
>     +    //size_t l4_len = (char *) dp_packet_tail(p) - (char *)
>     dp_packet_l4(p);
>     +    size_t l4_len = dp_packet_l4_size(p);
> 
>          if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
>              || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
>     @@ -3015,30 +3083,31 @@ flow_compose_l4_csum(struct dp_packet *p,
>     const struct flow *flow,
>                  struct tcp_header *tcp = dp_packet_l4(p);
> 
>                  tcp->tcp_csum = 0;
>     -            tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
>     -                                                      tcp, l4_len));
>     +            tcp->tcp_csum = csum_finish(
>     +                packet_csum_continue(p, pseudo_hdr_csum, p->l4_ofs,
>     l4_len));
>              } else if (flow->nw_proto == IPPROTO_UDP) {
>                  struct udp_header *udp = dp_packet_l4(p);
> 
>                  udp->udp_csum = 0;
>     -            udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
>     -                                                      udp, l4_len));
>     +            udp->udp_csum = csum_finish(
>     +                packet_csum_continue(p, pseudo_hdr_csum, p->l4_ofs,
>     l4_len));
>              } else if (flow->nw_proto == IPPROTO_ICMP) {
>                  struct icmp_header *icmp = dp_packet_l4(p);
> 
>                  icmp->icmp_csum = 0;
>     -            icmp->icmp_csum = csum(icmp, l4_len);
>     +            icmp->icmp_csum = packet_csum(p, p->l4_ofs, l4_len);
>              } else if (flow->nw_proto == IPPROTO_IGMP) {
>                  struct igmp_header *igmp = dp_packet_l4(p);
> 
>                  igmp->igmp_csum = 0;
>     -            igmp->igmp_csum = csum(igmp, l4_len);
>     +            igmp->igmp_csum = packet_csum(p, p->l4_ofs, l4_len);
>              } else if (flow->nw_proto == IPPROTO_ICMPV6) {
>                  struct icmp6_hdr *icmp = dp_packet_l4(p);
> 
>                  icmp->icmp6_cksum = 0;
>                  icmp->icmp6_cksum = (OVS_FORCE uint16_t)
>     -                csum_finish(csum_continue(pseudo_hdr_csum, icmp,
>     l4_len));
>     +                csum_finish(packet_csum_continue(p,
>     pseudo_hdr_csum, p->l4_ofs,
>     +                            l4_len));
>              }
>          }
>      }
>     @@ -3064,12 +3133,12 @@ packet_expand(struct dp_packet *p, const
>     struct flow *flow, size_t size)
>              eth->eth_type = htons(dp_packet_size(p));
>          } else if (dl_type_is_ip_any(flow->dl_type)) {
>              uint32_t pseudo_hdr_csum;
>     -        size_t l4_len = (char *) dp_packet_tail(p) - (char *)
>     dp_packet_l4(p);
>     +        size_t l4_len = dp_packet_l4_size(p);
> 
>              if (flow->dl_type == htons(ETH_TYPE_IP)) {
>                  struct ip_header *ip = dp_packet_l3(p);
> 
>     -            ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
>     +            ip->ip_tot_len = htons(dp_packet_l3_size(p));
>                  ip->ip_csum = 0;
>                  ip->ip_csum = csum(ip, sizeof *ip);
> 
>     @@ -3153,7 +3222,7 @@ flow_compose(struct dp_packet *p, const struct
>     flow *flow,
>              l4_len = flow_compose_l4(p, flow, l7, l7_len);
> 
>              ip = dp_packet_l3(p);
>     -        ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
>     +        ip->ip_tot_len = htons(dp_packet_l3_size(p));
>              /* Checksum has already been zeroed by put_zeros call. */
>              ip->ip_csum = csum(ip, sizeof *ip);
> 
>     diff --git a/lib/flow.h b/lib/flow.h
>     index 5ebdb1f..75b0062 100644
>     --- a/lib/flow.h
>     +++ b/lib/flow.h
>     @@ -539,7 +539,7 @@ struct pkt_metadata;
>      /* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units.
>       * 'dst->map' is ignored on input and set on output to indicate
>     which fields
>       * were extracted. */
>     -void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
>     +int miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
>      void miniflow_map_init(struct miniflow *, const struct flow *);
>      void flow_wc_map(const struct flow *, struct flowmap *);
>      size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
>     diff --git a/lib/mcast-snooping.c b/lib/mcast-snooping.c
>     index 6730301..875b7a1 100644
>     --- a/lib/mcast-snooping.c
>     +++ b/lib/mcast-snooping.c
>     @@ -455,6 +455,7 @@ mcast_snooping_add_report(struct mcast_snooping *ms,
>          if (!igmpv3) {
>              return 0;
>          }
>     +    offset = (char *) igmpv3 - (char *) dp_packet_data(p);
>          ngrp = ntohs(igmpv3->ngrp);
>          offset += IGMPV3_HEADER_LEN;
>          while (ngrp--) {
>     @@ -507,6 +508,7 @@ mcast_snooping_add_mld(struct mcast_snooping *ms,
>          if (!mld) {
>              return 0;
>          }
>     +    offset = (char *) mld - (char *) dp_packet_data(p);
>          ngrp = ntohs(mld->ngrp);
>          offset += MLD_HEADER_LEN;
>          addr = dp_packet_at(p, offset, sizeof(struct in6_addr));
>     diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
>     index 46698d5..278c8a9 100644
>     --- a/lib/netdev-bsd.c
>     +++ b/lib/netdev-bsd.c
>     @@ -700,6 +700,11 @@ netdev_bsd_send(struct netdev *netdev_, int qid
>     OVS_UNUSED,
>          }
> 
>          DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>     +        /* We need the whole data to send the packet on the device */
>     +        if (!dp_packet_is_linear(packet)) {
>     +            dp_packet_linearize(packet);
>     +        }
>     +
>              const void *data = dp_packet_data(packet);
>              size_t size = dp_packet_size(packet);
> 
>     diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
>     index 72b4f7a..c56c86b 100644
>     --- a/lib/netdev-dummy.c
>     +++ b/lib/netdev-dummy.c
>     @@ -233,7 +233,13 @@ dummy_packet_stream_run(struct netdev_dummy
>     *dev, struct dummy_packet_stream *s)
> 
>              ASSIGN_CONTAINER(txbuf_node, ovs_list_front(&s->txq),
>     list_node);
>              txbuf = txbuf_node->pkt;
>     -        retval = stream_send(s->stream, dp_packet_data(txbuf),
>     dp_packet_size(txbuf));
>     +
>     +        if (!dp_packet_is_linear(txbuf)) {
>     +            dp_packet_linearize(txbuf);
>     +        }
>     +
>     +        retval = stream_send(s->stream, dp_packet_data(txbuf),
>     +                             dp_packet_size(txbuf));
> 
>              if (retval > 0) {
>                  dp_packet_pull(txbuf, retval);
>     @@ -1087,6 +1093,11 @@ netdev_dummy_send(struct netdev *netdev, int
>     qid OVS_UNUSED,
> 
>          struct dp_packet *packet;
>          DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
>     +        /* We need the whole data to send the packet on the device */
>     +        if (!dp_packet_is_linear(packet)) {
>     +            dp_packet_linearize(packet);
>     +        }
>     +
>              const void *buffer = dp_packet_data(packet);
>              size_t size = dp_packet_size(packet);
> 
>     diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>     index f86dcd0..fa79b2a 100644
>     --- a/lib/netdev-linux.c
>     +++ b/lib/netdev-linux.c
>     @@ -1379,6 +1379,11 @@ netdev_linux_sock_batch_send(int sock, int
>     ifindex,
> 
>          struct dp_packet *packet;
>          DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>     +        /* We need the whole data to send the packet on the device */
>     +        if (!dp_packet_is_linear(packet)) {
>     +            dp_packet_linearize(packet);
>     +        }
>     +
>              iov[i].iov_base = dp_packet_data(packet);
>              iov[i].iov_len = dp_packet_size(packet);
>              mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
>     @@ -1432,8 +1437,14 @@ netdev_linux_tap_batch_send(struct netdev
>     *netdev_,
>              ssize_t retval;
>              int error;
> 
>     +        /* We need the whole data to send the packet on the device */
>     +        if (!dp_packet_is_linear(packet)) {
>     +            dp_packet_linearize(packet);
>     +        }
>     +
>              do {
>     -            retval = write(netdev->tap_fd, dp_packet_data(packet),
>     size);
>     +            retval = write(netdev->tap_fd, dp_packet_data(packet),
>     +                           size);
>                  error = retval < 0 ? errno : 0;
>              } while (error == EINTR);
> 
>     diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
>     index 56baaa2..285b927 100644
>     --- a/lib/netdev-native-tnl.c
>     +++ b/lib/netdev-native-tnl.c
>     @@ -65,7 +65,7 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet
>     *packet, struct flow_tnl *tnl,
>          void *nh;
>          struct ip_header *ip;
>          struct ovs_16aligned_ip6_hdr *ip6;
>     -    void *l4;
>     +    char *l4;
>          int l3_size;
> 
>          nh = dp_packet_l3(packet);
>     @@ -79,15 +79,15 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet
>     *packet, struct flow_tnl *tnl,
> 
>          *hlen = sizeof(struct eth_header);
> 
>     -    l3_size = dp_packet_size(packet) -
>     -              ((char *)nh - (char *)dp_packet_data(packet));
>     +    l3_size = dp_packet_l3_size(packet);
> 
>          if (IP_VER(ip->ip_ihl_ver) == 4) {
> 
>              ovs_be32 ip_src, ip_dst;
> 
>              if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
>     -            if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
>     +            if (packet_csum(packet, packet->l3_ofs,
>     +                            IP_IHL(ip->ip_ihl_ver) * 4)) {
>                      VLOG_WARN_RL(&err_rl, "ip packet has invalid
>     checksum");
>                      return NULL;
>                  }
>     @@ -196,10 +196,8 @@ udp_extract_tnl_md(struct dp_packet *packet,
>     struct flow_tnl *tnl,
>                      csum = packet_csum_pseudoheader(dp_packet_l3(packet));
>                  }
> 
>     -            csum = csum_continue(csum, udp, dp_packet_size(packet) -
>     -                                 ((const unsigned char *)udp -
>     -                                  (const unsigned char
>     *)dp_packet_eth(packet)
>     -                                 ));
>     +            csum = packet_csum_continue(packet, csum, packet->l4_ofs,
>     +                                        dp_packet_l4_size(packet));
>                  if (csum_finish(csum)) {
>                      return NULL;
>                  }
>     @@ -236,7 +234,7 @@ netdev_tnl_push_udp_header(const struct netdev
>     *netdev OVS_UNUSED,
>                  csum =
>     packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
>              }
> 
>     -        csum = csum_continue(csum, udp, ip_tot_size);
>     +        csum = packet_csum_continue(packet, csum, packet->l4_ofs,
>     ip_tot_size);
>              udp->udp_csum = csum_finish(csum);
> 
>              if (!udp->udp_csum) {
>     @@ -373,9 +371,8 @@ parse_gre_header(struct dp_packet *packet,
>          if (greh->flags & htons(GRE_CSUM)) {
>              ovs_be16 pkt_csum;
> 
>     -        pkt_csum = csum(greh, dp_packet_size(packet) -
>     -                              ((const unsigned char *)greh -
>     -                               (const unsigned char
>     *)dp_packet_eth(packet)));
>     +        pkt_csum = packet_csum(packet, packet->l4_ofs,
>     +                               dp_packet_l4_size(packet));
>              if (pkt_csum) {
>                  return -EINVAL;
>              }
>     @@ -448,8 +445,9 @@ netdev_gre_push_header(const struct netdev *netdev,
>          greh = netdev_tnl_push_ip_header(packet, data->header,
>     data->header_len, &ip_tot_size);
> 
>          if (greh->flags & htons(GRE_CSUM)) {
>     -        ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
>     -        *csum_opt = csum(greh, ip_tot_size);
>     +        greh = dp_packet_l4(packet);
>     +        ovs_be16 *csum_opt = (ovs_be16 *) greh;
>     +        *csum_opt = packet_csum(packet, packet->l4_ofs, ip_tot_size);
>          }
> 
>          if (greh->flags & htons(GRE_SEQ)) {
>     diff --git a/lib/odp-execute.c b/lib/odp-execute.c
>     index 3b6890e..2c33d6a 100644
>     --- a/lib/odp-execute.c
>     +++ b/lib/odp-execute.c
>     @@ -231,8 +231,16 @@ static void
>      odp_set_nd(struct dp_packet *packet, const struct ovs_key_nd *key,
>                 const struct ovs_key_nd *mask)
>      {
>     -    const struct ovs_nd_msg *ns = dp_packet_l4(packet);
>     -    const struct ovs_nd_lla_opt *lla_opt =
>     dp_packet_get_nd_payload(packet);
>     +    const struct ovs_nd_msg *ns;
>     +    const struct ovs_nd_lla_opt *lla_opt;
>     +
>     +    /* To orocess neighbor discovery options, we need the whole
>     packet */
>     +    if (!dp_packet_is_linear(packet)) {
>     +        dp_packet_linearize(packet);
>     +    }
>     +
>     +    ns = dp_packet_l4(packet);
>     +    lla_opt = dp_packet_get_nd_payload(packet);
> 
>          if (OVS_LIKELY(ns && lla_opt)) {
>              int bytes_remain = dp_packet_l4_size(packet) - sizeof(*ns);
>     diff --git a/lib/ovs-lldp.c b/lib/ovs-lldp.c
>     index 05c1dd4..39d677a 100644
>     --- a/lib/ovs-lldp.c
>     +++ b/lib/ovs-lldp.c
>     @@ -668,7 +668,8 @@ lldp_process_packet(struct lldp *lldp, const
>     struct dp_packet *p)
>      {
>          if (lldp) {
>              lldpd_recv(lldp->lldpd, lldpd_first_hardware(lldp->lldpd),
>     -                   (char *) dp_packet_data(p), dp_packet_size(p));
>     +                   (char *) dp_packet_data(p),
>     +                   dp_packet_size(p));
>          }
>      }
> 
>     diff --git a/lib/packets.c b/lib/packets.c
>     index 2d6f77b..d95c8cc 100644
>     --- a/lib/packets.c
>     +++ b/lib/packets.c
>     @@ -1007,12 +1007,20 @@ packet_rh_present(struct dp_packet *packet,
>     uint8_t *nexthdr)
>          const struct ovs_16aligned_ip6_hdr *nh;
>          size_t len;
>          size_t remaining;
>     -    uint8_t *data = dp_packet_l3(packet);
>     +    uint8_t *data;
> 
>     -    remaining = packet->l4_ofs - packet->l3_ofs;
>     +    remaining = dp_packet_l3h_size(packet);
>          if (remaining < sizeof *nh) {
>              return false;
>          }
>     +
>     +    /* We will need the whole data for processing the headers below */
>     +    if (!dp_packet_is_linear(packet)) {
>     +        dp_packet_linearize(packet);
> 
> 
> dp_packet_linearize() could check for dp_packet_is_linear() and if
> linear return early without doing anything

Sure, that would be another way to go. It has been mentioned before, and
this has become the final form because, 1. `dp_packet_linearize()`
changes the address space of the packet and would be easy to misuse it,
2. I feel this gives a little more context, instead of a single
`dp_packet_linearize()` on its own.

> 
>  
> 
>     +    }
>     +
>     +    data = dp_packet_l3(packet);
>     +
>          nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data);
>          data += sizeof *nh;
>          remaining -= sizeof *nh;
>     @@ -1254,12 +1262,12 @@ packet_set_sctp_port(struct dp_packet
>     *packet, ovs_be16 src, ovs_be16 dst)
> 
>          old_csum = get_16aligned_be32(&sh->sctp_csum);
>          put_16aligned_be32(&sh->sctp_csum, 0);
>     -    old_correct_csum = crc32c((void *)sh, tp_len);
>     +    old_correct_csum = packet_crc32c(packet, packet->l4_ofs, tp_len);
> 
>          sh->sctp_src = src;
>          sh->sctp_dst = dst;
> 
>     -    new_csum = crc32c((void *)sh, tp_len);
>     +    new_csum = packet_crc32c(packet, packet->l4_ofs, tp_len);
>          put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum
>     ^ new_csum);
>      }
> 
>     @@ -1293,6 +1301,11 @@ packet_set_nd(struct dp_packet *packet, const
>     struct in6_addr *target,
>              return;
>          }
> 
>     +    /* To process neighbor discovery options, we need the whole
>     packet */
>     +    if (!dp_packet_is_linear(packet)) {
>     +        dp_packet_linearize(packet);
>     +    }
>     +
>          ns = dp_packet_l4(packet);
>          opt = &ns->options[0];
>          bytes_remain -= sizeof(*ns);
>     @@ -1515,8 +1528,8 @@ compose_nd_ns(struct dp_packet *b, const
>     struct eth_addr eth_src,
> 
>          ns->icmph.icmp6_cksum = 0;
>          icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
>     -    ns->icmph.icmp6_cksum = csum_finish(
>     -        csum_continue(icmp_csum, ns, ND_MSG_LEN + ND_LLA_OPT_LEN));
>     +    ns->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     +        b, icmp_csum, b->l4_ofs, ND_MSG_LEN + ND_LLA_OPT_LEN));
>      }
> 
>      /* Compose an IPv6 Neighbor Discovery Neighbor Advertisement
>     message. */
>     @@ -1546,8 +1559,8 @@ compose_nd_na(struct dp_packet *b,
> 
>          na->icmph.icmp6_cksum = 0;
>          icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
>     -    na->icmph.icmp6_cksum = csum_finish(csum_continue(
>     -        icmp_csum, na, ND_MSG_LEN + ND_LLA_OPT_LEN));
>     +    na->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     +        b, icmp_csum, b->l4_ofs, ND_MSG_LEN + ND_LLA_OPT_LEN));
>      }
> 
>      /* Compose an IPv6 Neighbor Discovery Router Advertisement message with
>     @@ -1597,8 +1610,8 @@ compose_nd_ra(struct dp_packet *b,
> 
>          ra->icmph.icmp6_cksum = 0;
>          uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
>     -    ra->icmph.icmp6_cksum = csum_finish(csum_continue(
>     -        icmp_csum, ra, RA_MSG_LEN + ND_LLA_OPT_LEN + mtu_opt_len));
>     +    ra->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     +        b, icmp_csum, b->l4_ofs, RA_MSG_LEN + ND_LLA_OPT_LEN +
>     mtu_opt_len));
>      }
> 
>      /* Append an IPv6 Neighbor Discovery Prefix Information option to a
>     @@ -1627,8 +1640,8 @@ packet_put_ra_prefix_opt(struct dp_packet *b,
>          struct ovs_ra_msg *ra = dp_packet_l4(b);
>          ra->icmph.icmp6_cksum = 0;
>          uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
>     -    ra->icmph.icmp6_cksum = csum_finish(csum_continue(
>     -        icmp_csum, ra, prev_l4_size + ND_PREFIX_OPT_LEN));
>     +    ra->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
>     +        b, icmp_csum, b->l4_ofs, prev_l4_size + ND_PREFIX_OPT_LEN));
>      }
> 
>      uint32_t
>     @@ -1680,6 +1693,54 @@ packet_csum_upperlayer6(const struct
>     ovs_16aligned_ip6_hdr *ip6,
>      }
>      #endif
> 
> 
> It is a proposed 'general' API; a function header is needed
> same for other proposed apis

It is there in packets.h, or is that not what you're referring to?

Tiago.

> 
>  
> 
>     +uint32_t
>     +packet_csum_continue(const struct dp_packet *b, uint32_t partial,
>     +                     uint16_t offset, size_t n)
>     +{
>     +    char *ptr = NULL;
>     +    size_t rem = 0;
>     +    size_t size = 0;
>     +
>     +    while (n > 1) {
>     +        rem = dp_packet_read_data(b, offset, n, (void *)&ptr, NULL);
>     +
>     +        size = n - rem;
>     +        partial = csum_continue(partial, ptr, size);
>     +
>     +        offset += size;
>     +        n = rem;
>     +    }
>     +
>     +    return partial;
>     +}
>     +
>     +ovs_be16
>     +packet_csum(const struct dp_packet *b, uint16_t offset, size_t n)
>     +{
>     +    return csum_finish(packet_csum_continue(b, 0, offset, n));
>     +}
>     +
>     +ovs_be32
>     +packet_crc32c(const struct dp_packet *b, uint16_t offset, size_t n)
>     +{
>     +    char *ptr = NULL;
>     +    size_t rem = 0;
>     +    size_t size = 0;
>     +    uint32_t partial = 0xffffffffL;
>     +
>     +    while (n > 1) {
>     +        rem = dp_packet_read_data(b, offset, n, (void *)&ptr, NULL);
>     +
>     +        size = n - rem;
>     +        partial = crc32c_continue(partial, (uint8_t *) ptr, size);
>     +
>     +        offset += size;
>     +        n = rem;
>     +    }
>     +
>     +    return crc32c_finish(partial);
>     +}
>     +
>      void
>      IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
>      {
>     diff --git a/lib/packets.h b/lib/packets.h
>     index 09a0ac3..9e7f5a1 100644
>     --- a/lib/packets.h
>     +++ b/lib/packets.h
>     @@ -1573,6 +1573,13 @@ void packet_put_ra_prefix_opt(struct dp_packet *,
>                                    ovs_be32 preferred_lifetime,
>                                    const ovs_be128 router_prefix);
>      uint32_t packet_csum_pseudoheader(const struct ip_header *);
>     +uint32_t
>     +packet_csum_continue(const struct dp_packet *b, uint32_t partial,
>     +                     uint16_t offset, size_t n);
>     +ovs_be16
>     +packet_csum(const struct dp_packet *b, uint16_t offset, size_t n);
>     +ovs_be32
>     +packet_crc32c(const struct dp_packet *b, uint16_t offset, size_t n);
>      void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
> 
>      #define DNS_HEADER_LEN 12
>     diff --git a/ofproto/ofproto-dpif-upcall.c
>     b/ofproto/ofproto-dpif-upcall.c
>     index dc30824..ed1c1ac 100644
>     --- a/ofproto/ofproto-dpif-upcall.c
>     +++ b/ofproto/ofproto-dpif-upcall.c
>     @@ -1417,12 +1417,18 @@ process_upcall(struct udpif *udpif, struct
>     upcall *upcall,
>          case SFLOW_UPCALL:
>              if (upcall->sflow) {
>                  struct dpif_sflow_actions sflow_actions;
>     +            struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     packet);
> 
>                  memset(&sflow_actions, 0, sizeof sflow_actions);
> 
>                  actions_len = dpif_read_actions(udpif, upcall, flow,
>                                                  upcall->type,
>     &sflow_actions);
>     -            dpif_sflow_received(upcall->sflow, packet, flow,
>     +            /* Gather the whole data */
>     +            if (!dp_packet_is_linear(p)) {
>     +                dp_packet_linearize(p);
>     +            }
>     +
>     +            dpif_sflow_received(upcall->sflow, p, flow,
>                                      flow->in_port.odp_port,
>     &upcall->cookie,
>                                      actions_len > 0 ? &sflow_actions :
>     NULL);
>              }
>     @@ -1483,6 +1489,12 @@ process_upcall(struct udpif *udpif, struct
>     upcall *upcall,
> 
>                  const struct frozen_state *state = &recirc_node->state;
> 
>     +            /* Gather the whole data */
>     +            struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     packet);
>     +            if (!dp_packet_is_linear(p)) {
>     +                dp_packet_linearize(p);
>     +            }
>     +
>                  struct ofproto_async_msg *am = xmalloc(sizeof *am);
>                  *am = (struct ofproto_async_msg) {
>                      .controller_id = cookie->controller.controller_id,
>     @@ -1490,9 +1502,9 @@ process_upcall(struct udpif *udpif, struct
>     upcall *upcall,
>                      .pin = {
>                          .up = {
>                              .base = {
>     -                            .packet = xmemdup(dp_packet_data(packet),
>     -                                              dp_packet_size(packet)),
>     -                            .packet_len = dp_packet_size(packet),
>     +                            .packet = xmemdup(dp_packet_data(p),
>     +                                              dp_packet_size(p)),
>     +                            .packet_len = dp_packet_size(p),
>                                  .reason = cookie->controller.reason,
>                                  .table_id = state->table_id,
>                                  .cookie = get_32aligned_be64(
>     diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
>     index 839fddd..b2c31b7 100644
>     --- a/ofproto/ofproto-dpif-xlate.c
>     +++ b/ofproto/ofproto-dpif-xlate.c
>     @@ -1737,7 +1737,8 @@ stp_process_packet(const struct xport *xport,
>     const struct dp_packet *packet)
>          }
> 
>          if (dp_packet_try_pull(&payload, ETH_HEADER_LEN +
>     LLC_HEADER_LEN)) {
>     -        stp_received_bpdu(sp, dp_packet_data(&payload),
>     dp_packet_size(&payload));
>     +        stp_received_bpdu(sp, dp_packet_data(&payload),
>     +                          dp_packet_size(&payload));
>          }
>      }
> 
>     @@ -1788,7 +1789,8 @@ rstp_process_packet(const struct xport *xport,
>     const struct dp_packet *packet)
>          }
> 
>          if (dp_packet_try_pull(&payload, ETH_HEADER_LEN +
>     LLC_HEADER_LEN)) {
>     -        rstp_port_received_bpdu(xport->rstp_port,
>     dp_packet_data(&payload),
>     +        rstp_port_received_bpdu(xport->rstp_port,
>     +                                dp_packet_data(&payload),
>                                      dp_packet_size(&payload));
>          }
>      }
>     @@ -2563,6 +2565,7 @@ update_mcast_snooping_table4__(const struct
>     xlate_ctx *ctx,
> 
>          offset = (char *) dp_packet_l4(packet) - (char *)
>     dp_packet_data(packet);
>          igmp = dp_packet_at(packet, offset, IGMP_HEADER_LEN);
>     +
>          if (!igmp || csum(igmp, dp_packet_l4_size(packet)) != 0) {
>              xlate_report_debug(ctx, OFT_DETAIL,
>                                 "multicast snooping received bad IGMP "
>     @@ -2975,6 +2978,13 @@ xlate_normal(struct xlate_ctx *ctx)
>              && is_ip_any(flow)) {
>              struct mcast_snooping *ms = ctx->xbridge->ms;
>              struct mcast_group *grp = NULL;
>     +        struct dp_packet *p = CONST_CAST(struct dp_packet *,
>     +                                         ctx->xin->packet);
>     +
>     +        /* We will need the whole data for processing the packet
>     below */
>     +        if (p && !dp_packet_is_linear(p)) {
>     +            dp_packet_linearize(p);
>     +        }
> 
>              if (is_igmp(flow, wc)) {
>                  /*
>     @@ -3279,7 +3289,8 @@ process_special(struct xlate_ctx *ctx, const
>     struct xport *xport)
>          const struct flow *flow = &ctx->xin->flow;
>          struct flow_wildcards *wc = ctx->wc;
>          const struct xbridge *xbridge = ctx->xbridge;
>     -    const struct dp_packet *packet = ctx->xin->packet;
>     +    struct dp_packet *packet = CONST_CAST(struct dp_packet *,
>     +                                          ctx->xin->packet);
>          enum slow_path_reason slow;
> 
>          if (!xport) {
>     @@ -3291,6 +3302,11 @@ process_special(struct xlate_ctx *ctx, const
>     struct xport *xport)
>              slow = SLOW_CFM;
>          } else if (xport->bfd && bfd_should_process_flow(xport->bfd,
>     flow, wc)) {
>              if (packet) {
>     +            /* Gather the whole data for further processing */
>     +            if (!dp_packet_is_linear(packet)) {
>     +                dp_packet_linearize(packet);
>     +            }
>     +
>                  bfd_process_packet(xport->bfd, flow, packet);
>                  /* If POLL received, immediately sends FINAL back. */
>                  if (bfd_should_send_packet(xport->bfd)) {
>     @@ -3307,6 +3323,11 @@ process_special(struct xlate_ctx *ctx, const
>     struct xport *xport)
>          } else if ((xbridge->stp || xbridge->rstp) &&
>                     stp_should_process_flow(flow, wc)) {
>              if (packet) {
>     +            /* Gather the whole data for further processing */
>     +            if (!dp_packet_is_linear(packet)) {
>     +                dp_packet_linearize(packet);
>     +            }
>     +
>                  xbridge->stp
>                      ? stp_process_packet(xport, packet)
>                      : rstp_process_packet(xport, packet);
>     @@ -3314,6 +3335,11 @@ process_special(struct xlate_ctx *ctx, const
>     struct xport *xport)
>              slow = SLOW_STP;
>          } else if (xport->lldp && lldp_should_process_flow(xport->lldp,
>     flow)) {
>              if (packet) {
>     +            /* Gather the whole data for further processing */
>     +            if (!dp_packet_is_linear(packet)) {
>     +                dp_packet_linearize(packet);
>     +            }
>     +
>                  lldp_process_packet(xport->lldp, packet);
>              }
>              slow = SLOW_LLDP;
>     diff --git a/tests/test-rstp.c b/tests/test-rstp.c
>     index 01aeaf8..48f1cfa 100644
>     --- a/tests/test-rstp.c
>     +++ b/tests/test-rstp.c
>     @@ -86,8 +86,12 @@ send_bpdu(struct dp_packet *pkt, void *port_,
>     void *b_)
>          assert(port_no < b->n_ports);
>          lan = b->ports[port_no];
>          if (lan) {
>     -        const void *data = dp_packet_l3(pkt);
>     -        size_t size = (char *) dp_packet_tail(pkt) - (char *) data;
>     +        if (!dp_packet_is_linear(pkt)) {
>     +            dp_packet_linearize(pkt);
>     +        }
>     +
>     +        const char *data = dp_packet_l3(pkt);
>     +        size_t size = dp_packet_size(pkt) - pkt->l3_ofs;
>              int i;
> 
>              for (i = 0; i < lan->n_conns; i++) {
>     diff --git a/tests/test-stp.c b/tests/test-stp.c
>     index c85c99d..fd5bfad 100644
>     --- a/tests/test-stp.c
>     +++ b/tests/test-stp.c
>     @@ -94,8 +94,12 @@ send_bpdu(struct dp_packet *pkt, int port_no,
>     void *b_)
>          assert(port_no < b->n_ports);
>          lan = b->ports[port_no];
>          if (lan) {
>     -        const void *data = dp_packet_l3(pkt);
>     -        size_t size = (char *) dp_packet_tail(pkt) - (char *) data;
>     +        if (!dp_packet_is_linear(pkt)) {
>     +            dp_packet_linearize(pkt);
>     +        }
>     +
>     +        const char *data = dp_packet_l3(pkt);
>     +        size_t size = dp_packet_size(pkt) - pkt->l3_ofs;
>              int i;
> 
>              for (i = 0; i < lan->n_conns; i++) {
>     -- 
>     2.7.4
> 
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to