The computation of timestamp is best done in the part of
pcapng library that is in secondary process.
The secondary process is already doing a bunch of system
calls which makes it not performance sensitive.
This does change the rte_pcapng_copy()
and rte_pcapng_write_stats() experimental API's.

Simplify the computation of nanoseconds from TSC to a two
step process which avoids numeric overflow issues. The previous
code was not thread safe as well.

Fixes: c882eb544842 ("pcapng: fix timestamp wrapping in output files")
Signed-off-by: Stephen Hemminger <step...@networkplumber.org>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
 app/dumpcap/main.c      |  25 +++------
 app/test/test_pcapng.c  |   7 +--
 lib/graph/graph_pcap.c  |   2 +-
 lib/pcapng/rte_pcapng.c | 119 +++++++++++++++-------------------------
 lib/pcapng/rte_pcapng.h |  19 ++-----
 lib/pdump/rte_pdump.c   |   4 +-
 6 files changed, 62 insertions(+), 114 deletions(-)

diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c
index d05dddac0071..fc28e2d7027a 100644
--- a/app/dumpcap/main.c
+++ b/app/dumpcap/main.c
@@ -66,13 +66,13 @@ static bool print_stats;
 
 /* capture limit options */
 static struct {
-       uint64_t  duration;     /* nanoseconds */
+       time_t  duration;       /* seconds */
        unsigned long packets;  /* number of packets in file */
        size_t size;            /* file size (bytes) */
 } stop;
 
 /* Running state */
-static uint64_t start_time, end_time;
+static time_t start_time;
 static uint64_t packets_received;
 static size_t file_size;
 
@@ -197,7 +197,7 @@ static void auto_stop(char *opt)
                if (*value == '\0' || *endp != '\0' || interval <= 0)
                        rte_exit(EXIT_FAILURE,
                                 "Invalid duration \"%s\"\n", value);
-               stop.duration = NSEC_PER_SEC * interval;
+               stop.duration = interval;
        } else if (strcmp(opt, "filesize") == 0) {
                stop.size = get_uint(value, "filesize", 0) * 1024;
        } else if (strcmp(opt, "packets") == 0) {
@@ -511,15 +511,6 @@ static void statistics_loop(void)
        }
 }
 
-/* Return the time since 1/1/1970 in nanoseconds */
-static uint64_t create_timestamp(void)
-{
-       struct timespec now;
-
-       clock_gettime(CLOCK_MONOTONIC, &now);
-       return rte_timespec_to_ns(&now);
-}
-
 static void
 cleanup_pdump_resources(void)
 {
@@ -589,9 +580,8 @@ report_packet_stats(dumpcap_out_t out)
                ifdrop = pdump_stats.nombuf + pdump_stats.ringfull;
 
                if (use_pcapng)
-                       rte_pcapng_write_stats(out.pcapng, intf->port, NULL,
-                                              start_time, end_time,
-                                              ifrecv, ifdrop);
+                       rte_pcapng_write_stats(out.pcapng, intf->port,
+                                              ifrecv, ifdrop, NULL);
 
                if (ifrecv == 0)
                        percent = 0;
@@ -983,7 +973,7 @@ int main(int argc, char **argv)
        mp = create_mempool();
        out = create_output();
 
-       start_time = create_timestamp();
+       start_time = time(NULL);
        enable_pdump(r, mp);
 
        if (!quiet) {
@@ -1005,11 +995,10 @@ int main(int argc, char **argv)
                        break;
 
                if (stop.duration != 0 &&
-                   create_timestamp() - start_time > stop.duration)
+                   time(NULL) - start_time > stop.duration)
                        break;
        }
 
-       end_time = create_timestamp();
        disable_primary_monitor();
 
        if (rte_eal_primary_proc_alive(NULL))
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index b8429a02f160..21131dfa0c5e 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -146,7 +146,7 @@ test_write_packets(void)
                struct rte_mbuf *mc;
 
                mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len,
-                               rte_get_tsc_cycles(), 0, NULL);
+                                    RTE_PCAPNG_DIRECTION_UNKNOWN, NULL);
                if (mc == NULL) {
                        fprintf(stderr, "Cannot copy packet\n");
                        return -1;
@@ -174,8 +174,7 @@ test_write_stats(void)
 
        /* write a statistics block */
        len = rte_pcapng_write_stats(pcapng, port_id,
-                                    NULL, 0, 0,
-                                    NUM_PACKETS, 0);
+                                    UINT64_MAX, UINT64_MAX, NULL);
        if (len <= 0) {
                fprintf(stderr, "Write of statistics failed\n");
                return -1;
@@ -262,7 +261,7 @@ test_write_over_limit_iov_max(void)
                struct rte_mbuf *mc;
 
                mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len,
-                               rte_get_tsc_cycles(), 0, NULL);
+                                    RTE_PCAPNG_DIRECTION_UNKNOWN, NULL);
                if (mc == NULL) {
                        fprintf(stderr, "Cannot copy packet\n");
                        return -1;
diff --git a/lib/graph/graph_pcap.c b/lib/graph/graph_pcap.c
index db722c375fa7..89525f1220ca 100644
--- a/lib/graph/graph_pcap.c
+++ b/lib/graph/graph_pcap.c
@@ -214,7 +214,7 @@ graph_pcap_dispatch(struct rte_graph *graph,
                mbuf = (struct rte_mbuf *)objs[i];
 
                mc = rte_pcapng_copy(mbuf->port, 0, mbuf, pkt_mp, mbuf->pkt_len,
-                                    rte_get_tsc_cycles(), 0, buffer);
+                                    0, buffer);
                if (mc == NULL)
                        break;
 
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 3c91fc77644a..13fd2b97fb80 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -36,22 +36,14 @@
 /* Format of the capture file handle */
 struct rte_pcapng {
        int  outfd;             /* output file */
-
        unsigned int ports;     /* number of interfaces added */
+       uint64_t offset_ns;     /* ns since 1/1/1970 when initialized */
+       uint64_t tsc_base;      /* TSC when started */
 
        /* DPDK port id to interface index in file */
        uint32_t port_index[RTE_MAX_ETHPORTS];
 };
 
-/* For converting TSC cycles to PCAPNG ns format */
-static struct pcapng_time {
-       uint64_t ns;
-       uint64_t cycles;
-       uint64_t tsc_hz;
-       struct rte_reciprocal_u64 tsc_hz_inverse;
-} pcapng_time;
-
-
 #ifdef RTE_EXEC_ENV_WINDOWS
 /*
  * Windows does not have writev() call.
@@ -102,56 +94,21 @@ static ssize_t writev(int fd, const struct iovec *iov, int 
iovcnt)
 #define if_indextoname(ifindex, ifname) NULL
 #endif
 
-static inline void
-pcapng_init(void)
+/* Convert from TSC (CPU cycles) to nanoseconds */
+static uint64_t
+pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
 {
-       struct timespec ts;
+       uint64_t delta, rem, secs, ns;
+       const uint64_t hz = rte_get_tsc_hz();
 
-       pcapng_time.cycles = rte_get_tsc_cycles();
-       clock_gettime(CLOCK_REALTIME, &ts);
-       pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
-       pcapng_time.ns = rte_timespec_to_ns(&ts);
-
-       pcapng_time.tsc_hz = rte_get_tsc_hz();
-       pcapng_time.tsc_hz_inverse = 
rte_reciprocal_value_u64(pcapng_time.tsc_hz);
-}
+       delta = cycles - self->tsc_base;
 
-/* PCAPNG timestamps are in nanoseconds */
-static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
-{
-       uint64_t delta, secs;
-
-       if (!pcapng_time.tsc_hz)
-               pcapng_init();
-
-       /* In essence the calculation is:
-        *   delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / 
rte_get_tsc_hz()
-        * but this overflows within 4 to 8 seconds depending on TSC frequency.
-        * Instead, if delta >= pcapng_time.tsc_hz:
-        *   Increase pcapng_time.ns and pcapng_time.cycles by the number of
-        *   whole seconds in delta and reduce delta accordingly.
-        * delta will therefore always lie in the interval [0, 
pcapng_time.tsc_hz),
-        * which will not overflow when multiplied by NSEC_PER_SEC provided the
-        * TSC frequency < approx 18.4GHz.
-        *
-        * Currently all TSCs operate below 5GHz.
-        */
-       delta = cycles - pcapng_time.cycles;
-       if (unlikely(delta >= pcapng_time.tsc_hz)) {
-               if (likely(delta < pcapng_time.tsc_hz * 2)) {
-                       delta -= pcapng_time.tsc_hz;
-                       pcapng_time.cycles += pcapng_time.tsc_hz;
-                       pcapng_time.ns += NSEC_PER_SEC;
-               } else {
-                       secs = rte_reciprocal_divide_u64(delta, 
&pcapng_time.tsc_hz_inverse);
-                       delta -= secs * pcapng_time.tsc_hz;
-                       pcapng_time.cycles += secs * pcapng_time.tsc_hz;
-                       pcapng_time.ns += secs * NSEC_PER_SEC;
-               }
-       }
+       /* Avoid numeric wraparound by computing seconds first */
+       secs = delta / hz;
+       rem = delta % hz;
+       ns = (rem * NS_PER_S) / hz;
 
-       return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
-                                                         
&pcapng_time.tsc_hz_inverse);
+       return secs * NS_PER_S + ns + self->offset_ns;
 }
 
 /* length of option including padding */
@@ -368,15 +325,15 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t 
port,
  */
 ssize_t
 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
-                      const char *comment,
-                      uint64_t start_time, uint64_t end_time,
-                      uint64_t ifrecv, uint64_t ifdrop)
+                      uint64_t ifrecv, uint64_t ifdrop,
+                      const char *comment)
 {
        struct pcapng_statistics *hdr;
        struct pcapng_option *opt;
+       uint64_t start_time = self->offset_ns;
+       uint64_t sample_time;
        uint32_t optlen, len;
        uint8_t *buf;
-       uint64_t ns;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
@@ -386,10 +343,10 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t 
port_id,
                optlen += pcapng_optlen(sizeof(ifrecv));
        if (ifdrop != UINT64_MAX)
                optlen += pcapng_optlen(sizeof(ifdrop));
+
        if (start_time != 0)
                optlen += pcapng_optlen(sizeof(start_time));
-       if (end_time != 0)
-               optlen += pcapng_optlen(sizeof(end_time));
+
        if (comment)
                optlen += pcapng_optlen(strlen(comment));
        if (optlen != 0)
@@ -409,9 +366,6 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
        if (start_time != 0)
                opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
                                         &start_time, sizeof(start_time));
-       if (end_time != 0)
-               opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
-                                        &end_time, sizeof(end_time));
        if (ifrecv != UINT64_MAX)
                opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
                                &ifrecv, sizeof(ifrecv));
@@ -425,9 +379,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
        hdr->block_length = len;
        hdr->interface_id = self->port_index[port_id];
 
-       ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
-       hdr->timestamp_hi = ns >> 32;
-       hdr->timestamp_lo = (uint32_t)ns;
+       sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
+       hdr->timestamp_hi = sample_time >> 32;
+       hdr->timestamp_lo = (uint32_t)sample_time;
 
        /* clone block_length after option */
        memcpy(opt, &len, sizeof(uint32_t));
@@ -520,23 +474,21 @@ struct rte_mbuf *
 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
                const struct rte_mbuf *md,
                struct rte_mempool *mp,
-               uint32_t length, uint64_t cycles,
+               uint32_t length,
                enum rte_pcapng_direction direction,
                const char *comment)
 {
        struct pcapng_enhance_packet_block *epb;
        uint32_t orig_len, data_len, padding, flags;
        struct pcapng_option *opt;
+       uint64_t timestamp;
        uint16_t optlen;
        struct rte_mbuf *mc;
-       uint64_t ns;
        bool rss_hash;
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
 #endif
-       ns = pcapng_tsc_to_ns(cycles);
-
        orig_len = rte_pktmbuf_pkt_len(md);
 
        /* Take snapshot of the data */
@@ -641,8 +593,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
        /* Interface index is filled in later during write */
        mc->port = port_id;
 
-       epb->timestamp_hi = ns >> 32;
-       epb->timestamp_lo = (uint32_t)ns;
+       /* Put timestamp in cycles here - adjust in packet write */
+       timestamp = rte_get_tsc_cycles();
+       epb->timestamp_hi = timestamp >> 32;
+       epb->timestamp_lo = (uint32_t)timestamp;
        epb->capture_length = data_len;
        epb->original_length = orig_len;
 
@@ -668,6 +622,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
        for (i = 0; i < nb_pkts; i++) {
                struct rte_mbuf *m = pkts[i];
                struct pcapng_enhance_packet_block *epb;
+               uint64_t cycles, timestamp;
 
                /* sanity check that is really a pcapng mbuf */
                epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
@@ -684,6 +639,13 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
                        return -1;
                }
 
+               /* adjust timestamp recorded in packet */
+               cycles = (uint64_t)epb->timestamp_hi << 32;
+               cycles += epb->timestamp_lo;
+               timestamp = pcapng_timestamp(self, cycles);
+               epb->timestamp_hi = timestamp >> 32;
+               epb->timestamp_lo = (uint32_t)timestamp;
+
                /*
                 * Handle case of highly fragmented and large burst size
                 * Note: this assumes that max segments per mbuf < IOV_MAX
@@ -725,6 +687,8 @@ rte_pcapng_fdopen(int fd,
 {
        unsigned int i;
        rte_pcapng_t *self;
+       struct timespec ts;
+       uint64_t cycles;
 
        self = malloc(sizeof(*self));
        if (!self) {
@@ -734,6 +698,13 @@ rte_pcapng_fdopen(int fd,
 
        self->outfd = fd;
        self->ports = 0;
+
+       /* record start time in ns since 1/1/1970 */
+       cycles = rte_get_tsc_cycles();
+       clock_gettime(CLOCK_REALTIME, &ts);
+       self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+       self->offset_ns = rte_timespec_to_ns(&ts);
+
        for (i = 0; i < RTE_MAX_ETHPORTS; i++)
                self->port_index[i] = UINT32_MAX;
 
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
index d93cc9f73ad5..c40795c721de 100644
--- a/lib/pcapng/rte_pcapng.h
+++ b/lib/pcapng/rte_pcapng.h
@@ -121,8 +121,6 @@ enum rte_pcapng_direction {
  * @param length
  *   The upper limit on bytes to copy.  Passing UINT32_MAX
  *   means all data (after offset).
- * @param timestamp
- *   The timestamp in TSC cycles.
  * @param direction
  *   The direction of the packer: receive, transmit or unknown.
  * @param comment
@@ -136,7 +134,7 @@ __rte_experimental
 struct rte_mbuf *
 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
                const struct rte_mbuf *m, struct rte_mempool *mp,
-               uint32_t length, uint64_t timestamp,
+               uint32_t length,
                enum rte_pcapng_direction direction, const char *comment);
 
 
@@ -188,29 +186,22 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
  *  The handle to the packet capture file
  * @param port
  *  The Ethernet port to report stats on.
- * @param comment
- *   Optional comment to add to statistics.
- * @param start_time
- *  The time when packet capture was started in nanoseconds.
- *  Optional: can be zero if not known.
- * @param end_time
- *  The time when packet capture was stopped in nanoseconds.
- *  Optional: can be zero if not finished;
  * @param ifrecv
  *  The number of packets received by capture.
  *  Optional: use UINT64_MAX if not known.
  * @param ifdrop
  *  The number of packets missed by the capture process.
  *  Optional: use UINT64_MAX if not known.
+ * @param comment
+ *  Optional comment to add to statistics.
  * @return
  *  number of bytes written to file, -1 on failure to write file
  */
 __rte_experimental
 ssize_t
 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port,
-                      const char *comment,
-                      uint64_t start_time, uint64_t end_time,
-                      uint64_t ifrecv, uint64_t ifdrop);
+                      uint64_t ifrecv, uint64_t ifdrop,
+                      const char *comment);
 
 #ifdef __cplusplus
 }
diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c
index e94f49e21250..5a1ec14d7a18 100644
--- a/lib/pdump/rte_pdump.c
+++ b/lib/pdump/rte_pdump.c
@@ -90,7 +90,6 @@ pdump_copy(uint16_t port_id, uint16_t queue,
        int ring_enq;
        uint16_t d_pkts = 0;
        struct rte_mbuf *dup_bufs[nb_pkts];
-       uint64_t ts;
        struct rte_ring *ring;
        struct rte_mempool *mp;
        struct rte_mbuf *p;
@@ -99,7 +98,6 @@ pdump_copy(uint16_t port_id, uint16_t queue,
        if (cbs->filter)
                rte_bpf_exec_burst(cbs->filter, (void **)pkts, rcs, nb_pkts);
 
-       ts = rte_get_tsc_cycles();
        ring = cbs->ring;
        mp = cbs->mp;
        for (i = 0; i < nb_pkts; i++) {
@@ -122,7 +120,7 @@ pdump_copy(uint16_t port_id, uint16_t queue,
                if (cbs->ver == V2)
                        p = rte_pcapng_copy(port_id, queue,
                                            pkts[i], mp, cbs->snaplen,
-                                           ts, direction, NULL);
+                                           direction, NULL);
                else
                        p = rte_pktmbuf_copy(pkts[i], mp, 0, cbs->snaplen);
 
-- 
2.39.2

Reply via email to