Avoid doing expensive divide operations when converting
timestamps from cycles (TSC) to nanoseconds for pcapng.

Precompute a rte_reciprocal_u64 inverse of the TSC frequency
and a right-shift count chosen so that the intermediate
product (delta >> shift) * NSEC_PER_SEC cannot overflow
uint64_t. The per-packet conversion then requires only a
shift, a multiply, and a reciprocal divide—no division.

For TSC frequencies less than 18.4 GHz the shift value will
be zero but code is defensive to be future proof.

Signed-off-by: Stephen Hemminger <[email protected]>
---
 lib/pcapng/rte_pcapng.c | 97 +++++++++++++++++++++++++++++++----------
 1 file changed, 73 insertions(+), 24 deletions(-)

diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 2cc2ea2f2f..38fc518515 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -37,12 +37,23 @@
 /* upper bound for strings in pcapng option data */
 #define PCAPNG_STR_MAX UINT16_MAX
 
+/*
+ * Converter from TSC values to nanoseconds since Unix epoch.
+ * Uses reciprocal multiply to avoid runtime division.
+ */
+struct tsc_clock {
+       uint64_t tsc_base;          /* TSC value at initialization. */
+       uint64_t ns_base;           /* Nanoseconds since epoch at init. */
+       struct rte_reciprocal_u64 tsc_hz_inv; /* Reciprocal of TSC frequency. */
+       uint32_t shift;             /* Pre-shift to avoid overflow. */
+};
+
 /* Format of the capture file handle */
 struct rte_pcapng {
        int  outfd;             /* output file */
        unsigned int ports;     /* number of interfaces added */
-       uint64_t offset_ns;     /* ns since 1/1/1970 when initialized */
-       uint64_t tsc_base;      /* TSC when started */
+
+       struct tsc_clock clock;
 
        /* DPDK port id to interface index in file */
        uint32_t port_index[RTE_MAX_ETHPORTS];
@@ -98,21 +109,59 @@ static ssize_t writev(int fd, const struct iovec *iov, int 
iovcnt)
 #define if_indextoname(ifindex, ifname) NULL
 #endif
 
-/* Convert from TSC (CPU cycles) to nanoseconds */
-static uint64_t
-pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
+/*
+ * Initialize TSC-to-epoch-ns converter.
+ *
+ * Captures current TSC and system clock as a reference point.
+ */
+static int
+tsc_clock_init(struct tsc_clock *clk)
 {
-       uint64_t delta, rem, secs, ns;
-       const uint64_t hz = rte_get_tsc_hz();
+       struct timespec ts;
+       uint64_t cycles, tsc_hz, divisor;
+       uint32_t shift;
+
+       memset(clk, 0, sizeof(*clk));
+
+       /* If Hz is zero, something is seriously broken. */
+       tsc_hz = rte_get_tsc_hz();
+       if (tsc_hz == 0)
+               return -1;
+
+       /*
+        * Choose shift so (delta >> shift) * NSEC_PER_SEC fits in uint64_t.
+        * For typical GHz-range TSC and ~1s deltas this is 0.
+        */
+       shift = 0;
+       divisor = tsc_hz;
+       while (divisor > UINT64_MAX / NSEC_PER_SEC) {
+               divisor >>= 1;
+               shift++;
+       }
+
+       clk->shift = shift;
+       clk->tsc_hz_inv = rte_reciprocal_value_u64(divisor);
+
+       /* Sample TSC and system clock as close together as possible. */
+       cycles = rte_get_tsc_cycles();
+       clock_gettime(CLOCK_REALTIME, &ts);
+       clk->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+       clk->ns_base = (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+
+       return 0;
+}
 
-       delta = cycles - self->tsc_base;
+/* Convert a TSC value to nanoseconds since Unix epoch. */
+static inline uint64_t
+tsc_to_ns_epoch(const struct tsc_clock *clk, uint64_t tsc)
+{
+       uint64_t delta, ns;
 
-       /* Avoid numeric wraparound by computing seconds first */
-       secs = delta / hz;
-       rem = delta % hz;
-       ns = (rem * NS_PER_S) / hz;
+       delta = tsc - clk->tsc_base;
+       ns = (delta >> clk->shift) * NSEC_PER_SEC;
+       ns = rte_reciprocal_divide_u64(ns, &clk->tsc_hz_inv);
 
-       return secs * NS_PER_S + ns + self->offset_ns;
+       return clk->ns_base + ns;
 }
 
 /* length of option including padding */
@@ -346,7 +395,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
 {
        struct pcapng_statistics *hdr;
        struct pcapng_option *opt;
-       uint64_t start_time = self->offset_ns;
+       uint64_t start_time = self->clock.ns_base;
        uint64_t sample_time;
        uint32_t optlen, len;
        uint32_t *buf;
@@ -399,7 +448,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
        hdr->block_length = len;
        hdr->interface_id = self->port_index[port_id];
 
-       sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
+       sample_time = tsc_to_ns_epoch(&self->clock, rte_get_tsc_cycles());
        hdr->timestamp_hi = sample_time >> 32;
        hdr->timestamp_lo = (uint32_t)sample_time;
 
@@ -684,10 +733,13 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
                        return -1;
                }
 
-               /* adjust timestamp recorded in packet */
+               /*
+                * When data is captured by pcapng_copy the current TSC is 
stored.
+                * Adjust the value recorded in file to PCAP epoch units.
+                */
                cycles = (uint64_t)epb->timestamp_hi << 32;
                cycles += epb->timestamp_lo;
-               timestamp = pcapng_timestamp(self, cycles);
+               timestamp = tsc_to_ns_epoch(&self->clock, cycles);
                epb->timestamp_hi = timestamp >> 32;
                epb->timestamp_lo = (uint32_t)timestamp;
 
@@ -733,8 +785,6 @@ rte_pcapng_fdopen(int fd,
 {
        unsigned int i;
        rte_pcapng_t *self;
-       struct timespec ts;
-       uint64_t cycles;
        int ret;
 
        if ((osname && strlen(osname) > PCAPNG_STR_MAX) ||
@@ -754,11 +804,10 @@ rte_pcapng_fdopen(int fd,
        self->outfd = fd;
        self->ports = 0;
 
-       /* record start time in ns since 1/1/1970 */
-       cycles = rte_get_tsc_cycles();
-       clock_gettime(CLOCK_REALTIME, &ts);
-       self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
-       self->offset_ns = rte_timespec_to_ns(&ts);
+       if (tsc_clock_init(&self->clock) < 0) {
+               rte_errno = ENODEV;
+               goto fail;
+       }
 
        for (i = 0; i < RTE_MAX_ETHPORTS; i++)
                self->port_index[i] = UINT32_MAX;
-- 
2.51.0

Reply via email to