From a8e9b8ade110ff9eb9acc2bb28c6e3d1db136c29 Mon Sep 17 00:00:00 2001
From: Ramesh Gowda <ramesh.gowda@mavenir.com>
Date: Thu, 12 Oct 2023 15:26:27 +0530
Subject: [PATCH] GNSS fluctuation resulting in wrong update of NIC PHC time.

Context:
Under GNSS fluctuation condition due bad weather or improper location of GNSS antenna, observed ts2phc process updates 1 sec difference into the physical hard clock (PHC) of the NIC.
This 1 sec jump is seen momentarily as captured below.

    Mar 10 15:17:40 sno-cluster-nrnl04-master1 ts2phc: [161756.611] nmea delay: 89943030 ns
    Mar 10 15:17:40 sno-cluster-nrnl04-master1 ts2phc: [161756.611] enp81s0f0 extts index 0 at 1678461497.000000000 corr 0 src 1678461498.844774711 diff -1000000000
    Mar 10 15:17:40 sno-cluster-nrnl04-master1 ts2phc: [161756.611] enp81s0f0 master offset -1000000000 s2 freq -100000000
    Mar 10 15:17:40 sno-cluster-nrnl04-master1 ts2phc: [161756.612] nmea delay: 89943030 ns
    Mar 10 15:17:40 sno-cluster-nrnl04-master1 ts2phc: [161756.612] enp138s0f0 extts index 0 at 1678461497.000000000 corr 0 src 1678461498.845574965 diff -1000000000
    Mar 10 15:17:40 sno-cluster-nrnl04-master1 ts2phc: [161756.612] enp138s0f0 master offset -1000000000 s2 freq -100000000

Problem:
ts2phc process will update wrong timing into PHC, this time is periodically copied into system timing using phc2sys process.
This will result in sudden jump of time in system clock impacting others processes which are tightly bound to system timing.

Solution:
With ts2phc in SERVO_LOCKED state, time difference between PHC and GPS would be in +-1ns.
But under problem condition, the time difference will be +-1 sec.
To prevent wrong update of time, defined a new configurable variable called skip count.
With SERVO_LOCKED state and time diff is more than 500 msecond, ts2phc will skip update of PHC for configured skip count value.
Default skip count value would be set 120 (2 minutes).
Time difference stays more than 500 msecond for continuous intervals more than skip count value, ts2phc will update the PHC with time difference value.
---
 config.c       |  1 +
 ts2phc.c       |  3 +++
 ts2phc_slave.c | 23 +++++++++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/config.c b/config.c
index d237de9..2a9ee6b 100644
--- a/config.c
+++ b/config.c
@@ -331,6 +331,7 @@ struct config_item config_tab[] = {
 	GLOB_ITEM_INT("utc_offset", CURRENT_UTC_OFFSET, 0, INT_MAX),
 	GLOB_ITEM_INT("verbose", 0, 0, 1),
 	GLOB_ITEM_INT("write_phase_mode", 0, 0, 1),
+	PORT_ITEM_INT("max_phc_update_skip_cnt", 120, 0, 14400),
 };
 
 static struct unicast_master_table *current_uc_mtab;
diff --git a/ts2phc.c b/ts2phc.c
index 2342858..5687c9b 100644
--- a/ts2phc.c
+++ b/ts2phc.c
@@ -15,6 +15,8 @@
 #include "ts2phc_slave.h"
 #include "version.h"
 
+int max_phc_update_skip_count;
+
 struct interface {
 	STAILQ_ENTRY(interface) list;
 };
@@ -146,6 +148,7 @@ int main(int argc, char *argv[])
 	print_set_verbose(config_get_int(cfg, NULL, "verbose"));
 	print_set_syslog(config_get_int(cfg, NULL, "use_syslog"));
 	print_set_level(config_get_int(cfg, NULL, "logging_level"));
+	max_phc_update_skip_count = config_get_int(cfg, NULL, "max_phc_update_skip_cnt");
 
 	STAILQ_FOREACH(iface, &cfg->interfaces, list) {
 		if (1 == config_get_int(cfg, interface_name(iface), "ts2phc.master")) {
diff --git a/ts2phc_slave.c b/ts2phc_slave.c
index 749efe5..6af1aeb 100644
--- a/ts2phc_slave.c
+++ b/ts2phc_slave.c
@@ -29,6 +29,8 @@
 #define SAMPLE_WEIGHT		1.0
 #define SERVO_SYNC_INTERVAL	1.0
 
+extern int max_phc_update_skip_count;
+
 struct ts2phc_slave {
 	char *name;
 	STAILQ_ENTRY(ts2phc_slave) list;
@@ -42,6 +44,8 @@ struct ts2phc_slave {
 	clockid_t clk;
 	int no_adj;
 	int fd;
+	int max_offset_skip_count;
+	int current_offset_skip_count;
 };
 
 struct ts2phc_slave_array {
@@ -219,6 +223,10 @@ static struct ts2phc_slave *ts2phc_slave_create(struct config *cfg, const char *
 		goto no_ext_ts;
 	}
 
+	slave->max_offset_skip_count = max_phc_update_skip_count;
+	slave->current_offset_skip_count = 0;
+	pr_debug("PHC slave %s has skip cnt %d", device, slave->max_offset_skip_count);
+
 	return slave;
 no_ext_ts:
 no_pin_func:
@@ -278,6 +286,17 @@ static int ts2phc_slave_event(struct ts2phc_slave *slave,
 	adj = servo_sample(slave->servo, offset, extts_ts,
 			   SAMPLE_WEIGHT, &slave->state);
 
+	if ((slave->state == SERVO_LOCKED) || (slave->state == SERVO_LOCKED_STABLE)) {
+		if (llabs(offset) >= NS_PER_SEC / 2) {
+			if (slave->current_offset_skip_count < slave->max_offset_skip_count) {
+				pr_debug("Skip current PHC update %s offset %10" PRId64 " s%d freq %+7.0f",
+					slave->name, offset, slave->state, adj);
+				slave->current_offset_skip_count++;
+				return 0;
+			}
+		}
+	}
+
 	pr_debug("%s master offset %10" PRId64 " s%d freq %+7.0f",
 		 slave->name, offset, slave->state, adj);
 
@@ -290,6 +309,10 @@ static int ts2phc_slave_event(struct ts2phc_slave *slave,
 		break;
 	case SERVO_LOCKED:
 	case SERVO_LOCKED_STABLE:
+		if (llabs(offset) < 500) {
+			slave->current_offset_skip_count = 0;
+		}
+
 		clockadj_set_freq(slave->clk, -adj);
 		break;
 	}
-- 
2.20.1

