Implement the gettimexattrs64 and getcrosststampattrs callbacks in the ptp_vmclock driver to provide clock quality attributes through the new PTP_SYS_OFFSET_EXTENDED_ATTRS and PTP_SYS_OFFSET_PRECISE_ATTRS ioctls.
The ptp_vmclock device exposes: - error_bound: Derived from time_maxerror_nanosec, accumulated with counter frequency error (counter_period_maxerror_rate_frac_sec) over elapsed counter ticks - clock_status: Mapped from the device's clock_status field - timescale: Determined from time_type (UTC, TAI, monotonic, etc.) The legacy ioctls return -EINVAL when clock_status is UNRELIABLE since they have no way to communicate clock state to userspace. The attrs ioctls have a status field for this purpose, so they treat UNRELIABLE as success and let userspace check the status field. To avoid a race where the hypervisor could update clock_status between the timestamp call and the UNRELIABLE check, the clock state is captured inside the seq_count loop for a consistent snapshot with the timestamp. Signed-off-by: Arthur Kiyanovski <[email protected]> --- drivers/ptp/ptp_vmclock.c | 209 +++++++++++++++++++++++++++++++++++--- 1 file changed, 193 insertions(+), 16 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 8b630eb..350625e 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -53,6 +53,17 @@ struct vmclock_state { char *name; }; +/** + * struct vmclock_crosststamp_ctx - context for get_device_system_crosststamp() + * @st: vmclock device state + * @attrs: optional output for PTP clock attributes, populated inside the + * seq_count loop for a consistent snapshot with the timestamp + */ +struct vmclock_crosststamp_ctx { + struct vmclock_state *st; + struct ptp_clock_attributes *attrs; +}; + #define VMCLOCK_MAX_WAIT ms_to_ktime(100) /* Require at least the flags field to be present. All else can be optional. */ @@ -95,14 +106,123 @@ static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec) return false; } +static uint8_t vmclock_get_ptp_timescale(uint8_t vmclock_time_type) +{ + switch (vmclock_time_type) { + case VMCLOCK_TIME_UTC: + return PTP_TIMESCALE_UTC; + case VMCLOCK_TIME_TAI: + return PTP_TIMESCALE_TAI; + case VMCLOCK_TIME_MONOTONIC: + return PTP_TIMESCALE_MONOTONIC; + default: + return PTP_TIMESCALE_UNKNOWN; + } +} + +static uint8_t vmclock_get_ptp_status(uint8_t vmclock_status) +{ + switch (vmclock_status) { + case VMCLOCK_STATUS_UNKNOWN: + return PTP_CLOCK_STATUS_UNKNOWN; + case VMCLOCK_STATUS_INITIALIZING: + return PTP_CLOCK_STATUS_INITIALIZING; + case VMCLOCK_STATUS_SYNCHRONIZED: + return PTP_CLOCK_STATUS_SYNCED; + case VMCLOCK_STATUS_FREERUNNING: + return PTP_CLOCK_STATUS_FREE_RUNNING; + case VMCLOCK_STATUS_UNRELIABLE: + return PTP_CLOCK_STATUS_UNRELIABLE; + default: + return PTP_CLOCK_STATUS_UNKNOWN; + } +} + +static void vmclock_populate_ptp_attributes(struct vmclock_state *st, + struct ptp_clock_attributes *att, + uint64_t delta, + uint64_t cycle) +{ + uint64_t maxerror_ns = UINT_MAX; + + if (!att) + return; + + /* Only calculate if the base error is flagged as valid + * by the hypervisor. + */ + if (VMCLOCK_FIELD_PRESENT(st->clk, time_maxerror_nanosec) && + (le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_TIME_MAXERROR_VALID)) { + maxerror_ns = le64_to_cpu(st->clk->time_maxerror_nanosec); + + /* If frequency error is also valid, accumulate it + * over the delta. + */ + if (VMCLOCK_FIELD_PRESENT(st->clk, counter_period_maxerror_rate_frac_sec) && + (le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_PERIOD_MAXERROR_VALID)) { + uint64_t maxerror_rate, err_hi, err_frac, growth_ns; + + if (st->clk->counter_period_shift >= 128) { + maxerror_ns = U64_MAX; + goto saturate; + } + + maxerror_rate = le64_to_cpu(st->clk->counter_period_maxerror_rate_frac_sec); + err_frac = mul_u64_u64_shr_add_u64(&err_hi, delta, + maxerror_rate, + st->clk->counter_period_shift, + 0); + + if (err_hi > U64_MAX / NSEC_PER_SEC) { + maxerror_ns = U64_MAX; + goto saturate; + } + + growth_ns = (err_hi * NSEC_PER_SEC) + + mul_u64_u64_shr(err_frac, NSEC_PER_SEC, 64); + + /* Guard against overflow */ + if (U64_MAX - growth_ns < maxerror_ns) + maxerror_ns = U64_MAX; + else + maxerror_ns += growth_ns; + } + } + +saturate: + /* PTP UAPI error_bound is 32-bit nanoseconds */ + att->error_bound = (maxerror_ns > UINT_MAX) ? + UINT_MAX : (uint32_t)maxerror_ns; + att->timescale = vmclock_get_ptp_timescale(st->clk->time_type); + /* tai_adjust() already converted UTC to TAI before we're called */ + if (st->clk->time_type == VMCLOCK_TIME_UTC) + att->timescale = PTP_TIMESCALE_TAI; + att->status = vmclock_get_ptp_status(st->clk->clock_status); + + att->counter_value = cycle; + switch (st->cs_id) { + case CSID_X86_TSC: + att->counter_id = PTP_COUNTER_X86_TSC; + break; + case CSID_ARM_ARCH_COUNTER: + att->counter_id = PTP_COUNTER_ARM_ARCH; + break; + default: + att->counter_id = PTP_COUNTER_UNKNOWN; + break; + } +} + static int vmclock_get_crosststamp(struct vmclock_state *st, struct ptp_system_timestamp *sts, struct system_counterval_t *system_counter, - struct timespec64 *tspec) + struct timespec64 *tspec, + struct ptp_clock_attributes *attrs) { ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); struct system_time_snapshot systime_snapshot; uint64_t cycle, delta, seq, frac_sec; + uint8_t clock_status = VMCLOCK_STATUS_UNKNOWN; #ifdef CONFIG_X86 /* @@ -122,9 +242,6 @@ static int vmclock_get_crosststamp(struct vmclock_state *st, */ virt_rmb(); - if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE) - return -EINVAL; - /* * When invoked for gettimex64(), fill in the pre/post system * times. The simple case is when system time is based on the @@ -163,6 +280,18 @@ static int vmclock_get_crosststamp(struct vmclock_state *st, if (!tai_adjust(st->clk, &tspec->tv_sec)) return -EINVAL; + /* + * Capture clock state inside the seq_count loop for a + * consistent snapshot with the timestamp. The attrs path + * reports it to userspace via the status field; the legacy + * path saves it for the UNRELIABLE check after the loop. + */ + if (attrs) + vmclock_populate_ptp_attributes(st, attrs, delta, + cycle); + else + clock_status = st->clk->clock_status; + /* * This pairs with a write barrier in the hypervisor * which populates this structure. @@ -186,6 +315,17 @@ static int vmclock_get_crosststamp(struct vmclock_state *st, sts->post_ts = sts->pre_ts; } + /* + * If attrs is set, attributes were already populated inside the + * seq_count loop. Return success even for UNRELIABLE — the attrs + * ioctl can report the status to userspace. + */ + if (attrs) + return 0; + + if (clock_status == VMCLOCK_STATUS_UNRELIABLE) + return -EINVAL; + return 0; } @@ -198,7 +338,8 @@ static int vmclock_get_crosststamp(struct vmclock_state *st, static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st, struct ptp_system_timestamp *sts, struct system_counterval_t *system_counter, - struct timespec64 *tspec) + struct timespec64 *tspec, + struct ptp_clock_attributes *attrs) { struct pvclock_vcpu_time_info *pvti = this_cpu_pvti(); unsigned int pvti_ver; @@ -209,7 +350,8 @@ static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st, do { pvti_ver = pvclock_read_begin(pvti); - ret = vmclock_get_crosststamp(st, sts, system_counter, tspec); + ret = vmclock_get_crosststamp(st, sts, system_counter, tspec, + attrs); if (ret) break; @@ -238,17 +380,19 @@ static int ptp_vmclock_get_time_fn(ktime_t *device_time, struct system_counterval_t *system_counter, void *ctx) { - struct vmclock_state *st = ctx; + struct vmclock_crosststamp_ctx *vctx = ctx; + struct vmclock_state *st = vctx->st; struct timespec64 tspec; int ret; #ifdef SUPPORT_KVMCLOCK if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK) ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter, - &tspec); + &tspec, vctx->attrs); else #endif - ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec); + ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec, + vctx->attrs); if (!ret) *device_time = timespec64_to_ktime(tspec); @@ -256,12 +400,11 @@ static int ptp_vmclock_get_time_fn(ktime_t *device_time, return ret; } -static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp, - struct system_device_crosststamp *xtstamp) +static int ptp_vmclock_do_getcrosststamp(struct vmclock_crosststamp_ctx *vctx, + struct system_device_crosststamp *xtstamp) { - struct vmclock_state *st = container_of(ptp, struct vmclock_state, - ptp_clock_info); - int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st, + struct vmclock_state *st = vctx->st; + int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, vctx, NULL, xtstamp); #ifdef SUPPORT_KVMCLOCK /* @@ -278,13 +421,23 @@ static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp, systime_snapshot.cs_id == CSID_X86_KVM_CLK) { WRITE_ONCE(st->sys_cs_id, systime_snapshot.cs_id); ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, - st, NULL, xtstamp); + vctx, NULL, xtstamp); } } #endif return ret; } +static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp, + struct system_device_crosststamp *xtstamp) +{ + struct vmclock_state *st = container_of(ptp, struct vmclock_state, + ptp_clock_info); + struct vmclock_crosststamp_ctx vctx = { .st = st }; + + return ptp_vmclock_do_getcrosststamp(&vctx, xtstamp); +} + /* * PTP clock operations */ @@ -311,7 +464,29 @@ static int ptp_vmclock_gettimex(struct ptp_clock_info *ptp, struct timespec64 *t struct vmclock_state *st = container_of(ptp, struct vmclock_state, ptp_clock_info); - return vmclock_get_crosststamp(st, sts, NULL, ts); + return vmclock_get_crosststamp(st, sts, NULL, ts, NULL); +} + +static int ptp_vmclock_gettimexattrs(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts, + struct ptp_clock_attributes *att) +{ + struct vmclock_state *st = container_of(ptp, struct vmclock_state, + ptp_clock_info); + + return vmclock_get_crosststamp(st, sts, NULL, ts, att); +} + +static int ptp_vmclock_getcrosststampattrs(struct ptp_clock_info *ptp, + struct system_device_crosststamp *xtstamp, + struct ptp_clock_attributes *att) +{ + struct vmclock_state *st = container_of(ptp, struct vmclock_state, + ptp_clock_info); + struct vmclock_crosststamp_ctx vctx = { .st = st, .attrs = att }; + + return ptp_vmclock_do_getcrosststamp(&vctx, xtstamp); } static int ptp_vmclock_enable(struct ptp_clock_info *ptp, @@ -329,9 +504,11 @@ static const struct ptp_clock_info ptp_vmclock_info = { .adjfine = ptp_vmclock_adjfine, .adjtime = ptp_vmclock_adjtime, .gettimex64 = ptp_vmclock_gettimex, + .gettimexattrs64 = ptp_vmclock_gettimexattrs, .settime64 = ptp_vmclock_settime, .enable = ptp_vmclock_enable, .getcrosststamp = ptp_vmclock_getcrosststamp, + .getcrosststampattrs = ptp_vmclock_getcrosststampattrs, }; static struct ptp_clock *vmclock_ptp_register(struct device *dev, -- 2.47.3
