The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh8-4.18.0-193.6.3.vz8.4.14 ------> commit af2c78f571e62ae91e6c0f8ef69a1f237892ea1f Author: Andrey Ryabinin <aryabi...@virtuozzo.com> Date: Thu Oct 29 14:17:10 2020 +0300
ve: add per-ve CLOCK_MONOTONIC time via __vdso_gettimeofday() Make possible to read virtualized container's CLOCK_MONOTONIC time via __vdso_gettimeofday(). Record containers start time in per-ve vdso and substruct it from the host's time on clock read. https://jira.sw.ru/browse/PSBM-121668 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Reviewed-by: Konstantin Khorenko <khore...@virtuozzo.com> khorenko@ notes: 1) effectively we store in vdso area the same ve->start_time value. If a CT has been previously running, say 5 ns, we store in ve->start_time value (now - 5), so later monotonic_abs_to_ve() returns now - (ve->start_time) == now - (now - 5) == 5 2) introduced timespec_sub_ns() function has "inline" attribute - it is fine. The stock timespec_add_ns() has "__always_inline" attribute, but the function is static, so there will be different copies of the function anyway even if the function is used in other files. 3) timespec_sub_ns() is introduced for optimization: if we use timespec_add_ns(ns)+monotonic_time_to_ve(), there will be 2 cycles of __iter_div_u64_rem(). =============================================== The original vz7 commit message (f7188f105626): ve/vdso: virtualized monotonic gettime through vdso We already have infrastructure for virtualized vdso, however we use it only to change LINUX_VERSION_NAME in container. Simply store container's start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec, and use it in __vdso_clock_gettime() to calculate container's monotonic time. Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always setup new vdso, since previous policy to setup vdso only if uts_ns->name.releas e wouldn't work for virtualized __vdso_clock_gettime() https://jira.sw.ru/browse/PSBM-66451 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Reviewed-by: Dmitry Safonov <dsafo...@virtuozzo.com> --- arch/x86/entry/vdso/vclock_gettime.c | 27 +++++++++++++++++++++++---- arch/x86/entry/vdso/vdso2c.c | 1 + arch/x86/include/asm/vdso.h | 1 + kernel/ve/ve.c | 14 ++++++++++++++ 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index e48ca3afa091..be1de6c4cafa 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -24,6 +24,8 @@ #define gtod (&VVAR(vsyscall_gtod_data)) +u64 ve_start_time; + extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); @@ -227,6 +229,21 @@ notrace static int __always_inline do_realtime(struct timespec *ts) return mode; } +static inline void timespec_sub_ns(struct timespec *ts, u64 ns) +{ + if ((s64)ns <= 0) { + ts->tv_sec += __iter_div_u64_rem(-ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + } else { + ts->tv_sec -= __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + if (ns) { + ts->tv_sec--; + ns = NSEC_PER_SEC - ns; + } + ts->tv_nsec = ns; + } +} + notrace static int __always_inline do_monotonic(struct timespec *ts) { unsigned long seq; @@ -242,9 +259,7 @@ notrace static int __always_inline do_monotonic(struct timespec *ts) ns >>= gtod->shift; } while (unlikely(gtod_read_retry(gtod, seq))); - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - + timespec_sub_ns(ts, ve_start_time - ns); return mode; } @@ -260,12 +275,16 @@ notrace static void do_realtime_coarse(struct timespec *ts) notrace static void do_monotonic_coarse(struct timespec *ts) { + u64 ns; unsigned long seq; + do { seq = gtod_read_begin(gtod); ts->tv_sec = gtod->monotonic_time_coarse_sec; - ts->tv_nsec = gtod->monotonic_time_coarse_nsec; + ns = gtod->monotonic_time_coarse_nsec; } while (unlikely(gtod_read_retry(gtod, seq))); + + timespec_sub_ns(ts, ve_start_time - ns); } notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 7fab0bd96ac1..c76141e9ca16 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -110,6 +110,7 @@ struct vdso_sym required_syms[] = { {"__kernel_rt_sigreturn", true}, {"int80_landing_pad", true}, {"linux_version_code", true}, + {"ve_start_time", true}, }; __attribute__((format(printf, 1, 2))) __attribute__((noreturn)) diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 92c7ac06828e..9c265f79a126 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -28,6 +28,7 @@ struct vdso_image { long sym___kernel_vsyscall; long sym_int80_landing_pad; long sym_linux_version_code; + long sym_ve_start_time; }; #ifdef CONFIG_X86_64 diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index 060f9ecc477e..ba9732d8fce1 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -374,6 +374,17 @@ static int ve_start_kthreadd(struct ve_struct *ve) return err; } +static void ve_set_vdso_time(struct ve_struct *ve, u64 time) +{ + u64 *vdso_start_time; + + vdso_start_time = ve->vdso_64->data + ve->vdso_64->sym_ve_start_time; + *vdso_start_time = time; + + vdso_start_time = ve->vdso_32->data + ve->vdso_32->sym_ve_start_time; + *vdso_start_time = time; +} + /* under ve->op_sem write-lock */ static int ve_start_container(struct ve_struct *ve) { @@ -408,6 +419,8 @@ static int ve_start_container(struct ve_struct *ve) if (ve->start_time == 0) { ve->start_time = tsk->start_time; ve->real_start_time = tsk->real_start_time; + + ve_set_vdso_time(ve, ve->start_time); } /* The value is wrong, but it is never compared to process * start times */ @@ -1030,6 +1043,7 @@ static ssize_t ve_ts_write(struct kernfs_open_file *of, char *buf, case VE_CF_CLOCK_MONOTONIC: now = ktime_get_ns(); target = &ve->start_time; + ve_set_vdso_time(ve, now - delta_ns); break; case VE_CF_CLOCK_BOOTBASED: now = ktime_get_boot_ns(); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel