Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-20 Thread Radim Krcmar
2017-01-18 13:28-0200, Marcelo Tosatti:
> On Wed, Jan 18, 2017 at 04:20:33PM +0100, Radim Krcmar wrote:
>> 2017-01-18 12:53-0200, Marcelo Tosatti:
>> > GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
>> > > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> > > > 
>> > > > 
>> > > > On 18/01/2017 13:24, Marcelo Tosatti wrote:
>> > > > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
>> > > > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
>> > > > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>> > > >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
>> > > > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
>> > > > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and 
>> > > > > so on.
>> > > > >
>> > > > > ts[1] ts[3]
>> > > > > Host time-+-+
>> > > > >   | |
>> > > > >   | |
>> > > > > Guest time   +-+-+..
>> > > > > ts[0]ts[2] ts[4]
>> > > > >>>
>> > > > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and 
>> > > > >>> makes
>> > > > >>> the offset very consistent, so the graph would look like:
>> > > > >>>
>> > > > >>> ts[1] ts[3]
>> > > > >>> Host time-+-+
>> > > > >>>   | |
>> > > > >>>   | |
>> > > > >>> Guest time   +-+-+..
>> > > > >>> ts[0]ts[2] ts[4]
>> > > > >>>
>> > > > >>> which doesn't sound good if users assume that the host reading is 
>> > > > >>> in the
>> > > > >>> middle -- the guest time would be ahead of the host time.
>> > > > >>
>> > > > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>> > > > >> intense interrupts). Follows results:
>> > > > >>
>> > > > >> Without TSC delta calculation:
>> > > > >> =
>> > > > >>
>> > > > >> #* PHC0  0   3   377 2-99ns[ 
>> > > > >> +206ns] +/-  116ns
>> > > > >> #* PHC0  0   3   377 8   +202ns[ 
>> > > > >> +249ns] +/-  111ns
>> > > > >> #* PHC0  0   3   377 8   -213ns[ 
>> > > > >> +683ns] +/-   88ns
>> > > > >> #* PHC0  0   3   377 6+77ns[ 
>> > > > >> +319ns] +/-   56ns
>> > > > >> #* PHC0  0   3   377 4   
>> > > > >> -771ns[-1029ns] +/-   93ns
>> > > > >> #* PHC0  0   3   37710-49ns[  
>> > > > >> -58ns] +/-  121ns
>> > > > >> #* PHC0  0   3   377 9   +562ns[ 
>> > > > >> +703ns] +/-  107ns
>> > > > >> #* PHC0  0   3   377 6 -2ns[   
>> > > > >> -3ns] +/-   94ns
>> > > > >> #* PHC0  0   3   377 4   +451ns[ 
>> > > > >> +494ns] +/-  138ns
>> > > > >> #* PHC0  0   3   37711-67ns[  
>> > > > >> -74ns] +/-  113ns
>> > > > >> #* PHC0  0   3   377 8   +244ns[ 
>> > > > >> +264ns] +/-  119ns
>> > > > >> #* PHC0  0   3   377 7   -696ns[ 
>> > > > >> -890ns] +/-   89ns
>> > > > >> #* PHC0  0   3   377 4   +468ns[ 
>> > > > >> +560ns] +/-  110ns
>> > > > >> #* PHC0  0   3   37711   -310ns[ 
>> > > > >> -430ns] +/-   72ns
>> > > > >> #* PHC0  0   3   377 9   +189ns[ 
>> > > > >> +298ns] +/-   54ns
>> > > > >> #* PHC0  0   3   377 7   +594ns[ 
>> > > > >> +473ns] +/-   96ns
>> > > > >> #* PHC0  0   3   377 5   +151ns[ 
>> > > > >> +280ns] +/-   71ns
>> > > > >> #* PHC0  0   3   37710   -590ns[ 
>> > > > >> -696ns] +/-   94ns
>> > > > >> #* PHC0  0   3   377 8   +415ns[ 
>> > > > >> +526ns] +/-   74ns
>> > > > >> #* PHC0  0   3   377 6  
>> > > > >> +1381ns[+1469ns] +/-  101ns
>> > > > >> #* PHC0  0   3   377 4   
>> > > > >> +571ns[+1304ns] +/-   54ns
>> > > > >> #* PHC0  0   3   377 8 -5ns[  
>> > > > >> +71ns] +/-  139ns
>> > > > >> #* PHC0  0   3   377 7   -247ns[ 
>> > > > >> -502ns] +/-   69ns
>> > > > >> #* PHC0  0   3   377 5   -283ns[ 
>> > > > >> +879ns] +/-   73ns
>> > > > >> #* PHC0  0   3   377 3   +148ns[ 
>> > > > >> -109ns] +/-   61ns
>> > > > >>
>> > > > >> With TSC delta calculation:
>> > > > >> 
>> > > > >>
>> > > > >> #* PHC0  0   3   377 7   +379ns[ 
>> > > > >> +432ns] +/-   53ns
>> > > > >> #* PHC0  0  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-20 Thread Radim Krcmar
2017-01-18 13:28-0200, Marcelo Tosatti:
> On Wed, Jan 18, 2017 at 04:20:33PM +0100, Radim Krcmar wrote:
>> 2017-01-18 12:53-0200, Marcelo Tosatti:
>> > GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
>> > > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> > > > 
>> > > > 
>> > > > On 18/01/2017 13:24, Marcelo Tosatti wrote:
>> > > > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
>> > > > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
>> > > > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>> > > >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
>> > > > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
>> > > > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and 
>> > > > > so on.
>> > > > >
>> > > > > ts[1] ts[3]
>> > > > > Host time-+-+
>> > > > >   | |
>> > > > >   | |
>> > > > > Guest time   +-+-+..
>> > > > > ts[0]ts[2] ts[4]
>> > > > >>>
>> > > > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and 
>> > > > >>> makes
>> > > > >>> the offset very consistent, so the graph would look like:
>> > > > >>>
>> > > > >>> ts[1] ts[3]
>> > > > >>> Host time-+-+
>> > > > >>>   | |
>> > > > >>>   | |
>> > > > >>> Guest time   +-+-+..
>> > > > >>> ts[0]ts[2] ts[4]
>> > > > >>>
>> > > > >>> which doesn't sound good if users assume that the host reading is 
>> > > > >>> in the
>> > > > >>> middle -- the guest time would be ahead of the host time.
>> > > > >>
>> > > > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>> > > > >> intense interrupts). Follows results:
>> > > > >>
>> > > > >> Without TSC delta calculation:
>> > > > >> =
>> > > > >>
>> > > > >> #* PHC0  0   3   377 2-99ns[ 
>> > > > >> +206ns] +/-  116ns
>> > > > >> #* PHC0  0   3   377 8   +202ns[ 
>> > > > >> +249ns] +/-  111ns
>> > > > >> #* PHC0  0   3   377 8   -213ns[ 
>> > > > >> +683ns] +/-   88ns
>> > > > >> #* PHC0  0   3   377 6+77ns[ 
>> > > > >> +319ns] +/-   56ns
>> > > > >> #* PHC0  0   3   377 4   
>> > > > >> -771ns[-1029ns] +/-   93ns
>> > > > >> #* PHC0  0   3   37710-49ns[  
>> > > > >> -58ns] +/-  121ns
>> > > > >> #* PHC0  0   3   377 9   +562ns[ 
>> > > > >> +703ns] +/-  107ns
>> > > > >> #* PHC0  0   3   377 6 -2ns[   
>> > > > >> -3ns] +/-   94ns
>> > > > >> #* PHC0  0   3   377 4   +451ns[ 
>> > > > >> +494ns] +/-  138ns
>> > > > >> #* PHC0  0   3   37711-67ns[  
>> > > > >> -74ns] +/-  113ns
>> > > > >> #* PHC0  0   3   377 8   +244ns[ 
>> > > > >> +264ns] +/-  119ns
>> > > > >> #* PHC0  0   3   377 7   -696ns[ 
>> > > > >> -890ns] +/-   89ns
>> > > > >> #* PHC0  0   3   377 4   +468ns[ 
>> > > > >> +560ns] +/-  110ns
>> > > > >> #* PHC0  0   3   37711   -310ns[ 
>> > > > >> -430ns] +/-   72ns
>> > > > >> #* PHC0  0   3   377 9   +189ns[ 
>> > > > >> +298ns] +/-   54ns
>> > > > >> #* PHC0  0   3   377 7   +594ns[ 
>> > > > >> +473ns] +/-   96ns
>> > > > >> #* PHC0  0   3   377 5   +151ns[ 
>> > > > >> +280ns] +/-   71ns
>> > > > >> #* PHC0  0   3   37710   -590ns[ 
>> > > > >> -696ns] +/-   94ns
>> > > > >> #* PHC0  0   3   377 8   +415ns[ 
>> > > > >> +526ns] +/-   74ns
>> > > > >> #* PHC0  0   3   377 6  
>> > > > >> +1381ns[+1469ns] +/-  101ns
>> > > > >> #* PHC0  0   3   377 4   
>> > > > >> +571ns[+1304ns] +/-   54ns
>> > > > >> #* PHC0  0   3   377 8 -5ns[  
>> > > > >> +71ns] +/-  139ns
>> > > > >> #* PHC0  0   3   377 7   -247ns[ 
>> > > > >> -502ns] +/-   69ns
>> > > > >> #* PHC0  0   3   377 5   -283ns[ 
>> > > > >> +879ns] +/-   73ns
>> > > > >> #* PHC0  0   3   377 3   +148ns[ 
>> > > > >> -109ns] +/-   61ns
>> > > > >>
>> > > > >> With TSC delta calculation:
>> > > > >> 
>> > > > >>
>> > > > >> #* PHC0  0   3   377 7   +379ns[ 
>> > > > >> +432ns] +/-   53ns
>> > > > >> #* PHC0  0  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-20 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 04:45:07PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/01/2017 15:50, Marcelo Tosatti wrote:
> >> Interesting idea!  For this to work, KVM needs to implement
> >> getcrosstimestamp and ptp_chardev.c can then add an alternative
> >> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
> >>
> >> Something like
> >>
> >> for (i = 0; i <= sysoff->n_samples; i++) {
> >>// ... call getcrosststamp ...
> >>sysns = ktime_to_ns(xtstamp.sys_realtime);
> >>if (i > 0) {
> >>devns = ktime_to_ns(xtstamp.device);
> >>devns -= (sysns - prev_sysns) / 2;
> >>devts = ns_to_timespec(devns);
> >>pct->sec = devts.tv_sec;
> >>pct->nsec = devts.tv_nsec;
> >>pct++;
> >>}
> >>systs = ns_to_timespec(sysns);
> >> pct->sec = ts.tv_sec;
> >> pct->nsec = ts.tv_nsec;
> >> pct++;
> >>prev_sysns = sysns;
> >> }
> >>
> >> Marcelo, can you give it a try?
> > 
> > Can convert fine, but problem is the simultaneous read
> > of host and guest clocks.
> 
> Could the TSC from the hypercall be applied to kvmclock to do this?  My
> understanding is that get_device_system_crosststamp (which is used in
> the sole in-tree implementation of getcrosststamp) already contains all
> the logic to do that.
> 
> Paolo

Yeah feed that TSC to pvclock_clocksource_read.

Cool will take a look at that function thanks.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-20 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 04:45:07PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/01/2017 15:50, Marcelo Tosatti wrote:
> >> Interesting idea!  For this to work, KVM needs to implement
> >> getcrosstimestamp and ptp_chardev.c can then add an alternative
> >> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
> >>
> >> Something like
> >>
> >> for (i = 0; i <= sysoff->n_samples; i++) {
> >>// ... call getcrosststamp ...
> >>sysns = ktime_to_ns(xtstamp.sys_realtime);
> >>if (i > 0) {
> >>devns = ktime_to_ns(xtstamp.device);
> >>devns -= (sysns - prev_sysns) / 2;
> >>devts = ns_to_timespec(devns);
> >>pct->sec = devts.tv_sec;
> >>pct->nsec = devts.tv_nsec;
> >>pct++;
> >>}
> >>systs = ns_to_timespec(sysns);
> >> pct->sec = ts.tv_sec;
> >> pct->nsec = ts.tv_nsec;
> >> pct++;
> >>prev_sysns = sysns;
> >> }
> >>
> >> Marcelo, can you give it a try?
> > 
> > Can convert fine, but problem is the simultaneous read
> > of host and guest clocks.
> 
> Could the TSC from the hypercall be applied to kvmclock to do this?  My
> understanding is that get_device_system_crosststamp (which is used in
> the sole in-tree implementation of getcrosststamp) already contains all
> the logic to do that.
> 
> Paolo

Yeah feed that TSC to pvclock_clocksource_read.

Cool will take a look at that function thanks.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-20 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 04:20:33PM +0100, Radim Krcmar wrote:
> 2017-01-18 12:53-0200, Marcelo Tosatti:
> > GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
> > > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > > > 
> > > > 
> > > > On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > > > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> > > > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > > > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
> > > >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > > > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > > > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and 
> > > > > so on.
> > > > >
> > > > > ts[1] ts[3]
> > > > > Host time-+-+
> > > > >   | |
> > > > >   | |
> > > > > Guest time   +-+-+..
> > > > > ts[0]ts[2] ts[4]
> > > > >>>
> > > > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and 
> > > > >>> makes
> > > > >>> the offset very consistent, so the graph would look like:
> > > > >>>
> > > > >>> ts[1] ts[3]
> > > > >>> Host time-+-+
> > > > >>>   | |
> > > > >>>   | |
> > > > >>> Guest time   +-+-+..
> > > > >>> ts[0]ts[2] ts[4]
> > > > >>>
> > > > >>> which doesn't sound good if users assume that the host reading is 
> > > > >>> in the
> > > > >>> middle -- the guest time would be ahead of the host time.
> > > > >>
> > > > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> > > > >> intense interrupts). Follows results:
> > > > >>
> > > > >> Without TSC delta calculation:
> > > > >> =
> > > > >>
> > > > >> #* PHC0  0   3   377 2-99ns[ +206ns] 
> > > > >> +/-  116ns
> > > > >> #* PHC0  0   3   377 8   +202ns[ +249ns] 
> > > > >> +/-  111ns
> > > > >> #* PHC0  0   3   377 8   -213ns[ +683ns] 
> > > > >> +/-   88ns
> > > > >> #* PHC0  0   3   377 6+77ns[ +319ns] 
> > > > >> +/-   56ns
> > > > >> #* PHC0  0   3   377 4   -771ns[-1029ns] 
> > > > >> +/-   93ns
> > > > >> #* PHC0  0   3   37710-49ns[  -58ns] 
> > > > >> +/-  121ns
> > > > >> #* PHC0  0   3   377 9   +562ns[ +703ns] 
> > > > >> +/-  107ns
> > > > >> #* PHC0  0   3   377 6 -2ns[   -3ns] 
> > > > >> +/-   94ns
> > > > >> #* PHC0  0   3   377 4   +451ns[ +494ns] 
> > > > >> +/-  138ns
> > > > >> #* PHC0  0   3   37711-67ns[  -74ns] 
> > > > >> +/-  113ns
> > > > >> #* PHC0  0   3   377 8   +244ns[ +264ns] 
> > > > >> +/-  119ns
> > > > >> #* PHC0  0   3   377 7   -696ns[ -890ns] 
> > > > >> +/-   89ns
> > > > >> #* PHC0  0   3   377 4   +468ns[ +560ns] 
> > > > >> +/-  110ns
> > > > >> #* PHC0  0   3   37711   -310ns[ -430ns] 
> > > > >> +/-   72ns
> > > > >> #* PHC0  0   3   377 9   +189ns[ +298ns] 
> > > > >> +/-   54ns
> > > > >> #* PHC0  0   3   377 7   +594ns[ +473ns] 
> > > > >> +/-   96ns
> > > > >> #* PHC0  0   3   377 5   +151ns[ +280ns] 
> > > > >> +/-   71ns
> > > > >> #* PHC0  0   3   37710   -590ns[ -696ns] 
> > > > >> +/-   94ns
> > > > >> #* PHC0  0   3   377 8   +415ns[ +526ns] 
> > > > >> +/-   74ns
> > > > >> #* PHC0  0   3   377 6  +1381ns[+1469ns] 
> > > > >> +/-  101ns
> > > > >> #* PHC0  0   3   377 4   +571ns[+1304ns] 
> > > > >> +/-   54ns
> > > > >> #* PHC0  0   3   377 8 -5ns[  +71ns] 
> > > > >> +/-  139ns
> > > > >> #* PHC0  0   3   377 7   -247ns[ -502ns] 
> > > > >> +/-   69ns
> > > > >> #* PHC0  0   3   377 5   -283ns[ +879ns] 
> > > > >> +/-   73ns
> > > > >> #* PHC0  0   3   377 3   +148ns[ -109ns] 
> > > > >> +/-   61ns
> > > > >>
> > > > >> With TSC delta calculation:
> > > > >> 
> > > > >>
> > > > >> #* PHC0  0   3   377 7   +379ns[ +432ns] 
> > > > >> +/-   53ns
> > > > >> #* PHC0  0   3   377 9   +106ns[ +420ns] 
> > > > >> +/-   42ns
> > > > >> #* PHC0  0   3   377 7-58ns[ -136ns] 
> > 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-20 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 04:20:33PM +0100, Radim Krcmar wrote:
> 2017-01-18 12:53-0200, Marcelo Tosatti:
> > GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
> > > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > > > 
> > > > 
> > > > On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > > > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> > > > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > > > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
> > > >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > > > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > > > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and 
> > > > > so on.
> > > > >
> > > > > ts[1] ts[3]
> > > > > Host time-+-+
> > > > >   | |
> > > > >   | |
> > > > > Guest time   +-+-+..
> > > > > ts[0]ts[2] ts[4]
> > > > >>>
> > > > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and 
> > > > >>> makes
> > > > >>> the offset very consistent, so the graph would look like:
> > > > >>>
> > > > >>> ts[1] ts[3]
> > > > >>> Host time-+-+
> > > > >>>   | |
> > > > >>>   | |
> > > > >>> Guest time   +-+-+..
> > > > >>> ts[0]ts[2] ts[4]
> > > > >>>
> > > > >>> which doesn't sound good if users assume that the host reading is 
> > > > >>> in the
> > > > >>> middle -- the guest time would be ahead of the host time.
> > > > >>
> > > > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> > > > >> intense interrupts). Follows results:
> > > > >>
> > > > >> Without TSC delta calculation:
> > > > >> =
> > > > >>
> > > > >> #* PHC0  0   3   377 2-99ns[ +206ns] 
> > > > >> +/-  116ns
> > > > >> #* PHC0  0   3   377 8   +202ns[ +249ns] 
> > > > >> +/-  111ns
> > > > >> #* PHC0  0   3   377 8   -213ns[ +683ns] 
> > > > >> +/-   88ns
> > > > >> #* PHC0  0   3   377 6+77ns[ +319ns] 
> > > > >> +/-   56ns
> > > > >> #* PHC0  0   3   377 4   -771ns[-1029ns] 
> > > > >> +/-   93ns
> > > > >> #* PHC0  0   3   37710-49ns[  -58ns] 
> > > > >> +/-  121ns
> > > > >> #* PHC0  0   3   377 9   +562ns[ +703ns] 
> > > > >> +/-  107ns
> > > > >> #* PHC0  0   3   377 6 -2ns[   -3ns] 
> > > > >> +/-   94ns
> > > > >> #* PHC0  0   3   377 4   +451ns[ +494ns] 
> > > > >> +/-  138ns
> > > > >> #* PHC0  0   3   37711-67ns[  -74ns] 
> > > > >> +/-  113ns
> > > > >> #* PHC0  0   3   377 8   +244ns[ +264ns] 
> > > > >> +/-  119ns
> > > > >> #* PHC0  0   3   377 7   -696ns[ -890ns] 
> > > > >> +/-   89ns
> > > > >> #* PHC0  0   3   377 4   +468ns[ +560ns] 
> > > > >> +/-  110ns
> > > > >> #* PHC0  0   3   37711   -310ns[ -430ns] 
> > > > >> +/-   72ns
> > > > >> #* PHC0  0   3   377 9   +189ns[ +298ns] 
> > > > >> +/-   54ns
> > > > >> #* PHC0  0   3   377 7   +594ns[ +473ns] 
> > > > >> +/-   96ns
> > > > >> #* PHC0  0   3   377 5   +151ns[ +280ns] 
> > > > >> +/-   71ns
> > > > >> #* PHC0  0   3   37710   -590ns[ -696ns] 
> > > > >> +/-   94ns
> > > > >> #* PHC0  0   3   377 8   +415ns[ +526ns] 
> > > > >> +/-   74ns
> > > > >> #* PHC0  0   3   377 6  +1381ns[+1469ns] 
> > > > >> +/-  101ns
> > > > >> #* PHC0  0   3   377 4   +571ns[+1304ns] 
> > > > >> +/-   54ns
> > > > >> #* PHC0  0   3   377 8 -5ns[  +71ns] 
> > > > >> +/-  139ns
> > > > >> #* PHC0  0   3   377 7   -247ns[ -502ns] 
> > > > >> +/-   69ns
> > > > >> #* PHC0  0   3   377 5   -283ns[ +879ns] 
> > > > >> +/-   73ns
> > > > >> #* PHC0  0   3   377 3   +148ns[ -109ns] 
> > > > >> +/-   61ns
> > > > >>
> > > > >> With TSC delta calculation:
> > > > >> 
> > > > >>
> > > > >> #* PHC0  0   3   377 7   +379ns[ +432ns] 
> > > > >> +/-   53ns
> > > > >> #* PHC0  0   3   377 9   +106ns[ +420ns] 
> > > > >> +/-   42ns
> > > > >> #* PHC0  0   3   377 7-58ns[ -136ns] 
> > 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 16:54+0100, Miroslav Lichvar:
> On Wed, Jan 18, 2017 at 12:24:09PM -0200, Marcelo Tosatti wrote:
>> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> > I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
>> > does not support it---but I would still prefer you to support
>> > PTP_SYS_OFFSET_PRECISE as well.
>> 
>> Sure, I'll check if it makes sense to implement PTP_SYS_OFFSET_PRECISE for 
>> KVM case.
> 
> But is it really so precise that the application can safely assume
> there are no errors due to asymmetric delays, etc? I think
> PTP_SYS_OFFSET_PRECISE should be supported only if the accuracy of
> the offset measured between the HW and system clock is not worse than
> the precision of the system clock (typically few tens of nanoseconds).

KVM is actually the perfect user of PTP_SYS_OFFSET_PRECISE: the host and
guest system clocks use the same hardware clock.

(We want to copy the host data about the clock to the guest, and we just
 happen to use PTP for that.)

> It would be good to verify the accuracy of the offset when the host
> and guest clocks are synchronised to each other over PTP using two
> NICs with HW timestamping.

We can verify the accuracy just by reading the host time and tsc
computing guest time from that tsc -- they should be equal.

Well, we don't give host frequency to the guest, but I hope that the
guest can compute it accurately after polling the host few times.
(Still, a room for improvement.)

NICs with HW timestamping would be a bit more complicated to set up. :)


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 16:54+0100, Miroslav Lichvar:
> On Wed, Jan 18, 2017 at 12:24:09PM -0200, Marcelo Tosatti wrote:
>> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> > I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
>> > does not support it---but I would still prefer you to support
>> > PTP_SYS_OFFSET_PRECISE as well.
>> 
>> Sure, I'll check if it makes sense to implement PTP_SYS_OFFSET_PRECISE for 
>> KVM case.
> 
> But is it really so precise that the application can safely assume
> there are no errors due to asymmetric delays, etc? I think
> PTP_SYS_OFFSET_PRECISE should be supported only if the accuracy of
> the offset measured between the HW and system clock is not worse than
> the precision of the system clock (typically few tens of nanoseconds).

KVM is actually the perfect user of PTP_SYS_OFFSET_PRECISE: the host and
guest system clocks use the same hardware clock.

(We want to copy the host data about the clock to the guest, and we just
 happen to use PTP for that.)

> It would be good to verify the accuracy of the offset when the host
> and guest clocks are synchronised to each other over PTP using two
> NICs with HW timestamping.

We can verify the accuracy just by reading the host time and tsc
computing guest time from that tsc -- they should be equal.

Well, we don't give host frequency to the guest, but I hope that the
guest can compute it accurately after polling the host few times.
(Still, a room for improvement.)

NICs with HW timestamping would be a bit more complicated to set up. :)


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 16:54, Miroslav Lichvar wrote:
>> Sure, I'll check if it makes sense to implement PTP_SYS_OFFSET_PRECISE for 
>> KVM case.
> But is it really so precise that the application can safely assume
> there are no errors due to asymmetric delays, etc? I think
> PTP_SYS_OFFSET_PRECISE should be supported only if the accuracy of
> the offset measured between the HW and system clock is not worse than
> the precision of the system clock (typically few tens of nanoseconds).
> 
> It would be good to verify the accuracy of the offset when the host
> and guest clocks are synchronised to each other over PTP using two
> NICs with HW timestamping.

PTP_SYS_OFFSET_PRECISE works if the guest can compute its own timestamp
based on the same source as the device.  On bare metal you have:

- the source for system clock is the TSC (with clocksource_tsc)
- device provides a (time, ART) tuple

You can convert ART->TSC and then ask clocksource_tsc for a system time
based on the device-provided ART value.  Likewise for KVM:

- the source for system clock is the guest TSC (with kvmclock)
- host can provide a (time, guest TSC) tuple

The PTP driver can take the host-provided guest TSC, and ask kvmclock
for a system time based on that TSC value.  It's even simpler because
there's no ART->TSC conversion involved, and it will always be precise
and independent of any vmexit or interrupt delay.

Paolo


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 16:54, Miroslav Lichvar wrote:
>> Sure, I'll check if it makes sense to implement PTP_SYS_OFFSET_PRECISE for 
>> KVM case.
> But is it really so precise that the application can safely assume
> there are no errors due to asymmetric delays, etc? I think
> PTP_SYS_OFFSET_PRECISE should be supported only if the accuracy of
> the offset measured between the HW and system clock is not worse than
> the precision of the system clock (typically few tens of nanoseconds).
> 
> It would be good to verify the accuracy of the offset when the host
> and guest clocks are synchronised to each other over PTP using two
> NICs with HW timestamping.

PTP_SYS_OFFSET_PRECISE works if the guest can compute its own timestamp
based on the same source as the device.  On bare metal you have:

- the source for system clock is the TSC (with clocksource_tsc)
- device provides a (time, ART) tuple

You can convert ART->TSC and then ask clocksource_tsc for a system time
based on the device-provided ART value.  Likewise for KVM:

- the source for system clock is the guest TSC (with kvmclock)
- host can provide a (time, guest TSC) tuple

The PTP driver can take the host-provided guest TSC, and ask kvmclock
for a system time based on that TSC value.  It's even simpler because
there's no ART->TSC conversion involved, and it will always be precise
and independent of any vmexit or interrupt delay.

Paolo


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 13:46+0100, Paolo Bonzini:
> 
> 
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> >>>
> >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> >>> the offset very consistent, so the graph would look like:
> >>>
> >>> ts[1] ts[3]
> >>> Host time-+-+
> >>>   | |
> >>>   | |
> >>> Guest time   +-+-+..
> >>> ts[0]ts[2] ts[4]
> >>>
> >>> which doesn't sound good if users assume that the host reading is in the
> >>> middle -- the guest time would be ahead of the host time.
> >>
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:
> >>
> >> Without TSC delta calculation:
> >> =
> >>
> >> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> >> 116ns
> >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> >> 111ns
> >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> >> 88ns
> >> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> >> 56ns
> >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> >> 93ns
> >> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> >> 121ns
> >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> >> 107ns
> >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> >> 138ns
> >> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> >> 113ns
> >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> >> 119ns
> >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> >> 89ns
> >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> >> 110ns
> >> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> >> 72ns
> >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> >> 96ns
> >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> >> 71ns
> >> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> >> 74ns
> >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> >> 101ns
> >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
> >> 139ns
> >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
> >> 73ns
> >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
> >> 61ns
> >>
> >> With TSC delta calculation:
> >> 
> >>
> >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
> >> 53ns
> >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
> >> 62ns
> >> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
> >> 64ns
> >> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
> >> 52ns
> >> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
> >> 50ns
> >> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
> >> 70ns
> >> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
> >> 33ns
> >> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37711   -172ns[ -173ns] 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 13:46+0100, Paolo Bonzini:
> 
> 
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> >>>
> >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> >>> the offset very consistent, so the graph would look like:
> >>>
> >>> ts[1] ts[3]
> >>> Host time-+-+
> >>>   | |
> >>>   | |
> >>> Guest time   +-+-+..
> >>> ts[0]ts[2] ts[4]
> >>>
> >>> which doesn't sound good if users assume that the host reading is in the
> >>> middle -- the guest time would be ahead of the host time.
> >>
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:
> >>
> >> Without TSC delta calculation:
> >> =
> >>
> >> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> >> 116ns
> >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> >> 111ns
> >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> >> 88ns
> >> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> >> 56ns
> >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> >> 93ns
> >> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> >> 121ns
> >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> >> 107ns
> >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> >> 138ns
> >> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> >> 113ns
> >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> >> 119ns
> >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> >> 89ns
> >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> >> 110ns
> >> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> >> 72ns
> >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> >> 96ns
> >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> >> 71ns
> >> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> >> 74ns
> >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> >> 101ns
> >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
> >> 139ns
> >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
> >> 73ns
> >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
> >> 61ns
> >>
> >> With TSC delta calculation:
> >> 
> >>
> >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
> >> 53ns
> >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
> >> 62ns
> >> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
> >> 64ns
> >> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
> >> 52ns
> >> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
> >> 50ns
> >> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
> >> 70ns
> >> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
> >> 33ns
> >> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37711   -172ns[ -173ns] 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Miroslav Lichvar
On Wed, Jan 18, 2017 at 12:24:09PM -0200, Marcelo Tosatti wrote:
> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
> > does not support it---but I would still prefer you to support
> > PTP_SYS_OFFSET_PRECISE as well.
> 
> Sure, I'll check if it makes sense to implement PTP_SYS_OFFSET_PRECISE for 
> KVM case.

But is it really so precise that the application can safely assume
there are no errors due to asymmetric delays, etc? I think
PTP_SYS_OFFSET_PRECISE should be supported only if the accuracy of
the offset measured between the HW and system clock is not worse than
the precision of the system clock (typically few tens of nanoseconds).

It would be good to verify the accuracy of the offset when the host
and guest clocks are synchronised to each other over PTP using two
NICs with HW timestamping.

-- 
Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Miroslav Lichvar
On Wed, Jan 18, 2017 at 12:24:09PM -0200, Marcelo Tosatti wrote:
> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
> > does not support it---but I would still prefer you to support
> > PTP_SYS_OFFSET_PRECISE as well.
> 
> Sure, I'll check if it makes sense to implement PTP_SYS_OFFSET_PRECISE for 
> KVM case.

But is it really so precise that the application can safely assume
there are no errors due to asymmetric delays, etc? I think
PTP_SYS_OFFSET_PRECISE should be supported only if the accuracy of
the offset measured between the HW and system clock is not worse than
the precision of the system clock (typically few tens of nanoseconds).

It would be good to verify the accuracy of the offset when the host
and guest clocks are synchronised to each other over PTP using two
NICs with HW timestamping.

-- 
Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 15:50, Marcelo Tosatti wrote:
>> Interesting idea!  For this to work, KVM needs to implement
>> getcrosstimestamp and ptp_chardev.c can then add an alternative
>> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
>>
>> Something like
>>
>> for (i = 0; i <= sysoff->n_samples; i++) {
>>  // ... call getcrosststamp ...
>>  sysns = ktime_to_ns(xtstamp.sys_realtime);
>>  if (i > 0) {
>>  devns = ktime_to_ns(xtstamp.device);
>>  devns -= (sysns - prev_sysns) / 2;
>>  devts = ns_to_timespec(devns);
>>  pct->sec = devts.tv_sec;
>>  pct->nsec = devts.tv_nsec;
>>  pct++;
>>  }
>>  systs = ns_to_timespec(sysns);
>> pct->sec = ts.tv_sec;
>> pct->nsec = ts.tv_nsec;
>> pct++;
>>  prev_sysns = sysns;
>> }
>>
>> Marcelo, can you give it a try?
> 
> Can convert fine, but problem is the simultaneous read
> of host and guest clocks.

Could the TSC from the hypercall be applied to kvmclock to do this?  My
understanding is that get_device_system_crosststamp (which is used in
the sole in-tree implementation of getcrosststamp) already contains all
the logic to do that.

Paolo


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 15:50, Marcelo Tosatti wrote:
>> Interesting idea!  For this to work, KVM needs to implement
>> getcrosstimestamp and ptp_chardev.c can then add an alternative
>> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
>>
>> Something like
>>
>> for (i = 0; i <= sysoff->n_samples; i++) {
>>  // ... call getcrosststamp ...
>>  sysns = ktime_to_ns(xtstamp.sys_realtime);
>>  if (i > 0) {
>>  devns = ktime_to_ns(xtstamp.device);
>>  devns -= (sysns - prev_sysns) / 2;
>>  devts = ns_to_timespec(devns);
>>  pct->sec = devts.tv_sec;
>>  pct->nsec = devts.tv_nsec;
>>  pct++;
>>  }
>>  systs = ns_to_timespec(sysns);
>> pct->sec = ts.tv_sec;
>> pct->nsec = ts.tv_nsec;
>> pct++;
>>  prev_sysns = sysns;
>> }
>>
>> Marcelo, can you give it a try?
> 
> Can convert fine, but problem is the simultaneous read
> of host and guest clocks.

Could the TSC from the hypercall be applied to kvmclock to do this?  My
understanding is that get_device_system_crosststamp (which is used in
the sole in-tree implementation of getcrosststamp) already contains all
the logic to do that.

Paolo


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 12:50-0200, Marcelo Tosatti:
> On Wed, Jan 18, 2017 at 03:02:23PM +0100, Paolo Bonzini wrote:
>> 
>> 
>> On 18/01/2017 14:36, Miroslav Lichvar wrote:
>> > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> >> On 18/01/2017 13:24, Marcelo Tosatti wrote:
>>  Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>>  intense interrupts). Follows results:
>> > 
>>  Do you still want to drop it in favour of simplicity?
>> > 
>> >> It's just that it's not obvious why you get better results with biased
>> >> host timestamps.  What makes the biased host timestamp more precise?
>> >>
>> >> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
>> >> does not support it---but I would still prefer you to support
>> >> PTP_SYS_OFFSET_PRECISE as well.
>> > 
>> > Interesting. I wasn't aware that there is a new ioctl for measuring
>> > the HW-sys offset. Adding support to chrony shouldn't be difficult.
>> > 
>> > If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
>> > PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
>> > fields to corresponding ts slots. The apparent delay will be zero, but
>> > that's ok if the conversion is really accurate.
>> 
>> Yes, for 1 sample only.  Otherwise you'd have the same issue as in
>> Marcelo's driver (the device aka guest timestamp from
>> PTP_SYS_OFFSET_PRECISE would not be halfway between the system aka host
>> timestamps), and your idea below could be applied.
>> 
>> > I'm not sure if trying to do that in the opposite direction is a good
>> > idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
>> > conversion is accurate and not include any delay/dispersion in an
>> > estimate of the maximum error, which is needed in NTP for instance.
>> > 
>> > If we know the host timestamp ts[1] is not in the middle between the
>> > guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
>> > simply shift ts[1] by (ts[2]-ts[0])/2 ?
> 
> 
> 
>> 
>> Interesting idea!  For this to work, KVM needs to implement
>> getcrosstimestamp and ptp_chardev.c can then add an alternative
>> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
>> 
>> Something like
>> 
>> for (i = 0; i <= sysoff->n_samples; i++) {
>>  // ... call getcrosststamp ...
>>  sysns = ktime_to_ns(xtstamp.sys_realtime);
>>  if (i > 0) {
>>  devns = ktime_to_ns(xtstamp.device);
>>  devns -= (sysns - prev_sysns) / 2;
>>  devts = ns_to_timespec(devns);
>>  pct->sec = devts.tv_sec;
>>  pct->nsec = devts.tv_nsec;
>>  pct++;
>>  }
>>  systs = ns_to_timespec(sysns);
>> pct->sec = ts.tv_sec;
>> pct->nsec = ts.tv_nsec;
>> pct++;
>>  prev_sysns = sysns;
>> }

Nice.  PTP_SYS_OFFSET seems to be mandatory, so this hunk could be
upstreamable as a fallback for PTP devices that have getcrosststamp and
not gettime64.  KVM PTP would be the only driver using it so far.

>> Marcelo, can you give it a try?
> 
> Can convert fine, but problem is the simultaneous read
> of host and guest clocks.
> 
>> Thanks,
>> 
>> Paolo
> 
> It seems to me anything else other than using a single TSC read
> (for both host and guest clocks) is a poor PTP_SYS_OFFSET_PRECISE 
> implementation (because it would claim to be similar to ART, where
> the timestamps are simultaneous), but not be.

Yes, the guest should use the TSC returned by the hypercall to compute
the corresponding guest time, which will allow us to know the
host<->guest offset.

Is this impossible to do with the current API?

Thanks.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 12:50-0200, Marcelo Tosatti:
> On Wed, Jan 18, 2017 at 03:02:23PM +0100, Paolo Bonzini wrote:
>> 
>> 
>> On 18/01/2017 14:36, Miroslav Lichvar wrote:
>> > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> >> On 18/01/2017 13:24, Marcelo Tosatti wrote:
>>  Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>>  intense interrupts). Follows results:
>> > 
>>  Do you still want to drop it in favour of simplicity?
>> > 
>> >> It's just that it's not obvious why you get better results with biased
>> >> host timestamps.  What makes the biased host timestamp more precise?
>> >>
>> >> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
>> >> does not support it---but I would still prefer you to support
>> >> PTP_SYS_OFFSET_PRECISE as well.
>> > 
>> > Interesting. I wasn't aware that there is a new ioctl for measuring
>> > the HW-sys offset. Adding support to chrony shouldn't be difficult.
>> > 
>> > If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
>> > PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
>> > fields to corresponding ts slots. The apparent delay will be zero, but
>> > that's ok if the conversion is really accurate.
>> 
>> Yes, for 1 sample only.  Otherwise you'd have the same issue as in
>> Marcelo's driver (the device aka guest timestamp from
>> PTP_SYS_OFFSET_PRECISE would not be halfway between the system aka host
>> timestamps), and your idea below could be applied.
>> 
>> > I'm not sure if trying to do that in the opposite direction is a good
>> > idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
>> > conversion is accurate and not include any delay/dispersion in an
>> > estimate of the maximum error, which is needed in NTP for instance.
>> > 
>> > If we know the host timestamp ts[1] is not in the middle between the
>> > guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
>> > simply shift ts[1] by (ts[2]-ts[0])/2 ?
> 
> 
> 
>> 
>> Interesting idea!  For this to work, KVM needs to implement
>> getcrosstimestamp and ptp_chardev.c can then add an alternative
>> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
>> 
>> Something like
>> 
>> for (i = 0; i <= sysoff->n_samples; i++) {
>>  // ... call getcrosststamp ...
>>  sysns = ktime_to_ns(xtstamp.sys_realtime);
>>  if (i > 0) {
>>  devns = ktime_to_ns(xtstamp.device);
>>  devns -= (sysns - prev_sysns) / 2;
>>  devts = ns_to_timespec(devns);
>>  pct->sec = devts.tv_sec;
>>  pct->nsec = devts.tv_nsec;
>>  pct++;
>>  }
>>  systs = ns_to_timespec(sysns);
>> pct->sec = ts.tv_sec;
>> pct->nsec = ts.tv_nsec;
>> pct++;
>>  prev_sysns = sysns;
>> }

Nice.  PTP_SYS_OFFSET seems to be mandatory, so this hunk could be
upstreamable as a fallback for PTP devices that have getcrosststamp and
not gettime64.  KVM PTP would be the only driver using it so far.

>> Marcelo, can you give it a try?
> 
> Can convert fine, but problem is the simultaneous read
> of host and guest clocks.
> 
>> Thanks,
>> 
>> Paolo
> 
> It seems to me anything else other than using a single TSC read
> (for both host and guest clocks) is a poor PTP_SYS_OFFSET_PRECISE 
> implementation (because it would claim to be similar to ART, where
> the timestamps are simultaneous), but not be.

Yes, the guest should use the TSC returned by the hypercall to compute
the corresponding guest time, which will allow us to know the
host<->guest offset.

Is this impossible to do with the current API?

Thanks.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 12:53-0200, Marcelo Tosatti:
> GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > > 
> > > 
> > > On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> > > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
> > >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so 
> > > > on.
> > > >
> > > > ts[1] ts[3]
> > > > Host time-+-+
> > > >   | |
> > > >   | |
> > > > Guest time   +-+-+..
> > > > ts[0]ts[2] ts[4]
> > > >>>
> > > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> > > >>> the offset very consistent, so the graph would look like:
> > > >>>
> > > >>> ts[1] ts[3]
> > > >>> Host time-+-+
> > > >>>   | |
> > > >>>   | |
> > > >>> Guest time   +-+-+..
> > > >>> ts[0]ts[2] ts[4]
> > > >>>
> > > >>> which doesn't sound good if users assume that the host reading is in 
> > > >>> the
> > > >>> middle -- the guest time would be ahead of the host time.
> > > >>
> > > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> > > >> intense interrupts). Follows results:
> > > >>
> > > >> Without TSC delta calculation:
> > > >> =
> > > >>
> > > >> #* PHC0  0   3   377 2-99ns[ +206ns] 
> > > >> +/-  116ns
> > > >> #* PHC0  0   3   377 8   +202ns[ +249ns] 
> > > >> +/-  111ns
> > > >> #* PHC0  0   3   377 8   -213ns[ +683ns] 
> > > >> +/-   88ns
> > > >> #* PHC0  0   3   377 6+77ns[ +319ns] 
> > > >> +/-   56ns
> > > >> #* PHC0  0   3   377 4   -771ns[-1029ns] 
> > > >> +/-   93ns
> > > >> #* PHC0  0   3   37710-49ns[  -58ns] 
> > > >> +/-  121ns
> > > >> #* PHC0  0   3   377 9   +562ns[ +703ns] 
> > > >> +/-  107ns
> > > >> #* PHC0  0   3   377 6 -2ns[   -3ns] 
> > > >> +/-   94ns
> > > >> #* PHC0  0   3   377 4   +451ns[ +494ns] 
> > > >> +/-  138ns
> > > >> #* PHC0  0   3   37711-67ns[  -74ns] 
> > > >> +/-  113ns
> > > >> #* PHC0  0   3   377 8   +244ns[ +264ns] 
> > > >> +/-  119ns
> > > >> #* PHC0  0   3   377 7   -696ns[ -890ns] 
> > > >> +/-   89ns
> > > >> #* PHC0  0   3   377 4   +468ns[ +560ns] 
> > > >> +/-  110ns
> > > >> #* PHC0  0   3   37711   -310ns[ -430ns] 
> > > >> +/-   72ns
> > > >> #* PHC0  0   3   377 9   +189ns[ +298ns] 
> > > >> +/-   54ns
> > > >> #* PHC0  0   3   377 7   +594ns[ +473ns] 
> > > >> +/-   96ns
> > > >> #* PHC0  0   3   377 5   +151ns[ +280ns] 
> > > >> +/-   71ns
> > > >> #* PHC0  0   3   37710   -590ns[ -696ns] 
> > > >> +/-   94ns
> > > >> #* PHC0  0   3   377 8   +415ns[ +526ns] 
> > > >> +/-   74ns
> > > >> #* PHC0  0   3   377 6  +1381ns[+1469ns] 
> > > >> +/-  101ns
> > > >> #* PHC0  0   3   377 4   +571ns[+1304ns] 
> > > >> +/-   54ns
> > > >> #* PHC0  0   3   377 8 -5ns[  +71ns] 
> > > >> +/-  139ns
> > > >> #* PHC0  0   3   377 7   -247ns[ -502ns] 
> > > >> +/-   69ns
> > > >> #* PHC0  0   3   377 5   -283ns[ +879ns] 
> > > >> +/-   73ns
> > > >> #* PHC0  0   3   377 3   +148ns[ -109ns] 
> > > >> +/-   61ns
> > > >>
> > > >> With TSC delta calculation:
> > > >> 
> > > >>
> > > >> #* PHC0  0   3   377 7   +379ns[ +432ns] 
> > > >> +/-   53ns
> > > >> #* PHC0  0   3   377 9   +106ns[ +420ns] 
> > > >> +/-   42ns
> > > >> #* PHC0  0   3   377 7-58ns[ -136ns] 
> > > >> +/-   62ns
> > > >> #* PHC0  0   3   37712+93ns[  -38ns] 
> > > >> +/-   64ns
> > > >> #* PHC0  0   3   377 8+84ns[ +107ns] 
> > > >> +/-   69ns
> > > >> #* PHC0  0   3   377 3  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Radim Krcmar
2017-01-18 12:53-0200, Marcelo Tosatti:
> GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > > 
> > > 
> > > On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> > > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
> > >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so 
> > > > on.
> > > >
> > > > ts[1] ts[3]
> > > > Host time-+-+
> > > >   | |
> > > >   | |
> > > > Guest time   +-+-+..
> > > > ts[0]ts[2] ts[4]
> > > >>>
> > > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> > > >>> the offset very consistent, so the graph would look like:
> > > >>>
> > > >>> ts[1] ts[3]
> > > >>> Host time-+-+
> > > >>>   | |
> > > >>>   | |
> > > >>> Guest time   +-+-+..
> > > >>> ts[0]ts[2] ts[4]
> > > >>>
> > > >>> which doesn't sound good if users assume that the host reading is in 
> > > >>> the
> > > >>> middle -- the guest time would be ahead of the host time.
> > > >>
> > > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> > > >> intense interrupts). Follows results:
> > > >>
> > > >> Without TSC delta calculation:
> > > >> =
> > > >>
> > > >> #* PHC0  0   3   377 2-99ns[ +206ns] 
> > > >> +/-  116ns
> > > >> #* PHC0  0   3   377 8   +202ns[ +249ns] 
> > > >> +/-  111ns
> > > >> #* PHC0  0   3   377 8   -213ns[ +683ns] 
> > > >> +/-   88ns
> > > >> #* PHC0  0   3   377 6+77ns[ +319ns] 
> > > >> +/-   56ns
> > > >> #* PHC0  0   3   377 4   -771ns[-1029ns] 
> > > >> +/-   93ns
> > > >> #* PHC0  0   3   37710-49ns[  -58ns] 
> > > >> +/-  121ns
> > > >> #* PHC0  0   3   377 9   +562ns[ +703ns] 
> > > >> +/-  107ns
> > > >> #* PHC0  0   3   377 6 -2ns[   -3ns] 
> > > >> +/-   94ns
> > > >> #* PHC0  0   3   377 4   +451ns[ +494ns] 
> > > >> +/-  138ns
> > > >> #* PHC0  0   3   37711-67ns[  -74ns] 
> > > >> +/-  113ns
> > > >> #* PHC0  0   3   377 8   +244ns[ +264ns] 
> > > >> +/-  119ns
> > > >> #* PHC0  0   3   377 7   -696ns[ -890ns] 
> > > >> +/-   89ns
> > > >> #* PHC0  0   3   377 4   +468ns[ +560ns] 
> > > >> +/-  110ns
> > > >> #* PHC0  0   3   37711   -310ns[ -430ns] 
> > > >> +/-   72ns
> > > >> #* PHC0  0   3   377 9   +189ns[ +298ns] 
> > > >> +/-   54ns
> > > >> #* PHC0  0   3   377 7   +594ns[ +473ns] 
> > > >> +/-   96ns
> > > >> #* PHC0  0   3   377 5   +151ns[ +280ns] 
> > > >> +/-   71ns
> > > >> #* PHC0  0   3   37710   -590ns[ -696ns] 
> > > >> +/-   94ns
> > > >> #* PHC0  0   3   377 8   +415ns[ +526ns] 
> > > >> +/-   74ns
> > > >> #* PHC0  0   3   377 6  +1381ns[+1469ns] 
> > > >> +/-  101ns
> > > >> #* PHC0  0   3   377 4   +571ns[+1304ns] 
> > > >> +/-   54ns
> > > >> #* PHC0  0   3   377 8 -5ns[  +71ns] 
> > > >> +/-  139ns
> > > >> #* PHC0  0   3   377 7   -247ns[ -502ns] 
> > > >> +/-   69ns
> > > >> #* PHC0  0   3   377 5   -283ns[ +879ns] 
> > > >> +/-   73ns
> > > >> #* PHC0  0   3   377 3   +148ns[ -109ns] 
> > > >> +/-   61ns
> > > >>
> > > >> With TSC delta calculation:
> > > >> 
> > > >>
> > > >> #* PHC0  0   3   377 7   +379ns[ +432ns] 
> > > >> +/-   53ns
> > > >> #* PHC0  0   3   377 9   +106ns[ +420ns] 
> > > >> +/-   42ns
> > > >> #* PHC0  0   3   377 7-58ns[ -136ns] 
> > > >> +/-   62ns
> > > >> #* PHC0  0   3   37712+93ns[  -38ns] 
> > > >> +/-   64ns
> > > >> #* PHC0  0   3   377 8+84ns[ +107ns] 
> > > >> +/-   69ns
> > > >> #* PHC0  0   3   377 3  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 03:02:23PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/01/2017 14:36, Miroslav Lichvar wrote:
> > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> >> On 18/01/2017 13:24, Marcelo Tosatti wrote:
>  Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>  intense interrupts). Follows results:
> > 
>  Do you still want to drop it in favour of simplicity?
> > 
> >> It's just that it's not obvious why you get better results with biased
> >> host timestamps.  What makes the biased host timestamp more precise?
> >>
> >> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
> >> does not support it---but I would still prefer you to support
> >> PTP_SYS_OFFSET_PRECISE as well.
> > 
> > Interesting. I wasn't aware that there is a new ioctl for measuring
> > the HW-sys offset. Adding support to chrony shouldn't be difficult.
> > 
> > If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
> > PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
> > fields to corresponding ts slots. The apparent delay will be zero, but
> > that's ok if the conversion is really accurate.
> 
> Yes, for 1 sample only.  Otherwise you'd have the same issue as in
> Marcelo's driver (the device aka guest timestamp from
> PTP_SYS_OFFSET_PRECISE would not be halfway between the system aka host
> timestamps), and your idea below could be applied.
> 
> > I'm not sure if trying to do that in the opposite direction is a good
> > idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
> > conversion is accurate and not include any delay/dispersion in an
> > estimate of the maximum error, which is needed in NTP for instance.
> > 
> > If we know the host timestamp ts[1] is not in the middle between the
> > guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
> > simply shift ts[1] by (ts[2]-ts[0])/2 ?



> 
> Interesting idea!  For this to work, KVM needs to implement
> getcrosstimestamp and ptp_chardev.c can then add an alternative
> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
> 
> Something like
> 
> for (i = 0; i <= sysoff->n_samples; i++) {
>   // ... call getcrosststamp ...
>   sysns = ktime_to_ns(xtstamp.sys_realtime);
>   if (i > 0) {
>   devns = ktime_to_ns(xtstamp.device);
>   devns -= (sysns - prev_sysns) / 2;
>   devts = ns_to_timespec(devns);
>   pct->sec = devts.tv_sec;
>   pct->nsec = devts.tv_nsec;
>   pct++;
>   }
>   systs = ns_to_timespec(sysns);
> pct->sec = ts.tv_sec;
> pct->nsec = ts.tv_nsec;
> pct++;
>   prev_sysns = sysns;
> }
> 
> Marcelo, can you give it a try?

Can convert fine, but problem is the simultaneous read
of host and guest clocks.

> Thanks,
> 
> Paolo

It seems to me anything else other than using a single TSC read
(for both host and guest clocks) is a poor PTP_SYS_OFFSET_PRECISE 
implementation (because it would claim to be similar to ART, where
the timestamps are simultaneous), but not be.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 03:02:23PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/01/2017 14:36, Miroslav Lichvar wrote:
> > On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> >> On 18/01/2017 13:24, Marcelo Tosatti wrote:
>  Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>  intense interrupts). Follows results:
> > 
>  Do you still want to drop it in favour of simplicity?
> > 
> >> It's just that it's not obvious why you get better results with biased
> >> host timestamps.  What makes the biased host timestamp more precise?
> >>
> >> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
> >> does not support it---but I would still prefer you to support
> >> PTP_SYS_OFFSET_PRECISE as well.
> > 
> > Interesting. I wasn't aware that there is a new ioctl for measuring
> > the HW-sys offset. Adding support to chrony shouldn't be difficult.
> > 
> > If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
> > PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
> > fields to corresponding ts slots. The apparent delay will be zero, but
> > that's ok if the conversion is really accurate.
> 
> Yes, for 1 sample only.  Otherwise you'd have the same issue as in
> Marcelo's driver (the device aka guest timestamp from
> PTP_SYS_OFFSET_PRECISE would not be halfway between the system aka host
> timestamps), and your idea below could be applied.
> 
> > I'm not sure if trying to do that in the opposite direction is a good
> > idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
> > conversion is accurate and not include any delay/dispersion in an
> > estimate of the maximum error, which is needed in NTP for instance.
> > 
> > If we know the host timestamp ts[1] is not in the middle between the
> > guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
> > simply shift ts[1] by (ts[2]-ts[0])/2 ?



> 
> Interesting idea!  For this to work, KVM needs to implement
> getcrosstimestamp and ptp_chardev.c can then add an alternative
> implementation of PTP_SYS_OFFSET, based on precise cross timestamps.
> 
> Something like
> 
> for (i = 0; i <= sysoff->n_samples; i++) {
>   // ... call getcrosststamp ...
>   sysns = ktime_to_ns(xtstamp.sys_realtime);
>   if (i > 0) {
>   devns = ktime_to_ns(xtstamp.device);
>   devns -= (sysns - prev_sysns) / 2;
>   devts = ns_to_timespec(devns);
>   pct->sec = devts.tv_sec;
>   pct->nsec = devts.tv_nsec;
>   pct++;
>   }
>   systs = ns_to_timespec(sysns);
> pct->sec = ts.tv_sec;
> pct->nsec = ts.tv_nsec;
> pct++;
>   prev_sysns = sysns;
> }
> 
> Marcelo, can you give it a try?

Can convert fine, but problem is the simultaneous read
of host and guest clocks.

> Thanks,
> 
> Paolo

It seems to me anything else other than using a single TSC read
(for both host and guest clocks) is a poor PTP_SYS_OFFSET_PRECISE 
implementation (because it would claim to be similar to ART, where
the timestamps are simultaneous), but not be.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > 
> > 
> > On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
> >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> > >
> > > ts[1] ts[3]
> > > Host time-+-+
> > >   | |
> > >   | |
> > > Guest time   +-+-+..
> > > ts[0]ts[2] ts[4]
> > >>>
> > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> > >>> the offset very consistent, so the graph would look like:
> > >>>
> > >>> ts[1] ts[3]
> > >>> Host time-+-+
> > >>>   | |
> > >>>   | |
> > >>> Guest time   +-+-+..
> > >>> ts[0]ts[2] ts[4]
> > >>>
> > >>> which doesn't sound good if users assume that the host reading is in the
> > >>> middle -- the guest time would be ahead of the host time.
> > >>
> > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> > >> intense interrupts). Follows results:
> > >>
> > >> Without TSC delta calculation:
> > >> =
> > >>
> > >> #* PHC0  0   3   377 2-99ns[ +206ns] +/- 
> > >>  116ns
> > >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/- 
> > >>  111ns
> > >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/- 
> > >>   88ns
> > >> #* PHC0  0   3   377 6+77ns[ +319ns] +/- 
> > >>   56ns
> > >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/- 
> > >>   93ns
> > >> #* PHC0  0   3   37710-49ns[  -58ns] +/- 
> > >>  121ns
> > >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/- 
> > >>  107ns
> > >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/- 
> > >>   94ns
> > >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/- 
> > >>  138ns
> > >> #* PHC0  0   3   37711-67ns[  -74ns] +/- 
> > >>  113ns
> > >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/- 
> > >>  119ns
> > >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/- 
> > >>   89ns
> > >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/- 
> > >>  110ns
> > >> #* PHC0  0   3   37711   -310ns[ -430ns] +/- 
> > >>   72ns
> > >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/- 
> > >>   54ns
> > >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/- 
> > >>   96ns
> > >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/- 
> > >>   71ns
> > >> #* PHC0  0   3   37710   -590ns[ -696ns] +/- 
> > >>   94ns
> > >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/- 
> > >>   74ns
> > >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/- 
> > >>  101ns
> > >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/- 
> > >>   54ns
> > >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/- 
> > >>  139ns
> > >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/- 
> > >>   69ns
> > >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/- 
> > >>   73ns
> > >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/- 
> > >>   61ns
> > >>
> > >> With TSC delta calculation:
> > >> 
> > >>
> > >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/- 
> > >>   53ns
> > >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/- 
> > >>   42ns
> > >> #* PHC0  0   3   377 7-58ns[ -136ns] +/- 
> > >>   62ns
> > >> #* PHC0  0   3   37712+93ns[  -38ns] +/- 
> > >>   64ns
> > >> #* PHC0  0   3   377 8+84ns[ +107ns] +/- 
> > >>   69ns
> > >> #* PHC0  0   3   377 3-76ns[ -103ns] +/- 
> > >>   52ns
> > >> #* PHC0  0   3   377 7+52ns[  +63ns] +/- 
> > >>   50ns
> > >> #* PHC0  0   3   37711+29ns[  +31ns] +/- 
> > >>   70ns
> > >> #* PHC0  0   

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
GOn Wed, Jan 18, 2017 at 12:37:25PM -0200, Marcelo Tosatti wrote:
> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> > 
> > 
> > On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> > >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
> >  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> > >
> > > ts[1] ts[3]
> > > Host time-+-+
> > >   | |
> > >   | |
> > > Guest time   +-+-+..
> > > ts[0]ts[2] ts[4]
> > >>>
> > >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> > >>> the offset very consistent, so the graph would look like:
> > >>>
> > >>> ts[1] ts[3]
> > >>> Host time-+-+
> > >>>   | |
> > >>>   | |
> > >>> Guest time   +-+-+..
> > >>> ts[0]ts[2] ts[4]
> > >>>
> > >>> which doesn't sound good if users assume that the host reading is in the
> > >>> middle -- the guest time would be ahead of the host time.
> > >>
> > >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> > >> intense interrupts). Follows results:
> > >>
> > >> Without TSC delta calculation:
> > >> =
> > >>
> > >> #* PHC0  0   3   377 2-99ns[ +206ns] +/- 
> > >>  116ns
> > >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/- 
> > >>  111ns
> > >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/- 
> > >>   88ns
> > >> #* PHC0  0   3   377 6+77ns[ +319ns] +/- 
> > >>   56ns
> > >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/- 
> > >>   93ns
> > >> #* PHC0  0   3   37710-49ns[  -58ns] +/- 
> > >>  121ns
> > >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/- 
> > >>  107ns
> > >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/- 
> > >>   94ns
> > >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/- 
> > >>  138ns
> > >> #* PHC0  0   3   37711-67ns[  -74ns] +/- 
> > >>  113ns
> > >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/- 
> > >>  119ns
> > >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/- 
> > >>   89ns
> > >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/- 
> > >>  110ns
> > >> #* PHC0  0   3   37711   -310ns[ -430ns] +/- 
> > >>   72ns
> > >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/- 
> > >>   54ns
> > >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/- 
> > >>   96ns
> > >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/- 
> > >>   71ns
> > >> #* PHC0  0   3   37710   -590ns[ -696ns] +/- 
> > >>   94ns
> > >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/- 
> > >>   74ns
> > >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/- 
> > >>  101ns
> > >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/- 
> > >>   54ns
> > >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/- 
> > >>  139ns
> > >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/- 
> > >>   69ns
> > >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/- 
> > >>   73ns
> > >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/- 
> > >>   61ns
> > >>
> > >> With TSC delta calculation:
> > >> 
> > >>
> > >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/- 
> > >>   53ns
> > >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/- 
> > >>   42ns
> > >> #* PHC0  0   3   377 7-58ns[ -136ns] +/- 
> > >>   62ns
> > >> #* PHC0  0   3   37712+93ns[  -38ns] +/- 
> > >>   64ns
> > >> #* PHC0  0   3   377 8+84ns[ +107ns] +/- 
> > >>   69ns
> > >> #* PHC0  0   3   377 3-76ns[ -103ns] +/- 
> > >>   52ns
> > >> #* PHC0  0   3   377 7+52ns[  +63ns] +/- 
> > >>   50ns
> > >> #* PHC0  0   3   37711+29ns[  +31ns] +/- 
> > >>   70ns
> > >> #* PHC0  0   

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> >>>
> >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> >>> the offset very consistent, so the graph would look like:
> >>>
> >>> ts[1] ts[3]
> >>> Host time-+-+
> >>>   | |
> >>>   | |
> >>> Guest time   +-+-+..
> >>> ts[0]ts[2] ts[4]
> >>>
> >>> which doesn't sound good if users assume that the host reading is in the
> >>> middle -- the guest time would be ahead of the host time.
> >>
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:
> >>
> >> Without TSC delta calculation:
> >> =
> >>
> >> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> >> 116ns
> >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> >> 111ns
> >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> >> 88ns
> >> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> >> 56ns
> >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> >> 93ns
> >> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> >> 121ns
> >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> >> 107ns
> >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> >> 138ns
> >> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> >> 113ns
> >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> >> 119ns
> >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> >> 89ns
> >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> >> 110ns
> >> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> >> 72ns
> >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> >> 96ns
> >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> >> 71ns
> >> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> >> 74ns
> >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> >> 101ns
> >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
> >> 139ns
> >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
> >> 73ns
> >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
> >> 61ns
> >>
> >> With TSC delta calculation:
> >> 
> >>
> >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
> >> 53ns
> >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
> >> 62ns
> >> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
> >> 64ns
> >> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
> >> 52ns
> >> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
> >> 50ns
> >> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
> >> 70ns
> >> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
> >> 33ns
> >> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37711  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> >>>
> >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> >>> the offset very consistent, so the graph would look like:
> >>>
> >>> ts[1] ts[3]
> >>> Host time-+-+
> >>>   | |
> >>>   | |
> >>> Guest time   +-+-+..
> >>> ts[0]ts[2] ts[4]
> >>>
> >>> which doesn't sound good if users assume that the host reading is in the
> >>> middle -- the guest time would be ahead of the host time.
> >>
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:
> >>
> >> Without TSC delta calculation:
> >> =
> >>
> >> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> >> 116ns
> >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> >> 111ns
> >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> >> 88ns
> >> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> >> 56ns
> >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> >> 93ns
> >> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> >> 121ns
> >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> >> 107ns
> >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> >> 138ns
> >> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> >> 113ns
> >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> >> 119ns
> >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> >> 89ns
> >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> >> 110ns
> >> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> >> 72ns
> >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> >> 96ns
> >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> >> 71ns
> >> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> >> 74ns
> >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> >> 101ns
> >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
> >> 139ns
> >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
> >> 73ns
> >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
> >> 61ns
> >>
> >> With TSC delta calculation:
> >> 
> >>
> >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
> >> 53ns
> >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
> >> 62ns
> >> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
> >> 64ns
> >> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
> >> 52ns
> >> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
> >> 50ns
> >> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
> >> 70ns
> >> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
> >> 33ns
> >> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37711  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> >>>
> >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> >>> the offset very consistent, so the graph would look like:
> >>>
> >>> ts[1] ts[3]
> >>> Host time-+-+
> >>>   | |
> >>>   | |
> >>> Guest time   +-+-+..
> >>> ts[0]ts[2] ts[4]
> >>>
> >>> which doesn't sound good if users assume that the host reading is in the
> >>> middle -- the guest time would be ahead of the host time.
> >>
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:
> >>
> >> Without TSC delta calculation:
> >> =
> >>
> >> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> >> 116ns
> >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> >> 111ns
> >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> >> 88ns
> >> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> >> 56ns
> >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> >> 93ns
> >> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> >> 121ns
> >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> >> 107ns
> >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> >> 138ns
> >> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> >> 113ns
> >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> >> 119ns
> >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> >> 89ns
> >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> >> 110ns
> >> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> >> 72ns
> >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> >> 96ns
> >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> >> 71ns
> >> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> >> 74ns
> >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> >> 101ns
> >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
> >> 139ns
> >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
> >> 73ns
> >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
> >> 61ns
> >>
> >> With TSC delta calculation:
> >> 
> >>
> >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
> >> 53ns
> >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
> >> 62ns
> >> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
> >> 64ns
> >> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
> >> 52ns
> >> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
> >> 50ns
> >> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
> >> 70ns
> >> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
> >> 33ns
> >> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> > On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> >> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> >>> 2017-01-17 09:30-0200, Marcelo Tosatti:
>  On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> >>>
> >>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> >>> the offset very consistent, so the graph would look like:
> >>>
> >>> ts[1] ts[3]
> >>> Host time-+-+
> >>>   | |
> >>>   | |
> >>> Guest time   +-+-+..
> >>> ts[0]ts[2] ts[4]
> >>>
> >>> which doesn't sound good if users assume that the host reading is in the
> >>> middle -- the guest time would be ahead of the host time.
> >>
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:
> >>
> >> Without TSC delta calculation:
> >> =
> >>
> >> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> >> 116ns
> >> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> >> 111ns
> >> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> >> 88ns
> >> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> >> 56ns
> >> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> >> 93ns
> >> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> >> 121ns
> >> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> >> 107ns
> >> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> >> 138ns
> >> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> >> 113ns
> >> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> >> 119ns
> >> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> >> 89ns
> >> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> >> 110ns
> >> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> >> 72ns
> >> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> >> 96ns
> >> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> >> 71ns
> >> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> >> 94ns
> >> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> >> 74ns
> >> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> >> 101ns
> >> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
> >> 54ns
> >> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
> >> 139ns
> >> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
> >> 73ns
> >> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
> >> 61ns
> >>
> >> With TSC delta calculation:
> >> 
> >>
> >> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
> >> 53ns
> >> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
> >> 62ns
> >> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
> >> 64ns
> >> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
> >> 69ns
> >> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
> >> 52ns
> >> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
> >> 50ns
> >> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
> >> 70ns
> >> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
> >> 33ns
> >> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
> >> 42ns
> >> #* PHC0  0   3   377  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 14:36, Miroslav Lichvar wrote:
> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> On 18/01/2017 13:24, Marcelo Tosatti wrote:
 Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
 intense interrupts). Follows results:
> 
 Do you still want to drop it in favour of simplicity?
> 
>> It's just that it's not obvious why you get better results with biased
>> host timestamps.  What makes the biased host timestamp more precise?
>>
>> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
>> does not support it---but I would still prefer you to support
>> PTP_SYS_OFFSET_PRECISE as well.
> 
> Interesting. I wasn't aware that there is a new ioctl for measuring
> the HW-sys offset. Adding support to chrony shouldn't be difficult.
> 
> If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
> PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
> fields to corresponding ts slots. The apparent delay will be zero, but
> that's ok if the conversion is really accurate.

Yes, for 1 sample only.  Otherwise you'd have the same issue as in
Marcelo's driver (the device aka guest timestamp from
PTP_SYS_OFFSET_PRECISE would not be halfway between the system aka host
timestamps), and your idea below could be applied.

> I'm not sure if trying to do that in the opposite direction is a good
> idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
> conversion is accurate and not include any delay/dispersion in an
> estimate of the maximum error, which is needed in NTP for instance.
> 
> If we know the host timestamp ts[1] is not in the middle between the
> guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
> simply shift ts[1] by (ts[2]-ts[0])/2 ?

Interesting idea!  For this to work, KVM needs to implement
getcrosstimestamp and ptp_chardev.c can then add an alternative
implementation of PTP_SYS_OFFSET, based on precise cross timestamps.

Something like

for (i = 0; i <= sysoff->n_samples; i++) {
// ... call getcrosststamp ...
sysns = ktime_to_ns(xtstamp.sys_realtime);
if (i > 0) {
devns = ktime_to_ns(xtstamp.device);
devns -= (sysns - prev_sysns) / 2;
devts = ns_to_timespec(devns);
pct->sec = devts.tv_sec;
pct->nsec = devts.tv_nsec;
pct++;
}
systs = ns_to_timespec(sysns);
pct->sec = ts.tv_sec;
pct->nsec = ts.tv_nsec;
pct++;
prev_sysns = sysns;
}

Marcelo, can you give it a try?

Thanks,

Paolo


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 14:36, Miroslav Lichvar wrote:
> On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
>> On 18/01/2017 13:24, Marcelo Tosatti wrote:
 Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
 intense interrupts). Follows results:
> 
 Do you still want to drop it in favour of simplicity?
> 
>> It's just that it's not obvious why you get better results with biased
>> host timestamps.  What makes the biased host timestamp more precise?
>>
>> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
>> does not support it---but I would still prefer you to support
>> PTP_SYS_OFFSET_PRECISE as well.
> 
> Interesting. I wasn't aware that there is a new ioctl for measuring
> the HW-sys offset. Adding support to chrony shouldn't be difficult.
> 
> If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
> PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
> fields to corresponding ts slots. The apparent delay will be zero, but
> that's ok if the conversion is really accurate.

Yes, for 1 sample only.  Otherwise you'd have the same issue as in
Marcelo's driver (the device aka guest timestamp from
PTP_SYS_OFFSET_PRECISE would not be halfway between the system aka host
timestamps), and your idea below could be applied.

> I'm not sure if trying to do that in the opposite direction is a good
> idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
> conversion is accurate and not include any delay/dispersion in an
> estimate of the maximum error, which is needed in NTP for instance.
> 
> If we know the host timestamp ts[1] is not in the middle between the
> guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
> simply shift ts[1] by (ts[2]-ts[0])/2 ?

Interesting idea!  For this to work, KVM needs to implement
getcrosstimestamp and ptp_chardev.c can then add an alternative
implementation of PTP_SYS_OFFSET, based on precise cross timestamps.

Something like

for (i = 0; i <= sysoff->n_samples; i++) {
// ... call getcrosststamp ...
sysns = ktime_to_ns(xtstamp.sys_realtime);
if (i > 0) {
devns = ktime_to_ns(xtstamp.device);
devns -= (sysns - prev_sysns) / 2;
devts = ns_to_timespec(devns);
pct->sec = devts.tv_sec;
pct->nsec = devts.tv_nsec;
pct++;
}
systs = ns_to_timespec(sysns);
pct->sec = ts.tv_sec;
pct->nsec = ts.tv_nsec;
pct++;
prev_sysns = sysns;
}

Marcelo, can you give it a try?

Thanks,

Paolo


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Miroslav Lichvar
On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:

> >> Do you still want to drop it in favour of simplicity?

> It's just that it's not obvious why you get better results with biased
> host timestamps.  What makes the biased host timestamp more precise?
> 
> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
> does not support it---but I would still prefer you to support
> PTP_SYS_OFFSET_PRECISE as well.

Interesting. I wasn't aware that there is a new ioctl for measuring
the HW-sys offset. Adding support to chrony shouldn't be difficult.

If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
fields to corresponding ts slots. The apparent delay will be zero, but
that's ok if the conversion is really accurate.

I'm not sure if trying to do that in the opposite direction is a good
idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
conversion is accurate and not include any delay/dispersion in an
estimate of the maximum error, which is needed in NTP for instance.

If we know the host timestamp ts[1] is not in the middle between the
guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
simply shift ts[1] by (ts[2]-ts[0])/2 ?

-- 
Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Miroslav Lichvar
On Wed, Jan 18, 2017 at 01:46:58PM +0100, Paolo Bonzini wrote:
> On 18/01/2017 13:24, Marcelo Tosatti wrote:
> >> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> >> intense interrupts). Follows results:

> >> Do you still want to drop it in favour of simplicity?

> It's just that it's not obvious why you get better results with biased
> host timestamps.  What makes the biased host timestamp more precise?
> 
> I'd rather use PTP_SYS_OFFSET_PRECISE instead, but unfortunately chrony
> does not support it---but I would still prefer you to support
> PTP_SYS_OFFSET_PRECISE as well.

Interesting. I wasn't aware that there is a new ioctl for measuring
the HW-sys offset. Adding support to chrony shouldn't be difficult.

If I understand it correctly, PTP_SYS_OFFSET can be emulated on top of
PTP_SYS_OFFSET_PRECISE simply by copying the sys_realtime and device
fields to corresponding ts slots. The apparent delay will be zero, but
that's ok if the conversion is really accurate.

I'm not sure if trying to do that in the opposite direction is a good
idea. An application using PTP_SYS_OFFSET_PRECISE may assume the
conversion is accurate and not include any delay/dispersion in an
estimate of the maximum error, which is needed in NTP for instance.

If we know the host timestamp ts[1] is not in the middle between the
guests timestamps ts[0] and ts[2], but rather closer to ts[2], why not
simply shift ts[1] by (ts[2]-ts[0])/2 ?

-- 
Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 13:24, Marcelo Tosatti wrote:
> On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
>> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
>>> 2017-01-17 09:30-0200, Marcelo Tosatti:
 On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
>
> ts[1] ts[3]
> Host time-+-+
>   | |
>   | |
> Guest time   +-+-+..
> ts[0]ts[2] ts[4]
>>>
>>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
>>> the offset very consistent, so the graph would look like:
>>>
>>> ts[1] ts[3]
>>> Host time-+-+
>>>   | |
>>>   | |
>>> Guest time   +-+-+..
>>> ts[0]ts[2] ts[4]
>>>
>>> which doesn't sound good if users assume that the host reading is in the
>>> middle -- the guest time would be ahead of the host time.
>>
>> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>> intense interrupts). Follows results:
>>
>> Without TSC delta calculation:
>> =
>>
>> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
>> 116ns
>> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
>> 111ns
>> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
>> 88ns
>> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
>> 56ns
>> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
>> 93ns
>> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
>> 121ns
>> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
>> 107ns
>> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
>> 94ns
>> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
>> 138ns
>> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
>> 113ns
>> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
>> 119ns
>> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
>> 89ns
>> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
>> 110ns
>> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
>> 72ns
>> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
>> 54ns
>> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
>> 96ns
>> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
>> 71ns
>> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
>> 94ns
>> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
>> 74ns
>> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
>> 101ns
>> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
>> 54ns
>> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
>> 139ns
>> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
>> 69ns
>> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
>> 73ns
>> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
>> 61ns
>>
>> With TSC delta calculation:
>> 
>>
>> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
>> 53ns
>> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
>> 42ns
>> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
>> 62ns
>> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
>> 64ns
>> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
>> 69ns
>> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
>> 52ns
>> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
>> 50ns
>> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
>> 70ns
>> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
>> 42ns
>> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
>> 33ns
>> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
>> 42ns
>> #* PHC0  0   3   37711   -172ns[ -173ns] +/-  
>> 118ns
>> #* PHC0  0   3   377 6+65ns[  +76ns] +/-   
>> 23ns
>> #* PHC0  0   3   377 9+18ns[  +23ns] +/-   
>> 37ns
>> #* PHC0  0   3   377 6+41ns[  -60ns] +/-  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Paolo Bonzini


On 18/01/2017 13:24, Marcelo Tosatti wrote:
> On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
>> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
>>> 2017-01-17 09:30-0200, Marcelo Tosatti:
 On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
>
> ts[1] ts[3]
> Host time-+-+
>   | |
>   | |
> Guest time   +-+-+..
> ts[0]ts[2] ts[4]
>>>
>>> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
>>> the offset very consistent, so the graph would look like:
>>>
>>> ts[1] ts[3]
>>> Host time-+-+
>>>   | |
>>>   | |
>>> Guest time   +-+-+..
>>> ts[0]ts[2] ts[4]
>>>
>>> which doesn't sound good if users assume that the host reading is in the
>>> middle -- the guest time would be ahead of the host time.
>>
>> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
>> intense interrupts). Follows results:
>>
>> Without TSC delta calculation:
>> =
>>
>> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
>> 116ns
>> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
>> 111ns
>> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
>> 88ns
>> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
>> 56ns
>> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
>> 93ns
>> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
>> 121ns
>> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
>> 107ns
>> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
>> 94ns
>> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
>> 138ns
>> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
>> 113ns
>> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
>> 119ns
>> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
>> 89ns
>> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
>> 110ns
>> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
>> 72ns
>> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
>> 54ns
>> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
>> 96ns
>> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
>> 71ns
>> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
>> 94ns
>> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
>> 74ns
>> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
>> 101ns
>> #* PHC0  0   3   377 4   +571ns[+1304ns] +/-   
>> 54ns
>> #* PHC0  0   3   377 8 -5ns[  +71ns] +/-  
>> 139ns
>> #* PHC0  0   3   377 7   -247ns[ -502ns] +/-   
>> 69ns
>> #* PHC0  0   3   377 5   -283ns[ +879ns] +/-   
>> 73ns
>> #* PHC0  0   3   377 3   +148ns[ -109ns] +/-   
>> 61ns
>>
>> With TSC delta calculation:
>> 
>>
>> #* PHC0  0   3   377 7   +379ns[ +432ns] +/-   
>> 53ns
>> #* PHC0  0   3   377 9   +106ns[ +420ns] +/-   
>> 42ns
>> #* PHC0  0   3   377 7-58ns[ -136ns] +/-   
>> 62ns
>> #* PHC0  0   3   37712+93ns[  -38ns] +/-   
>> 64ns
>> #* PHC0  0   3   377 8+84ns[ +107ns] +/-   
>> 69ns
>> #* PHC0  0   3   377 3-76ns[ -103ns] +/-   
>> 52ns
>> #* PHC0  0   3   377 7+52ns[  +63ns] +/-   
>> 50ns
>> #* PHC0  0   3   37711+29ns[  +31ns] +/-   
>> 70ns
>> #* PHC0  0   3   377 7-47ns[  -56ns] +/-   
>> 42ns
>> #* PHC0  0   3   37710-35ns[  -42ns] +/-   
>> 33ns
>> #* PHC0  0   3   377 7-32ns[  -34ns] +/-   
>> 42ns
>> #* PHC0  0   3   37711   -172ns[ -173ns] +/-  
>> 118ns
>> #* PHC0  0   3   377 6+65ns[  +76ns] +/-   
>> 23ns
>> #* PHC0  0   3   377 9+18ns[  +23ns] +/-   
>> 37ns
>> #* PHC0  0   3   377 6+41ns[  -60ns] +/-  

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > 2017-01-17 09:30-0200, Marcelo Tosatti:
> > > On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > >> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> > >> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> > >> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > >> > > > Sorry, unless i am misunderstanding how this works, it'll get the 
> > >> > > > guest clock
> > >> > > > 2us behind, which is something not wanted.
> > >> > > > 
> > >> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the 
> > >> > > > past, 
> > >> > > > this means Chrony will sync the guest clock to
> > >> > > > 
> > >> > > > host realtime - 2us
> > >> > > > 
> > >> > > > Is that correct?
> > >> 
> > >> Probably. It depends on the error of both host and guest timestamps.
> > >> If the error is the same on both sides, it will cancel out. An
> > >> occasional spike in the delay shouldn't be a problem as the reading
> > >> will be filtered out, but for best accuracy it's necessary that the
> > >> host's timestamp is taken in the middle between the guest's
> > >> timestamps.
> > > 
> > > The problem is that spikes can be far from occasional: it depends on 
> > > activity of
> > > the host CPU and interrupts. Whose delay can be "intermittent": as long
> > > as interrupts are being sent to the host CPU, for example, the delay
> > > will be high (which can last minutes).
> > > 
> > > The TSC reading in the guest KVM PTP driver corrects for that delay.
> > > 
> > >> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > >> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> > >> 
> > >> ts[1] ts[3]
> > >> Host time-+-+
> > >>   | |
> > >>   | |
> > >> Guest time   +-+-+..
> > >> ts[0]ts[2] ts[4]
> > 
> > KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> > the offset very consistent, so the graph would look like:
> > 
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> > 
> > which doesn't sound good if users assume that the host reading is in the
> > middle -- the guest time would be ahead of the host time.
> > 
> > I'm wondering why is the PTP precision around 10ns, when the hypercall
> > takes around 2-3k cycles.  Have you measured the guest<->host offset by
> > getting the output of the hypercall, i.e.
> >   {host_sec @ tsc, host_nsec @ tsc, tsc}
> > and comparing it with guest time computed from the same tsc, i.e.
> >   {guest_sec @ tsc, guest_nsec @ tsc}
> > ?
> > 
> > Thanks.
> 
> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> intense interrupts). Follows results:
> 
> Without TSC delta calculation:
> =
> 
> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> 116ns
> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> 111ns
> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> 88ns
> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> 56ns
> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> 93ns
> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> 121ns
> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> 107ns
> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> 94ns
> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> 138ns
> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> 113ns
> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> 119ns
> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> 89ns
> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> 110ns
> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> 72ns
> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> 54ns
> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> 96ns
> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> 71ns
> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> 94ns
> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> 74ns
> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> 101ns
> #* PHC0  0   3   377 4   +571ns[+1304ns] 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Wed, Jan 18, 2017 at 10:17:38AM -0200, Marcelo Tosatti wrote:
> On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> > 2017-01-17 09:30-0200, Marcelo Tosatti:
> > > On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> > >> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> > >> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> > >> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > >> > > > Sorry, unless i am misunderstanding how this works, it'll get the 
> > >> > > > guest clock
> > >> > > > 2us behind, which is something not wanted.
> > >> > > > 
> > >> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the 
> > >> > > > past, 
> > >> > > > this means Chrony will sync the guest clock to
> > >> > > > 
> > >> > > > host realtime - 2us
> > >> > > > 
> > >> > > > Is that correct?
> > >> 
> > >> Probably. It depends on the error of both host and guest timestamps.
> > >> If the error is the same on both sides, it will cancel out. An
> > >> occasional spike in the delay shouldn't be a problem as the reading
> > >> will be filtered out, but for best accuracy it's necessary that the
> > >> host's timestamp is taken in the middle between the guest's
> > >> timestamps.
> > > 
> > > The problem is that spikes can be far from occasional: it depends on 
> > > activity of
> > > the host CPU and interrupts. Whose delay can be "intermittent": as long
> > > as interrupts are being sent to the host CPU, for example, the delay
> > > will be high (which can last minutes).
> > > 
> > > The TSC reading in the guest KVM PTP driver corrects for that delay.
> > > 
> > >> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> > >> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> > >> 
> > >> ts[1] ts[3]
> > >> Host time-+-+
> > >>   | |
> > >>   | |
> > >> Guest time   +-+-+..
> > >> ts[0]ts[2] ts[4]
> > 
> > KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> > the offset very consistent, so the graph would look like:
> > 
> > ts[1] ts[3]
> > Host time-+-+
> >   | |
> >   | |
> > Guest time   +-+-+..
> > ts[0]ts[2] ts[4]
> > 
> > which doesn't sound good if users assume that the host reading is in the
> > middle -- the guest time would be ahead of the host time.
> > 
> > I'm wondering why is the PTP precision around 10ns, when the hypercall
> > takes around 2-3k cycles.  Have you measured the guest<->host offset by
> > getting the output of the hypercall, i.e.
> >   {host_sec @ tsc, host_nsec @ tsc, tsc}
> > and comparing it with guest time computed from the same tsc, i.e.
> >   {guest_sec @ tsc, guest_nsec @ tsc}
> > ?
> > 
> > Thanks.
> 
> Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
> intense interrupts). Follows results:
> 
> Without TSC delta calculation:
> =
> 
> #* PHC0  0   3   377 2-99ns[ +206ns] +/-  
> 116ns
> #* PHC0  0   3   377 8   +202ns[ +249ns] +/-  
> 111ns
> #* PHC0  0   3   377 8   -213ns[ +683ns] +/-   
> 88ns
> #* PHC0  0   3   377 6+77ns[ +319ns] +/-   
> 56ns
> #* PHC0  0   3   377 4   -771ns[-1029ns] +/-   
> 93ns
> #* PHC0  0   3   37710-49ns[  -58ns] +/-  
> 121ns
> #* PHC0  0   3   377 9   +562ns[ +703ns] +/-  
> 107ns
> #* PHC0  0   3   377 6 -2ns[   -3ns] +/-   
> 94ns
> #* PHC0  0   3   377 4   +451ns[ +494ns] +/-  
> 138ns
> #* PHC0  0   3   37711-67ns[  -74ns] +/-  
> 113ns
> #* PHC0  0   3   377 8   +244ns[ +264ns] +/-  
> 119ns
> #* PHC0  0   3   377 7   -696ns[ -890ns] +/-   
> 89ns
> #* PHC0  0   3   377 4   +468ns[ +560ns] +/-  
> 110ns
> #* PHC0  0   3   37711   -310ns[ -430ns] +/-   
> 72ns
> #* PHC0  0   3   377 9   +189ns[ +298ns] +/-   
> 54ns
> #* PHC0  0   3   377 7   +594ns[ +473ns] +/-   
> 96ns
> #* PHC0  0   3   377 5   +151ns[ +280ns] +/-   
> 71ns
> #* PHC0  0   3   37710   -590ns[ -696ns] +/-   
> 94ns
> #* PHC0  0   3   377 8   +415ns[ +526ns] +/-   
> 74ns
> #* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  
> 101ns
> #* PHC0  0   3   377 4   +571ns[+1304ns] 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> 2017-01-17 09:30-0200, Marcelo Tosatti:
> > On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> >> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> >> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> >> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> >> > > > Sorry, unless i am misunderstanding how this works, it'll get the 
> >> > > > guest clock
> >> > > > 2us behind, which is something not wanted.
> >> > > > 
> >> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the 
> >> > > > past, 
> >> > > > this means Chrony will sync the guest clock to
> >> > > > 
> >> > > > host realtime - 2us
> >> > > > 
> >> > > > Is that correct?
> >> 
> >> Probably. It depends on the error of both host and guest timestamps.
> >> If the error is the same on both sides, it will cancel out. An
> >> occasional spike in the delay shouldn't be a problem as the reading
> >> will be filtered out, but for best accuracy it's necessary that the
> >> host's timestamp is taken in the middle between the guest's
> >> timestamps.
> > 
> > The problem is that spikes can be far from occasional: it depends on 
> > activity of
> > the host CPU and interrupts. Whose delay can be "intermittent": as long
> > as interrupts are being sent to the host CPU, for example, the delay
> > will be high (which can last minutes).
> > 
> > The TSC reading in the guest KVM PTP driver corrects for that delay.
> > 
> >> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> >> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >> 
> >> ts[1] ts[3]
> >> Host time-+-+
> >>   | |
> >>   | |
> >> Guest time   +-+-+..
> >> ts[0]ts[2] ts[4]
> 
> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> the offset very consistent, so the graph would look like:
> 
> ts[1] ts[3]
> Host time-+-+
>   | |
>   | |
> Guest time   +-+-+..
> ts[0]ts[2] ts[4]
> 
> which doesn't sound good if users assume that the host reading is in the
> middle -- the guest time would be ahead of the host time.
> 
> I'm wondering why is the PTP precision around 10ns, when the hypercall
> takes around 2-3k cycles.  Have you measured the guest<->host offset by
> getting the output of the hypercall, i.e.
>   {host_sec @ tsc, host_nsec @ tsc, tsc}
> and comparing it with guest time computed from the same tsc, i.e.
>   {guest_sec @ tsc, guest_nsec @ tsc}
> ?
> 
> Thanks.

Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
intense interrupts). Follows results:

Without TSC delta calculation:
=

#* PHC0  0   3   377 2-99ns[ +206ns] +/-  116ns
#* PHC0  0   3   377 8   +202ns[ +249ns] +/-  111ns
#* PHC0  0   3   377 8   -213ns[ +683ns] +/-   88ns
#* PHC0  0   3   377 6+77ns[ +319ns] +/-   56ns
#* PHC0  0   3   377 4   -771ns[-1029ns] +/-   93ns
#* PHC0  0   3   37710-49ns[  -58ns] +/-  121ns
#* PHC0  0   3   377 9   +562ns[ +703ns] +/-  107ns
#* PHC0  0   3   377 6 -2ns[   -3ns] +/-   94ns
#* PHC0  0   3   377 4   +451ns[ +494ns] +/-  138ns
#* PHC0  0   3   37711-67ns[  -74ns] +/-  113ns
#* PHC0  0   3   377 8   +244ns[ +264ns] +/-  119ns
#* PHC0  0   3   377 7   -696ns[ -890ns] +/-   89ns
#* PHC0  0   3   377 4   +468ns[ +560ns] +/-  110ns
#* PHC0  0   3   37711   -310ns[ -430ns] +/-   72ns
#* PHC0  0   3   377 9   +189ns[ +298ns] +/-   54ns
#* PHC0  0   3   377 7   +594ns[ +473ns] +/-   96ns
#* PHC0  0   3   377 5   +151ns[ +280ns] +/-   71ns
#* PHC0  0   3   37710   -590ns[ -696ns] +/-   94ns
#* PHC0  0   3   377 8   +415ns[ +526ns] +/-   74ns
#* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  101ns
#* PHC0  0   3   377 4   +571ns[+1304ns] +/-   54ns
#* PHC0  0   3   377 8 -5ns[  +71ns] +/-  139ns
#* PHC0  0   3   377 7   -247ns[ -502ns] +/-   69ns
#* PHC0  0   3   377 5   -283ns[ +879ns] +/-   73ns
#* PHC0  0   3   377 3   

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-18 Thread Marcelo Tosatti
On Tue, Jan 17, 2017 at 04:36:21PM +0100, Radim Krcmar wrote:
> 2017-01-17 09:30-0200, Marcelo Tosatti:
> > On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> >> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> >> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> >> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> >> > > > Sorry, unless i am misunderstanding how this works, it'll get the 
> >> > > > guest clock
> >> > > > 2us behind, which is something not wanted.
> >> > > > 
> >> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the 
> >> > > > past, 
> >> > > > this means Chrony will sync the guest clock to
> >> > > > 
> >> > > > host realtime - 2us
> >> > > > 
> >> > > > Is that correct?
> >> 
> >> Probably. It depends on the error of both host and guest timestamps.
> >> If the error is the same on both sides, it will cancel out. An
> >> occasional spike in the delay shouldn't be a problem as the reading
> >> will be filtered out, but for best accuracy it's necessary that the
> >> host's timestamp is taken in the middle between the guest's
> >> timestamps.
> > 
> > The problem is that spikes can be far from occasional: it depends on 
> > activity of
> > the host CPU and interrupts. Whose delay can be "intermittent": as long
> > as interrupts are being sent to the host CPU, for example, the delay
> > will be high (which can last minutes).
> > 
> > The TSC reading in the guest KVM PTP driver corrects for that delay.
> > 
> >> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> >> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> >> 
> >> ts[1] ts[3]
> >> Host time-+-+
> >>   | |
> >>   | |
> >> Guest time   +-+-+..
> >> ts[0]ts[2] ts[4]
> 
> KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
> the offset very consistent, so the graph would look like:
> 
> ts[1] ts[3]
> Host time-+-+
>   | |
>   | |
> Guest time   +-+-+..
> ts[0]ts[2] ts[4]
> 
> which doesn't sound good if users assume that the host reading is in the
> middle -- the guest time would be ahead of the host time.
> 
> I'm wondering why is the PTP precision around 10ns, when the hypercall
> takes around 2-3k cycles.  Have you measured the guest<->host offset by
> getting the output of the hypercall, i.e.
>   {host_sec @ tsc, host_nsec @ tsc, tsc}
> and comparing it with guest time computed from the same tsc, i.e.
>   {guest_sec @ tsc, guest_nsec @ tsc}
> ?
> 
> Thanks.

Testcase: run a guest and a loop sending SIGUSR1 to vcpu0 (emulating
intense interrupts). Follows results:

Without TSC delta calculation:
=

#* PHC0  0   3   377 2-99ns[ +206ns] +/-  116ns
#* PHC0  0   3   377 8   +202ns[ +249ns] +/-  111ns
#* PHC0  0   3   377 8   -213ns[ +683ns] +/-   88ns
#* PHC0  0   3   377 6+77ns[ +319ns] +/-   56ns
#* PHC0  0   3   377 4   -771ns[-1029ns] +/-   93ns
#* PHC0  0   3   37710-49ns[  -58ns] +/-  121ns
#* PHC0  0   3   377 9   +562ns[ +703ns] +/-  107ns
#* PHC0  0   3   377 6 -2ns[   -3ns] +/-   94ns
#* PHC0  0   3   377 4   +451ns[ +494ns] +/-  138ns
#* PHC0  0   3   37711-67ns[  -74ns] +/-  113ns
#* PHC0  0   3   377 8   +244ns[ +264ns] +/-  119ns
#* PHC0  0   3   377 7   -696ns[ -890ns] +/-   89ns
#* PHC0  0   3   377 4   +468ns[ +560ns] +/-  110ns
#* PHC0  0   3   37711   -310ns[ -430ns] +/-   72ns
#* PHC0  0   3   377 9   +189ns[ +298ns] +/-   54ns
#* PHC0  0   3   377 7   +594ns[ +473ns] +/-   96ns
#* PHC0  0   3   377 5   +151ns[ +280ns] +/-   71ns
#* PHC0  0   3   37710   -590ns[ -696ns] +/-   94ns
#* PHC0  0   3   377 8   +415ns[ +526ns] +/-   74ns
#* PHC0  0   3   377 6  +1381ns[+1469ns] +/-  101ns
#* PHC0  0   3   377 4   +571ns[+1304ns] +/-   54ns
#* PHC0  0   3   377 8 -5ns[  +71ns] +/-  139ns
#* PHC0  0   3   377 7   -247ns[ -502ns] +/-   69ns
#* PHC0  0   3   377 5   -283ns[ +879ns] +/-   73ns
#* PHC0  0   3   377 3   

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-17 Thread Radim Krcmar
2017-01-17 09:30-0200, Marcelo Tosatti:
> On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
>> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
>> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
>> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
>> > > > Sorry, unless i am misunderstanding how this works, it'll get the 
>> > > > guest clock
>> > > > 2us behind, which is something not wanted.
>> > > > 
>> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
>> > > > this means Chrony will sync the guest clock to
>> > > > 
>> > > > host realtime - 2us
>> > > > 
>> > > > Is that correct?
>> 
>> Probably. It depends on the error of both host and guest timestamps.
>> If the error is the same on both sides, it will cancel out. An
>> occasional spike in the delay shouldn't be a problem as the reading
>> will be filtered out, but for best accuracy it's necessary that the
>> host's timestamp is taken in the middle between the guest's
>> timestamps.
> 
> The problem is that spikes can be far from occasional: it depends on activity 
> of
> the host CPU and interrupts. Whose delay can be "intermittent": as long
> as interrupts are being sent to the host CPU, for example, the delay
> will be high (which can last minutes).
> 
> The TSC reading in the guest KVM PTP driver corrects for that delay.
> 
>> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
>> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
>> 
>> ts[1] ts[3]
>> Host time-+-+
>>   | |
>>   | |
>> Guest time   +-+-+..
>> ts[0]ts[2] ts[4]

KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
the offset very consistent, so the graph would look like:

ts[1] ts[3]
Host time-+-+
  | |
  | |
Guest time   +-+-+..
ts[0]ts[2] ts[4]

which doesn't sound good if users assume that the host reading is in the
middle -- the guest time would be ahead of the host time.

I'm wondering why is the PTP precision around 10ns, when the hypercall
takes around 2-3k cycles.  Have you measured the guest<->host offset by
getting the output of the hypercall, i.e.
  {host_sec @ tsc, host_nsec @ tsc, tsc}
and comparing it with guest time computed from the same tsc, i.e.
  {guest_sec @ tsc, guest_nsec @ tsc}
?

Thanks.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-17 Thread Radim Krcmar
2017-01-17 09:30-0200, Marcelo Tosatti:
> On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
>> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
>> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
>> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
>> > > > Sorry, unless i am misunderstanding how this works, it'll get the 
>> > > > guest clock
>> > > > 2us behind, which is something not wanted.
>> > > > 
>> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
>> > > > this means Chrony will sync the guest clock to
>> > > > 
>> > > > host realtime - 2us
>> > > > 
>> > > > Is that correct?
>> 
>> Probably. It depends on the error of both host and guest timestamps.
>> If the error is the same on both sides, it will cancel out. An
>> occasional spike in the delay shouldn't be a problem as the reading
>> will be filtered out, but for best accuracy it's necessary that the
>> host's timestamp is taken in the middle between the guest's
>> timestamps.
> 
> The problem is that spikes can be far from occasional: it depends on activity 
> of
> the host CPU and interrupts. Whose delay can be "intermittent": as long
> as interrupts are being sent to the host CPU, for example, the delay
> will be high (which can last minutes).
> 
> The TSC reading in the guest KVM PTP driver corrects for that delay.
> 
>> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
>> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
>> 
>> ts[1] ts[3]
>> Host time-+-+
>>   | |
>>   | |
>> Guest time   +-+-+..
>> ts[0]ts[2] ts[4]

KVM PTP delay moves host ts[i] to be close to guest ts[i+1] and makes
the offset very consistent, so the graph would look like:

ts[1] ts[3]
Host time-+-+
  | |
  | |
Guest time   +-+-+..
ts[0]ts[2] ts[4]

which doesn't sound good if users assume that the host reading is in the
middle -- the guest time would be ahead of the host time.

I'm wondering why is the PTP precision around 10ns, when the hypercall
takes around 2-3k cycles.  Have you measured the guest<->host offset by
getting the output of the hypercall, i.e.
  {host_sec @ tsc, host_nsec @ tsc, tsc}
and comparing it with guest time computed from the same tsc, i.e.
  {guest_sec @ tsc, guest_nsec @ tsc}
?

Thanks.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-17 Thread Marcelo Tosatti
On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > > > Sorry, unless i am misunderstanding how this works, it'll get the guest 
> > > > clock
> > > > 2us behind, which is something not wanted.
> > > > 
> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> > > > this means Chrony will sync the guest clock to
> > > > 
> > > > host realtime - 2us
> > > > 
> > > > Is that correct?
> 
> Probably. It depends on the error of both host and guest timestamps.
> If the error is the same on both sides, it will cancel out. An
> occasional spike in the delay shouldn't be a problem as the reading
> will be filtered out, but for best accuracy it's necessary that the
> host's timestamp is taken in the middle between the guest's
> timestamps.

The problem is that spikes can be far from occasional: it depends on activity of
the host CPU and interrupts. Whose delay can be "intermittent": as long
as interrupts are being sent to the host CPU, for example, the delay
will be high (which can last minutes).

The TSC reading in the guest KVM PTP driver corrects for that delay.

> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> 
> ts[1] ts[3]
> Host time-+-+
>   | |
>   | |
> Guest time   +-+-+..
> ts[0]ts[2] ts[4]
> 
> -- 
> Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-17 Thread Marcelo Tosatti
On Tue, Jan 17, 2017 at 09:03:27AM +0100, Miroslav Lichvar wrote:
> On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> > On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> > > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > > > Sorry, unless i am misunderstanding how this works, it'll get the guest 
> > > > clock
> > > > 2us behind, which is something not wanted.
> > > > 
> > > > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> > > > this means Chrony will sync the guest clock to
> > > > 
> > > > host realtime - 2us
> > > > 
> > > > Is that correct?
> 
> Probably. It depends on the error of both host and guest timestamps.
> If the error is the same on both sides, it will cancel out. An
> occasional spike in the delay shouldn't be a problem as the reading
> will be filtered out, but for best accuracy it's necessary that the
> host's timestamp is taken in the middle between the guest's
> timestamps.

The problem is that spikes can be far from occasional: it depends on activity of
the host CPU and interrupts. Whose delay can be "intermittent": as long
as interrupts are being sent to the host CPU, for example, the delay
will be high (which can last minutes).

The TSC reading in the guest KVM PTP driver corrects for that delay.

> Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
> corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.
> 
> ts[1] ts[3]
> Host time-+-+
>   | |
>   | |
> Guest time   +-+-+..
> ts[0]ts[2] ts[4]
> 
> -- 
> Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-17 Thread Miroslav Lichvar
On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > > Sorry, unless i am misunderstanding how this works, it'll get the guest 
> > > clock
> > > 2us behind, which is something not wanted.
> > > 
> > > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> > > this means Chrony will sync the guest clock to
> > > 
> > > host realtime - 2us
> > > 
> > > Is that correct?

Probably. It depends on the error of both host and guest timestamps.
If the error is the same on both sides, it will cancel out. An
occasional spike in the delay shouldn't be a problem as the reading
will be filtered out, but for best accuracy it's necessary that the
host's timestamp is taken in the middle between the guest's
timestamps.

Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.

ts[1] ts[3]
Host time-+-+
  | |
  | |
Guest time   +-+-+..
ts[0]ts[2] ts[4]

-- 
Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-17 Thread Miroslav Lichvar
On Mon, Jan 16, 2017 at 06:01:14PM -0200, Marcelo Tosatti wrote:
> On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> > On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > > Sorry, unless i am misunderstanding how this works, it'll get the guest 
> > > clock
> > > 2us behind, which is something not wanted.
> > > 
> > > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> > > this means Chrony will sync the guest clock to
> > > 
> > > host realtime - 2us
> > > 
> > > Is that correct?

Probably. It depends on the error of both host and guest timestamps.
If the error is the same on both sides, it will cancel out. An
occasional spike in the delay shouldn't be a problem as the reading
will be filtered out, but for best accuracy it's necessary that the
host's timestamp is taken in the middle between the guest's
timestamps.

Users of the PTP_SYS_OFFSET ioctl assume that (ts[0]+ts[2])/2
corresponds to ts[1], (ts[2]+ts[4])/2 corresponds to ts[3], and so on.

ts[1] ts[3]
Host time-+-+
  | |
  | |
Guest time   +-+-+..
ts[0]ts[2] ts[4]

-- 
Miroslav Lichvar


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > On Mon, Jan 16, 2017 at 07:01:48PM +0100, Radim Krcmar wrote:
> > > > Sorry the clock difference is 10ns now. So the guest clock is off by 
> > > > _10 ns_ 
> > > > of the host clock.
> > > 
> > > That is pretty good.
> > 
> > Yes.
> > 
> > > > You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> > > > guest_tsc) part ?
> > > 
> > > Yes, it results in simpler code, doesn't create dependency on the
> > > dreaded kvmclock, and is the best we can currently do wrt. precision.
> 
> Even if the PHC sync algorithm manages to detect that the clock read is
> incorrect, consider the following:
> 
> Variability in the VM-entry code path, such as cache effects and interrupts 
> would cause
> certain readings to be longer then the average (assuming an average
> where cache is hot).
> 
> Using the TSC removes this variability, which can be large in case of
> non realtime guests, where you do:
> 
>   1. kvm_hypercall.
>   2. read host realtime clock.
>   3. schedule out qemu-kvm vcpu.
>   4. schedule in qemu-kvm vcpu.
> 
> So using the delta between read host realtime and 
> ->gettime64 increases precision and decreases variability.
> 
> > Sorry, unless i am misunderstanding how this works, it'll get the guest 
> > clock
> > 2us behind, which is something not wanted.
> > 
> > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> > this means Chrony will sync the guest clock to
> > 
> > host realtime - 2us
> > 
> > Is that correct?

Drop the offset correction and the following happens:

Clock offset seems to vary between negative hundreds of ns:

210 Number of sources = 1
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   37711   -131ns[ -309ns] +/-
3ns

And positive:

210 Number of sources = 1
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   377 4+79ns[ +155ns] +/-
3ns




Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:47:15PM -0200, Marcelo Tosatti wrote:
> On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> > On Mon, Jan 16, 2017 at 07:01:48PM +0100, Radim Krcmar wrote:
> > > > Sorry the clock difference is 10ns now. So the guest clock is off by 
> > > > _10 ns_ 
> > > > of the host clock.
> > > 
> > > That is pretty good.
> > 
> > Yes.
> > 
> > > > You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> > > > guest_tsc) part ?
> > > 
> > > Yes, it results in simpler code, doesn't create dependency on the
> > > dreaded kvmclock, and is the best we can currently do wrt. precision.
> 
> Even if the PHC sync algorithm manages to detect that the clock read is
> incorrect, consider the following:
> 
> Variability in the VM-entry code path, such as cache effects and interrupts 
> would cause
> certain readings to be longer then the average (assuming an average
> where cache is hot).
> 
> Using the TSC removes this variability, which can be large in case of
> non realtime guests, where you do:
> 
>   1. kvm_hypercall.
>   2. read host realtime clock.
>   3. schedule out qemu-kvm vcpu.
>   4. schedule in qemu-kvm vcpu.
> 
> So using the delta between read host realtime and 
> ->gettime64 increases precision and decreases variability.
> 
> > Sorry, unless i am misunderstanding how this works, it'll get the guest 
> > clock
> > 2us behind, which is something not wanted.
> > 
> > Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> > this means Chrony will sync the guest clock to
> > 
> > host realtime - 2us
> > 
> > Is that correct?

Drop the offset correction and the following happens:

Clock offset seems to vary between negative hundreds of ns:

210 Number of sources = 1
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   37711   -131ns[ -309ns] +/-
3ns

And positive:

210 Number of sources = 1
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   377 4+79ns[ +155ns] +/-
3ns




Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> On Mon, Jan 16, 2017 at 07:01:48PM +0100, Radim Krcmar wrote:
> > > Sorry the clock difference is 10ns now. So the guest clock is off by _10 
> > > ns_ 
> > > of the host clock.
> > 
> > That is pretty good.
> 
> Yes.
> 
> > > You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> > > guest_tsc) part ?
> > 
> > Yes, it results in simpler code, doesn't create dependency on the
> > dreaded kvmclock, and is the best we can currently do wrt. precision.

Even if the PHC sync algorithm manages to detect that the clock read is
incorrect, consider the following:

Variability in the VM-entry code path, such as cache effects and interrupts 
would cause
certain readings to be longer then the average (assuming an average
where cache is hot).

Using the TSC removes this variability, which can be large in case of
non realtime guests, where you do:

1. kvm_hypercall.
2. read host realtime clock.
3. schedule out qemu-kvm vcpu.
4. schedule in qemu-kvm vcpu.

So using the delta between read host realtime and 
->gettime64 increases precision and decreases variability.

> Sorry, unless i am misunderstanding how this works, it'll get the guest clock
> 2us behind, which is something not wanted.
> 
> Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> this means Chrony will sync the guest clock to
> 
> host realtime - 2us
> 
> Is that correct?
> 


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:36:55PM -0200, Marcelo Tosatti wrote:
> On Mon, Jan 16, 2017 at 07:01:48PM +0100, Radim Krcmar wrote:
> > > Sorry the clock difference is 10ns now. So the guest clock is off by _10 
> > > ns_ 
> > > of the host clock.
> > 
> > That is pretty good.
> 
> Yes.
> 
> > > You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> > > guest_tsc) part ?
> > 
> > Yes, it results in simpler code, doesn't create dependency on the
> > dreaded kvmclock, and is the best we can currently do wrt. precision.

Even if the PHC sync algorithm manages to detect that the clock read is
incorrect, consider the following:

Variability in the VM-entry code path, such as cache effects and interrupts 
would cause
certain readings to be longer then the average (assuming an average
where cache is hot).

Using the TSC removes this variability, which can be large in case of
non realtime guests, where you do:

1. kvm_hypercall.
2. read host realtime clock.
3. schedule out qemu-kvm vcpu.
4. schedule in qemu-kvm vcpu.

So using the delta between read host realtime and 
->gettime64 increases precision and decreases variability.

> Sorry, unless i am misunderstanding how this works, it'll get the guest clock
> 2us behind, which is something not wanted.
> 
> Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
> this means Chrony will sync the guest clock to
> 
> host realtime - 2us
> 
> Is that correct?
> 


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 07:01:48PM +0100, Radim Krcmar wrote:
> > Sorry the clock difference is 10ns now. So the guest clock is off by _10 
> > ns_ 
> > of the host clock.
> 
> That is pretty good.

Yes.

> > You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> > guest_tsc) part ?
> 
> Yes, it results in simpler code, doesn't create dependency on the
> dreaded kvmclock, and is the best we can currently do wrt. precision.

Sorry, unless i am misunderstanding how this works, it'll get the guest clock
2us behind, which is something not wanted.

Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
this means Chrony will sync the guest clock to

host realtime - 2us

Is that correct?



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 07:01:48PM +0100, Radim Krcmar wrote:
> > Sorry the clock difference is 10ns now. So the guest clock is off by _10 
> > ns_ 
> > of the host clock.
> 
> That is pretty good.

Yes.

> > You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> > guest_tsc) part ?
> 
> Yes, it results in simpler code, doesn't create dependency on the
> dreaded kvmclock, and is the best we can currently do wrt. precision.

Sorry, unless i am misunderstanding how this works, it'll get the guest clock
2us behind, which is something not wanted.

Miroslav, if ->gettime64 returns the host realtime at 2us in the past, 
this means Chrony will sync the guest clock to

host realtime - 2us

Is that correct?



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 15:39-0200, Marcelo Tosatti:
> On Mon, Jan 16, 2017 at 06:27:58PM +0100, Radim Krcmar wrote:
>> 2017-01-16 15:08-0200, Marcelo Tosatti:
>> > On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
>> >> 2017-01-16 17:26+0100, Radim Krcmar:
>> >> > 2017-01-13 15:40-0200, Marcelo Tosatti:
>> >> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> >> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >> >>> > +   version = pvclock_read_begin(src);
>> >> >>> > +
>> >> >>> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> >> >>> > +clock_off_gpa,
>> >> >>> > +KVM_CLOCK_OFFSET_WALLCLOCK);
>> >> >>> > +   if (ret != 0) {
>> >> >>> > +   pr_err("clock offset hypercall ret %lu\n", ret);
>> >> >>> > +   spin_unlock(_ptp_lock);
>> >> >>> > +   preempt_enable_notrace();
>> >> >>> > +   return -EOPNOTSUPP;
>> >> >>> > +   }
>> >> >>> > +
>> >> >>> > +   tspec.tv_sec = clock_off.sec;
>> >> >>> > +   tspec.tv_nsec = clock_off.nsec;
>> >> >>> > +
>> >> >>> > +   delta = rdtsc_ordered() - clock_off.tsc;
>> >> >>> > +
>> >> >>> > +   offset = pvclock_scale_delta(delta, 
>> >> >>> > src->tsc_to_system_mul,
>> >> >>> > +src->tsc_shift);
>> >> >>> > +
>> >> >>> > +   } while (pvclock_read_retry(src, version));
>> >> >>> > +
>> >> >>> > +   preempt_enable_notrace();
>> >> >>> > +
>> >> >>> > +   tspec.tv_nsec = tspec.tv_nsec + offset;
>> >> >>> > +
>> >> >>> > +   spin_unlock(_ptp_lock);
>> >> >>> > +
>> >> >>> > +   if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> >> >>> > +   u64 secs = tspec.tv_nsec;
>> >> >>> > +
>> >> >>> > +   tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> >> >>> > +   tspec.tv_sec += secs;
>> >> >>> > +   }
>> >> >>> > +
>> >> >>> > +   memcpy(ts, , sizeof(struct timespec64));
>> >> >>> 
>> >> >>> But the whole idea is of improving the time by reading tsc a bit later
>> >> >>> is just weird ... why is it better to provide
>> >> >>> 
>> >> >>>   tsc + x, time + tsc_delta_to_time(x)
>> >> >>> 
>> >> >>> than just
>> >> >>> 
>> >> >>>  tsc, time
>> >> >>> 
>> >> >>> ?
>> >> >> 
>> >> >> Because you want to calculate the value of the host realtime clock 
>> >> >> at the moment of ptp_kvm_gettime.
>> >> >> 
>> >> >> We do:
>> >> >> 
>> >> >>1. kvm_hypercall.
>> >> >>2. get {sec, nsec, guest_tsc}.
>> >> >>3. kvm_hypercall returns.
>> >> >>4. delay = rdtsc() - guest_tsc.
>> >> >> 
>> >> >> Where delay is the delta (measured with the TSC) between points 2 and 
>> >> >> 4.
>> >> > 
>> >> > I see now ... the PTP interface is just not good for our purposes.
>> >> 
>> >> There is getcrosststamp() callback in PTP, which seems to be exactly
>> >> what we want when pairing with TSC, so the pvclock delay fixup can be
>> >> dropped when using it.
>> > 
>> > What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
>> > part?
>> 
>> Yes.
>> 
>> >   You can't drop it, because if you do then your "host realtime
>> > clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.
>> 
>> The TSC read will be some cycles old when the hypercall ends, but that
>> doesn't matter, because we will pass {sec, nsec, guest_tsc} to PTP and
>> PTP should plug them into kernel's realtime clock roughly like this:
>> 
>>   sec/nsec + (rdtsc() - guest_tsc) * tsc_freq
>> 
>> Adding delay to guest_tsc and sec/nsec cannot improve precision.
>> (And will likely degrade it as kvmclock's frequency is incorrect.)
>> 
>> > We want the highest precision as possible.
>> 
>> I agree, which is why we don't want to lose precision in the delay
>> guesswork because of gettime64().
> 
> Sorry the clock difference is 10ns now. So the guest clock is off by _10 ns_ 
> of the host clock.

That is pretty good.

> You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> guest_tsc) part ?

Yes, it results in simpler code, doesn't create dependency on the
dreaded kvmclock, and is the best we can currently do wrt. precision.

Thanks.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 15:39-0200, Marcelo Tosatti:
> On Mon, Jan 16, 2017 at 06:27:58PM +0100, Radim Krcmar wrote:
>> 2017-01-16 15:08-0200, Marcelo Tosatti:
>> > On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
>> >> 2017-01-16 17:26+0100, Radim Krcmar:
>> >> > 2017-01-13 15:40-0200, Marcelo Tosatti:
>> >> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> >> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >> >>> > +   version = pvclock_read_begin(src);
>> >> >>> > +
>> >> >>> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> >> >>> > +clock_off_gpa,
>> >> >>> > +KVM_CLOCK_OFFSET_WALLCLOCK);
>> >> >>> > +   if (ret != 0) {
>> >> >>> > +   pr_err("clock offset hypercall ret %lu\n", ret);
>> >> >>> > +   spin_unlock(_ptp_lock);
>> >> >>> > +   preempt_enable_notrace();
>> >> >>> > +   return -EOPNOTSUPP;
>> >> >>> > +   }
>> >> >>> > +
>> >> >>> > +   tspec.tv_sec = clock_off.sec;
>> >> >>> > +   tspec.tv_nsec = clock_off.nsec;
>> >> >>> > +
>> >> >>> > +   delta = rdtsc_ordered() - clock_off.tsc;
>> >> >>> > +
>> >> >>> > +   offset = pvclock_scale_delta(delta, 
>> >> >>> > src->tsc_to_system_mul,
>> >> >>> > +src->tsc_shift);
>> >> >>> > +
>> >> >>> > +   } while (pvclock_read_retry(src, version));
>> >> >>> > +
>> >> >>> > +   preempt_enable_notrace();
>> >> >>> > +
>> >> >>> > +   tspec.tv_nsec = tspec.tv_nsec + offset;
>> >> >>> > +
>> >> >>> > +   spin_unlock(_ptp_lock);
>> >> >>> > +
>> >> >>> > +   if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> >> >>> > +   u64 secs = tspec.tv_nsec;
>> >> >>> > +
>> >> >>> > +   tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> >> >>> > +   tspec.tv_sec += secs;
>> >> >>> > +   }
>> >> >>> > +
>> >> >>> > +   memcpy(ts, , sizeof(struct timespec64));
>> >> >>> 
>> >> >>> But the whole idea is of improving the time by reading tsc a bit later
>> >> >>> is just weird ... why is it better to provide
>> >> >>> 
>> >> >>>   tsc + x, time + tsc_delta_to_time(x)
>> >> >>> 
>> >> >>> than just
>> >> >>> 
>> >> >>>  tsc, time
>> >> >>> 
>> >> >>> ?
>> >> >> 
>> >> >> Because you want to calculate the value of the host realtime clock 
>> >> >> at the moment of ptp_kvm_gettime.
>> >> >> 
>> >> >> We do:
>> >> >> 
>> >> >>1. kvm_hypercall.
>> >> >>2. get {sec, nsec, guest_tsc}.
>> >> >>3. kvm_hypercall returns.
>> >> >>4. delay = rdtsc() - guest_tsc.
>> >> >> 
>> >> >> Where delay is the delta (measured with the TSC) between points 2 and 
>> >> >> 4.
>> >> > 
>> >> > I see now ... the PTP interface is just not good for our purposes.
>> >> 
>> >> There is getcrosststamp() callback in PTP, which seems to be exactly
>> >> what we want when pairing with TSC, so the pvclock delay fixup can be
>> >> dropped when using it.
>> > 
>> > What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
>> > part?
>> 
>> Yes.
>> 
>> >   You can't drop it, because if you do then your "host realtime
>> > clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.
>> 
>> The TSC read will be some cycles old when the hypercall ends, but that
>> doesn't matter, because we will pass {sec, nsec, guest_tsc} to PTP and
>> PTP should plug them into kernel's realtime clock roughly like this:
>> 
>>   sec/nsec + (rdtsc() - guest_tsc) * tsc_freq
>> 
>> Adding delay to guest_tsc and sec/nsec cannot improve precision.
>> (And will likely degrade it as kvmclock's frequency is incorrect.)
>> 
>> > We want the highest precision as possible.
>> 
>> I agree, which is why we don't want to lose precision in the delay
>> guesswork because of gettime64().
> 
> Sorry the clock difference is 10ns now. So the guest clock is off by _10 ns_ 
> of the host clock.

That is pretty good.

> You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
> guest_tsc) part ?

Yes, it results in simpler code, doesn't create dependency on the
dreaded kvmclock, and is the best we can currently do wrt. precision.

Thanks.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 06:27:58PM +0100, Radim Krcmar wrote:
> 2017-01-16 15:08-0200, Marcelo Tosatti:
> > On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
> >> 2017-01-16 17:26+0100, Radim Krcmar:
> >> > 2017-01-13 15:40-0200, Marcelo Tosatti:
> >> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> >> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
> >> >>> > +version = pvclock_read_begin(src);
> >> >>> > +
> >> >>> > +ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> >> >>> > + clock_off_gpa,
> >> >>> > + KVM_CLOCK_OFFSET_WALLCLOCK);
> >> >>> > +if (ret != 0) {
> >> >>> > +pr_err("clock offset hypercall ret %lu\n", ret);
> >> >>> > +spin_unlock(_ptp_lock);
> >> >>> > +preempt_enable_notrace();
> >> >>> > +return -EOPNOTSUPP;
> >> >>> > +}
> >> >>> > +
> >> >>> > +tspec.tv_sec = clock_off.sec;
> >> >>> > +tspec.tv_nsec = clock_off.nsec;
> >> >>> > +
> >> >>> > +delta = rdtsc_ordered() - clock_off.tsc;
> >> >>> > +
> >> >>> > +offset = pvclock_scale_delta(delta, 
> >> >>> > src->tsc_to_system_mul,
> >> >>> > + src->tsc_shift);
> >> >>> > +
> >> >>> > +} while (pvclock_read_retry(src, version));
> >> >>> > +
> >> >>> > +preempt_enable_notrace();
> >> >>> > +
> >> >>> > +tspec.tv_nsec = tspec.tv_nsec + offset;
> >> >>> > +
> >> >>> > +spin_unlock(_ptp_lock);
> >> >>> > +
> >> >>> > +if (tspec.tv_nsec >= NSEC_PER_SEC) {
> >> >>> > +u64 secs = tspec.tv_nsec;
> >> >>> > +
> >> >>> > +tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> >> >>> > +tspec.tv_sec += secs;
> >> >>> > +}
> >> >>> > +
> >> >>> > +memcpy(ts, , sizeof(struct timespec64));
> >> >>> 
> >> >>> But the whole idea is of improving the time by reading tsc a bit later
> >> >>> is just weird ... why is it better to provide
> >> >>> 
> >> >>>   tsc + x, time + tsc_delta_to_time(x)
> >> >>> 
> >> >>> than just
> >> >>> 
> >> >>>  tsc, time
> >> >>> 
> >> >>> ?
> >> >> 
> >> >> Because you want to calculate the value of the host realtime clock 
> >> >> at the moment of ptp_kvm_gettime.
> >> >> 
> >> >> We do:
> >> >> 
> >> >> 1. kvm_hypercall.
> >> >> 2. get {sec, nsec, guest_tsc}.
> >> >> 3. kvm_hypercall returns.
> >> >> 4. delay = rdtsc() - guest_tsc.
> >> >> 
> >> >> Where delay is the delta (measured with the TSC) between points 2 and 4.
> >> > 
> >> > I see now ... the PTP interface is just not good for our purposes.
> >> 
> >> There is getcrosststamp() callback in PTP, which seems to be exactly
> >> what we want when pairing with TSC, so the pvclock delay fixup can be
> >> dropped when using it.
> > 
> > What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
> > part?
> 
> Yes.
> 
> >   You can't drop it, because if you do then your "host realtime
> > clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.
> 
> The TSC read will be some cycles old when the hypercall ends, but that
> doesn't matter, because we will pass {sec, nsec, guest_tsc} to PTP and
> PTP should plug them into kernel's realtime clock roughly like this:
> 
>   sec/nsec + (rdtsc() - guest_tsc) * tsc_freq
> 
> Adding delay to guest_tsc and sec/nsec cannot improve precision.
> (And will likely degrade it as kvmclock's frequency is incorrect.)
> 
> > We want the highest precision as possible.
> 
> I agree, which is why we don't want to lose precision in the delay
> guesswork because of gettime64().

Sorry the clock difference is 10ns now. So the guest clock is off by _10 ns_ 
of the host clock.

You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
guest_tsc) part ?

Please be more verbose.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 06:27:58PM +0100, Radim Krcmar wrote:
> 2017-01-16 15:08-0200, Marcelo Tosatti:
> > On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
> >> 2017-01-16 17:26+0100, Radim Krcmar:
> >> > 2017-01-13 15:40-0200, Marcelo Tosatti:
> >> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> >> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
> >> >>> > +version = pvclock_read_begin(src);
> >> >>> > +
> >> >>> > +ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> >> >>> > + clock_off_gpa,
> >> >>> > + KVM_CLOCK_OFFSET_WALLCLOCK);
> >> >>> > +if (ret != 0) {
> >> >>> > +pr_err("clock offset hypercall ret %lu\n", ret);
> >> >>> > +spin_unlock(_ptp_lock);
> >> >>> > +preempt_enable_notrace();
> >> >>> > +return -EOPNOTSUPP;
> >> >>> > +}
> >> >>> > +
> >> >>> > +tspec.tv_sec = clock_off.sec;
> >> >>> > +tspec.tv_nsec = clock_off.nsec;
> >> >>> > +
> >> >>> > +delta = rdtsc_ordered() - clock_off.tsc;
> >> >>> > +
> >> >>> > +offset = pvclock_scale_delta(delta, 
> >> >>> > src->tsc_to_system_mul,
> >> >>> > + src->tsc_shift);
> >> >>> > +
> >> >>> > +} while (pvclock_read_retry(src, version));
> >> >>> > +
> >> >>> > +preempt_enable_notrace();
> >> >>> > +
> >> >>> > +tspec.tv_nsec = tspec.tv_nsec + offset;
> >> >>> > +
> >> >>> > +spin_unlock(_ptp_lock);
> >> >>> > +
> >> >>> > +if (tspec.tv_nsec >= NSEC_PER_SEC) {
> >> >>> > +u64 secs = tspec.tv_nsec;
> >> >>> > +
> >> >>> > +tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> >> >>> > +tspec.tv_sec += secs;
> >> >>> > +}
> >> >>> > +
> >> >>> > +memcpy(ts, , sizeof(struct timespec64));
> >> >>> 
> >> >>> But the whole idea is of improving the time by reading tsc a bit later
> >> >>> is just weird ... why is it better to provide
> >> >>> 
> >> >>>   tsc + x, time + tsc_delta_to_time(x)
> >> >>> 
> >> >>> than just
> >> >>> 
> >> >>>  tsc, time
> >> >>> 
> >> >>> ?
> >> >> 
> >> >> Because you want to calculate the value of the host realtime clock 
> >> >> at the moment of ptp_kvm_gettime.
> >> >> 
> >> >> We do:
> >> >> 
> >> >> 1. kvm_hypercall.
> >> >> 2. get {sec, nsec, guest_tsc}.
> >> >> 3. kvm_hypercall returns.
> >> >> 4. delay = rdtsc() - guest_tsc.
> >> >> 
> >> >> Where delay is the delta (measured with the TSC) between points 2 and 4.
> >> > 
> >> > I see now ... the PTP interface is just not good for our purposes.
> >> 
> >> There is getcrosststamp() callback in PTP, which seems to be exactly
> >> what we want when pairing with TSC, so the pvclock delay fixup can be
> >> dropped when using it.
> > 
> > What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
> > part?
> 
> Yes.
> 
> >   You can't drop it, because if you do then your "host realtime
> > clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.
> 
> The TSC read will be some cycles old when the hypercall ends, but that
> doesn't matter, because we will pass {sec, nsec, guest_tsc} to PTP and
> PTP should plug them into kernel's realtime clock roughly like this:
> 
>   sec/nsec + (rdtsc() - guest_tsc) * tsc_freq
> 
> Adding delay to guest_tsc and sec/nsec cannot improve precision.
> (And will likely degrade it as kvmclock's frequency is incorrect.)
> 
> > We want the highest precision as possible.
> 
> I agree, which is why we don't want to lose precision in the delay
> guesswork because of gettime64().

Sorry the clock difference is 10ns now. So the guest clock is off by _10 ns_ 
of the host clock.

You are suggesting to use getcrosststamp instead, to drop the (rdtsc() -
guest_tsc) part ?

Please be more verbose.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 15:04-0200, Marcelo Tosatti:
> On Mon, Jan 16, 2017 at 05:26:53PM +0100, Radim Krcmar wrote:
>> 2017-01-13 15:40-0200, Marcelo Tosatti:
>> > On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> > > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >> > +   version = pvclock_read_begin(src);
>> >> > +
>> >> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> >> > +clock_off_gpa,
>> >> > +KVM_CLOCK_OFFSET_WALLCLOCK);
>> >> > +   if (ret != 0) {
>> >> > +   pr_err("clock offset hypercall ret %lu\n", ret);
>> >> > +   spin_unlock(_ptp_lock);
>> >> > +   preempt_enable_notrace();
>> >> > +   return -EOPNOTSUPP;
>> >> > +   }
>> >> > +
>> >> > +   tspec.tv_sec = clock_off.sec;
>> >> > +   tspec.tv_nsec = clock_off.nsec;
>> >> > +
>> >> > +   delta = rdtsc_ordered() - clock_off.tsc;
>> >> > +
>> >> > +   offset = pvclock_scale_delta(delta, 
>> >> > src->tsc_to_system_mul,
>> >> > +src->tsc_shift);
>> >> > +
>> >> > +   } while (pvclock_read_retry(src, version));
>> >> > +
>> >> > +   preempt_enable_notrace();
>> >> > +
>> >> > +   tspec.tv_nsec = tspec.tv_nsec + offset;
>> >> > +
>> >> > +   spin_unlock(_ptp_lock);
>> >> > +
>> >> > +   if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> >> > +   u64 secs = tspec.tv_nsec;
>> >> > +
>> >> > +   tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> >> > +   tspec.tv_sec += secs;
>> >> > +   }
>> >> > +
>> >> > +   memcpy(ts, , sizeof(struct timespec64));
>> >> 
>> >> But the whole idea is of improving the time by reading tsc a bit later
>> >> is just weird ... why is it better to provide
>> >> 
>> >>   tsc + x, time + tsc_delta_to_time(x)
>> >> 
>> >> than just
>> >> 
>> >>  tsc, time
>> >> 
>> >> ?
>> > 
>> > Because you want to calculate the value of the host realtime clock 
>> > at the moment of ptp_kvm_gettime.
>> > 
>> > We do:
>> > 
>> >1. kvm_hypercall.
>> >2. get {sec, nsec, guest_tsc}.
>> >3. kvm_hypercall returns.
>> >4. delay = rdtsc() - guest_tsc.
>> > 
>> > Where delay is the delta (measured with the TSC) between points 2 and 4.
>> 
>> I see now ... the PTP interface is just not good for our purposes.
>> We don't return {sec, nsec, guest_tsc}, we just return {sec, nsec} at
>> some random time in the past.  And to make it a bit more accurate, you
>> add a best-effort delta before returning, which makes sense.
> 
> Not random time in the past. We return {sec, nsec} from the host
> realtime at the moment the user ran the hypercall. 

That is what we want, but {sec, nsec} is not anchored to any running
time, so it is unavoidably late when we return it.

> Since PTP is very accurate, that "a bit more" counts, yes.
> 
>> When we have to depend on pvclock, what are the advantages of not using
>> the existing pvclock API for wall clock?
>> (You mentioned some extensions.)
>> 
>>   struct pvclock_wall_clock {
>>  u32   version;
>>  u32   sec;
>>  u32   nsec;
>>   } __attribute__((__packed__));
>
>> It gives the wall clock when pvclock was 0, so you just add current
>> kvmclock and get the host wall clock.  
> 
> Well, no. For one, the TSC part of kvmclock: 
> 
>   kvmclock-read = system_timestamp + convert-to-1GHz(rdtsc() - 
> tsc_timestamp)
>  
> 
> Drifts relative to UTC. This part can be large.
> The guests NTP is responsible for fixing
> that drift of the guests realtime clock (talking about current setup, 
> without KVM PTP driver).
> 
> Now, we want very high precision (less than 1us) for this
> driver. Very small TSC drifts on a large delta defeat the purpose.

True.

>> Without a VM exit.
> 
> Huge performance is not an issue. Accuracy (how different from the host
> realtime clock our "approximation" of the host realtime clock) is.

Ah, ok.

>> And how often is ptp_kvm_gettime() usually called?
> 
> The PTP_SYS_OFFSET ioctl calls the following code in a loop:
> 
> struct ptp_sys_offset {
> unsigned int n_samples; /* Desired number of measurements. */
> unsigned int rsv[3];/* Reserved for future use. */
> /*
>  * Array of interleaved system/phc time stamps. The kernel
>  * will provide 2*n_samples + 1 time stamps, with the last
>  * one as a system time stamp.
>  */
> struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
> };
> 
> #define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement
> samples. */
> 
> case PTP_SYS_OFFSET:
> sysoff = memdup_user((void __user *)arg,
> sizeof(*sysoff));
> if (IS_ERR(sysoff)) {
> err = PTR_ERR(sysoff);
> 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 15:04-0200, Marcelo Tosatti:
> On Mon, Jan 16, 2017 at 05:26:53PM +0100, Radim Krcmar wrote:
>> 2017-01-13 15:40-0200, Marcelo Tosatti:
>> > On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> > > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >> > +   version = pvclock_read_begin(src);
>> >> > +
>> >> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> >> > +clock_off_gpa,
>> >> > +KVM_CLOCK_OFFSET_WALLCLOCK);
>> >> > +   if (ret != 0) {
>> >> > +   pr_err("clock offset hypercall ret %lu\n", ret);
>> >> > +   spin_unlock(_ptp_lock);
>> >> > +   preempt_enable_notrace();
>> >> > +   return -EOPNOTSUPP;
>> >> > +   }
>> >> > +
>> >> > +   tspec.tv_sec = clock_off.sec;
>> >> > +   tspec.tv_nsec = clock_off.nsec;
>> >> > +
>> >> > +   delta = rdtsc_ordered() - clock_off.tsc;
>> >> > +
>> >> > +   offset = pvclock_scale_delta(delta, 
>> >> > src->tsc_to_system_mul,
>> >> > +src->tsc_shift);
>> >> > +
>> >> > +   } while (pvclock_read_retry(src, version));
>> >> > +
>> >> > +   preempt_enable_notrace();
>> >> > +
>> >> > +   tspec.tv_nsec = tspec.tv_nsec + offset;
>> >> > +
>> >> > +   spin_unlock(_ptp_lock);
>> >> > +
>> >> > +   if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> >> > +   u64 secs = tspec.tv_nsec;
>> >> > +
>> >> > +   tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> >> > +   tspec.tv_sec += secs;
>> >> > +   }
>> >> > +
>> >> > +   memcpy(ts, , sizeof(struct timespec64));
>> >> 
>> >> But the whole idea is of improving the time by reading tsc a bit later
>> >> is just weird ... why is it better to provide
>> >> 
>> >>   tsc + x, time + tsc_delta_to_time(x)
>> >> 
>> >> than just
>> >> 
>> >>  tsc, time
>> >> 
>> >> ?
>> > 
>> > Because you want to calculate the value of the host realtime clock 
>> > at the moment of ptp_kvm_gettime.
>> > 
>> > We do:
>> > 
>> >1. kvm_hypercall.
>> >2. get {sec, nsec, guest_tsc}.
>> >3. kvm_hypercall returns.
>> >4. delay = rdtsc() - guest_tsc.
>> > 
>> > Where delay is the delta (measured with the TSC) between points 2 and 4.
>> 
>> I see now ... the PTP interface is just not good for our purposes.
>> We don't return {sec, nsec, guest_tsc}, we just return {sec, nsec} at
>> some random time in the past.  And to make it a bit more accurate, you
>> add a best-effort delta before returning, which makes sense.
> 
> Not random time in the past. We return {sec, nsec} from the host
> realtime at the moment the user ran the hypercall. 

That is what we want, but {sec, nsec} is not anchored to any running
time, so it is unavoidably late when we return it.

> Since PTP is very accurate, that "a bit more" counts, yes.
> 
>> When we have to depend on pvclock, what are the advantages of not using
>> the existing pvclock API for wall clock?
>> (You mentioned some extensions.)
>> 
>>   struct pvclock_wall_clock {
>>  u32   version;
>>  u32   sec;
>>  u32   nsec;
>>   } __attribute__((__packed__));
>
>> It gives the wall clock when pvclock was 0, so you just add current
>> kvmclock and get the host wall clock.  
> 
> Well, no. For one, the TSC part of kvmclock: 
> 
>   kvmclock-read = system_timestamp + convert-to-1GHz(rdtsc() - 
> tsc_timestamp)
>  
> 
> Drifts relative to UTC. This part can be large.
> The guests NTP is responsible for fixing
> that drift of the guests realtime clock (talking about current setup, 
> without KVM PTP driver).
> 
> Now, we want very high precision (less than 1us) for this
> driver. Very small TSC drifts on a large delta defeat the purpose.

True.

>> Without a VM exit.
> 
> Huge performance is not an issue. Accuracy (how different from the host
> realtime clock our "approximation" of the host realtime clock) is.

Ah, ok.

>> And how often is ptp_kvm_gettime() usually called?
> 
> The PTP_SYS_OFFSET ioctl calls the following code in a loop:
> 
> struct ptp_sys_offset {
> unsigned int n_samples; /* Desired number of measurements. */
> unsigned int rsv[3];/* Reserved for future use. */
> /*
>  * Array of interleaved system/phc time stamps. The kernel
>  * will provide 2*n_samples + 1 time stamps, with the last
>  * one as a system time stamp.
>  */
> struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
> };
> 
> #define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement
> samples. */
> 
> case PTP_SYS_OFFSET:
> sysoff = memdup_user((void __user *)arg,
> sizeof(*sysoff));
> if (IS_ERR(sysoff)) {
> err = PTR_ERR(sysoff);
> 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 15:08-0200, Marcelo Tosatti:
> On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
>> 2017-01-16 17:26+0100, Radim Krcmar:
>> > 2017-01-13 15:40-0200, Marcelo Tosatti:
>> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >>> > +  version = pvclock_read_begin(src);
>> >>> > +
>> >>> > +  ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> >>> > +   clock_off_gpa,
>> >>> > +   KVM_CLOCK_OFFSET_WALLCLOCK);
>> >>> > +  if (ret != 0) {
>> >>> > +  pr_err("clock offset hypercall ret %lu\n", ret);
>> >>> > +  spin_unlock(_ptp_lock);
>> >>> > +  preempt_enable_notrace();
>> >>> > +  return -EOPNOTSUPP;
>> >>> > +  }
>> >>> > +
>> >>> > +  tspec.tv_sec = clock_off.sec;
>> >>> > +  tspec.tv_nsec = clock_off.nsec;
>> >>> > +
>> >>> > +  delta = rdtsc_ordered() - clock_off.tsc;
>> >>> > +
>> >>> > +  offset = pvclock_scale_delta(delta, 
>> >>> > src->tsc_to_system_mul,
>> >>> > +   src->tsc_shift);
>> >>> > +
>> >>> > +  } while (pvclock_read_retry(src, version));
>> >>> > +
>> >>> > +  preempt_enable_notrace();
>> >>> > +
>> >>> > +  tspec.tv_nsec = tspec.tv_nsec + offset;
>> >>> > +
>> >>> > +  spin_unlock(_ptp_lock);
>> >>> > +
>> >>> > +  if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> >>> > +  u64 secs = tspec.tv_nsec;
>> >>> > +
>> >>> > +  tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> >>> > +  tspec.tv_sec += secs;
>> >>> > +  }
>> >>> > +
>> >>> > +  memcpy(ts, , sizeof(struct timespec64));
>> >>> 
>> >>> But the whole idea is of improving the time by reading tsc a bit later
>> >>> is just weird ... why is it better to provide
>> >>> 
>> >>>   tsc + x, time + tsc_delta_to_time(x)
>> >>> 
>> >>> than just
>> >>> 
>> >>>  tsc, time
>> >>> 
>> >>> ?
>> >> 
>> >> Because you want to calculate the value of the host realtime clock 
>> >> at the moment of ptp_kvm_gettime.
>> >> 
>> >> We do:
>> >> 
>> >>   1. kvm_hypercall.
>> >>   2. get {sec, nsec, guest_tsc}.
>> >>   3. kvm_hypercall returns.
>> >>   4. delay = rdtsc() - guest_tsc.
>> >> 
>> >> Where delay is the delta (measured with the TSC) between points 2 and 4.
>> > 
>> > I see now ... the PTP interface is just not good for our purposes.
>> 
>> There is getcrosststamp() callback in PTP, which seems to be exactly
>> what we want when pairing with TSC, so the pvclock delay fixup can be
>> dropped when using it.
> 
> What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
> part?

Yes.

>   You can't drop it, because if you do then your "host realtime
> clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.

The TSC read will be some cycles old when the hypercall ends, but that
doesn't matter, because we will pass {sec, nsec, guest_tsc} to PTP and
PTP should plug them into kernel's realtime clock roughly like this:

  sec/nsec + (rdtsc() - guest_tsc) * tsc_freq

Adding delay to guest_tsc and sec/nsec cannot improve precision.
(And will likely degrade it as kvmclock's frequency is incorrect.)

> We want the highest precision as possible.

I agree, which is why we don't want to lose precision in the delay
guesswork because of gettime64().


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 15:08-0200, Marcelo Tosatti:
> On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
>> 2017-01-16 17:26+0100, Radim Krcmar:
>> > 2017-01-13 15:40-0200, Marcelo Tosatti:
>> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >>> > +  version = pvclock_read_begin(src);
>> >>> > +
>> >>> > +  ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> >>> > +   clock_off_gpa,
>> >>> > +   KVM_CLOCK_OFFSET_WALLCLOCK);
>> >>> > +  if (ret != 0) {
>> >>> > +  pr_err("clock offset hypercall ret %lu\n", ret);
>> >>> > +  spin_unlock(_ptp_lock);
>> >>> > +  preempt_enable_notrace();
>> >>> > +  return -EOPNOTSUPP;
>> >>> > +  }
>> >>> > +
>> >>> > +  tspec.tv_sec = clock_off.sec;
>> >>> > +  tspec.tv_nsec = clock_off.nsec;
>> >>> > +
>> >>> > +  delta = rdtsc_ordered() - clock_off.tsc;
>> >>> > +
>> >>> > +  offset = pvclock_scale_delta(delta, 
>> >>> > src->tsc_to_system_mul,
>> >>> > +   src->tsc_shift);
>> >>> > +
>> >>> > +  } while (pvclock_read_retry(src, version));
>> >>> > +
>> >>> > +  preempt_enable_notrace();
>> >>> > +
>> >>> > +  tspec.tv_nsec = tspec.tv_nsec + offset;
>> >>> > +
>> >>> > +  spin_unlock(_ptp_lock);
>> >>> > +
>> >>> > +  if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> >>> > +  u64 secs = tspec.tv_nsec;
>> >>> > +
>> >>> > +  tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> >>> > +  tspec.tv_sec += secs;
>> >>> > +  }
>> >>> > +
>> >>> > +  memcpy(ts, , sizeof(struct timespec64));
>> >>> 
>> >>> But the whole idea is of improving the time by reading tsc a bit later
>> >>> is just weird ... why is it better to provide
>> >>> 
>> >>>   tsc + x, time + tsc_delta_to_time(x)
>> >>> 
>> >>> than just
>> >>> 
>> >>>  tsc, time
>> >>> 
>> >>> ?
>> >> 
>> >> Because you want to calculate the value of the host realtime clock 
>> >> at the moment of ptp_kvm_gettime.
>> >> 
>> >> We do:
>> >> 
>> >>   1. kvm_hypercall.
>> >>   2. get {sec, nsec, guest_tsc}.
>> >>   3. kvm_hypercall returns.
>> >>   4. delay = rdtsc() - guest_tsc.
>> >> 
>> >> Where delay is the delta (measured with the TSC) between points 2 and 4.
>> > 
>> > I see now ... the PTP interface is just not good for our purposes.
>> 
>> There is getcrosststamp() callback in PTP, which seems to be exactly
>> what we want when pairing with TSC, so the pvclock delay fixup can be
>> dropped when using it.
> 
> What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
> part?

Yes.

>   You can't drop it, because if you do then your "host realtime
> clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.

The TSC read will be some cycles old when the hypercall ends, but that
doesn't matter, because we will pass {sec, nsec, guest_tsc} to PTP and
PTP should plug them into kernel's realtime clock roughly like this:

  sec/nsec + (rdtsc() - guest_tsc) * tsc_freq

Adding delay to guest_tsc and sec/nsec cannot improve precision.
(And will likely degrade it as kvmclock's frequency is incorrect.)

> We want the highest precision as possible.

I agree, which is why we don't want to lose precision in the delay
guesswork because of gettime64().


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
> 2017-01-16 17:26+0100, Radim Krcmar:
> > 2017-01-13 15:40-0200, Marcelo Tosatti:
> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
> >>> > +   version = pvclock_read_begin(src);
> >>> > +
> >>> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> >>> > +clock_off_gpa,
> >>> > +KVM_CLOCK_OFFSET_WALLCLOCK);
> >>> > +   if (ret != 0) {
> >>> > +   pr_err("clock offset hypercall ret %lu\n", ret);
> >>> > +   spin_unlock(_ptp_lock);
> >>> > +   preempt_enable_notrace();
> >>> > +   return -EOPNOTSUPP;
> >>> > +   }
> >>> > +
> >>> > +   tspec.tv_sec = clock_off.sec;
> >>> > +   tspec.tv_nsec = clock_off.nsec;
> >>> > +
> >>> > +   delta = rdtsc_ordered() - clock_off.tsc;
> >>> > +
> >>> > +   offset = pvclock_scale_delta(delta, 
> >>> > src->tsc_to_system_mul,
> >>> > +src->tsc_shift);
> >>> > +
> >>> > +   } while (pvclock_read_retry(src, version));
> >>> > +
> >>> > +   preempt_enable_notrace();
> >>> > +
> >>> > +   tspec.tv_nsec = tspec.tv_nsec + offset;
> >>> > +
> >>> > +   spin_unlock(_ptp_lock);
> >>> > +
> >>> > +   if (tspec.tv_nsec >= NSEC_PER_SEC) {
> >>> > +   u64 secs = tspec.tv_nsec;
> >>> > +
> >>> > +   tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> >>> > +   tspec.tv_sec += secs;
> >>> > +   }
> >>> > +
> >>> > +   memcpy(ts, , sizeof(struct timespec64));
> >>> 
> >>> But the whole idea is of improving the time by reading tsc a bit later
> >>> is just weird ... why is it better to provide
> >>> 
> >>>   tsc + x, time + tsc_delta_to_time(x)
> >>> 
> >>> than just
> >>> 
> >>>  tsc, time
> >>> 
> >>> ?
> >> 
> >> Because you want to calculate the value of the host realtime clock 
> >> at the moment of ptp_kvm_gettime.
> >> 
> >> We do:
> >> 
> >>1. kvm_hypercall.
> >>2. get {sec, nsec, guest_tsc}.
> >>3. kvm_hypercall returns.
> >>4. delay = rdtsc() - guest_tsc.
> >> 
> >> Where delay is the delta (measured with the TSC) between points 2 and 4.
> > 
> > I see now ... the PTP interface is just not good for our purposes.
> 
> There is getcrosststamp() callback in PTP, which seems to be exactly
> what we want when pairing with TSC, so the pvclock delay fixup can be
> dropped when using it.

What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
part? You can't drop it, because if you do then your "host realtime
clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.
We want the highest precision as possible.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:54:11PM +0100, Radim Krcmar wrote:
> 2017-01-16 17:26+0100, Radim Krcmar:
> > 2017-01-13 15:40-0200, Marcelo Tosatti:
> >> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> >> > 2017-01-13 10:01-0200, Marcelo Tosatti:
> >>> > +   version = pvclock_read_begin(src);
> >>> > +
> >>> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> >>> > +clock_off_gpa,
> >>> > +KVM_CLOCK_OFFSET_WALLCLOCK);
> >>> > +   if (ret != 0) {
> >>> > +   pr_err("clock offset hypercall ret %lu\n", ret);
> >>> > +   spin_unlock(_ptp_lock);
> >>> > +   preempt_enable_notrace();
> >>> > +   return -EOPNOTSUPP;
> >>> > +   }
> >>> > +
> >>> > +   tspec.tv_sec = clock_off.sec;
> >>> > +   tspec.tv_nsec = clock_off.nsec;
> >>> > +
> >>> > +   delta = rdtsc_ordered() - clock_off.tsc;
> >>> > +
> >>> > +   offset = pvclock_scale_delta(delta, 
> >>> > src->tsc_to_system_mul,
> >>> > +src->tsc_shift);
> >>> > +
> >>> > +   } while (pvclock_read_retry(src, version));
> >>> > +
> >>> > +   preempt_enable_notrace();
> >>> > +
> >>> > +   tspec.tv_nsec = tspec.tv_nsec + offset;
> >>> > +
> >>> > +   spin_unlock(_ptp_lock);
> >>> > +
> >>> > +   if (tspec.tv_nsec >= NSEC_PER_SEC) {
> >>> > +   u64 secs = tspec.tv_nsec;
> >>> > +
> >>> > +   tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> >>> > +   tspec.tv_sec += secs;
> >>> > +   }
> >>> > +
> >>> > +   memcpy(ts, , sizeof(struct timespec64));
> >>> 
> >>> But the whole idea is of improving the time by reading tsc a bit later
> >>> is just weird ... why is it better to provide
> >>> 
> >>>   tsc + x, time + tsc_delta_to_time(x)
> >>> 
> >>> than just
> >>> 
> >>>  tsc, time
> >>> 
> >>> ?
> >> 
> >> Because you want to calculate the value of the host realtime clock 
> >> at the moment of ptp_kvm_gettime.
> >> 
> >> We do:
> >> 
> >>1. kvm_hypercall.
> >>2. get {sec, nsec, guest_tsc}.
> >>3. kvm_hypercall returns.
> >>4. delay = rdtsc() - guest_tsc.
> >> 
> >> Where delay is the delta (measured with the TSC) between points 2 and 4.
> > 
> > I see now ... the PTP interface is just not good for our purposes.
> 
> There is getcrosststamp() callback in PTP, which seems to be exactly
> what we want when pairing with TSC, so the pvclock delay fixup can be
> dropped when using it.

What pvclock delay fixup you refer to? The "rdtsc() - clock_offset.tsc"
part? You can't drop it, because if you do then your "host realtime
clock read" will be behind by "rdtsc() - clock_offset.tsc" TSC cycles.
We want the highest precision as possible.



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:26:53PM +0100, Radim Krcmar wrote:
> 2017-01-13 15:40-0200, Marcelo Tosatti:
> > On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> > > 2017-01-13 10:01-0200, Marcelo Tosatti:
> >> > +version = pvclock_read_begin(src);
> >> > +
> >> > +ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> >> > + clock_off_gpa,
> >> > + KVM_CLOCK_OFFSET_WALLCLOCK);
> >> > +if (ret != 0) {
> >> > +pr_err("clock offset hypercall ret %lu\n", ret);
> >> > +spin_unlock(_ptp_lock);
> >> > +preempt_enable_notrace();
> >> > +return -EOPNOTSUPP;
> >> > +}
> >> > +
> >> > +tspec.tv_sec = clock_off.sec;
> >> > +tspec.tv_nsec = clock_off.nsec;
> >> > +
> >> > +delta = rdtsc_ordered() - clock_off.tsc;
> >> > +
> >> > +offset = pvclock_scale_delta(delta, 
> >> > src->tsc_to_system_mul,
> >> > + src->tsc_shift);
> >> > +
> >> > +} while (pvclock_read_retry(src, version));
> >> > +
> >> > +preempt_enable_notrace();
> >> > +
> >> > +tspec.tv_nsec = tspec.tv_nsec + offset;
> >> > +
> >> > +spin_unlock(_ptp_lock);
> >> > +
> >> > +if (tspec.tv_nsec >= NSEC_PER_SEC) {
> >> > +u64 secs = tspec.tv_nsec;
> >> > +
> >> > +tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> >> > +tspec.tv_sec += secs;
> >> > +}
> >> > +
> >> > +memcpy(ts, , sizeof(struct timespec64));
> >> 
> >> But the whole idea is of improving the time by reading tsc a bit later
> >> is just weird ... why is it better to provide
> >> 
> >>   tsc + x, time + tsc_delta_to_time(x)
> >> 
> >> than just
> >> 
> >>  tsc, time
> >> 
> >> ?
> > 
> > Because you want to calculate the value of the host realtime clock 
> > at the moment of ptp_kvm_gettime.
> > 
> > We do:
> > 
> > 1. kvm_hypercall.
> > 2. get {sec, nsec, guest_tsc}.
> > 3. kvm_hypercall returns.
> > 4. delay = rdtsc() - guest_tsc.
> > 
> > Where delay is the delta (measured with the TSC) between points 2 and 4.
> 
> I see now ... the PTP interface is just not good for our purposes.
> We don't return {sec, nsec, guest_tsc}, we just return {sec, nsec} at
> some random time in the past.  And to make it a bit more accurate, you
> add a best-effort delta before returning, which makes sense.

Not random time in the past. We return {sec, nsec} from the host
realtime at the moment the user ran the hypercall. 

Since PTP is very accurate, that "a bit more" counts, yes.

> When we have to depend on pvclock, what are the advantages of not using
> the existing pvclock API for wall clock?
> (You mentioned some extensions.)
> 
>   struct pvclock_wall_clock {
>   u32   version;
>   u32   sec;
>   u32   nsec;
>   } __attribute__((__packed__));

> It gives the wall clock when pvclock was 0, so you just add current
> kvmclock and get the host wall clock.  

Well, no. For one, the TSC part of kvmclock: 

kvmclock-read = system_timestamp + convert-to-1GHz(rdtsc() - 
tsc_timestamp)
   

Drifts relative to UTC. This part can be large.
The guests NTP is responsible for fixing
that drift of the guests realtime clock (talking about current setup, 
without KVM PTP driver).

Now, we want very high precision (less than 1us) for this
driver. Very small TSC drifts on a large delta defeat the purpose.

> Without a VM exit.

Huge performance is not an issue. Accuracy (how different from the host
realtime clock our "approximation" of the host realtime clock) is.

> And how often is ptp_kvm_gettime() usually called?

The PTP_SYS_OFFSET ioctl calls the following code in a loop:

struct ptp_sys_offset {
unsigned int n_samples; /* Desired number of measurements. */
unsigned int rsv[3];/* Reserved for future use. */
/*
 * Array of interleaved system/phc time stamps. The kernel
 * will provide 2*n_samples + 1 time stamps, with the last
 * one as a system time stamp.
 */
struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
};

#define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement
samples. */

case PTP_SYS_OFFSET:
sysoff = memdup_user((void __user *)arg,
sizeof(*sysoff));
if (IS_ERR(sysoff)) {
err = PTR_ERR(sysoff);
sysoff = NULL;
break;
}
if (sysoff->n_samples > PTP_MAX_SAMPLES) {
err = -EINVAL;
break;
}
pct = >ts[0];
for (i = 0; i < sysoff->n_samples; i++) {
   

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Marcelo Tosatti
On Mon, Jan 16, 2017 at 05:26:53PM +0100, Radim Krcmar wrote:
> 2017-01-13 15:40-0200, Marcelo Tosatti:
> > On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> > > 2017-01-13 10:01-0200, Marcelo Tosatti:
> >> > +version = pvclock_read_begin(src);
> >> > +
> >> > +ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> >> > + clock_off_gpa,
> >> > + KVM_CLOCK_OFFSET_WALLCLOCK);
> >> > +if (ret != 0) {
> >> > +pr_err("clock offset hypercall ret %lu\n", ret);
> >> > +spin_unlock(_ptp_lock);
> >> > +preempt_enable_notrace();
> >> > +return -EOPNOTSUPP;
> >> > +}
> >> > +
> >> > +tspec.tv_sec = clock_off.sec;
> >> > +tspec.tv_nsec = clock_off.nsec;
> >> > +
> >> > +delta = rdtsc_ordered() - clock_off.tsc;
> >> > +
> >> > +offset = pvclock_scale_delta(delta, 
> >> > src->tsc_to_system_mul,
> >> > + src->tsc_shift);
> >> > +
> >> > +} while (pvclock_read_retry(src, version));
> >> > +
> >> > +preempt_enable_notrace();
> >> > +
> >> > +tspec.tv_nsec = tspec.tv_nsec + offset;
> >> > +
> >> > +spin_unlock(_ptp_lock);
> >> > +
> >> > +if (tspec.tv_nsec >= NSEC_PER_SEC) {
> >> > +u64 secs = tspec.tv_nsec;
> >> > +
> >> > +tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> >> > +tspec.tv_sec += secs;
> >> > +}
> >> > +
> >> > +memcpy(ts, , sizeof(struct timespec64));
> >> 
> >> But the whole idea is of improving the time by reading tsc a bit later
> >> is just weird ... why is it better to provide
> >> 
> >>   tsc + x, time + tsc_delta_to_time(x)
> >> 
> >> than just
> >> 
> >>  tsc, time
> >> 
> >> ?
> > 
> > Because you want to calculate the value of the host realtime clock 
> > at the moment of ptp_kvm_gettime.
> > 
> > We do:
> > 
> > 1. kvm_hypercall.
> > 2. get {sec, nsec, guest_tsc}.
> > 3. kvm_hypercall returns.
> > 4. delay = rdtsc() - guest_tsc.
> > 
> > Where delay is the delta (measured with the TSC) between points 2 and 4.
> 
> I see now ... the PTP interface is just not good for our purposes.
> We don't return {sec, nsec, guest_tsc}, we just return {sec, nsec} at
> some random time in the past.  And to make it a bit more accurate, you
> add a best-effort delta before returning, which makes sense.

Not random time in the past. We return {sec, nsec} from the host
realtime at the moment the user ran the hypercall. 

Since PTP is very accurate, that "a bit more" counts, yes.

> When we have to depend on pvclock, what are the advantages of not using
> the existing pvclock API for wall clock?
> (You mentioned some extensions.)
> 
>   struct pvclock_wall_clock {
>   u32   version;
>   u32   sec;
>   u32   nsec;
>   } __attribute__((__packed__));

> It gives the wall clock when pvclock was 0, so you just add current
> kvmclock and get the host wall clock.  

Well, no. For one, the TSC part of kvmclock: 

kvmclock-read = system_timestamp + convert-to-1GHz(rdtsc() - 
tsc_timestamp)
   

Drifts relative to UTC. This part can be large.
The guests NTP is responsible for fixing
that drift of the guests realtime clock (talking about current setup, 
without KVM PTP driver).

Now, we want very high precision (less than 1us) for this
driver. Very small TSC drifts on a large delta defeat the purpose.

> Without a VM exit.

Huge performance is not an issue. Accuracy (how different from the host
realtime clock our "approximation" of the host realtime clock) is.

> And how often is ptp_kvm_gettime() usually called?

The PTP_SYS_OFFSET ioctl calls the following code in a loop:

struct ptp_sys_offset {
unsigned int n_samples; /* Desired number of measurements. */
unsigned int rsv[3];/* Reserved for future use. */
/*
 * Array of interleaved system/phc time stamps. The kernel
 * will provide 2*n_samples + 1 time stamps, with the last
 * one as a system time stamp.
 */
struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
};

#define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement
samples. */

case PTP_SYS_OFFSET:
sysoff = memdup_user((void __user *)arg,
sizeof(*sysoff));
if (IS_ERR(sysoff)) {
err = PTR_ERR(sysoff);
sysoff = NULL;
break;
}
if (sysoff->n_samples > PTP_MAX_SAMPLES) {
err = -EINVAL;
break;
}
pct = >ts[0];
for (i = 0; i < sysoff->n_samples; i++) {
   

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 17:26+0100, Radim Krcmar:
> 2017-01-13 15:40-0200, Marcelo Tosatti:
>> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>>> > + version = pvclock_read_begin(src);
>>> > +
>>> > + ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>>> > +  clock_off_gpa,
>>> > +  KVM_CLOCK_OFFSET_WALLCLOCK);
>>> > + if (ret != 0) {
>>> > + pr_err("clock offset hypercall ret %lu\n", ret);
>>> > + spin_unlock(_ptp_lock);
>>> > + preempt_enable_notrace();
>>> > + return -EOPNOTSUPP;
>>> > + }
>>> > +
>>> > + tspec.tv_sec = clock_off.sec;
>>> > + tspec.tv_nsec = clock_off.nsec;
>>> > +
>>> > + delta = rdtsc_ordered() - clock_off.tsc;
>>> > +
>>> > + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
>>> > +  src->tsc_shift);
>>> > +
>>> > + } while (pvclock_read_retry(src, version));
>>> > +
>>> > + preempt_enable_notrace();
>>> > +
>>> > + tspec.tv_nsec = tspec.tv_nsec + offset;
>>> > +
>>> > + spin_unlock(_ptp_lock);
>>> > +
>>> > + if (tspec.tv_nsec >= NSEC_PER_SEC) {
>>> > + u64 secs = tspec.tv_nsec;
>>> > +
>>> > + tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>>> > + tspec.tv_sec += secs;
>>> > + }
>>> > +
>>> > + memcpy(ts, , sizeof(struct timespec64));
>>> 
>>> But the whole idea is of improving the time by reading tsc a bit later
>>> is just weird ... why is it better to provide
>>> 
>>>   tsc + x, time + tsc_delta_to_time(x)
>>> 
>>> than just
>>> 
>>>  tsc, time
>>> 
>>> ?
>> 
>> Because you want to calculate the value of the host realtime clock 
>> at the moment of ptp_kvm_gettime.
>> 
>> We do:
>> 
>>  1. kvm_hypercall.
>>  2. get {sec, nsec, guest_tsc}.
>>  3. kvm_hypercall returns.
>>  4. delay = rdtsc() - guest_tsc.
>> 
>> Where delay is the delta (measured with the TSC) between points 2 and 4.
> 
> I see now ... the PTP interface is just not good for our purposes.

There is getcrosststamp() callback in PTP, which seems to be exactly
what we want when pairing with TSC, so the pvclock delay fixup can be
dropped when using it.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-16 17:26+0100, Radim Krcmar:
> 2017-01-13 15:40-0200, Marcelo Tosatti:
>> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>>> > + version = pvclock_read_begin(src);
>>> > +
>>> > + ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>>> > +  clock_off_gpa,
>>> > +  KVM_CLOCK_OFFSET_WALLCLOCK);
>>> > + if (ret != 0) {
>>> > + pr_err("clock offset hypercall ret %lu\n", ret);
>>> > + spin_unlock(_ptp_lock);
>>> > + preempt_enable_notrace();
>>> > + return -EOPNOTSUPP;
>>> > + }
>>> > +
>>> > + tspec.tv_sec = clock_off.sec;
>>> > + tspec.tv_nsec = clock_off.nsec;
>>> > +
>>> > + delta = rdtsc_ordered() - clock_off.tsc;
>>> > +
>>> > + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
>>> > +  src->tsc_shift);
>>> > +
>>> > + } while (pvclock_read_retry(src, version));
>>> > +
>>> > + preempt_enable_notrace();
>>> > +
>>> > + tspec.tv_nsec = tspec.tv_nsec + offset;
>>> > +
>>> > + spin_unlock(_ptp_lock);
>>> > +
>>> > + if (tspec.tv_nsec >= NSEC_PER_SEC) {
>>> > + u64 secs = tspec.tv_nsec;
>>> > +
>>> > + tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>>> > + tspec.tv_sec += secs;
>>> > + }
>>> > +
>>> > + memcpy(ts, , sizeof(struct timespec64));
>>> 
>>> But the whole idea is of improving the time by reading tsc a bit later
>>> is just weird ... why is it better to provide
>>> 
>>>   tsc + x, time + tsc_delta_to_time(x)
>>> 
>>> than just
>>> 
>>>  tsc, time
>>> 
>>> ?
>> 
>> Because you want to calculate the value of the host realtime clock 
>> at the moment of ptp_kvm_gettime.
>> 
>> We do:
>> 
>>  1. kvm_hypercall.
>>  2. get {sec, nsec, guest_tsc}.
>>  3. kvm_hypercall returns.
>>  4. delay = rdtsc() - guest_tsc.
>> 
>> Where delay is the delta (measured with the TSC) between points 2 and 4.
> 
> I see now ... the PTP interface is just not good for our purposes.

There is getcrosststamp() callback in PTP, which seems to be exactly
what we want when pairing with TSC, so the pvclock delay fixup can be
dropped when using it.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-13 15:40-0200, Marcelo Tosatti:
> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> > +  version = pvclock_read_begin(src);
>> > +
>> > +  ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> > +   clock_off_gpa,
>> > +   KVM_CLOCK_OFFSET_WALLCLOCK);
>> > +  if (ret != 0) {
>> > +  pr_err("clock offset hypercall ret %lu\n", ret);
>> > +  spin_unlock(_ptp_lock);
>> > +  preempt_enable_notrace();
>> > +  return -EOPNOTSUPP;
>> > +  }
>> > +
>> > +  tspec.tv_sec = clock_off.sec;
>> > +  tspec.tv_nsec = clock_off.nsec;
>> > +
>> > +  delta = rdtsc_ordered() - clock_off.tsc;
>> > +
>> > +  offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
>> > +   src->tsc_shift);
>> > +
>> > +  } while (pvclock_read_retry(src, version));
>> > +
>> > +  preempt_enable_notrace();
>> > +
>> > +  tspec.tv_nsec = tspec.tv_nsec + offset;
>> > +
>> > +  spin_unlock(_ptp_lock);
>> > +
>> > +  if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> > +  u64 secs = tspec.tv_nsec;
>> > +
>> > +  tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> > +  tspec.tv_sec += secs;
>> > +  }
>> > +
>> > +  memcpy(ts, , sizeof(struct timespec64));
>> 
>> But the whole idea is of improving the time by reading tsc a bit later
>> is just weird ... why is it better to provide
>> 
>>   tsc + x, time + tsc_delta_to_time(x)
>> 
>> than just
>> 
>>  tsc, time
>> 
>> ?
> 
> Because you want to calculate the value of the host realtime clock 
> at the moment of ptp_kvm_gettime.
> 
> We do:
> 
>   1. kvm_hypercall.
>   2. get {sec, nsec, guest_tsc}.
>   3. kvm_hypercall returns.
>   4. delay = rdtsc() - guest_tsc.
> 
> Where delay is the delta (measured with the TSC) between points 2 and 4.

I see now ... the PTP interface is just not good for our purposes.
We don't return {sec, nsec, guest_tsc}, we just return {sec, nsec} at
some random time in the past.  And to make it a bit more accurate, you
add a best-effort delta before returning, which makes sense.

When we have to depend on pvclock, what are the advantages of not using
the existing pvclock API for wall clock?
(You mentioned some extensions.)

  struct pvclock_wall_clock {
u32   version;
u32   sec;
u32   nsec;
  } __attribute__((__packed__));

It gives the wall clock when pvclock was 0, so you just add current
kvmclock and get the host wall clock.  Without a VM exit.

And how often is ptp_kvm_gettime() usually called?

Thanks.

>> Because we'll always be quering the time at tsc + y, where y >> x, and
>> we'd likely have other problems if shifting the time base by few
>> thousand cycles made a difference.
> 
> Radim, i didnt get your "tsc + x", "time + tsc_delta_to_time(x)"
> formulas above. Can you be more verbose please?

x is the delta, tsc_delta_to_time() is what pvclock_scale_delta() does.

I assumed that we set precise time with TSC, so the delta wouldn't
matter, because PTP would either get {sec, nsec, guest_tsc}, or the
same, but just shifted by delta, hence
{sec  + tsc_delta_to_time(x) / NSEC_PER_SEC,
 nsec + tsc_delta_to_time(x) % NSEC_PER_SEC,
 guest_tsc + x}.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-13 15:40-0200, Marcelo Tosatti:
> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> > 2017-01-13 10:01-0200, Marcelo Tosatti:
>> > +  version = pvclock_read_begin(src);
>> > +
>> > +  ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
>> > +   clock_off_gpa,
>> > +   KVM_CLOCK_OFFSET_WALLCLOCK);
>> > +  if (ret != 0) {
>> > +  pr_err("clock offset hypercall ret %lu\n", ret);
>> > +  spin_unlock(_ptp_lock);
>> > +  preempt_enable_notrace();
>> > +  return -EOPNOTSUPP;
>> > +  }
>> > +
>> > +  tspec.tv_sec = clock_off.sec;
>> > +  tspec.tv_nsec = clock_off.nsec;
>> > +
>> > +  delta = rdtsc_ordered() - clock_off.tsc;
>> > +
>> > +  offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
>> > +   src->tsc_shift);
>> > +
>> > +  } while (pvclock_read_retry(src, version));
>> > +
>> > +  preempt_enable_notrace();
>> > +
>> > +  tspec.tv_nsec = tspec.tv_nsec + offset;
>> > +
>> > +  spin_unlock(_ptp_lock);
>> > +
>> > +  if (tspec.tv_nsec >= NSEC_PER_SEC) {
>> > +  u64 secs = tspec.tv_nsec;
>> > +
>> > +  tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
>> > +  tspec.tv_sec += secs;
>> > +  }
>> > +
>> > +  memcpy(ts, , sizeof(struct timespec64));
>> 
>> But the whole idea is of improving the time by reading tsc a bit later
>> is just weird ... why is it better to provide
>> 
>>   tsc + x, time + tsc_delta_to_time(x)
>> 
>> than just
>> 
>>  tsc, time
>> 
>> ?
> 
> Because you want to calculate the value of the host realtime clock 
> at the moment of ptp_kvm_gettime.
> 
> We do:
> 
>   1. kvm_hypercall.
>   2. get {sec, nsec, guest_tsc}.
>   3. kvm_hypercall returns.
>   4. delay = rdtsc() - guest_tsc.
> 
> Where delay is the delta (measured with the TSC) between points 2 and 4.

I see now ... the PTP interface is just not good for our purposes.
We don't return {sec, nsec, guest_tsc}, we just return {sec, nsec} at
some random time in the past.  And to make it a bit more accurate, you
add a best-effort delta before returning, which makes sense.

When we have to depend on pvclock, what are the advantages of not using
the existing pvclock API for wall clock?
(You mentioned some extensions.)

  struct pvclock_wall_clock {
u32   version;
u32   sec;
u32   nsec;
  } __attribute__((__packed__));

It gives the wall clock when pvclock was 0, so you just add current
kvmclock and get the host wall clock.  Without a VM exit.

And how often is ptp_kvm_gettime() usually called?

Thanks.

>> Because we'll always be quering the time at tsc + y, where y >> x, and
>> we'd likely have other problems if shifting the time base by few
>> thousand cycles made a difference.
> 
> Radim, i didnt get your "tsc + x", "time + tsc_delta_to_time(x)"
> formulas above. Can you be more verbose please?

x is the delta, tsc_delta_to_time() is what pvclock_scale_delta() does.

I assumed that we set precise time with TSC, so the delta wouldn't
matter, because PTP would either get {sec, nsec, guest_tsc}, or the
same, but just shifted by delta, hence
{sec  + tsc_delta_to_time(x) / NSEC_PER_SEC,
 nsec + tsc_delta_to_time(x) % NSEC_PER_SEC,
 guest_tsc + x}.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-14 16:26+0100, Richard Cochran:
> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> > +static int __init ptp_kvm_init(void)
>> > +{
>> > +  if (!kvm_para_available())
>> > +  return -ENODEV;
>> > +
>> > +  kvm_ptp_clock.caps = ptp_kvm_caps;
>> > +
>> > +  kvm_ptp_clock.ptp_clock = ptp_clock_register(_ptp_clock.caps, NULL);
>> 
>> It is a shame that the infrastructure uses polling when the guest could
>> be notified on every host real time change, but this should be good
>> enough.
> 
> This comment makes no sense at all.  What do you mean by "host real
> time change"?

Real time in the sense of wall clock, as perceived by the host, and the
change of that time.

Unlike other PTP drivers, host (source) and guest (destination) share
the same hardware clock, so they cannot shift or drift unless one of
them changes its "TSC to real time" conversion (the host is most likely
using NTP/PTP to keep its own real time).

I meant that the host could notify the guest when a change happens,
which would be more efficient.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-16 Thread Radim Krcmar
2017-01-14 16:26+0100, Richard Cochran:
> On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
>> > +static int __init ptp_kvm_init(void)
>> > +{
>> > +  if (!kvm_para_available())
>> > +  return -ENODEV;
>> > +
>> > +  kvm_ptp_clock.caps = ptp_kvm_caps;
>> > +
>> > +  kvm_ptp_clock.ptp_clock = ptp_clock_register(_ptp_clock.caps, NULL);
>> 
>> It is a shame that the infrastructure uses polling when the guest could
>> be notified on every host real time change, but this should be good
>> enough.
> 
> This comment makes no sense at all.  What do you mean by "host real
> time change"?

Real time in the sense of wall clock, as perceived by the host, and the
change of that time.

Unlike other PTP drivers, host (source) and guest (destination) share
the same hardware clock, so they cannot shift or drift unless one of
them changes its "TSC to real time" conversion (the host is most likely
using NTP/PTP to keep its own real time).

I meant that the host could notify the guest when a change happens,
which would be more efficient.


Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-14 Thread Richard Cochran
On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> > +static int __init ptp_kvm_init(void)
> > +{
> > +   if (!kvm_para_available())
> > +   return -ENODEV;
> > +
> > +   kvm_ptp_clock.caps = ptp_kvm_caps;
> > +
> > +   kvm_ptp_clock.ptp_clock = ptp_clock_register(_ptp_clock.caps, NULL);
> 
> It is a shame that the infrastructure uses polling when the guest could
> be notified on every host real time change, but this should be good
> enough.

This comment makes no sense at all.  What do you mean by "host real
time change"?

Thanks,
Richard



Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-14 Thread Richard Cochran
On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> > +static int __init ptp_kvm_init(void)
> > +{
> > +   if (!kvm_para_available())
> > +   return -ENODEV;
> > +
> > +   kvm_ptp_clock.caps = ptp_kvm_caps;
> > +
> > +   kvm_ptp_clock.ptp_clock = ptp_clock_register(_ptp_clock.caps, NULL);
> 
> It is a shame that the infrastructure uses polling when the guest could
> be notified on every host real time change, but this should be good
> enough.

This comment makes no sense at all.  What do you mean by "host real
time change"?

Thanks,
Richard



[patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Marcelo Tosatti
Add a driver with gettime method returning hosts realtime clock.
This allows Chrony to synchronize host and guest clocks with 
high precision (see results below).

chronyc> sources
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   377 6 +4ns[   +4ns] +/-3ns

To configure Chronyd to use PHC refclock, add the 
following line to its configuration file:

refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0

Where /dev/ptpX is the kvmclock PTP clock.

Signed-off-by: Marcelo Tosatti 

---
 drivers/ptp/Kconfig   |   12 +++
 drivers/ptp/Makefile  |1 
 drivers/ptp/ptp_kvm.c |  179 ++
 3 files changed, 192 insertions(+)

v2: check for kvmclock (Radim)
initialize global variables before device registration (Radim)


Index: kvm-ptpdriver/drivers/ptp/Kconfig
===
--- kvm-ptpdriver.orig/drivers/ptp/Kconfig  2017-01-13 16:43:04.808231054 
-0200
+++ kvm-ptpdriver/drivers/ptp/Kconfig   2017-01-13 16:43:31.248266610 -0200
@@ -90,4 +90,16 @@
  To compile this driver as a module, choose M here: the module
  will be called ptp_pch.
 
+config PTP_1588_CLOCK_KVM
+   tristate "KVM virtual PTP clock"
+   depends on PTP_1588_CLOCK
+   depends on KVM_GUEST
+   default y
+   help
+ This driver adds support for using kvm infrastructure as a PTP
+ clock. This clock is only useful if you are using KVM guests.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ptp_kvm.
+
 endmenu
Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 16:43:31.248266610 -0200
@@ -0,0 +1,179 @@
+/*
+ * Virtual PTP 1588 clock for use with KVM guests
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+struct kvm_ptp_clock {
+   struct ptp_clock *ptp_clock;
+   struct ptp_clock_info caps;
+};
+
+DEFINE_SPINLOCK(kvm_ptp_lock);
+
+static struct pvclock_vsyscall_time_info *hv_clock;
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+   return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+   return -EOPNOTSUPP;
+}
+
+static struct kvm_clock_offset clock_off;
+static phys_addr_t clock_off_gpa;
+
+static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+   unsigned long ret;
+   struct timespec64 tspec;
+   u64 delta;
+   cycle_t offset;
+   unsigned version;
+   int cpu;
+   struct pvclock_vcpu_time_info *src;
+
+   preempt_disable_notrace();
+   cpu = smp_processor_id();
+   src = _clock[cpu].pvti;
+
+   spin_lock(_ptp_lock);
+
+   do {
+   /*
+* We are measuring the delay between
+* kvm_hypercall and rdtsc using TSC,
+* and converting that delta to
+* tsc_to_system_mul and tsc_shift
+* So any changes to tsc_to_system_mul
+* and tsc_shift in this region
+* invalidate the measurement.
+*/
+   version = pvclock_read_begin(src);
+
+   ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+clock_off_gpa,
+KVM_CLOCK_PAIRING_WALLCLOCK);
+   if (ret != 0) {
+   pr_err("clock offset hypercall ret %lu\n", ret);
+   spin_unlock(_ptp_lock);
+   preempt_enable_notrace();
+   return -EOPNOTSUPP;
+   }
+
+   tspec.tv_sec = clock_off.sec;
+   tspec.tv_nsec = clock_off.nsec;
+
+   delta = rdtsc_ordered() - clock_off.tsc;
+
+   offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+src->tsc_shift);
+
+   } while (pvclock_read_retry(src, version));
+
+   preempt_enable_notrace();
+
+   tspec.tv_nsec = 

[patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Marcelo Tosatti
Add a driver with gettime method returning hosts realtime clock.
This allows Chrony to synchronize host and guest clocks with 
high precision (see results below).

chronyc> sources
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   377 6 +4ns[   +4ns] +/-3ns

To configure Chronyd to use PHC refclock, add the 
following line to its configuration file:

refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0

Where /dev/ptpX is the kvmclock PTP clock.

Signed-off-by: Marcelo Tosatti 

---
 drivers/ptp/Kconfig   |   12 +++
 drivers/ptp/Makefile  |1 
 drivers/ptp/ptp_kvm.c |  179 ++
 3 files changed, 192 insertions(+)

v2: check for kvmclock (Radim)
initialize global variables before device registration (Radim)


Index: kvm-ptpdriver/drivers/ptp/Kconfig
===
--- kvm-ptpdriver.orig/drivers/ptp/Kconfig  2017-01-13 16:43:04.808231054 
-0200
+++ kvm-ptpdriver/drivers/ptp/Kconfig   2017-01-13 16:43:31.248266610 -0200
@@ -90,4 +90,16 @@
  To compile this driver as a module, choose M here: the module
  will be called ptp_pch.
 
+config PTP_1588_CLOCK_KVM
+   tristate "KVM virtual PTP clock"
+   depends on PTP_1588_CLOCK
+   depends on KVM_GUEST
+   default y
+   help
+ This driver adds support for using kvm infrastructure as a PTP
+ clock. This clock is only useful if you are using KVM guests.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ptp_kvm.
+
 endmenu
Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 16:43:31.248266610 -0200
@@ -0,0 +1,179 @@
+/*
+ * Virtual PTP 1588 clock for use with KVM guests
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+struct kvm_ptp_clock {
+   struct ptp_clock *ptp_clock;
+   struct ptp_clock_info caps;
+};
+
+DEFINE_SPINLOCK(kvm_ptp_lock);
+
+static struct pvclock_vsyscall_time_info *hv_clock;
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+   return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+   return -EOPNOTSUPP;
+}
+
+static struct kvm_clock_offset clock_off;
+static phys_addr_t clock_off_gpa;
+
+static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+   unsigned long ret;
+   struct timespec64 tspec;
+   u64 delta;
+   cycle_t offset;
+   unsigned version;
+   int cpu;
+   struct pvclock_vcpu_time_info *src;
+
+   preempt_disable_notrace();
+   cpu = smp_processor_id();
+   src = _clock[cpu].pvti;
+
+   spin_lock(_ptp_lock);
+
+   do {
+   /*
+* We are measuring the delay between
+* kvm_hypercall and rdtsc using TSC,
+* and converting that delta to
+* tsc_to_system_mul and tsc_shift
+* So any changes to tsc_to_system_mul
+* and tsc_shift in this region
+* invalidate the measurement.
+*/
+   version = pvclock_read_begin(src);
+
+   ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+clock_off_gpa,
+KVM_CLOCK_PAIRING_WALLCLOCK);
+   if (ret != 0) {
+   pr_err("clock offset hypercall ret %lu\n", ret);
+   spin_unlock(_ptp_lock);
+   preempt_enable_notrace();
+   return -EOPNOTSUPP;
+   }
+
+   tspec.tv_sec = clock_off.sec;
+   tspec.tv_nsec = clock_off.nsec;
+
+   delta = rdtsc_ordered() - clock_off.tsc;
+
+   offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+src->tsc_shift);
+
+   } while (pvclock_read_retry(src, version));
+
+   preempt_enable_notrace();
+
+   tspec.tv_nsec = tspec.tv_nsec + offset;

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Marcelo Tosatti
On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> 2017-01-13 10:01-0200, Marcelo Tosatti:
> > Add a driver with gettime method returning hosts realtime clock.
> > This allows Chrony to synchronize host and guest clocks with 
> > high precision (see results below).
> > 
> > chronyc> sources
> > MS Name/IP address Stratum Poll Reach LastRx Last sample
> > ===
> > #* PHC0  0   3   377 6 +4ns[   +4ns] +/-
> > 3ns
> > 
> > To configure Chronyd to use PHC refclock, add the 
> > following line to its configuration file:
> > 
> > refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> > 
> > Where /dev/ptpX is the kvmclock PTP clock.
> > 
> > 
> > ---
> >  drivers/ptp/Kconfig   |   12 +++
> >  drivers/ptp/Makefile  |1 
> >  drivers/ptp/ptp_kvm.c |  180 
> > ++
> >  3 files changed, 193 insertions(+)
> > 
> > Index: kvm-ptpdriver/drivers/ptp/Kconfig
> > ===
> > --- kvm-ptpdriver.orig/drivers/ptp/Kconfig  2017-01-13 09:17:31.724568567 
> > -0200
> > +++ kvm-ptpdriver/drivers/ptp/Kconfig   2017-01-13 09:55:33.344208894 
> > -0200
> > @@ -90,4 +90,16 @@
> >   To compile this driver as a module, choose M here: the module
> >   will be called ptp_pch.
> >  
> > +config PTP_1588_CLOCK_KVM
> > +   tristate "KVM virtual PTP clock"
> > +   depends on PTP_1588_CLOCK
> > +   depends on KVM_GUEST
> > +   default y
> > +   help
> > + This driver adds support for using kvm infrastructure as a PTP
> > + clock. This clock is only useful if you are using KVM guests.
> > +
> > + To compile this driver as a module, choose M here: the module
> > + will be called ptp_kvm.
> > +
> >  endmenu
> > Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> > ===
> > --- /dev/null   1970-01-01 00:00:00.0 +
> > +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 09:57:55.013440645 
> > -0200
> > @@ -0,0 +1,180 @@
> > +/*
> > + * Virtual PTP 1588 clock for use with KVM guests
> > + *
> > + * Copyright (C) 2017 Red Hat Inc.
> > + *
> > + *  This program is free software; you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation; either version 2 of the License, or
> > + *  (at your option) any later version.
> > + *
> > + *  This program is distributed in the hope that it will be useful,
> > + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + *  GNU General Public License for more details.
> > + *
> > + */
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include 
> > +
> > +struct kvm_ptp_clock {
> > +   struct ptp_clock *ptp_clock;
> > +   struct ptp_clock_info caps;
> > +};
> > +
> > +DEFINE_SPINLOCK(kvm_ptp_lock);
> > +
> > +static struct pvclock_vsyscall_time_info *hv_clock;
> > +
> > +/*
> > + * PTP clock operations
> > + */
> > +
> > +static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
> > +{
> > +   return -EOPNOTSUPP;
> > +}
> > +
> > +static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
> > +{
> > +   return -EOPNOTSUPP;
> > +}
> > +
> > +static struct kvm_clock_offset clock_off;
> > +static phys_addr_t clock_off_gpa;
> > +
> > +static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 
> > *ts)
> > +{
> > +   unsigned long ret;
> > +   struct timespec64 tspec;
> > +   u64 delta;
> > +   cycle_t offset;
> > +   unsigned version;
> > +   int cpu;
> > +   struct pvclock_vcpu_time_info *src;
> > +
> > +   preempt_disable_notrace();
> > +   cpu = smp_processor_id();
> > +   src = _clock[cpu].pvti;
> > +
> > +   spin_lock(_ptp_lock);
> > +
> > +   do {
> > +   /*
> > +* We are measuring the delay between
> > +* kvm_hypercall and rdtsc using TSC,
> > +* and converting that delta to
> > +* tsc_to_system_mul and tsc_shift
> > +* So any changes to tsc_to_system_mul
> > +* and tsc_shift in this region
> > +* invalidate the measurement.
> > +*/
> 
> This assumes that the host uses kvmclock, but the guest can be using
> just TSC and even have kvmclock disabled.  This code should at least
> check that kvmclock enabled.

Fixed.

> (If we made kvmclock mandatory, then we could fix pvclock_wall_clock
>  interface, because it is already defined to provide real time based on
>  kvmclock ...)
> 
> > +   version = pvclock_read_begin(src);
> > +
> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> > +clock_off_gpa,
> > +

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Marcelo Tosatti
On Fri, Jan 13, 2017 at 04:56:58PM +0100, Radim Krcmar wrote:
> 2017-01-13 10:01-0200, Marcelo Tosatti:
> > Add a driver with gettime method returning hosts realtime clock.
> > This allows Chrony to synchronize host and guest clocks with 
> > high precision (see results below).
> > 
> > chronyc> sources
> > MS Name/IP address Stratum Poll Reach LastRx Last sample
> > ===
> > #* PHC0  0   3   377 6 +4ns[   +4ns] +/-
> > 3ns
> > 
> > To configure Chronyd to use PHC refclock, add the 
> > following line to its configuration file:
> > 
> > refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> > 
> > Where /dev/ptpX is the kvmclock PTP clock.
> > 
> > 
> > ---
> >  drivers/ptp/Kconfig   |   12 +++
> >  drivers/ptp/Makefile  |1 
> >  drivers/ptp/ptp_kvm.c |  180 
> > ++
> >  3 files changed, 193 insertions(+)
> > 
> > Index: kvm-ptpdriver/drivers/ptp/Kconfig
> > ===
> > --- kvm-ptpdriver.orig/drivers/ptp/Kconfig  2017-01-13 09:17:31.724568567 
> > -0200
> > +++ kvm-ptpdriver/drivers/ptp/Kconfig   2017-01-13 09:55:33.344208894 
> > -0200
> > @@ -90,4 +90,16 @@
> >   To compile this driver as a module, choose M here: the module
> >   will be called ptp_pch.
> >  
> > +config PTP_1588_CLOCK_KVM
> > +   tristate "KVM virtual PTP clock"
> > +   depends on PTP_1588_CLOCK
> > +   depends on KVM_GUEST
> > +   default y
> > +   help
> > + This driver adds support for using kvm infrastructure as a PTP
> > + clock. This clock is only useful if you are using KVM guests.
> > +
> > + To compile this driver as a module, choose M here: the module
> > + will be called ptp_kvm.
> > +
> >  endmenu
> > Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> > ===
> > --- /dev/null   1970-01-01 00:00:00.0 +
> > +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 09:57:55.013440645 
> > -0200
> > @@ -0,0 +1,180 @@
> > +/*
> > + * Virtual PTP 1588 clock for use with KVM guests
> > + *
> > + * Copyright (C) 2017 Red Hat Inc.
> > + *
> > + *  This program is free software; you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation; either version 2 of the License, or
> > + *  (at your option) any later version.
> > + *
> > + *  This program is distributed in the hope that it will be useful,
> > + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + *  GNU General Public License for more details.
> > + *
> > + */
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include 
> > +
> > +struct kvm_ptp_clock {
> > +   struct ptp_clock *ptp_clock;
> > +   struct ptp_clock_info caps;
> > +};
> > +
> > +DEFINE_SPINLOCK(kvm_ptp_lock);
> > +
> > +static struct pvclock_vsyscall_time_info *hv_clock;
> > +
> > +/*
> > + * PTP clock operations
> > + */
> > +
> > +static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
> > +{
> > +   return -EOPNOTSUPP;
> > +}
> > +
> > +static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
> > +{
> > +   return -EOPNOTSUPP;
> > +}
> > +
> > +static struct kvm_clock_offset clock_off;
> > +static phys_addr_t clock_off_gpa;
> > +
> > +static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 
> > *ts)
> > +{
> > +   unsigned long ret;
> > +   struct timespec64 tspec;
> > +   u64 delta;
> > +   cycle_t offset;
> > +   unsigned version;
> > +   int cpu;
> > +   struct pvclock_vcpu_time_info *src;
> > +
> > +   preempt_disable_notrace();
> > +   cpu = smp_processor_id();
> > +   src = _clock[cpu].pvti;
> > +
> > +   spin_lock(_ptp_lock);
> > +
> > +   do {
> > +   /*
> > +* We are measuring the delay between
> > +* kvm_hypercall and rdtsc using TSC,
> > +* and converting that delta to
> > +* tsc_to_system_mul and tsc_shift
> > +* So any changes to tsc_to_system_mul
> > +* and tsc_shift in this region
> > +* invalidate the measurement.
> > +*/
> 
> This assumes that the host uses kvmclock, but the guest can be using
> just TSC and even have kvmclock disabled.  This code should at least
> check that kvmclock enabled.

Fixed.

> (If we made kvmclock mandatory, then we could fix pvclock_wall_clock
>  interface, because it is already defined to provide real time based on
>  kvmclock ...)
> 
> > +   version = pvclock_read_begin(src);
> > +
> > +   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> > +clock_off_gpa,
> > +

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Radim Krcmar
2017-01-13 10:01-0200, Marcelo Tosatti:
> Add a driver with gettime method returning hosts realtime clock.
> This allows Chrony to synchronize host and guest clocks with 
> high precision (see results below).
> 
> chronyc> sources
> MS Name/IP address Stratum Poll Reach LastRx Last sample
> ===
> #* PHC0  0   3   377 6 +4ns[   +4ns] +/-
> 3ns
> 
> To configure Chronyd to use PHC refclock, add the 
> following line to its configuration file:
> 
> refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> 
> Where /dev/ptpX is the kvmclock PTP clock.
> 
> 
> ---
>  drivers/ptp/Kconfig   |   12 +++
>  drivers/ptp/Makefile  |1 
>  drivers/ptp/ptp_kvm.c |  180 
> ++
>  3 files changed, 193 insertions(+)
> 
> Index: kvm-ptpdriver/drivers/ptp/Kconfig
> ===
> --- kvm-ptpdriver.orig/drivers/ptp/Kconfig2017-01-13 09:17:31.724568567 
> -0200
> +++ kvm-ptpdriver/drivers/ptp/Kconfig 2017-01-13 09:55:33.344208894 -0200
> @@ -90,4 +90,16 @@
> To compile this driver as a module, choose M here: the module
> will be called ptp_pch.
>  
> +config PTP_1588_CLOCK_KVM
> + tristate "KVM virtual PTP clock"
> + depends on PTP_1588_CLOCK
> + depends on KVM_GUEST
> + default y
> + help
> +   This driver adds support for using kvm infrastructure as a PTP
> +   clock. This clock is only useful if you are using KVM guests.
> +
> +   To compile this driver as a module, choose M here: the module
> +   will be called ptp_kvm.
> +
>  endmenu
> Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> ===
> --- /dev/null 1970-01-01 00:00:00.0 +
> +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c   2017-01-13 09:57:55.013440645 
> -0200
> @@ -0,0 +1,180 @@
> +/*
> + * Virtual PTP 1588 clock for use with KVM guests
> + *
> + * Copyright (C) 2017 Red Hat Inc.
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +struct kvm_ptp_clock {
> + struct ptp_clock *ptp_clock;
> + struct ptp_clock_info caps;
> +};
> +
> +DEFINE_SPINLOCK(kvm_ptp_lock);
> +
> +static struct pvclock_vsyscall_time_info *hv_clock;
> +
> +/*
> + * PTP clock operations
> + */
> +
> +static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static struct kvm_clock_offset clock_off;
> +static phys_addr_t clock_off_gpa;
> +
> +static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
> +{
> + unsigned long ret;
> + struct timespec64 tspec;
> + u64 delta;
> + cycle_t offset;
> + unsigned version;
> + int cpu;
> + struct pvclock_vcpu_time_info *src;
> +
> + preempt_disable_notrace();
> + cpu = smp_processor_id();
> + src = _clock[cpu].pvti;
> +
> + spin_lock(_ptp_lock);
> +
> + do {
> + /*
> +  * We are measuring the delay between
> +  * kvm_hypercall and rdtsc using TSC,
> +  * and converting that delta to
> +  * tsc_to_system_mul and tsc_shift
> +  * So any changes to tsc_to_system_mul
> +  * and tsc_shift in this region
> +  * invalidate the measurement.
> +  */

This assumes that the host uses kvmclock, but the guest can be using
just TSC and even have kvmclock disabled.  This code should at least
check that kvmclock enabled.

(If we made kvmclock mandatory, then we could fix pvclock_wall_clock
 interface, because it is already defined to provide real time based on
 kvmclock ...)

> + version = pvclock_read_begin(src);
> +
> + ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> +  clock_off_gpa,
> +  KVM_CLOCK_OFFSET_WALLCLOCK);
> + if (ret != 0) {
> + pr_err("clock offset hypercall ret %lu\n", ret);
> + spin_unlock(_ptp_lock);
> + preempt_enable_notrace();
> + return -EOPNOTSUPP;
> + }
> +
> + 

Re: [patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Radim Krcmar
2017-01-13 10:01-0200, Marcelo Tosatti:
> Add a driver with gettime method returning hosts realtime clock.
> This allows Chrony to synchronize host and guest clocks with 
> high precision (see results below).
> 
> chronyc> sources
> MS Name/IP address Stratum Poll Reach LastRx Last sample
> ===
> #* PHC0  0   3   377 6 +4ns[   +4ns] +/-
> 3ns
> 
> To configure Chronyd to use PHC refclock, add the 
> following line to its configuration file:
> 
> refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> 
> Where /dev/ptpX is the kvmclock PTP clock.
> 
> 
> ---
>  drivers/ptp/Kconfig   |   12 +++
>  drivers/ptp/Makefile  |1 
>  drivers/ptp/ptp_kvm.c |  180 
> ++
>  3 files changed, 193 insertions(+)
> 
> Index: kvm-ptpdriver/drivers/ptp/Kconfig
> ===
> --- kvm-ptpdriver.orig/drivers/ptp/Kconfig2017-01-13 09:17:31.724568567 
> -0200
> +++ kvm-ptpdriver/drivers/ptp/Kconfig 2017-01-13 09:55:33.344208894 -0200
> @@ -90,4 +90,16 @@
> To compile this driver as a module, choose M here: the module
> will be called ptp_pch.
>  
> +config PTP_1588_CLOCK_KVM
> + tristate "KVM virtual PTP clock"
> + depends on PTP_1588_CLOCK
> + depends on KVM_GUEST
> + default y
> + help
> +   This driver adds support for using kvm infrastructure as a PTP
> +   clock. This clock is only useful if you are using KVM guests.
> +
> +   To compile this driver as a module, choose M here: the module
> +   will be called ptp_kvm.
> +
>  endmenu
> Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> ===
> --- /dev/null 1970-01-01 00:00:00.0 +
> +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c   2017-01-13 09:57:55.013440645 
> -0200
> @@ -0,0 +1,180 @@
> +/*
> + * Virtual PTP 1588 clock for use with KVM guests
> + *
> + * Copyright (C) 2017 Red Hat Inc.
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +struct kvm_ptp_clock {
> + struct ptp_clock *ptp_clock;
> + struct ptp_clock_info caps;
> +};
> +
> +DEFINE_SPINLOCK(kvm_ptp_lock);
> +
> +static struct pvclock_vsyscall_time_info *hv_clock;
> +
> +/*
> + * PTP clock operations
> + */
> +
> +static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static struct kvm_clock_offset clock_off;
> +static phys_addr_t clock_off_gpa;
> +
> +static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
> +{
> + unsigned long ret;
> + struct timespec64 tspec;
> + u64 delta;
> + cycle_t offset;
> + unsigned version;
> + int cpu;
> + struct pvclock_vcpu_time_info *src;
> +
> + preempt_disable_notrace();
> + cpu = smp_processor_id();
> + src = _clock[cpu].pvti;
> +
> + spin_lock(_ptp_lock);
> +
> + do {
> + /*
> +  * We are measuring the delay between
> +  * kvm_hypercall and rdtsc using TSC,
> +  * and converting that delta to
> +  * tsc_to_system_mul and tsc_shift
> +  * So any changes to tsc_to_system_mul
> +  * and tsc_shift in this region
> +  * invalidate the measurement.
> +  */

This assumes that the host uses kvmclock, but the guest can be using
just TSC and even have kvmclock disabled.  This code should at least
check that kvmclock enabled.

(If we made kvmclock mandatory, then we could fix pvclock_wall_clock
 interface, because it is already defined to provide real time based on
 kvmclock ...)

> + version = pvclock_read_begin(src);
> +
> + ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> +  clock_off_gpa,
> +  KVM_CLOCK_OFFSET_WALLCLOCK);
> + if (ret != 0) {
> + pr_err("clock offset hypercall ret %lu\n", ret);
> + spin_unlock(_ptp_lock);
> + preempt_enable_notrace();
> + return -EOPNOTSUPP;
> + }
> +
> + 

[patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Marcelo Tosatti
Add a driver with gettime method returning hosts realtime clock.
This allows Chrony to synchronize host and guest clocks with 
high precision (see results below).

chronyc> sources
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   377 6 +4ns[   +4ns] +/-3ns

To configure Chronyd to use PHC refclock, add the 
following line to its configuration file:

refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0

Where /dev/ptpX is the kvmclock PTP clock.


---
 drivers/ptp/Kconfig   |   12 +++
 drivers/ptp/Makefile  |1 
 drivers/ptp/ptp_kvm.c |  180 ++
 3 files changed, 193 insertions(+)

Index: kvm-ptpdriver/drivers/ptp/Kconfig
===
--- kvm-ptpdriver.orig/drivers/ptp/Kconfig  2017-01-13 09:17:31.724568567 
-0200
+++ kvm-ptpdriver/drivers/ptp/Kconfig   2017-01-13 09:55:33.344208894 -0200
@@ -90,4 +90,16 @@
  To compile this driver as a module, choose M here: the module
  will be called ptp_pch.
 
+config PTP_1588_CLOCK_KVM
+   tristate "KVM virtual PTP clock"
+   depends on PTP_1588_CLOCK
+   depends on KVM_GUEST
+   default y
+   help
+ This driver adds support for using kvm infrastructure as a PTP
+ clock. This clock is only useful if you are using KVM guests.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ptp_kvm.
+
 endmenu
Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 09:57:55.013440645 -0200
@@ -0,0 +1,180 @@
+/*
+ * Virtual PTP 1588 clock for use with KVM guests
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+struct kvm_ptp_clock {
+   struct ptp_clock *ptp_clock;
+   struct ptp_clock_info caps;
+};
+
+DEFINE_SPINLOCK(kvm_ptp_lock);
+
+static struct pvclock_vsyscall_time_info *hv_clock;
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+   return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+   return -EOPNOTSUPP;
+}
+
+static struct kvm_clock_offset clock_off;
+static phys_addr_t clock_off_gpa;
+
+static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+   unsigned long ret;
+   struct timespec64 tspec;
+   u64 delta;
+   cycle_t offset;
+   unsigned version;
+   int cpu;
+   struct pvclock_vcpu_time_info *src;
+
+   preempt_disable_notrace();
+   cpu = smp_processor_id();
+   src = _clock[cpu].pvti;
+
+   spin_lock(_ptp_lock);
+
+   do {
+   /*
+* We are measuring the delay between
+* kvm_hypercall and rdtsc using TSC,
+* and converting that delta to
+* tsc_to_system_mul and tsc_shift
+* So any changes to tsc_to_system_mul
+* and tsc_shift in this region
+* invalidate the measurement.
+*/
+   version = pvclock_read_begin(src);
+
+   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
+clock_off_gpa,
+KVM_CLOCK_OFFSET_WALLCLOCK);
+   if (ret != 0) {
+   pr_err("clock offset hypercall ret %lu\n", ret);
+   spin_unlock(_ptp_lock);
+   preempt_enable_notrace();
+   return -EOPNOTSUPP;
+   }
+
+   tspec.tv_sec = clock_off.sec;
+   tspec.tv_nsec = clock_off.nsec;
+
+   delta = rdtsc_ordered() - clock_off.tsc;
+
+   offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+src->tsc_shift);
+
+   } while (pvclock_read_retry(src, version));
+
+   preempt_enable_notrace();
+
+   tspec.tv_nsec = tspec.tv_nsec + offset;
+
+   spin_unlock(_ptp_lock);
+
+   if (tspec.tv_nsec >= NSEC_PER_SEC) {
+   u64 secs = tspec.tv_nsec;
+
+

[patch 3/3] PTP: add kvm PTP driver

2017-01-13 Thread Marcelo Tosatti
Add a driver with gettime method returning hosts realtime clock.
This allows Chrony to synchronize host and guest clocks with 
high precision (see results below).

chronyc> sources
MS Name/IP address Stratum Poll Reach LastRx Last sample
===
#* PHC0  0   3   377 6 +4ns[   +4ns] +/-3ns

To configure Chronyd to use PHC refclock, add the 
following line to its configuration file:

refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0

Where /dev/ptpX is the kvmclock PTP clock.


---
 drivers/ptp/Kconfig   |   12 +++
 drivers/ptp/Makefile  |1 
 drivers/ptp/ptp_kvm.c |  180 ++
 3 files changed, 193 insertions(+)

Index: kvm-ptpdriver/drivers/ptp/Kconfig
===
--- kvm-ptpdriver.orig/drivers/ptp/Kconfig  2017-01-13 09:17:31.724568567 
-0200
+++ kvm-ptpdriver/drivers/ptp/Kconfig   2017-01-13 09:55:33.344208894 -0200
@@ -90,4 +90,16 @@
  To compile this driver as a module, choose M here: the module
  will be called ptp_pch.
 
+config PTP_1588_CLOCK_KVM
+   tristate "KVM virtual PTP clock"
+   depends on PTP_1588_CLOCK
+   depends on KVM_GUEST
+   default y
+   help
+ This driver adds support for using kvm infrastructure as a PTP
+ clock. This clock is only useful if you are using KVM guests.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ptp_kvm.
+
 endmenu
Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 09:57:55.013440645 -0200
@@ -0,0 +1,180 @@
+/*
+ * Virtual PTP 1588 clock for use with KVM guests
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+struct kvm_ptp_clock {
+   struct ptp_clock *ptp_clock;
+   struct ptp_clock_info caps;
+};
+
+DEFINE_SPINLOCK(kvm_ptp_lock);
+
+static struct pvclock_vsyscall_time_info *hv_clock;
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+   return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+   return -EOPNOTSUPP;
+}
+
+static struct kvm_clock_offset clock_off;
+static phys_addr_t clock_off_gpa;
+
+static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+   unsigned long ret;
+   struct timespec64 tspec;
+   u64 delta;
+   cycle_t offset;
+   unsigned version;
+   int cpu;
+   struct pvclock_vcpu_time_info *src;
+
+   preempt_disable_notrace();
+   cpu = smp_processor_id();
+   src = _clock[cpu].pvti;
+
+   spin_lock(_ptp_lock);
+
+   do {
+   /*
+* We are measuring the delay between
+* kvm_hypercall and rdtsc using TSC,
+* and converting that delta to
+* tsc_to_system_mul and tsc_shift
+* So any changes to tsc_to_system_mul
+* and tsc_shift in this region
+* invalidate the measurement.
+*/
+   version = pvclock_read_begin(src);
+
+   ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
+clock_off_gpa,
+KVM_CLOCK_OFFSET_WALLCLOCK);
+   if (ret != 0) {
+   pr_err("clock offset hypercall ret %lu\n", ret);
+   spin_unlock(_ptp_lock);
+   preempt_enable_notrace();
+   return -EOPNOTSUPP;
+   }
+
+   tspec.tv_sec = clock_off.sec;
+   tspec.tv_nsec = clock_off.nsec;
+
+   delta = rdtsc_ordered() - clock_off.tsc;
+
+   offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+src->tsc_shift);
+
+   } while (pvclock_read_retry(src, version));
+
+   preempt_enable_notrace();
+
+   tspec.tv_nsec = tspec.tv_nsec + offset;
+
+   spin_unlock(_ptp_lock);
+
+   if (tspec.tv_nsec >= NSEC_PER_SEC) {
+   u64 secs = tspec.tv_nsec;
+
+