On Mon, May 29, 2017 at 11:59:35AM +0200, Dietmar Maurer wrote: > comment inline > > > > On Tue, May 23, 2017 at 09:08:44AM +0200, Dietmar Maurer wrote: > > > And implement retry algorythm after failure: > > > > > > $next_sync = $state->{last_try} + 5*60*$fail_count; > > > > > > and limit to 3 failures. > > > > > > Signed-off-by: Dietmar Maurer <diet...@proxmox.com> > > > --- > > > PVE/API2/Replication.pm | 2 +- > > > PVE/CLI/pvesr.pm | 28 ++++++++++++++++++++-------- > > > PVE/Replication.pm | 35 +++++++++++++++++++++-------------- > > > 3 files changed, 42 insertions(+), 23 deletions(-) > > > > > > diff --git a/PVE/API2/Replication.pm b/PVE/API2/Replication.pm > > > index c400a93c..977d3ec1 100644 > > > --- a/PVE/API2/Replication.pm > > > +++ b/PVE/API2/Replication.pm > > > @@ -82,7 +82,7 @@ __PACKAGE__->register_method ({ > > > my $vmid = $d->{guest}; > > > next if !$rpcenv->check($authuser, "/vms/$vmid", [ 'VM.Audit' ]); > > > $d->{id} = $id; > > > - foreach my $k (qw(last_sync fail_count error duration)) { > > > + foreach my $k (qw(last_sync last_try fail_count error duration)) { > > > $d->{$k} = $state->{$k} if defined($state->{$k}); > > > } > > > if ($state->{pid} && $state->{ptime}) { > > > diff --git a/PVE/CLI/pvesr.pm b/PVE/CLI/pvesr.pm > > > index 38116f7b..115bc2c1 100644 > > > --- a/PVE/CLI/pvesr.pm > > > +++ b/PVE/CLI/pvesr.pm > > > @@ -97,14 +97,14 @@ my $print_job_list = sub { > > > > > > my $format = "%-20s %10s %-20s %10s %5s %8s\n"; > > > > > > - printf($format, "JobID", "GuestID", "Target", "Interval", "Rate", > > > "Enabled"); > > > + printf($format, "JobID", "GuestID", "Target", "Schedule", "Rate", > > > "Enabled"); > > > > > > foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) { > > > my $plugin = PVE::ReplicationConfig->lookup($job->{type}); > > > my $tid = $plugin->get_unique_target_id($job); > > > > > > printf($format, $job->{id}, $job->{guest}, $tid, > > > - defined($job->{interval}) ? $job->{interval} : '-', > > > + defined($job->{schedule}) ? $job->{schedule} : '*/15', > > > defined($job->{rate}) ? $job->{rate} : '-', > > > $job->{disable} ? 'no' : 'yes' > > > ); > > > @@ -114,21 +114,33 @@ my $print_job_list = sub { > > > my $print_job_status = sub { > > > my ($list) = @_; > > > > > > - my $format = "%-20s %10s %-20s %20s %10s %10s %s\n"; > > > + my $format = "%-20s %10s %-20s %20s %20s %10s %10s %s\n"; > > > > > > - printf($format, "JobID", "GuestID", "Target", "LastSync", "Duration", > > > "FailCount", "State"); > > > + printf($format, "JobID", "GuestID", "Target", "LastSync", "NextSync", > > > "Duration", "FailCount", "State"); > > > > > > foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) { > > > my $plugin = PVE::ReplicationConfig->lookup($job->{type}); > > > my $tid = $plugin->get_unique_target_id($job); > > > > > > - my $timestr = $job->{last_sync} ? > > > - strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync})) : '-'; > > > + my $timestr = '-'; > > > + if ($job->{last_sync}) { > > > + strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync})); > > > + } > > > + > > > + my $nextstr = '-'; > > > + if (my $next = $job->{next_sync}) { > > > + my $now = time(); > > > + if ($next > $now) { > > > + $nextstr = strftime("%Y-%m-%d_%H:%M:%S", > > > localtime($job->{next_sync})); > > > + } else { > > > + $nextstr = 'now' > > > + } > > > + } > > > > > > my $state = $job->{pid} ? "SYNCING" : $job->{error} // 'OK'; > > > > > > printf($format, $job->{id}, $job->{guest}, $tid, > > > - $timestr, $job->{duration} // '-', > > > + $timestr, $nextstr, $job->{duration} // '-', > > > $job->{fail_count}, $state); > > > } > > > }; > > > @@ -136,7 +148,7 @@ my $print_job_status = sub { > > > our $cmddef = { > > > status => [ 'PVE::API2::Replication', 'status', [], { node => > > > $nodename > > > }, $print_job_status ], > > > > > > - jobs => [ 'PVE::API2::ReplicationConfig', 'index' , [], {}, > > > $print_job_list ], > > > + list => [ 'PVE::API2::ReplicationConfig', 'index' , [], {}, > > > $print_job_list ], > > > read => [ 'PVE::API2::ReplicationConfig', 'read' , ['id'], {}, > > > sub { my $res = shift; print to_json($res, { pretty => 1, canonical > > > => 1}); }], > > > update => [ 'PVE::API2::ReplicationConfig', 'update' , ['id'], {} ], > > > diff --git a/PVE/Replication.pm b/PVE/Replication.pm > > > index ff4bbeb4..d878b44a 100644 > > > --- a/PVE/Replication.pm > > > +++ b/PVE/Replication.pm > > > @@ -9,6 +9,7 @@ use Time::HiRes qw(gettimeofday tv_interval); > > > use PVE::INotify; > > > use PVE::ProcFSTools; > > > use PVE::Tools; > > > +use PVE::CalendarEvent; > > > use PVE::Cluster; > > > use PVE::QemuConfig; > > > use PVE::QemuServer; > > > @@ -46,7 +47,8 @@ my $get_job_state = sub { > > > $state = {} if !$state; > > > > > > $state->{last_iteration} //= 0; > > > - $state->{last_sync} //= 0; > > > + $state->{last_try} //= 0; # last sync start time > > > + $state->{last_sync} //= 0; # last successful sync start time > > > $state->{fail_count} //= 0; > > > > > > return $state; > > > @@ -93,10 +95,23 @@ sub job_status { > > > > > > next if $jobcfg->{disable}; > > > > > > - $jobcfg->{state} = $get_job_state->($stateobj, $jobcfg); > > > + my $state = $get_job_state->($stateobj, $jobcfg); > > > + $jobcfg->{state} = $state; > > > $jobcfg->{id} = $jobid; > > > $jobcfg->{vmtype} = $vms->{ids}->{$vmid}->{type}; > > > > > > + my $next_sync = 0; > > > + if (my $fail_count = $state->{fail_count}) { > > > + if ($fail_count < 3) { > > > + $next_sync = $state->{last_try} + 5*60*$fail_count; > > > > both the 3 and the 5*60 constants seem awfully arbitrary.. maybe we > > could make them configurable? e.g., for a sync job that syncs once or > > twice a day, a window of 0+5+10+15=30 minutes of not being able to sync > > should probably not put that job into an error state? > > yes, but I guess we we can add such things later. >
sounds good to me _______________________________________________ pve-devel mailing list pve-devel@pve.proxmox.com https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel