On Tue, May 23, 2017 at 09:08:44AM +0200, Dietmar Maurer wrote:
> And implement retry algorythm after failure:
> 
>   $next_sync = $state->{last_try} + 5*60*$fail_count;
> 
> and limit to 3 failures.
> 
> Signed-off-by: Dietmar Maurer <diet...@proxmox.com>
> ---
>  PVE/API2/Replication.pm |  2 +-
>  PVE/CLI/pvesr.pm        | 28 ++++++++++++++++++++--------
>  PVE/Replication.pm      | 35 +++++++++++++++++++++--------------
>  3 files changed, 42 insertions(+), 23 deletions(-)
> 
> diff --git a/PVE/API2/Replication.pm b/PVE/API2/Replication.pm
> index c400a93c..977d3ec1 100644
> --- a/PVE/API2/Replication.pm
> +++ b/PVE/API2/Replication.pm
> @@ -82,7 +82,7 @@ __PACKAGE__->register_method ({
>           my $vmid = $d->{guest};
>           next if !$rpcenv->check($authuser, "/vms/$vmid", [ 'VM.Audit' ]);
>           $d->{id} = $id;
> -         foreach my $k (qw(last_sync fail_count error duration)) {
> +         foreach my $k (qw(last_sync last_try fail_count error duration)) {
>               $d->{$k} = $state->{$k} if defined($state->{$k});
>           }
>           if ($state->{pid} && $state->{ptime}) {
> diff --git a/PVE/CLI/pvesr.pm b/PVE/CLI/pvesr.pm
> index 38116f7b..115bc2c1 100644
> --- a/PVE/CLI/pvesr.pm
> +++ b/PVE/CLI/pvesr.pm
> @@ -97,14 +97,14 @@ my $print_job_list = sub {
>  
>      my $format = "%-20s %10s %-20s %10s %5s %8s\n";
>  
> -    printf($format, "JobID", "GuestID", "Target", "Interval", "Rate", 
> "Enabled");
> +    printf($format, "JobID", "GuestID", "Target", "Schedule", "Rate", 
> "Enabled");
>  
>      foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) {
>       my $plugin = PVE::ReplicationConfig->lookup($job->{type});
>       my $tid = $plugin->get_unique_target_id($job);
>  
>       printf($format, $job->{id}, $job->{guest}, $tid,
> -            defined($job->{interval}) ? $job->{interval} : '-',
> +            defined($job->{schedule}) ? $job->{schedule} : '*/15',
>              defined($job->{rate}) ? $job->{rate} : '-',
>              $job->{disable} ? 'no' : 'yes'
>           );
> @@ -114,21 +114,33 @@ my $print_job_list = sub {
>  my $print_job_status = sub {
>      my ($list) = @_;
>  
> -    my $format = "%-20s %10s %-20s %20s %10s %10s %s\n";
> +    my $format = "%-20s %10s %-20s %20s %20s %10s %10s %s\n";
>  
> -    printf($format, "JobID", "GuestID", "Target", "LastSync", "Duration", 
> "FailCount", "State");
> +    printf($format, "JobID", "GuestID", "Target", "LastSync", "NextSync", 
> "Duration", "FailCount", "State");
>  
>      foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) {
>       my $plugin = PVE::ReplicationConfig->lookup($job->{type});
>       my $tid = $plugin->get_unique_target_id($job);
>  
> -     my $timestr = $job->{last_sync} ?
> -         strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync})) : '-';
> +     my $timestr = '-';
> +     if ($job->{last_sync}) {
> +         strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync}));

This is not assigned to $timestr anymore?

> +     }
> +
> +     my $nextstr = '-';
> +     if (my $next = $job->{next_sync}) {
> +         my $now = time();
> +         if ($next > $now) {
> +             $nextstr = strftime("%Y-%m-%d_%H:%M:%S", 
> localtime($job->{next_sync}));
> +         } else {
> +             $nextstr = 'now'
> +         }
> +     }
>  
>       my $state = $job->{pid} ? "SYNCING" : $job->{error} // 'OK';
>  
>       printf($format, $job->{id}, $job->{guest}, $tid,
> -            $timestr, $job->{duration} // '-',
> +            $timestr, $nextstr, $job->{duration} // '-',
>              $job->{fail_count}, $state);
>      }
>  };
> @@ -136,7 +148,7 @@ my $print_job_status = sub {
>  our $cmddef = {
>      status => [ 'PVE::API2::Replication', 'status', [], { node => $nodename 
> }, $print_job_status ],
>  
> -    jobs => [ 'PVE::API2::ReplicationConfig', 'index' , [], {}, 
> $print_job_list ],
> +    list => [ 'PVE::API2::ReplicationConfig', 'index' , [], {}, 
> $print_job_list ],
>      read => [ 'PVE::API2::ReplicationConfig', 'read' , ['id'], {},
>            sub { my $res = shift; print to_json($res, { pretty => 1, 
> canonical => 1}); }],
>      update => [ 'PVE::API2::ReplicationConfig', 'update' , ['id'], {} ],
> diff --git a/PVE/Replication.pm b/PVE/Replication.pm
> index ff4bbeb4..d878b44a 100644
> --- a/PVE/Replication.pm
> +++ b/PVE/Replication.pm
> @@ -9,6 +9,7 @@ use Time::HiRes qw(gettimeofday tv_interval);
>  use PVE::INotify;
>  use PVE::ProcFSTools;
>  use PVE::Tools;
> +use PVE::CalendarEvent;
>  use PVE::Cluster;
>  use PVE::QemuConfig;
>  use PVE::QemuServer;
> @@ -46,7 +47,8 @@ my $get_job_state = sub {
>      $state = {} if !$state;
>  
>      $state->{last_iteration} //= 0;
> -    $state->{last_sync} //= 0;
> +    $state->{last_try} //= 0; # last sync start time
> +    $state->{last_sync} //= 0; # last successful sync start time
>      $state->{fail_count} //= 0;
>  
>      return $state;
> @@ -93,10 +95,23 @@ sub job_status {
>  
>       next if $jobcfg->{disable};
>  
> -     $jobcfg->{state} = $get_job_state->($stateobj, $jobcfg);
> +     my $state = $get_job_state->($stateobj, $jobcfg);
> +     $jobcfg->{state} = $state;
>       $jobcfg->{id} = $jobid;
>       $jobcfg->{vmtype} = $vms->{ids}->{$vmid}->{type};
>  
> +     my $next_sync = 0;
> +     if (my $fail_count = $state->{fail_count}) {
> +         if ($fail_count < 3) {
> +             $next_sync = $state->{last_try} + 5*60*$fail_count;
> +         }
> +     } else {
> +         my $schedule =  $jobcfg->{schedule} || '*/15';
> +         my $calspec = PVE::CalendarEvent::parse_calendar_event($schedule);
> +         $next_sync = PVE::CalendarEvent::compute_next_event($calspec, 
> $state->{last_try}) // 0;
> +     }
> +     $jobcfg->{next_sync} = $next_sync;
> +
>       $jobs->{$jobid} = $jobcfg;
>      }
>  
> @@ -110,15 +125,6 @@ my $get_next_job = sub {
>  
>      my $jobs = job_status($stateobj);
>  
> -    # compute next_sync here to make it easy to sort jobs
> -    my $next_sync_hash = {};
> -    foreach my $jobid (keys %$jobs) {
> -     my $jobcfg = $jobs->{$jobid};
> -     my $interval = $jobcfg->{interval} || 15;
> -     my $last_sync = $jobcfg->{state}->{last_sync};
> -     $next_sync_hash->{$jobid} = $last_sync + $interval * 60;
> -    }
> -
>      my $sort_func = sub {
>       my $joba = $jobs->{$a};
>       my $jobb = $jobs->{$b};
> @@ -126,7 +132,7 @@ my $get_next_job = sub {
>       my $sb =  $jobb->{state};
>       my $res = $sa->{last_iteration} cmp $sb->{last_iteration};
>       return $res if $res != 0;
> -     $res = $next_sync_hash->{$a} <=> $next_sync_hash->{$b};
> +     $res = $joba->{next_sync} <=> $jobb->{next_sync};
>       return $res if $res != 0;
>       return  $joba->{guest} <=> $jobb->{guest};
>      };
> @@ -134,7 +140,7 @@ my $get_next_job = sub {
>      foreach my $jobid (sort $sort_func keys %$jobs) {
>       my $jobcfg = $jobs->{$jobid};
>       next if $jobcfg->{state}->{last_iteration} >= $now;
> -     if ($now >= $next_sync_hash->{$jobid}) {
> +     if ($jobcfg->{next_sync} && ($now >= $jobcfg->{next_sync})) {
>           $next_jobid = $jobid;
>           last;
>       }
> @@ -174,7 +180,7 @@ my $run_replication = sub {
>  
>      $state->{pid} = $$;
>      $state->{ptime} = PVE::ProcFSTools::read_proc_starttime($state->{pid});
> -
> +    $state->{last_try} = $start_time;
>      $update_job_state->($stateobj, $jobcfg,  $state);
>  
>      eval { replicate($jobcfg, $start_time); };
> @@ -188,6 +194,7 @@ my $run_replication = sub {
>       $state->{fail_count}++;
>       $state->{error} = "$err";
>       $update_job_state->($stateobj, $jobcfg,  $state);
> +     warn $err;
>     } else {
>       $state->{last_sync} = $start_time;
>       $state->{fail_count} = 0;
> -- 
> 2.11.0
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel@pve.proxmox.com
> https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

_______________________________________________
pve-devel mailing list
pve-devel@pve.proxmox.com
https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to