Thanks Timo.

I was writing out corrupted dovecot-uidlist files, fixing that solved the issue.

Attached is a script that fixes message names.  It's not fully QA'ed yet, and 
uses some of our site-specific stuff to figure out who owns the maildir and to 
boot the user while making changes, but might be useful as a starting point.  
Note that it handles compressed and uncompressed messages.

#!/usr/bin/perl

use strict;
use warnings;

package MaildirSizeFix;

=head1 SYNOPSIS

MaildirSizeFix - library and utility for checking and fixing message size in 
 maildir filenames

=head2 Library

  MaildirSizeFix::fixmaildir(<maildir>, [<filehandle>]);

Find all mail folders in <maildir> (including '.', the INBOX folder) and call
 fixfolder() on each. Print log messages to <filehandle> if provided.

  MaildirSizeFix::fixfolder(<folder>, [<filehandle>]);

Find all mail message files in the cur and new subdirectories of <folder>,
 compare the filename size and actual message size and if necessary
 rename the file.  Also updates file names in dovecot-uidlist.
 Print log messages to <filehandle> if provided.
 Locks the maildir and boots the user if changes need to be made.

=head2 Command Line

  MaildirSizeFix.pm 
   [--maildir=<maildir>] [--folder=<folder>] [--help] 

--maildir call fixmaildir on <maildir>

--folder call fixfolder on <folder>

Where maildir, folder and are full directory paths.

=head2 Note

For both command line and library usage, the environment variable 
 MAILDIRLOCK_BIN _must_ be set to the location of
 the maildirlock binary from dovecot source.  LD_LIBRARY_PATH will most
 likely also have to be set to include libdovecot.

=cut


use IO::Zlib qw(:gzip_external 0);
use IO::Socket::INET;
use File::Basename;
use Getopt::Long;
use Pod::Usage;

sub fixmaildir($;$)
{
  my ($dir, $outh) = @_;
  die "Cannot access maildir [$dir]" unless ($dir && -d $dir);
  my @folders = sort (grep { $_ !~ /\/..$/ } glob("$dir/.*"));
  for my $f (@folders) # cur and new in every folder
  {
    _pout($outh, "FOLDER: [$f]\n");
    fixfolder($f, $outh);
  }
}

sub fixfolder($;$)
{
  my ($folder, $outh) = @_;
  # find files to rename
  # lock maildir 
  # boot user
  # rename files
  # update dovecot-uidlist
  # unlock maildir
  unless ($folder && -d $folder)
  { 
    _pout($outh, "DIRBAD: [$folder]\n");
    return;
  }
  my %renames = _check_folder($folder, $outh);
  if (%renames)
  {
    my $lock;
    unless ($lock = _lock_maildir($folder))
    {
      _pout($outh, "LOCKBAD: [$folder]\n");
      return;
    }
    _reap_owner($folder, $outh);
    while (my ($old, $new) = each %renames)
    {
      if (rename("$old", "$new"))
      {
        _pout($outh, "FIXED: [$old] => [$new]\n");
      }
      else
      {
        _pout($outh, "RENAMEBAD: [$old] => [$new]: $!\n");
        delete $renames{$old};
      }
    }
    _fix_uidlist($folder, %renames);
    _unlock_maildir($lock);
  }
}

sub _check_folder($;$)
{
  my ($folder, $outh) = @_;
  my %renames = ();
  for my $d (map {"$folder/$_"} qw(cur new))
  {
    opendir (my $dh, $d) || next; # TODO log error?
    while (my $f = readdir($dh)) # every message file
    {
      my $fullpath = "$d/$f";
      next unless -f $fullpath;
      my ($filen_size) = $f =~ /S=(\d*)/;
      next unless $filen_size;
    
      my $quick_size = _uncompressed_size_quick($fullpath);
      if (!defined $quick_size)
      {
        _pout($outh, "\tBAD1: [$fullpath]\n");
        next;
      }
      if ($filen_size != $quick_size)
      {
        my ($actual_size, $actual_wsize) = _uncompressed_size($fullpath);
        if (!defined $actual_size)
        {
          _pout($outh, "\tBAD2: [$fullpath]\n");
          next;
        }
        my $newname = $f;
        $newname =~ s/,S=(\d*)|,W=(\d*)//g; #remove old sizes if present
        $newname =~ s/:2/,S=$actual_size,W=$actual_wsize:2/;
        $renames{"$d/$f"} = "$d/$newname";
      }
    }
  }
  return %renames;
}

sub _lock_maildir($)
{
  my ($folder) = @_;
  my $lockbin = $ENV{MAILDIRLOCK_BIN};
  open my $output,  "-|", $lockbin, ($folder, "30") or return 0;
  my $pid = <$output>;
  close $output;
  return $pid; 
}

sub _unlock_maildir($)
{
  my ($lock) = @_;
  return unless $lock;
  kill 15, $lock;
  return;
}

sub _reap_owner($;$)
{
  my ($folder, $outh) = @_;
  my ($owner) = $folder =~ qr(/\d\d\d/\d\d\d/(.*?)/);
  return unless $owner;
  my $sock = IO::Socket::INET->new(PeerAddr=>'imap:1313', Proto=>'tcp');
  return unless $sock;
  scalar(<$sock>); # read banner
  print $sock "GLOBAL KILL $owner\r\n";
  $sock->autoflush();
  close($sock);
}

sub _fix_uidlist($%)
{
  my ($folder, %renames) = @_;
  my $fn = "$folder/dovecot-uidlist";
  return unless %renames;
  open (my $fh, '+<', $fn) or return;
  my $list = do { local $/; <$fh> }; # slurp whole file
  while (my ($old, $new) = each %renames)
  {
    $old = basename($old);
    $new = basename($new);
    $list =~ s/$old/$new/;
  }
  truncate ($fh, 0);
  seek($fh, 0,0);
  print $fh $list;
  close ($fh);
}

sub _pout($@)
{
  my $h = shift;
  print $h @_ if $h;
}


# get the message size, either from the last 4 bytes if compressed, or file size
sub _uncompressed_size_quick($)
{
  my ($fn) = @_;
  my $gzid = chr(0x1f) . chr(0x8b);
  my ($flag, $buf);

  open(my $fh, '<', $fn) or return undef;
  return undef unless (sysread($fh, $flag, 2) == 2);
  unless ($flag eq $gzid) # not a compressed file, return the size-on-disk
  {
    return sysseek($fh, 0, 2);
  }
  # gziped file, size is in last 4 bytes
  return undef unless (sysseek($fh, -4, 2));
  return undef unless (sysread($fh, $buf, 4));
  return unpack('V', $buf);
}
  

# get the S= and W= size by reading the whole file.
sub _uncompressed_size($)
{
  my $fn = $_[0];
  my $fh = IO::Zlib->new($fn, "rb") || IO::File->new("< $fn");
  return undef unless $fh;
  my $sz = 0; #uncompressed size
  my $wsz = 0; #uncompressed size with /n converted to /r/n
  my $read;
  my $chunk = 4096; # TODO tune
  my $buf;
  my $cusp = 0;
  while ($read = read($fh, $buf, $chunk))
  {
    $sz += $read;
    $wsz += $read;
    $wsz += () = $buf =~ /(?<!\r)\n/sg; #count \ns not preceded by an \r
    if ($cusp)
    {
      # last chunk ended with \r and this chunk starts with \n, so we counted
      # an /n we shouldn't have above
      $wsz -=1 if ($buf =~ /^\n/s);
    }
    $cusp = $buf =~ /\r$/s;
  }
  return ($sz, $wsz);
}

unless(caller())
{
  my ($maildir, $folder, $mail, $lockbin);
  GetOptions(
    "maildir=s" => \$maildir,
    "folder=s" => \$folder,
    "help" => sub {pod2usage(-verbose =>1 )},
    "man" => sub {pod2usage(-verbose =>2 )}) 
      || pod2usage(-verbose => 1);

  $lockbin = $ENV{MAILDIRLOCK_BIN};
  pod2usage(-verbose=>1) unless ($lockbin && ($maildir || $folder || $mail));

  die "Could not execute maildirlock [$lockbin]" unless -x $lockbin;
  system("$lockbin >/dev/null 2>&1 ");
  die "Could not execute maildirlock [$lockbin], " . 
    "maybe you need to set LD_LIBRARY_PATH" unless ($? >> 8) == 1;
  if ($maildir)
  {
    print "Fixing maildir: [$maildir]\n";
    fixmaildir($maildir, \*STDOUT);
  }
  if ($folder)
  {
    print "Fixing folder: [$folder]\n";
    fixfolder($folder, \*STDOUT);
  }
}

1;

On 2013-01-22, at 7:01 AM, Timo Sirainen <t...@iki.fi> wrote:

> On 21.1.2013, at 21.54, Richard Platel <rpla...@tucows.com> wrote:
> 
>> As stated in my previous message, we have some old compressed maildir 
>> messages with incorrect sizes in the filename.  These messages cause dovecot 
>> 2.x problems.
>> 
>> I'm trying to write a script to crawl all our messages, check the actual 
>> message size and if necessary, fix the filename.  However, when I do this, 
>> dovecot gives the message a new UID on next login.  If I change the filename 
>> in dovecot-uidlist, dovecot still gives a new UID on next login.  If I 
>> change dovecot-uidlist and delete the index, then the UID is preserved.
> 
> I don't really understand why deleting dovecot.index* would make a difference 
> here, except maybe as a workaround in case the user has that mailbox 
> selected, because the filenames could be cached in memory.
> 
> http://dovecot.org/tools/maildir-size-fix.pl
> http://dovecot.org/tools/maildir-size-check.sh
> 
> Those scripts kind of do what you want, except not fully, so it would be nice 
> to get one fully finished one :) The best way for the script to would would 
> be to:
> 
> * scan through a maildir, figure out what needs to be renamed to what, but 
> don't actually do it
> * lock the maildir with dovecot-uidlist.lock (src/util/maildirlock comes with 
> dovecot)
> * doveadm kick user's imap/pop3 sessions, and even better if it was possible 
> to kill -9 any pending processes
> * rename the files and update dovecot-uidlist
> * delete dovecot-uidlist.lock
> 
> This separately for each folder.
> 

Reply via email to