Script-ul atasat mai jos este dedicat tuturor celor care folosesc
sisteme UTF-8-impaired, si isi doresc sa citeasca in mesajele de pe RLUG
litere cu diacritice in loc de carcalaci.  Bug reports / comentarii /
injuraturi / etc. pe personala.

Note:
-----

(1) Din motive bine intemeiate, script-ul are nevoie de MIME-tools
    >= 6.2; versiunile 5.x pe care le instaleaza in mod normal
    diferitele package managers sub Linux nu sunt bune;

        
http://search.cpan.org/CPAN/authors/id/E/ER/ERYQ/MIME-tools-6.200_02.tar.gz

(2) Semnaturile PGP nu pot supravietui re-incodarii, motiv pentru care
    script-ul le taie;

(3) Incantatie pentru Procmail:


        :0
        * ^List-Id:.*\.lists\.lug\.ro>
        {
          :0 fw
          * ^Subject:.*=\?UTF-?8\?
          | utf8-recode.pl iso-8859-2

          :0 Efw
          * HB ?? ^(Content-Type:.*|[   ]+)charset="?utf-?8"?\>
          | utf8-recode.pl iso-8859-2
        }


    ([  ] de mai sus contine un spatiu si un tab).

    Salutari,

    Liviu Daia

-- 
Dr. Liviu Daia                                  http://www.imar.ro/~daia
#! /usr/bin/perl
#
# Copyright (c) 2005 Liviu Daia <[EMAIL PROTECTED]>
# All rights reserved.
#
# $Revision: 1.5 $
# $Id: utf8-recode.pl,v 1.5 2006/01/05 06:33:31 daia Exp $
# $Source: /usr/share/CVS/Scripts/scripts/utf8-recode.pl,v $
#

use File::Basename;
use MIME::Parser 6.108;
use MIME::Words qw(:all);
use IO::File;
use Locale::Recode;

# $^W = 1;
# use strict;
# use Carp ();
# local $SIG{__WARN__} = \&Carp::cluck;



my ($chr_to, $recode, $tmp, $parser, $parsedir, $f, $e);
my ($idx, $h, @chunks, $data, $charset, $io);



sub recode_recursive ($);

sub recode_recursive ($)
{
  my $e = shift;

  # embedded messages can have subjects
  $e->head->modify (0);
  $idx = 0;
  for ($e->head->get ('Subject'))
  {
    $h = '';
    @chunks = decode_mimewords $_;

    for (@chunks)
    {
      ($data, $charset) = @$_;
      if (defined $charset and $charset =~ m/^utf-?8$/io)
      {
        $recode->recode ($data)
          or die $recode->getError;
        $charset = $chr_to;
      }
      $h .= $charset ? (encode_mimeword $data, 'B', $charset) : $data;
    }

    $e->head->replace ('Subject', $h, $idx++);
  }

  # Lines and Content-Length are probably wrong
  $e->head->delete ('Lines');
  $e->head->delete ('Content-Length');

  if ($e->is_multipart)
  {
    for ($e->parts)
    {
      recode_recursive ($_);
    }
  }
  elsif (lc $e->effective_type eq 'text/plain' and
# elsif ($e->effective_type =~ m!^text/!io and
         defined $e->head->mime_attr ('content-type.charset') and
         $e->head->mime_attr ('content-type.charset') =~ /^utf-?8$/io)
  {
    # avoids messing with internal pathnames
    # avoids reading unbound data in memory
    $tmp->seek (0, 0);
    $tmp->truncate (0);
    $io = $e->bodyhandle->open ('r');
    $tmp->print ($_)
      while (defined ($_ = $io->getline));
    $io->close;

    $tmp->flush;
    $tmp->seek (0, 0);
    $io = $e->bodyhandle->open ('w');
    while (defined ($_ = $tmp->getline))
    {
      $recode->recode ($_)
        or die $recode->getError;
      $io->print ($_);
    }
    $io->close;

    $e->head->mime_attr ('Content-Type.charset' => $chr_to);
  }
}



sub chop_signatures ($);

sub chop_signatures ($)
{
  my $e = shift;

  if ($e->is_multipart)
  {
    $e->parts ([grep { lc $_->effective_type ne 'application/pgp-signature' } 
$e->parts]);
    $e->make_singlepart;
    chop_signatures ($_)
      for ($e->parts);
  }
}



#
# main
#
die ("usage: " . (basename $0) . " [ -l | <encoding> ]\n")
  unless (@ARGV == 1 and ($ARGV[0] eq '-l' or $chr_to = 
Locale::Recode->resolveAlias ($ARGV[0])));

if ($ARGV[0] eq '-l')
{
  print "$_\n"
    for (sort @{Locale::Recode->getCharsets});
  exit;
}

$parser = MIME::Parser->new ();
$parser->extract_encoded_messages (1);
$parser->extract_nested_messages ('NEST');
$parser->extract_uuencode (1);
$parser->ignore_errors (1);
$parser->output_to_core (0);
$parser->tmp_recycling (1);
$parser->tmp_to_core (0);
$parser->use_inner_files (0);

if    (defined $ENV{'TMPDIR'}) { $parsedir = $ENV{'TMPDIR'}; }
elsif (defined $ENV{'TMP'})    { $parsedir = $ENV{'TMP'}; }
else                           { $parsedir = '/tmp'; }

$f = MIME::Parser::FlatFiler->new ($parsedir);
$f->ignore_filename (1);
$parser->filer ($f);

# XXX can't tell the parser to keep From_
my $from = <STDIN>;
$e = eval { $parser->parse (\*STDIN); };

print STDERR $_
  for ($parser->results->msgs);
die qq{can't parse message: [EMAIL PROTECTED]
  unless defined $e;

$recode = Locale::Recode->new (from => 'UTF-8', to => $chr_to);
die $recode->getError
  if $recode->getError;

$tmp = IO::File->new_tmpfile
  or die qq{can't create temp file: $!};

# PGP signatures are useless by now
chop_signatures $e;
# $e->dump_skeleton (\*STDERR);

eval { recode_recursive ($e); };
die qq{can't recode message: [EMAIL PROTECTED]
  if ($@);
$e->sync_headers (Length => 'DELETE');

print $from;
$e->print;

undef $tmp;
$parser->filer->purge;

_______________________________________________
RLUG mailing list
RLUG@lists.lug.ro
http://lists.lug.ro/mailman/listinfo/rlug

Raspunde prin e-mail lui