https://bugs.kde.org/show_bug.cgi?id=44880
--- Comment #18 from Christian Schmitz <list schweb com ar> 2011-08-09 14:45:15 --- I write this script, need be refined in case of "<img" tag when is slited into 2 different lines. This script remove any type of html tag keeping links to images as text, links to url is keeped as text. I hope was usefull to the comunity. #!/usr/bin/perl # script que convierte los emails HTML a TXT # ideal para usar en kmail # # # $block=0 // no se acarrea nada del renglon anterior # $block=1 // se esta buscando una URL (ej: <img src=http:xxxx >) que fue iniciado en otro renglon # $block=2 // se esta buscando un FIN de tag HTML que fue iniciado en otro renglon. # $filtrado =0; #Si ha habido alguna linea filtrada $activo =1; #Debe parsear el mail $special =0; # $block =0; #Se esta borrando un block de renglones $block_fin =""; $str_fin =">"; @htmltags=( "<html", "</html", "<body", "</body", "<table", "</table", "<tr", "</tr", "<td", "</td", "<hr", "<pre", "</hr", "<b", "</b", "<p", "</p", "<!--", "</a", "<span", "</span", "<font", "</font", "<style", "<script", ); @htmltags2=( ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", ">", "</style>", "</script>", ); @htmlspecial1=( "<a", "<img", ); @htmlspecial2=( " href=", " src=", ); @htmlspecial3=( ">", ">", ); open(IN,"/dev/stdin"); #open(IN,"kmail-samle-mail.txt"); while(<IN>){ $reng=$_; chomp $reng; print "\nIN :$reng\n"; $reng=~s/<br>/\n/; if( $block==1 ){ $reng=strip_url($reng,$block_fin); } if( $block==2 ){ $reng=strip_fin($reng,$block_url,$block_fin); } if( $block==0 ){ $tag_n=0; foreach $tag (@htmltags){ $str_fin=$htmltags2[$tag_n]; while($reng=~/$tag/){ $reng=strip($tag,$reng,$str_fin); } $tag_n++; } $tag_n=0; while($reng=~/\<img/){ my $tag ="<img"; my $str_url ="src="; my $str_fin =">"; $reng=strip_special($reng,$tag,$str_url,$str_fin); } } # if( length($reng)>0){ print "b=$block : $reng\n"; print "$reng\n"; # }; # if( $filtrado>20){ # exit 0; # } print " :"; } close IN; if( $filtrado!=0){ print "filtrado por kmail-html-strip\n"; } ####################################################################################### # sub strip{ my $tag =$_[0]; my $reng =$_[1]; my $str_fin =$_[2]; my $r_len =length($reng); my $t_len =length($tag); my $s_len =length($str_fin); my $inicio =0; while(substr($reng,$inicio,$t_len) ne $tag && $inicio<= $r_len){$inicio++;} $fin=$inicio+$s_len; while(substr($reng,$fin,$s_len) ne $str_fin && $fin<= $r_len){$fin++;} $reng=substr($reng,0,$inicio).substr($reng,$fin+$s_len); if($fin>$r_len && substr($reng,$fin,$s_len) ne $str_fin){ $block_fin=$str_fin; $block=2; } $filtrado++; return $reng } ################ # # En caso de tag abierto antes # sub strip_fin{ my $reng =$_[0]; my $str_fin =$_[1]; my $r_len =length($reng); my $fin =0; my $s_len =length($str_fin); # while(substr($reng,$fin,1) ne ">" && $fin<= $r_len){$fin++;} while(substr($reng,$fin,$s_len) ne $str_fin && $fin<= $r_len){$fin++;} $reng=substr($reng,$fin+$s_len); if($fin>$r_len && substr($reng,$fin,1) ne ">"){ $block=2; }else{ $block=0; } return $reng; } sub strip_url{ my $reng =$_[0]; my $str_url =$_[1]; my $str_fin =$_[2]; my $r_len =length($reng); my $t_len =length($tag); my $u_len =length($str_url); my $f_len =length($str_fin); my $url_ini =0; my $url_fin =0; my $tag_fin =0; while(substr($reng,$url_ini,$u_len) ne $str_url && $url_ini<= $r_len){$url_ini++;} print "tag_ini=--- url_ini=$url_ini url_fin=---- tag_fin=----- r_len=$r_len\n"; if($url_ini>$r_len && substr($reng,$url_ini,$u_len) ne $str_url){ # solo se inicio el tag sin encontrarse URL $block_url=$str_fin; $block_fin=$str_fin; $block=1; $reng=""; # $reng= substr($reng,0,$tag_ini); return } $block=0; $url_ini=$url_ini+$u_len; $url_fin=$url_ini+$u_len+1; while($url_fin<=$r_len){ $tp=substr($reng,$url_fin,1); if ( $tp eq "\"" || $tp eq " " || $tp eq ">"){ last; } $url_fin++; } $tag_fin=$url_fin; while(substr($reng,$tag_fin,$f_len) ne $str_fin && $tag_fin<= $r_len){$tag_fin++;} if($tag_fin>$r_len && substr($reng,$tag_fin,$f_len) ne $str_fin){ $block_fin=$str_fin; $block=2; } print "tag_ini=--- url_ini=$url_ini url_fin=$url_fin tag_fin=$tag_fin r_len=$r_len\n"; $reng =substr($reng,$url_ini,$url_fin-$url_ini).substr($reng,$tag_fin); } ######################################### # # sub strip_special{ print "special strip\n"; my $reng =$_[0]; my $tag =$_[1]; my $str_url =$_[2]; my $str_fin =$_[3]; # my $tag ="<img"; # my $str_url ="src="; # my $str_fin =">"; my $delimiter =""; my $r_len =length($reng); my $t_len =length($tag); my $u_len =length($str_url); my $f_len =length($str_fin); my $tag_ini =0; my $url_ini =0; my $url_fin =0; my $tag_fin =0; while(substr($reng,$tag_ini,$t_len) ne $tag && $tag_ini<= $r_len){$tag_ini++;} $url_ini=$tag_ini+$t_len; while(substr($reng,$url_ini,$u_len) ne $str_url && $url_ini<= $r_len){$url_ini++;} if($url_ini>$r_len && substr($reng,$url_ini,$u_len) ne $str_url){ # solo se inicio el tag sin encontrarse URL $block_url=$str_fin; $block_fin=$str_fin; $block=1; $reng= substr($reng,0,$tag_ini); } else { $url_ini=$url_ini+$u_len; $url_fin=$url_ini+$u_len+1; while($url_fin<=$r_len){ $tp=substr($reng,$url_fin,1); if ( $tp eq "\"" || $tp eq " " || $tp eq ">"){ last; } $url_fin++; } $tag_fin=$url_fin; while(substr($reng,$tag_fin,$f_len) ne $str_fin && $tag_fin<= $r_len){$tag_fin++;} if($tag_fin>$r_len && substr($reng,$tag_fin,$f_len) ne $str_fin){ $block_fin=$str_fin; $block=2; } $reng =substr($reng,0,$tag_ini).substr($reng,$url_ini,$url_fin-$url_ini).substr($reng,$tag_fin+$f_len); } print "tag_ini=$tag_ini url_ini=$url_ini url_fin=$url_fin tag_fin=$tag_fin r_len=$r_len\n"; # print "$reng\n"; return $reng; } -- Configure bugmail: https://bugs.kde.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug. _______________________________________________ Kdepim-bugs mailing list Kdepim-bugs@kde.org https://mail.kde.org/mailman/listinfo/kdepim-bugs