On 3/9/2010 8:34 AM, Daniel Lemke wrote:

Is there a reason why the "latest" (2007-01-17) version of ImageInfo plugin is not part of the SA source?


I have not researched this but I would guess a bug was never opened for this purpose or it was just a CLA/Licensing issue. Overall, the changes appear straightforward with the major addition being the image_name_regex. I've reviewed the code and create a diff patch.

Dallas, could you open a bug to update this?

From reviewing the rules and the code, the attached patch and these additional rules are the differences for consideration which you could add as patches.

# double dot gifs (from Kevin)
body CG_DOUBLEDOT_GIF eval:image_name_regex('/^\w{2,9}\.\.gif$/')
describe        CG_DOUBLEDOT_GIF        Double dotted image name
score           CG_DOUBLEDOT_GIF        1.4

# emails containing pictures from digital cameras (from Kevin)
# although i think most of these images will be >256kb (unless they
# have been post-processed) so SA probably isnt scanning them.

# Nikon/Sony: DSC00001.JPG
body            CG_SONY_JPG     eval:image_name_regex('/^DSC\d{5}\.JPG$/')
describe        CG_SONY_JPG     Looks like a Sony/Nikon digital camera shot
score           CG_SONY_JPG     -0.1

# Fuji: DSCF0001.JPG
body            CG_FUJI_JPG     eval:image_name_regex('/^DSCF\d{4}\.JPG$/')
describe        CG_FUJI_JPG     Looks like a Fuji digital camera shot
score           CG_FUJI_JPG     -0.1

# Canon: BODY0001.JPG
body            CG_CANON_JPG    eval:image_name_regex('/^\w{4}\d{4}\.JPG$/')
describe        CG_CANON_JPG    Canon's semi random image names
score           CG_CANON_JPG    -0.1

regards,
KAM
--- 
/usr/src/Mail-SpamAssassin-3.3.0/blib/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm 
    Thu Jan 21 07:15:15 2010
+++ ImageInfo.pm        Wed Jan 17 15:00:01 2007
@@ -16,6 +15,22 @@
 # </@LICENSE>
 #
 # -------------------------------------------------------
+# ImageInfo Plugin for SpamAssassin
+# Version: 0.7
+# Created: 2006-08-02
+# Modified: 2007-01-17
+#
+# Changes: 
+#   0.7 - added image_name_regex to allow pattern matching on the image name
+#       - added support for image/pjpeg content types (progressive jpeg)
+#       - updated imageinfo.cf with a few sample rules for using 
image_name_regex()
+#   0.6 - fixed dems_ bug in image_size_range_
+#   0.5 - added image_named and image_to_text_ratio
+#   0.4 - added image_size_exact and image_size_range
+#   0.3 - added jpeg support
+#   0.2 - optimized by theo
+#   0.1 - added gif/png support
+#
 #
 # Usage:
 #  image_count()
@@ -27,7 +56,7 @@
 #        max: optional, if specified, message must not 
 #             contain more than this number of images
 #
-#  examples
+#  image_count() examples
 # 
 #     body ONE_IMAGE  eval:image_count('all',1,1) 
 #     body ONE_OR_MORE_IMAGES  eval:image_count('all',1)
@@ -44,13 +73,21 @@
 #        max: optional, if specified, message must not
 #             contain more than this much pixel area
 #
-#  examples
+#   pixel_coverage() examples
+#
+#     body LARGE_IMAGE_AREA  eval:pixel_coverage('all',150000)  # catches any 
images that are 150k pixel/sq or higher
+#     body SMALL_GIF_AREA  eval:pixel_coverage('gif',1,40000)   # catches only 
gifs that 1 to 40k pixel/sql
+#
+#  image_name_regex()
+# 
+#     body RULENAME  eval:image_name_regex(<regex>) 
+#        regex: full quoted regexp, see examples below
+#
+#  image_name_regex() examples
+#
+#     body CG_DOUBLEDOT_GIF  eval:image_name_regex('/^\w{2,9}\.\.gif$/i') # 
catches double dot gifs  abcd..gif
 #
-#     body LARGE_IMAGE_AREA  eval:pixel_coverage('all',150000)
-#     body SMALL_GIF_AREA  eval:pixel_coverage('gif',1,40000)
 #
-#  See the ruleset for ways to meta image_count() 
-#  and pixel_coverage() together.  
 #
 # -------------------------------------------------------
 
@@ -81,6 +120,7 @@
   $self->register_eval_rule ("image_size_exact");
   $self->register_eval_rule ("image_size_range");
   $self->register_eval_rule ("image_named");
+  $self->register_eval_rule ("image_name_regex");
   $self->register_eval_rule ("image_to_text_ratio");
 
   return $self;
@@ -201,13 +241,12 @@
     $pms->{'imageinfo'}->{"count_$type"} = 0;
   }
 
-  foreach my $p ($pms->{msg}->find_parts(q...@^image/(?:gif|png|jpe?g)$@, 1)) {
+  foreach my $p ($pms->{msg}->find_parts(q...@^image/(?:gif|png|jpeg)$@, 1)) {
     # make sure its base64 encoded
     my $cte = lc $p->get_header('content-transfer-encoding') || '';
     next if ($cte !~ /^base64$/);
 
     my ($type) = $p->{'type'} =~ m@/(\w+)$@;
-    $type='jpeg' if $type eq 'jpg';
     if ($type && exists $get_details{$type}) {
        $get_details{$type}->($pms,$p);
        $pms->{'imageinfo'}->{"count_$type"} ++;
@@ -246,6 +285,34 @@
 
 # -----------------------------------------
 
+sub image_name_regex {
+  my ($self,$pms,$body,$re) = @_;
+  return unless (defined $re);
+
+  # make sure we have image data read in.
+  if (!exists $pms->{'imageinfo'}) {
+    $self->_get_images($pms);
+  }
+
+  return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
+
+  my $hit = 0;
+  foreach my $name (keys %{$pms->{'imageinfo'}->{"names_all"}}) {
+    dbg("imageinfo: checking image named $name against regex $re");
+    my $eval = 'if (q{'.$name.'} =~  ' . $re . ') {  $hit = 1; } ';
+    eval $eval;
+    dbg("imageinfo: error in regex $re - $@") if $@;
+    if ($hit) {
+      dbg("imageinfo: image_name_regex hit on $name");
+      return 1;
+    }
+  }
+  return 0;
+
+}
+
+# -----------------------------------------
+
 sub image_count {
   my ($self,$pms,$body,$type,$min,$max) = @_;
   
@@ -325,7 +392,8 @@
     $self->_get_images($pms);
   }
 
-  return unless (exists $pms->{'imageinfo'}->{"dems_$type"});
+  my $name = 'dems_'.$type;
+  return unless (exists $pms->{'imageinfo'}->{$name});
 
   foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}}) {
     my ($h,$w) = split(/x/,$dem);

Reply via email to