On 3/9/2010 8:34 AM, Daniel Lemke wrote:
Is there a reason why the "latest" (2007-01-17) version of ImageInfo
plugin is not part of the SA source?
I have not researched this but I would guess a bug was never opened for
this purpose or it was just a CLA/Licensing issue. Overall, the changes
appear straightforward with the major addition being the
image_name_regex. I've reviewed the code and create a diff patch.
Dallas, could you open a bug to update this?
From reviewing the rules and the code, the attached patch and these
additional rules are the differences for consideration which you could
add as patches.
# double dot gifs (from Kevin)
body CG_DOUBLEDOT_GIF
eval:image_name_regex('/^\w{2,9}\.\.gif$/')
describe CG_DOUBLEDOT_GIF Double dotted image name
score CG_DOUBLEDOT_GIF 1.4
# emails containing pictures from digital cameras (from Kevin)
# although i think most of these images will be >256kb (unless they
# have been post-processed) so SA probably isnt scanning them.
# Nikon/Sony: DSC00001.JPG
body CG_SONY_JPG eval:image_name_regex('/^DSC\d{5}\.JPG$/')
describe CG_SONY_JPG Looks like a Sony/Nikon digital camera shot
score CG_SONY_JPG -0.1
# Fuji: DSCF0001.JPG
body CG_FUJI_JPG eval:image_name_regex('/^DSCF\d{4}\.JPG$/')
describe CG_FUJI_JPG Looks like a Fuji digital camera shot
score CG_FUJI_JPG -0.1
# Canon: BODY0001.JPG
body CG_CANON_JPG eval:image_name_regex('/^\w{4}\d{4}\.JPG$/')
describe CG_CANON_JPG Canon's semi random image names
score CG_CANON_JPG -0.1
regards,
KAM
---
/usr/src/Mail-SpamAssassin-3.3.0/blib/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm
Thu Jan 21 07:15:15 2010
+++ ImageInfo.pm Wed Jan 17 15:00:01 2007
@@ -16,6 +15,22 @@
# </@LICENSE>
#
# -------------------------------------------------------
+# ImageInfo Plugin for SpamAssassin
+# Version: 0.7
+# Created: 2006-08-02
+# Modified: 2007-01-17
+#
+# Changes:
+# 0.7 - added image_name_regex to allow pattern matching on the image name
+# - added support for image/pjpeg content types (progressive jpeg)
+# - updated imageinfo.cf with a few sample rules for using
image_name_regex()
+# 0.6 - fixed dems_ bug in image_size_range_
+# 0.5 - added image_named and image_to_text_ratio
+# 0.4 - added image_size_exact and image_size_range
+# 0.3 - added jpeg support
+# 0.2 - optimized by theo
+# 0.1 - added gif/png support
+#
#
# Usage:
# image_count()
@@ -27,7 +56,7 @@
# max: optional, if specified, message must not
# contain more than this number of images
#
-# examples
+# image_count() examples
#
# body ONE_IMAGE eval:image_count('all',1,1)
# body ONE_OR_MORE_IMAGES eval:image_count('all',1)
@@ -44,13 +73,21 @@
# max: optional, if specified, message must not
# contain more than this much pixel area
#
-# examples
+# pixel_coverage() examples
+#
+# body LARGE_IMAGE_AREA eval:pixel_coverage('all',150000) # catches any
images that are 150k pixel/sq or higher
+# body SMALL_GIF_AREA eval:pixel_coverage('gif',1,40000) # catches only
gifs that 1 to 40k pixel/sql
+#
+# image_name_regex()
+#
+# body RULENAME eval:image_name_regex(<regex>)
+# regex: full quoted regexp, see examples below
+#
+# image_name_regex() examples
+#
+# body CG_DOUBLEDOT_GIF eval:image_name_regex('/^\w{2,9}\.\.gif$/i') #
catches double dot gifs abcd..gif
#
-# body LARGE_IMAGE_AREA eval:pixel_coverage('all',150000)
-# body SMALL_GIF_AREA eval:pixel_coverage('gif',1,40000)
#
-# See the ruleset for ways to meta image_count()
-# and pixel_coverage() together.
#
# -------------------------------------------------------
@@ -81,6 +120,7 @@
$self->register_eval_rule ("image_size_exact");
$self->register_eval_rule ("image_size_range");
$self->register_eval_rule ("image_named");
+ $self->register_eval_rule ("image_name_regex");
$self->register_eval_rule ("image_to_text_ratio");
return $self;
@@ -201,13 +241,12 @@
$pms->{'imageinfo'}->{"count_$type"} = 0;
}
- foreach my $p ($pms->{msg}->find_parts(q...@^image/(?:gif|png|jpe?g)$@, 1)) {
+ foreach my $p ($pms->{msg}->find_parts(q...@^image/(?:gif|png|jpeg)$@, 1)) {
# make sure its base64 encoded
my $cte = lc $p->get_header('content-transfer-encoding') || '';
next if ($cte !~ /^base64$/);
my ($type) = $p->{'type'} =~ m@/(\w+)$@;
- $type='jpeg' if $type eq 'jpg';
if ($type && exists $get_details{$type}) {
$get_details{$type}->($pms,$p);
$pms->{'imageinfo'}->{"count_$type"} ++;
@@ -246,6 +285,34 @@
# -----------------------------------------
+sub image_name_regex {
+ my ($self,$pms,$body,$re) = @_;
+ return unless (defined $re);
+
+ # make sure we have image data read in.
+ if (!exists $pms->{'imageinfo'}) {
+ $self->_get_images($pms);
+ }
+
+ return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
+
+ my $hit = 0;
+ foreach my $name (keys %{$pms->{'imageinfo'}->{"names_all"}}) {
+ dbg("imageinfo: checking image named $name against regex $re");
+ my $eval = 'if (q{'.$name.'} =~ ' . $re . ') { $hit = 1; } ';
+ eval $eval;
+ dbg("imageinfo: error in regex $re - $@") if $@;
+ if ($hit) {
+ dbg("imageinfo: image_name_regex hit on $name");
+ return 1;
+ }
+ }
+ return 0;
+
+}
+
+# -----------------------------------------
+
sub image_count {
my ($self,$pms,$body,$type,$min,$max) = @_;
@@ -325,7 +392,8 @@
$self->_get_images($pms);
}
- return unless (exists $pms->{'imageinfo'}->{"dems_$type"});
+ my $name = 'dems_'.$type;
+ return unless (exists $pms->{'imageinfo'}->{$name});
foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}}) {
my ($h,$w) = split(/x/,$dem);