> 
> I'm running Spamassassin on OpenSuse 10.2 and have just installed
> FuzzyOCR.
> 
> It appears to be working in that it scans/detects words in the supplied
> test files.
> 
> I noticed "spamassassin --lint" gives:
> 
> [25313] warn: FuzzyOcr: Cannot find executable for pamthreshold
> [25313] warn: FuzzyOcr: Cannot find executable for tesseract
> 
> Which seems fair enough as I don't have them.
> 
> Is it just a spurious warning though or do I need to be concerned?
> 
> Also as a general question other than adding words to the wordlist as
> and when, are there any "Must Know" tips n tricks for FuzzyOCR?
> 
> cheers,

Hi,

Take a look here (http://www200.pair.com/mecham/spam/image_spam2.html) and use 
patches for netpbm < 10.34


Or do the following (works for me):

1) Download latest stable version:
# svn checkout https://netpbm.svn.sourceforge.net/svnroot/netpbm/stable netpbm

2) Apply this patch:
diff -Naur netpbm-10.35.21/Makefile.config.in 
netpbm-10.35.21-patched/Makefile.config.in
--- netpbm-10.35.21/Makefile.config.in  2007-01-14 16:18:25.000000000 +0200
+++ netpbm-10.35.21-patched/Makefile.config.in  2007-01-14 16:33:59.304432096 
+0200
@@ -108,7 +108,7 @@
 #OSF1:
 #INSTALL = $(SRCDIR)/buildtools/installosf
 #Red Hat Linux:
-#INSTALL = install
+INSTALL = install

 # STRIPFLAG is the option you pass to the above install program to make it
 # strip unnecessary information out of binaries.
@@ -280,9 +280,9 @@
 # compiler/linker).  Build-time linking fails without it.  I don't
 # know why -- history seems to be repeating itself.  2005.02.23.

-CFLAGS_SHLIB =
+# CFLAGS_SHLIB =
 # Solaris or SunOS with gcc, and NetBSD:
-#CFLAGS_SHLIB = -fpic
+CFLAGS_SHLIB = -fPIC
 #CFLAGS_SHLIB = -fPIC
 # Sun compiler:
 #CFLAGS_SHLIB = -Kpic
@@ -350,7 +350,7 @@
 # The TIFF library.  See above.  If you want to build the tiff
 # converters, you must have the tiff library already installed.

-TIFFLIB = NONE
+TIFFLIB = libtiff.so
 TIFFHDR_DIR =

 #TIFFLIB = libtiff.so
@@ -382,7 +382,7 @@
 # JPEG stuff statically linked in, in which case you won't need
 # JPEGLIB in order to build the Tiff converters.

-JPEGLIB = NONE
+JPEGLIB = libjpeg.so
 JPEGHDR_DIR =
 #JPEGLIB = libjpeg.so
 #JPEGHDR_DIR = /usr/include/jpeg
@@ -413,7 +413,7 @@
 # case, PNGLIB and PNGHDR_DIR are irrelevant, but PNGVER is still meaningful,
 # because the make file runs 'libpng$(PNGVER)-config'.

-PNGLIB = NONE
+PNGLIB = libpng.so
 PNGHDR_DIR =
 PNGVER =
 #PNGLIB = libpng$(PNGVER).so
@@ -432,7 +432,7 @@
 #
 # If you have 'libpng-config' (see above), these are irrelevant.

-ZLIB = NONE
+ZLIB = libz.so
 ZHDR_DIR =
 #ZLIB = libz.so

diff -Naur netpbm-10.35.21/converter/other/fiasco/codec/dfiasco.c 
netpbm-10.35.21-patched/converter/other/fiasco/codec/dfiasco.c
--- netpbm-10.35.21/converter/other/fiasco/codec/dfiasco.c      2007-01-14 
16:18:03.000000000 +0200
+++ netpbm-10.35.21-patched/converter/other/fiasco/codec/dfiasco.c      
2007-01-14 16:37:35.780522728 +0200
@@ -15,7 +15,7 @@
  */

 #include <string.h>
-
+#include <stdlib.h>
 #include "config.h"

 #include "types.h"
diff -Naur netpbm-10.35.21/converter/other/fiasco/config.h 
netpbm-10.35.21-patched/converter/other/fiasco/config.h
--- netpbm-10.35.21/converter/other/fiasco/config.h     2007-01-14 
16:18:03.000000000 +0200
+++ netpbm-10.35.21-patched/converter/other/fiasco/config.h     2007-01-14 
16:36:00.265043288 +0200
@@ -25,6 +25,12 @@
    byte first (like Motorola and SPARC, unlike Intel and VAX).  */
 /* #undef WORDS_BIGENDIAN */

+/* since we don't have autoconf... */
+#include <endian.h>
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define WORDS_BIGENDIAN 1
+#endif
+
 /* Define if the X Window System is missing or not being used.  */
 #define X_DISPLAY_MISSING 1

diff -Naur netpbm-10.35.21/converter/other/fiasco/input/basis.c 
netpbm-10.35.21-patched/converter/other/fiasco/input/basis.c
--- netpbm-10.35.21/converter/other/fiasco/input/basis.c        2007-01-14 
16:18:00.000000000 +0200
+++ netpbm-10.35.21-patched/converter/other/fiasco/input/basis.c        
2007-01-14 16:38:10.711212456 +0200
@@ -13,7 +13,7 @@
  *  $Revision: 5.3 $
  *  $State: Exp $
  */
-
+#include <string.h>
 #include "config.h"

 #include "types.h"
diff -Naur netpbm-10.35.21/converter/pbm/icontopbm.c 
netpbm-10.35.21-patched/converter/pbm/icontopbm.c
--- netpbm-10.35.21/converter/pbm/icontopbm.c   2007-01-14 16:18:22.000000000 
+0200
+++ netpbm-10.35.21-patched/converter/pbm/icontopbm.c   2007-01-14 
16:43:50.478559968 +0200
@@ -13,6 +13,7 @@
 #include <string.h>

 #include "nstring.h"
+#include <limits.h>
 #include "pbm.h"

 /* size in bytes of a bitmap */
diff -Naur netpbm-10.35.21/converter/ppm/ppmtowinicon.c 
netpbm-10.35.21-patched/converter/ppm/ppmtowinicon.c
--- netpbm-10.35.21/converter/ppm/ppmtowinicon.c        2007-01-14 
16:18:20.000000000 +0200
+++ netpbm-10.35.21-patched/converter/ppm/ppmtowinicon.c        2007-01-14 
16:46:54.505583608 +0200
@@ -12,7 +12,7 @@

 #include <math.h>
 #include <string.h>
-
+#include <stdlib.h>
 #include "winico.h"
 #include "ppm.h"
 #include "mallocvar.h"
diff -Naur netpbm-10.35.21/editor/pnmrotate.c 
netpbm-10.35.21-patched/editor/pnmrotate.c
--- netpbm-10.35.21/editor/pnmrotate.c  2007-01-14 16:17:42.000000000 +0200
+++ netpbm-10.35.21-patched/editor/pnmrotate.c  2007-01-14 16:48:03.604079040 
+0200
@@ -16,6 +16,7 @@
 #include <assert.h>

 #include "pnm.h"
+#include <limits.h>
 #include "shhopt.h"
 #include "mallocvar.h"

diff -Naur netpbm-10.35.21/editor/pnmshear.c 
netpbm-10.35.21-patched/editor/pnmshear.c
--- netpbm-10.35.21/editor/pnmshear.c   2007-01-14 16:17:42.000000000 +0200
+++ netpbm-10.35.21-patched/editor/pnmshear.c   2007-01-14 16:48:41.694288448 
+0200
@@ -14,7 +14,7 @@

 #include <math.h>
 #include <string.h>
-
+#include <limits.h>
 #include "pnm.h"
 #include "shhopt.h"

diff -Naur netpbm-10.35.21/lib/libpm.c netpbm-10.35.21-patched/lib/libpm.c
--- netpbm-10.35.21/lib/libpm.c 2007-01-14 16:17:51.000000000 +0200
+++ netpbm-10.35.21-patched/lib/libpm.c 2007-01-14 16:49:14.410314856 +0200
@@ -36,6 +36,7 @@
     /* This makes the the x64() functions available on AIX */

 #include <stdio.h>
+#include <limits.h>
 #include <stdarg.h>
 #include <string.h>
 #include <errno.h>
diff -Naur netpbm-10.35.21/lib/pm.h netpbm-10.35.21-patched/lib/pm.h
--- netpbm-10.35.21/lib/pm.h    2007-01-14 16:17:51.000000000 +0200
+++ netpbm-10.35.21-patched/lib/pm.h    2007-01-14 16:38:55.133459248 +0200
@@ -21,7 +21,7 @@
 #include <errno.h>
 #include <setjmp.h>
 #include <sys/stat.h>
-
+#include <string.h>
 #ifdef VMS
 #include <perror.h>
 #endif
diff -Naur netpbm-10.35.21/urt/rle.h netpbm-10.35.21-patched/urt/rle.h
--- netpbm-10.35.21/urt/rle.h   2007-01-14 16:18:25.000000000 +0200
+++ netpbm-10.35.21-patched/urt/rle.h   2007-01-14 16:39:20.813555280 +0200
@@ -33,7 +33,7 @@
 #include "rle_config.h"     /* Configuration parameters. */

 #include <stdio.h>      /* Declare FILE. */
-
+#include <string.h>
 #ifdef c_plusplus
 #define USE_PROTOTYPES
 #endif

3)
# cp Makefile.config.in Makefile.config

4) 
# make
.
.
  >build_complete

5)
After doing a 'make', do

# make package pkgdir=/tmp/binaries

to copy all the Netpbm files you need to install into the
directory /tmp/binaries.

6)
to install from there (/tmp/binaries) to your system via an interactive
dialog.  Or do it manually using simple copy commands and
following instructions in the file DIR/README

I've choosed to install to /usr (like default rpm installation do), 
you may chose a better location for binaries and man pages for sure:
------------------------------------------------------------------------------------
# buildtools/installnetpbm.pl
Welcome to the Netpbm install dialogue.  We will now proceed
to interactively install Netpbm on this system.

You must have already built Netpbm and then packaged it for
installation by running 'make package'.  See the INSTALL file.

Where is the install package you created with 'make package'?
package directory (/tmp/netpbm) ==> /tmp/binaries

Enter the default prefix for installation locations.  I will use
this in generating defaults for the following prompts to save you
typing.  If you plan to spread Netpbm across your system,
enter '/'.

install prefix (/usr) ==> /usr

program directory (/usr/bin) ==> /usr/bin

Installing programs...
Done.

Where do you want the shared library installed?

shared library directory (/usr/lib) ==> /usr/lib

Installing shared libraries...
done.

In order for the Netpbm shared library to be found when you invoke
A Netpbm program, you must either set an environment variable to
tell where to look for it, or you must put its location in the shared
library location cache.  Do you want to run Ldconfig now to put the
Netpbm shared library in the cache?  This works only if you have
installed the library in a standard location.

Y(es) or N(o) (Y) ==> Y
Ldconfig completed successfully.


Where do you want the static link library installed?

static library directory (/usr/lib) ==> /usr/lib

Installing link libraries.
done.

Where do you want the data files installed?

data file directory (/usr/lib) ==> /usr/lib

Installing data files...
done.

Where do you want the library interface header files installed?

header directory (/usr/include) ==> /usr/include

Installing interface header files...
done.

Where do you want the man pages installed?

man page directory (/usr/man) ==> /usr/man

Installing man pages...
done.

You don't have a /etc/manweb.conf, which is the configuration
file for the 'manweb' program, which is a quick way to get to Netpbm
documentation.  Would you like to create one now?
create /etc/manweb.conf (Y) ==> Y
Your manweb.conf file says top level documentation is in /usr/man/web,
but you installed netpbm.url in /usr/local/netpbm/man/web.
Do you want to create a symlink in /usr/man/web now?
create symlink (Y/N) (Y) ==> Y

Installation is complete (except where previous error messages have
indicated otherwise).

7) Now lets check that FuzzyOcr can use our new binaries:
In FuzzyOcr.cf temporarily set:
focr_autodisable_score 50

Now run the following and see FuzzyOcr in action:
# su -c "spamassassin --debug FuzzyOcr < animated-gif.eml > /dev/null" vscan

Check for these lines:
2007-01-17 17:28:30 [5288] Option logfile = /var/spool/amavis/FuzzyOcr.log
2007-01-17 17:28:30 [5288] Option global_wordlist = 
/etc/mail/spamassassin/FuzzyOcr.words
2007-01-17 17:28:30 [5288] Found scan: $gocr -i -d 2 -l 100 $pfile
2007-01-17 17:28:30 [5288] Found scan: $ocrad -s5 $pfile
2007-01-17 17:28:30 [5288] Found scan: $ocrad -s5 -i $pfile
2007-01-17 17:28:30 [5288] Score{wrongctype} = 0
2007-01-17 17:28:30 [5288] Score{autodisable} = 50
2007-01-17 17:28:30 [5288] Option counts_required = 2
2007-01-17 17:28:30 [5288] Using gifsicle => /usr/bin/gifsicle
2007-01-17 17:28:30 [5288] Using giffix => /usr/bin/giffix
2007-01-17 17:28:30 [5288] Using giftext => /usr/bin/giftext
2007-01-17 17:28:30 [5288] Using gifinter => /usr/bin/gifinter
2007-01-17 17:28:30 [5288] Using giftopnm => /usr/bin/giftopnm
2007-01-17 17:28:30 [5288] Using jpegtopnm => /usr/bin/jpegtopnm
2007-01-17 17:28:30 [5288] Using pngtopnm => /usr/bin/pngtopnm
2007-01-17 17:28:30 [5288] Using bmptopnm => /usr/bin/bmptopnm
2007-01-17 17:28:30 [5288] Using tifftopnm => /usr/bin/tifftopnm
2007-01-17 17:28:30 [5288] Using ppmhist => /usr/bin/ppmhist
2007-01-17 17:28:30 [5288] Using pamfile => /usr/bin/pamfile
2007-01-17 17:28:30 [5288] Using gocr => /usr/bin/gocr
2007-01-17 17:28:30 [5288] Using ocrad => /usr/bin/ocrad
2007-01-17 17:28:30 [5288] Loaded <67> words from 
"/etc/mail/spamassassin/FuzzyOcr.words"


And these:
.
[30046] dbg: FuzzyOcr: Found word "industries" in line
[30046] dbg: FuzzyOcr: 
"leadingbrandsintheaudiouisualcomputerandhometheaterindustriesinthe"
[30046] dbg: FuzzyOcr: with fuzz of 0.0000 scanned with scanset $ocrad -s5 
$pfile
[30046] dbg: FuzzyOcr: Found word "international" in line
[30046] dbg: FuzzyOcr: 
"unitedstatescanadaandlatinamericauniquelyrecogizednationallyand"
[30046] dbg: FuzzyOcr: with fuzz of 0.2308 scanned with scanset $ocrad -s5 
$pfile
[30046] dbg: FuzzyOcr: Message is spam, score = 66.000
[30046] dbg: FuzzyOcr: Remove DIR: /tmp/.spamassassin300462uqKDXtmp
[30046] dbg: FuzzyOcr: FuzzyOcr ending successfully...


Looks OK to me :)

Reply via email to