> Tesseract is an open source OCR program. It can already
> produce searchable PDF and will soon support streaming.
> It would be fun to support something like this:
>
> scanimage --batch | tesseract - - pdf > searchable.pdf
>
> To make this work nicely, scanimage would need to
> print the name of each file to stdout after it is written.
>
> Thoughts?
Hi,
We had a different requirement for batch processing and added a
--batch-script option to our SANE build. Maybe it could be useful for you,
patch is attached.
Regards,
Simon
diff -Naur sane-backends-1.0.21.orig/doc/scanimage.man sane-backends-1.0.21/doc/scanimage.man
--- sane-backends-1.0.21.orig/doc/scanimage.man 2010-04-05 15:18:05.000000000 +0200
+++ sane-backends-1.0.21/doc/scanimage.man 2010-08-23 11:27:48.000000000 +0200
@@ -22,6 +22,8 @@
.RB [ \-\-batch\-increment
.IR increment ]
.RB [ \-\-batch\-double ]
+.RB [ \-\-batch\-script
+.IR script ]
.RB [ \-\-accept\-md5\-only ]
.RB [ \-p | \-\-progress ]
.RB [ \-n | \-\-dont\-scan ]
@@ -189,6 +191,11 @@
.B \-\-batch\-prompt
will ask for pressing RETURN before scanning a page. This can be used for
scanning multiple pages without an automatic document feeder.
+.B \-\-batch\-script
+.I script
+will run the script with the temporary file name as argument 1 and the page
+number as argument 2 after scanning a page but before renaming it to its
+final output file name.
.PP
The
.B \-\-accept\-md5\-only
diff -Naur sane-backends-1.0.21.orig/frontend/scanimage.c sane-backends-1.0.21/frontend/scanimage.c
--- sane-backends-1.0.21.orig/frontend/scanimage.c 2010-04-05 15:18:05.000000000 +0200
+++ sane-backends-1.0.21/frontend/scanimage.c 2010-08-23 11:26:42.000000000 +0200
@@ -2,7 +2,7 @@
Uses the SANE library.
Copyright (C) 1996, 1997, 1998 Andreas Beck and David Mosberger
- Copyright (C) 1999 - 2009 by the SANE Project -- See AUTHORS and ChangeLog
+ Copyright (C) 1999 - 2010 by the SANE Project -- See AUTHORS and ChangeLog
for details.
For questions and comments contact the sane-devel mailinglist (see
@@ -75,7 +75,8 @@
#define OPTION_BATCH_START_AT 1004
#define OPTION_BATCH_DOUBLE 1005
#define OPTION_BATCH_INCREMENT 1006
-#define OPTION_BATCH_PROMPT 1007
+#define OPTION_BATCH_PROMPT 1007
+#define OPTION_BATCH_SCRIPT 1008
#define BATCH_COUNT_UNLIMITED -1
@@ -95,6 +96,7 @@
{"batch-double", no_argument, NULL, OPTION_BATCH_DOUBLE},
{"batch-increment", required_argument, NULL, OPTION_BATCH_INCREMENT},
{"batch-prompt", no_argument, NULL, OPTION_BATCH_PROMPT},
+ {"batch-script", required_argument, NULL, OPTION_BATCH_SCRIPT},
{"format", required_argument, NULL, OPTION_FORMAT},
{"accept-md5-only", no_argument, NULL, OPTION_MD5},
{"icc-profile", required_argument, NULL, 'i'},
@@ -1670,6 +1672,7 @@
const char *devname = 0;
const char *defdevname = 0;
const char *format = 0;
+ const char *script = 0;
char readbuf[2];
char *readbuf2;
int batch = 0;
@@ -1677,6 +1680,7 @@
int batch_count = BATCH_COUNT_UNLIMITED;
int batch_start_at = 1;
int batch_increment = 1;
+ int batch_script = 0;
SANE_Status status;
char *full_optstring;
SANE_Int version_code;
@@ -1754,6 +1758,10 @@
batch_count = atoi (optarg);
batch = 1;
break;
+ case OPTION_BATCH_SCRIPT:
+ batch_script = 1;
+ script = optarg;
+ break;
case OPTION_FORMAT:
if (strcmp (optarg, "tiff") == 0)
output_format = OUTPUT_TIFF;
@@ -1924,6 +1932,8 @@
--batch-double increment page number by two, same as\n\
--batch-increment=2\n\
--batch-prompt ask for pressing a key before scanning a page\n\
+ --batch-script=SCRIPT run script after scanning a page but before renaming\n\
+ it to its output file name\n\
--accept-md5-only only accept authorization requests using md5\n");
printf ("\
-p, --progress print progress messages\n\
@@ -2217,6 +2227,7 @@
{
char path[PATH_MAX];
char part_path[PATH_MAX];
+ char cmd[PATH_MAX * 2];
if (batch) /* format is NULL unless batch mode */
{
sprintf (path, format, n); /* love --(C++) */
@@ -2295,6 +2306,28 @@
}
else
{
+ /* run script before renaming scanned file */
+ if (batch_script && NULL != script)
+ {
+ sprintf (cmd, "%s '%s' %d", script, part_path, n);
+ switch (system (cmd))
+ {
+ case -1:
+ fprintf (stderr, "cannot run script `%s'\n", cmd);
+ sane_cancel (device);
+ return SANE_STATUS_ACCESS_DENIED;
+ break;
+ case 0:
+ /* the only case considered success */
+ break;
+ default:
+ fprintf (stderr,
+ "non-zero exit status running `%s'\n", cmd);
+ sane_cancel (device);
+ return SANE_STATUS_ACCESS_DENIED;
+ break;
+ }
+ }
/* let the fully scanned file show up */
if (rename (part_path, path))
{
--
sane-devel mailing list: sane-devel@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/sane-devel
Unsubscribe: Send mail with subject "unsubscribe your_password"
to sane-devel-requ...@lists.alioth.debian.org