> Tesseract is an open source OCR program. It can already
> produce searchable PDF and will soon support streaming.
> It would be fun to support something like this:
>
>    scanimage --batch | tesseract - - pdf > searchable.pdf
>
> To make this work nicely, scanimage would need to
> print the name of each file to stdout after it is written.
>
> Thoughts?

Hi,

We had a different requirement for batch processing and added a
--batch-script option to our SANE build. Maybe it could be useful for you,
patch is attached.

Regards,
Simon
diff -Naur sane-backends-1.0.21.orig/doc/scanimage.man sane-backends-1.0.21/doc/scanimage.man
--- sane-backends-1.0.21.orig/doc/scanimage.man	2010-04-05 15:18:05.000000000 +0200
+++ sane-backends-1.0.21/doc/scanimage.man	2010-08-23 11:27:48.000000000 +0200
@@ -22,6 +22,8 @@
 .RB [ \-\-batch\-increment
 .IR increment ]
 .RB [ \-\-batch\-double ]
+.RB [ \-\-batch\-script
+.IR script ]
 .RB [ \-\-accept\-md5\-only ]
 .RB [ \-p | \-\-progress ]
 .RB [ \-n | \-\-dont\-scan ]
@@ -189,6 +191,11 @@
 .B \-\-batch\-prompt
 will ask for pressing RETURN before scanning a page. This can be used for
 scanning multiple pages without an automatic document feeder.
+.B \-\-batch\-script
+.I script
+will run the script with the temporary file name as argument 1 and the page
+number as argument 2 after scanning a page but before renaming it to its
+final output file name.
 .PP
 The
 .B \-\-accept\-md5\-only
diff -Naur sane-backends-1.0.21.orig/frontend/scanimage.c sane-backends-1.0.21/frontend/scanimage.c
--- sane-backends-1.0.21.orig/frontend/scanimage.c	2010-04-05 15:18:05.000000000 +0200
+++ sane-backends-1.0.21/frontend/scanimage.c	2010-08-23 11:26:42.000000000 +0200
@@ -2,7 +2,7 @@
    Uses the SANE library.
    Copyright (C) 1996, 1997, 1998 Andreas Beck and David Mosberger
    
-   Copyright (C) 1999 - 2009 by the SANE Project -- See AUTHORS and ChangeLog
+   Copyright (C) 1999 - 2010 by the SANE Project -- See AUTHORS and ChangeLog
    for details.
 
    For questions and comments contact the sane-devel mailinglist (see
@@ -75,7 +75,8 @@
 #define OPTION_BATCH_START_AT	1004
 #define OPTION_BATCH_DOUBLE	1005
 #define OPTION_BATCH_INCREMENT	1006
-#define OPTION_BATCH_PROMPT    1007
+#define OPTION_BATCH_PROMPT	1007
+#define OPTION_BATCH_SCRIPT	1008
 
 #define BATCH_COUNT_UNLIMITED -1
 
@@ -95,6 +96,7 @@
   {"batch-double", no_argument, NULL, OPTION_BATCH_DOUBLE},
   {"batch-increment", required_argument, NULL, OPTION_BATCH_INCREMENT},
   {"batch-prompt", no_argument, NULL, OPTION_BATCH_PROMPT},
+  {"batch-script", required_argument, NULL, OPTION_BATCH_SCRIPT},
   {"format", required_argument, NULL, OPTION_FORMAT},
   {"accept-md5-only", no_argument, NULL, OPTION_MD5},
   {"icc-profile", required_argument, NULL, 'i'},
@@ -1670,6 +1672,7 @@
   const char *devname = 0;
   const char *defdevname = 0;
   const char *format = 0;
+  const char *script = 0;
   char readbuf[2];
   char *readbuf2;
   int batch = 0;
@@ -1677,6 +1680,7 @@
   int batch_count = BATCH_COUNT_UNLIMITED;
   int batch_start_at = 1;
   int batch_increment = 1;
+  int batch_script = 0;
   SANE_Status status;
   char *full_optstring;
   SANE_Int version_code;
@@ -1754,6 +1758,10 @@
 	  batch_count = atoi (optarg);
 	  batch = 1;
 	  break;
+	case OPTION_BATCH_SCRIPT:
+	  batch_script = 1;
+	  script = optarg;
+	  break;
 	case OPTION_FORMAT:
 	  if (strcmp (optarg, "tiff") == 0)
 	    output_format = OUTPUT_TIFF;
@@ -1924,6 +1932,8 @@
     --batch-double         increment page number by two, same as\n\
                            --batch-increment=2\n\
     --batch-prompt         ask for pressing a key before scanning a page\n\
+    --batch-script=SCRIPT  run script after scanning a page but before renaming\n\
+                           it to its output file name\n\
     --accept-md5-only      only accept authorization requests using md5\n");
       printf ("\
 -p, --progress             print progress messages\n\
@@ -2217,6 +2227,7 @@
 	{
 	  char path[PATH_MAX];
 	  char part_path[PATH_MAX];
+	  char cmd[PATH_MAX * 2];
 	  if (batch)		/* format is NULL unless batch mode */
 	    {
 	      sprintf (path, format, n);	/* love --(C++) */
@@ -2295,6 +2306,28 @@
 		    }
 		  else
 		    {
+		      /* run script before renaming scanned file */
+		      if (batch_script && NULL != script)
+			{
+			  sprintf (cmd, "%s '%s' %d", script, part_path, n);
+			  switch (system (cmd))
+			    {
+			    case -1:
+			      fprintf (stderr, "cannot run script `%s'\n", cmd);
+			      sane_cancel (device);
+			      return SANE_STATUS_ACCESS_DENIED;
+			      break;
+			    case 0:
+			      /* the only case considered success */
+			      break;
+			    default:
+			      fprintf (stderr,
+				    "non-zero exit status running `%s'\n", cmd);
+			      sane_cancel (device);
+			      return SANE_STATUS_ACCESS_DENIED;
+			      break;
+			    }
+			}
 		      /* let the fully scanned file show up */
 		      if (rename (part_path, path))
 			{
-- 
sane-devel mailing list: sane-devel@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/sane-devel
Unsubscribe: Send mail with subject "unsubscribe your_password"
             to sane-devel-requ...@lists.alioth.debian.org

Reply via email to