RFC822ParserTest.java

Hong-Thai Nguyen Tue, 21 Oct 2014 06:58:33 -0700

Hi Chris,

Yes, I made a mistake on this commit by missing a renaming file and broke
build, the next commit corrected:
Revision: 1633331
Author: thaichat04
Date: mardi 21 octobre 2014 11:47:54
Message:
TIKA-1422 - Fixing build & minor refactory of naming test class
----
Modified :
/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
Added :
/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
Deleted :
/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRTest.java


Please 'pull' latest again then tell me if OK ?

Sorry

On Tue, Oct 21, 2014 at 3:49 PM, Mattmann, Chris A (3980) <
chris.a.mattm...@jpl.nasa.gov> wrote:

> Hi Hong-Thai,
>
> These commits look strange to me - it looks like it subtracts the
> whole files (and the unit test removed the test file, renamed it,
> and then added what largely looks like the same file, back?)
>
> Any idea what¹s up?
>
> Cheers,
> Chris
>
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> Chris Mattmann, Ph.D.
> Chief Architect
> Instrument Software and Science Data Systems Section (398)
> NASA Jet Propulsion Laboratory Pasadena, CA 91109 USA
> Office: 168-519, Mailstop: 168-527
> Email: chris.a.mattm...@nasa.gov
> WWW:  http://sunset.usc.edu/~mattmann/
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> Adjunct Associate Professor, Computer Science Department
> University of Southern California, Los Angeles, CA 90089 USA
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>
>
>
>
>
>
> -----Original Message-----
> From: "thaicha...@apache.org" <thaicha...@apache.org>
> Reply-To: "dev@tika.apache.org" <dev@tika.apache.org>
> Date: Tuesday, October 21, 2014 at 2:32 AM
> To: "comm...@tika.apache.org" <comm...@tika.apache.org>
> Subject: svn commit: r1633325 - in /tika/trunk/tika-parsers/src:
> main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
> test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
>
> >Author: thaichat04
> >Date: Tue Oct 21 09:32:06 2014
> >New Revision: 1633325
> >
> >URL: http://svn.apache.org/r1633325
> >Log:
> >TIKA-1422 - Apply fix of [~olegt] in Windows
> >
> >Modified:
> >
> >tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
> >OCRParser.java
> >
> >tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822Pa
> >rserTest.java
> >
> >Modified:
> >tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
> >OCRParser.java
> >URL:
> >
> http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apa
> >che/tika/parser/ocr/TesseractOCRParser.java?rev=1633325&r1=1633324&r2=1633
> >325&view=diff
> >==========================================================================
> >====
> >---
> >tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
> >OCRParser.java (original)
> >+++
> >tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
> >OCRParser.java Tue Oct 21 09:32:06 2014
> >@@ -26,11 +26,11 @@ import java.io.IOException;
> > import java.io.InputStream;
> > import java.io.InputStreamReader;
> > import java.io.Reader;
> >+import java.util.ArrayList;
> > import java.util.HashSet;
> >+import java.util.List;
> > import java.util.Map;
> > import java.util.Set;
> >-import java.util.List;
> >-import java.util.ArrayList;
> > import java.util.concurrent.Callable;
> > import java.util.concurrent.ExecutionException;
> > import java.util.concurrent.FutureTask;
> >@@ -45,20 +45,23 @@ import org.apache.tika.io.TemporaryResou
> > import org.apache.tika.io.TikaInputStream;
> > import org.apache.tika.metadata.Metadata;
> > import org.apache.tika.mime.MediaType;
> >-import org.apache.tika.parser.Parser;
> > import org.apache.tika.parser.AbstractParser;
> > import org.apache.tika.parser.ParseContext;
> >+import org.apache.tika.parser.Parser;
> > import org.apache.tika.parser.external.ExternalParser;
> >+import org.apache.tika.parser.image.ImageParser;
> >+import org.apache.tika.parser.image.PSDParser;
> >+import org.apache.tika.parser.image.TiffParser;
> >+import org.apache.tika.parser.jpeg.JpegParser;
> > import org.apache.tika.sax.XHTMLContentHandler;
> > import org.xml.sax.ContentHandler;
> > import org.xml.sax.SAXException;
> >
> > /**
> >- * TesseractOCRParser powered by tesseract-ocr engine.
> >- * To enable this parser, create a {@link TesseractOCRConfig}
> >- * object and pass it through a ParseContext.
> >- * Tesseract-ocr must be installed and on system path or
> >- * the path to its root folder must be provided:
> >+ * TesseractOCRParser powered by tesseract-ocr engine. To enable this
> >parser,
> >+ * create a {@link TesseractOCRConfig} object and pass it through a
> >+ * ParseContext. Tesseract-ocr must be installed and on system path or
> >the path
> >+ * to its root folder must be provided:
> >  * <p>
> >  * TesseractOCRConfig config = new TesseractOCRConfig();<br>
> >  * //Needed if tesseract is not on system path<br>
> >@@ -69,226 +72,231 @@ import org.xml.sax.SAXException;
> >  *
> >  */
> > public class TesseractOCRParser extends AbstractParser {
> >-
> >-      private static final long serialVersionUID = 1L;
> >-
> >-      private static final Set<MediaType> SUPPORTED_TYPES = getTypes();
> >-
> >-      private static Set<MediaType> getTypes() {
> >-              HashSet<MediaType> supportedTypes = new
> HashSet<MediaType>();
> >-
> >-              supportedTypes.add(MediaType.image("png"));
> >-              supportedTypes.add(MediaType.image("jpeg"));
> >-              supportedTypes.add(MediaType.image("tiff"));
> >-              supportedTypes.add(MediaType.image("x-ms-bmp"));
> >-              supportedTypes.add(MediaType.image("gif"));
> >-
> >-              return supportedTypes;
> >-      }
> >-
> >-      @Override
> >-      public Set<MediaType> getSupportedTypes(ParseContext arg0) {
> >-              return SUPPORTED_TYPES;
> >-      }
> >-
> >-    private void setEnv(TesseractOCRConfig config, ProcessBuilder pb) {
> >-        if(!config.getTesseractPath().isEmpty()){
> >-            Map<String, String> env = pb.environment();
> >-            env.put("TESSDATA_PREFIX", config.getTesseractPath());
> >-        }
> >+
> >+  private static final long serialVersionUID = 1L;
> >+
> >+  private static final Set<MediaType> SUPPORTED_TYPES = getTypes();
> >+
> >+  private static Set<MediaType> getTypes() {
> >+    HashSet<MediaType> supportedTypes = new HashSet<MediaType>();
> >+
> >+    supportedTypes.add(MediaType.image("png"));
> >+    supportedTypes.add(MediaType.image("jpeg"));
> >+    supportedTypes.add(MediaType.image("tiff"));
> >+    supportedTypes.add(MediaType.image("x-ms-bmp"));
> >+    supportedTypes.add(MediaType.image("gif"));
> >+
> >+    return supportedTypes;
> >+  }
> >+
> >+  @Override
> >+  public Set<MediaType> getSupportedTypes(ParseContext arg0) {
> >+    return SUPPORTED_TYPES;
> >+  }
> >+
> >+  private void setEnv(TesseractOCRConfig config, ProcessBuilder pb) {
> >+    if (!config.getTesseractPath().isEmpty()) {
> >+      Map<String, String> env = pb.environment();
> >+      env.put("TESSDATA_PREFIX", config.getTesseractPath());
> >     }
> >-
> >-      public void parse(Image image, ContentHandler handler, Metadata
> >metadata, ParseContext context)
> >-            throws IOException, SAXException, TikaException {
> >-
> >-              TemporaryResources tmp = new TemporaryResources();
> >-              FileOutputStream fos = null;
> >-              TikaInputStream tis = null;
> >-              try{
> >-                      int w = image.getWidth(null);
> >-              int h = image.getHeight(null);
> >-              BufferedImage bImage = new BufferedImage(w, h,
> >BufferedImage.TYPE_INT_RGB);
> >-              Graphics2D g2 = bImage.createGraphics();
> >-              g2.drawImage(image, 0, 0, null);
> >-              g2.dispose();
> >-              File file = tmp.createTemporaryFile();
> >-                      fos = new FileOutputStream(file);
> >-                      ImageIO.write(bImage, "png", fos);
> >-                      bImage = null;
> >-                      tis = TikaInputStream.get(file);
> >-                      parse(tis, handler, metadata, context);
> >-
> >-              }finally{
> >-                      tmp.dispose();
> >-                      if(tis != null)
> >-                              tis.close();
> >-                      if(fos != null)
> >-                              fos.close();
> >-              }
> >-
> >-
> >-      }
> >-
> >-      @Override
> >-    public void parse(
> >-            InputStream stream, ContentHandler handler,
> >-            Metadata metadata, ParseContext context)
> >-            throws IOException, SAXException, TikaException {
> >-
> >-      TesseractOCRConfig config = context.get(TesseractOCRConfig.class);
> >-      if(config == null) config = new TesseractOCRConfig();
> >-
> >-        String[] checkCmd = {config.getTesseractPath() + "tesseract"};
> >-        // If Tesseract is not on the path, do not try to run OCR.
> >-        if (!ExternalParser.check(checkCmd)) return;
> >-
> >-      XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
> >metadata);
> >+  }
> >
> >-        TemporaryResources tmp = new TemporaryResources();
> >-        File output = null;
> >-        try {
> >-              TikaInputStream  tikaStream = TikaInputStream.get(stream,
> tmp);
> >-              File input = tikaStream.getFile();
> >-              long size = tikaStream.getLength();
> >-
> >-              if(size >= config.getMinFileSizeToOcr() && size <=
> >config.getMaxFileSizeToOcr()){
> >-
> >-              output = tmp.createTemporaryFile();
> >-              doOCR(input, output, config);
> >-
> >-                //Tesseract appends .txt to output file name
> >-                output = new File(output.getAbsolutePath() + ".txt");
> >-
> >-                if(output.exists())
> >-                      extractOutput(new FileInputStream(output), xhtml);
> >+  public void parse(Image image, ContentHandler handler, Metadata
> >metadata, ParseContext context) throws IOException,
> >+      SAXException, TikaException {
> >
> >-              }
> >-
> >-        } finally {
> >-              tmp.dispose();
> >-              if(output != null)
> >-                      output.delete();
> >-
> >-        }
> >+    TemporaryResources tmp = new TemporaryResources();
> >+    FileOutputStream fos = null;
> >+    TikaInputStream tis = null;
> >+    try {
> >+      int w = image.getWidth(null);
> >+      int h = image.getHeight(null);
> >+      BufferedImage bImage = new BufferedImage(w, h,
> >BufferedImage.TYPE_INT_RGB);
> >+      Graphics2D g2 = bImage.createGraphics();
> >+      g2.drawImage(image, 0, 0, null);
> >+      g2.dispose();
> >+      File file = tmp.createTemporaryFile();
> >+      fos = new FileOutputStream(file);
> >+      ImageIO.write(bImage, "png", fos);
> >+      bImage = null;
> >+      tis = TikaInputStream.get(file);
> >+      parse(tis, handler, metadata, context);
> >+
> >+    } finally {
> >+      tmp.dispose();
> >+      if (tis != null)
> >+        tis.close();
> >+      if (fos != null)
> >+        fos.close();
> >     }
> >
> >-      /**
> >-       * Run external tesseract-ocr process.
> >-       * @param input File to be ocred
> >-     * @param output File to collect ocr result
> >-     * @param config Configuration of tesseract-ocr engine
> >-     * @throws TikaException if the extraction timed out
> >-     * @throws IOException if an input error occurred
> >-       */
> >-    private void doOCR(File input, File output, TesseractOCRConfig
> >config)
> >-            throws IOException, TikaException {
> >-        String[] cmd = {config.getTesseractPath() + "tesseract",
> >-                                      input.getPath(),
> >-                                              output.getPath() ,
> >-                                              "-l",
> >-                                              config.getLanguage() ,
> >-                                              "-psm",
> >-                                              config.getPageSegMode() };
> >-
> >-        ProcessBuilder pb = new ProcessBuilder(cmd);
> >-        setEnv(config, pb);
> >-        final Process process = pb.start();
> >-
> >-        process.getOutputStream().close();
> >-        InputStream out = process.getInputStream();
> >-        InputStream err = process.getErrorStream();
> >-
> >-        logStream("OCR MSG", out, input);
> >-        logStream("OCR ERROR", err, input);
> >-
> >-        FutureTask<Integer> waitTask = new FutureTask<Integer>(new
> >Callable<Integer>() {
> >-              public Integer call() throws Exception {
> >-                  return process.waitFor();
> >-              }
> >-        });
> >-
> >-        Thread waitThread = new Thread(waitTask);
> >-        waitThread.start();
> >-
> >-        try {
> >-              waitTask.get(config.getTimeout(), TimeUnit.SECONDS);
> >-
> >-        } catch (InterruptedException e) {
> >-              waitThread.interrupt();
> >-              process.destroy();
> >-              Thread.currentThread().interrupt();
> >-              throw new TikaException("TesseractOCRParser interrupted",
> e);
> >-
> >-        } catch (ExecutionException e) {
> >-                      //should not be thrown
> >-
> >-              } catch (TimeoutException e) {
> >-                      waitThread.interrupt();
> >-                      process.destroy();
> >-                      throw new TikaException("TesseractOCRParser
> timeout", e);
> >-              }
> >-
> >-
> >+  }
> >+
> >+  @Override
> >+  public void parse(InputStream stream, ContentHandler handler, Metadata
> >metadata, ParseContext context)
> >+      throws IOException, SAXException, TikaException {
> >+
> >+    TesseractOCRConfig config = context.get(TesseractOCRConfig.class);
> >+    if (config == null)
> >+      config = new TesseractOCRConfig();
> >+
> >+    String[] checkCmd = { config.getTesseractPath() + "tesseract" };
> >+    // If Tesseract is not on the path, do not try to run OCR.
> >+    if (!ExternalParser.check(checkCmd))
> >+      return;
> >+
> >+    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
> >metadata);
> >+
> >+    TemporaryResources tmp = new TemporaryResources();
> >+    File output = null;
> >+    try {
> >+      TikaInputStream tikaStream = TikaInputStream.get(stream, tmp);
> >+      File input = tikaStream.getFile();
> >+      long size = tikaStream.getLength();
> >+
> >+      if (size >= config.getMinFileSizeToOcr() && size <=
> >config.getMaxFileSizeToOcr()) {
> >+
> >+        output = tmp.createTemporaryFile();
> >+        doOCR(input, output, config);
> >+
> >+        // Tesseract appends .txt to output file name
> >+        output = new File(output.getAbsolutePath() + ".txt");
> >+
> >+        if (output.exists())
> >+          extractOutput(new FileInputStream(output), xhtml);
> >+
> >+      }
> >+
> >+    } finally {
> >+      tmp.dispose();
> >+      if (output != null)
> >+        output.delete();
> >+
> >     }
> >-
> >+  }
> >
> >-    /**
> >-     * Reads the contents of the given stream and write it to the
> >-     * given XHTML content handler.
> >-     * The stream is closed once fully processed.
> >-     *
> >-     * @param stream Stream where is the result of ocr
> >-     * @param xhtml XHTML content handler
> >-     * @throws SAXException if the XHTML SAX events could not be handled
> >-     * @throws IOException if an input error occurred
> >-     */
> >-    private void extractOutput(InputStream stream, XHTMLContentHandler
> >xhtml)
> >-      throws SAXException, IOException {
> >-
> >-        Reader reader = new InputStreamReader(stream, "UTF-8");
> >-        xhtml.startDocument();
> >-        xhtml.startElement("div");
> >-        try {
> >-            char[] buffer = new char[1024];
> >-            for (int n = reader.read(buffer); n != -1; n =
> >reader.read(buffer)) {
> >-                if (n > 0) xhtml.characters(buffer, 0, n);
> >-            }
> >-        } finally {
> >-            reader.close();
> >-        }
> >-        xhtml.endElement("div");
> >-        xhtml.endDocument();
> >+  /**
> >+   * Run external tesseract-ocr process.
> >+   *
> >+   * @param input
> >+   *          File to be ocred
> >+   * @param output
> >+   *          File to collect ocr result
> >+   * @param config
> >+   *          Configuration of tesseract-ocr engine
> >+   * @throws TikaException
> >+   *           if the extraction timed out
> >+   * @throws IOException
> >+   *           if an input error occurred
> >+   */
> >+  private void doOCR(File input, File output, TesseractOCRConfig config)
> >throws IOException, TikaException {
> >+    String[] cmd = { config.getTesseractPath() + "tesseract",
> >input.getPath(), output.getPath(), "-l",
> >+        config.getLanguage(), "-psm", config.getPageSegMode() };
> >+
> >+    ProcessBuilder pb = new ProcessBuilder(cmd);
> >+    setEnv(config, pb);
> >+    final Process process = pb.start();
> >+
> >+    process.getOutputStream().close();
> >+    InputStream out = process.getInputStream();
> >+    InputStream err = process.getErrorStream();
> >+
> >+    logStream("OCR MSG", out, input);
> >+    logStream("OCR ERROR", err, input);
> >+
> >+    FutureTask<Integer> waitTask = new FutureTask<Integer>(new
> >Callable<Integer>() {
> >+      public Integer call() throws Exception {
> >+        return process.waitFor();
> >+      }
> >+    });
> >+
> >+    Thread waitThread = new Thread(waitTask);
> >+    waitThread.start();
> >+
> >+    try {
> >+      waitTask.get(config.getTimeout(), TimeUnit.SECONDS);
> >+
> >+    } catch (InterruptedException e) {
> >+      waitThread.interrupt();
> >+      process.destroy();
> >+      Thread.currentThread().interrupt();
> >+      throw new TikaException("TesseractOCRParser interrupted", e);
> >+
> >+    } catch (ExecutionException e) {
> >+      // should not be thrown
> >+
> >+    } catch (TimeoutException e) {
> >+      waitThread.interrupt();
> >+      process.destroy();
> >+      throw new TikaException("TesseractOCRParser timeout", e);
> >     }
> >
> >-    /**
> >-     * Starts a thread that reads the contents of the standard output
> >-     * or error stream of the given process to not block the process.
> >-     * The stream is closed once fully processed.
> >-     */
> >-    private void logStream(final String logType, final InputStream
> >stream, final File file) {
> >-        new Thread() {
> >-            public void run() {
> >-              Reader reader = new InputStreamReader(stream);
> >-                StringBuilder out = new StringBuilder();
> >-                char[] buffer = new char[1024];
> >-                try {
> >-                                      for (int n = reader.read(buffer);
> n != -1; n = reader.read(buffer))
> >-                                              out.append(buffer, 0, n);
> >-                              } catch (IOException e) {
> >-
> >-                              } finally {
> >-                    IOUtils.closeQuietly(stream);
> >-                }
> >-
> >-
> >-                              String msg = out.toString();
> >-                              //log or discard message?
> >-
> >-            }
> >-        }.start();
> >+  }
> >+
> >+  /**
> >+   * Reads the contents of the given stream and write it to the given
> >XHTML
> >+   * content handler. The stream is closed once fully processed.
> >+   *
> >+   * @param stream
> >+   *          Stream where is the result of ocr
> >+   * @param xhtml
> >+   *          XHTML content handler
> >+   * @throws SAXException
> >+   *           if the XHTML SAX events could not be handled
> >+   * @throws IOException
> >+   *           if an input error occurred
> >+   */
> >+  private void extractOutput(InputStream stream, XHTMLContentHandler
> >xhtml) throws SAXException, IOException {
> >+
> >+    Reader reader = new InputStreamReader(stream, "UTF-8");
> >+    xhtml.startDocument();
> >+    xhtml.startElement("div");
> >+    try {
> >+      char[] buffer = new char[1024];
> >+      for (int n = reader.read(buffer); n != -1; n =
> >reader.read(buffer)) {
> >+        if (n > 0)
> >+          xhtml.characters(buffer, 0, n);
> >+      }
> >+    } finally {
> >+      reader.close();
> >     }
> >+    xhtml.endElement("div");
> >+    xhtml.endDocument();
> >+  }
> >+
> >+  /**
> >+   * Starts a thread that reads the contents of the standard output or
> >error
> >+   * stream of the given process to not block the process. The stream is
> >closed
> >+   * once fully processed.
> >+   */
> >+  private void logStream(final String logType, final InputStream stream,
> >final File file) {
> >+    new Thread() {
> >+      public void run() {
> >+        Reader reader = new InputStreamReader(stream);
> >+        StringBuilder out = new StringBuilder();
> >+        char[] buffer = new char[1024];
> >+        try {
> >+          for (int n = reader.read(buffer); n != -1; n =
> >reader.read(buffer))
> >+            out.append(buffer, 0, n);
> >+        } catch (IOException e) {
> >
> >-
> >-}
> >+        } finally {
> >+          IOUtils.closeQuietly(stream);
> >+        }
> >
> >+        String msg = out.toString();
> >+        // log or discard message?
> >
> >+      }
> >+    }.start();
> >+  }
> >+
> >+  private List<Parser> getImageParsers() {
> >+    List<Parser> parsers = new ArrayList<Parser>();
> >+    parsers.add(new ImageParser());
> >+    parsers.add(new PSDParser());
> >+    parsers.add(new TiffParser());
> >+    parsers.add(new JpegParser());
> >+    return parsers;
> >+  }
> >+
> >+}
> >
> >Modified:
> >tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822Pa
> >rserTest.java
> >URL:
> >
> http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apa
> >che/tika/parser/mail/RFC822ParserTest.java?rev=1633325&r1=1633324&r2=16333
> >25&view=diff
> >==========================================================================
> >====
> >---
> >tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822Pa
> >rserTest.java (original)
> >+++
> >tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822Pa
> >rserTest.java Tue Oct 21 09:32:06 2014
> >@@ -36,6 +36,8 @@ import org.apache.tika.metadata.Metadata
> > import org.apache.tika.metadata.TikaCoreProperties;
> > import org.apache.tika.parser.ParseContext;
> > import org.apache.tika.parser.Parser;
> >+import org.apache.tika.parser.ocr.TesseractOCRConfig;
> >+import org.apache.tika.parser.ocr.TesseractOCRParserTest;
> > import org.apache.tika.sax.BodyContentHandler;
> > import org.apache.tika.sax.XHTMLContentHandler;
> > import org.junit.Test;
> >@@ -83,13 +85,19 @@ public class RFC822ParserTest {
> >         try {
> >             parser.parse(stream, handler, metadata, new ParseContext());
> >             verify(handler).startDocument();
> >-            //4 body-part divs -- two outer bodies and two inner bodies
> >-            verify(handler,
> >times(4)).startElement(eq(XHTMLContentHandler.XHTML), eq("div"),
> >eq("div"), any(Attributes.class));
> >-            verify(handler,
> >times(4)).endElement(XHTMLContentHandler.XHTML, "div", "div");
> >-            //5 paragraph elements, 4 for body-parts and 1 for
> >encompassing message
> >-            verify(handler,
> >times(5)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"),
> >any(Attributes.class));
> >-            verify(handler,
> >times(5)).endElement(XHTMLContentHandler.XHTML, "p", "p");
> >+            int bodyExpectedTimes = 4, multipackExpectedTimes = 5;;
> >+            int invokingTimes = bodyExpectedTimes;
> >+            TesseractOCRConfig config = new TesseractOCRConfig();
> >+            if (TesseractOCRParserTest.canRun(config)) {
> >+              invokingTimes = multipackExpectedTimes;
> >+            }
> >+
> >+            verify(handler,
> >times(invokingTimes)).startElement(eq(XHTMLContentHandler.XHTML),
> >eq("div"), eq("div"), any(Attributes.class));
> >+            verify(handler,
> >times(invokingTimes)).endElement(XHTMLContentHandler.XHTML, "div", "div");
> >+            verify(handler,
> >times(multipackExpectedTimes)).startElement(eq(XHTMLContentHandler.XHTML),
> > eq("p"), eq("p"), any(Attributes.class));
> >+            verify(handler,
> >times(multipackExpectedTimes)).endElement(XHTMLContentHandler.XHTML, "p",
> >"p");
> >             verify(handler).endDocument();
> >+
> >         } catch (Exception e) {
> >             fail("Exception thrown: " + e.getMessage());
> >         }
> >
> >
>
>


-- 
--------------
Hong-Thai

Re: svn commit: r1633325 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java test/java/org/apache/tika/parser/mail/RFC822ParserTest.java

Reply via email to