[ 
https://issues.apache.org/jira/browse/PDFBOX-2860?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14624345#comment-14624345
 ] 

simon steiner commented on PDFBOX-2860:
---------------------------------------

{code}
import org.apache.pdfbox.pdmodel.PDDocument;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class TestPDFBoxParser {
    private static void listFilesForFolder(File folder, List<String> out) {
        for (File fileEntry : folder.listFiles()) {
            if (fileEntry.isDirectory()) {
                listFilesForFolder(fileEntry, out);
            } else if (fileEntry.getName().toLowerCase().endsWith(".pdf")) {
                out.add(fileEntry.getAbsolutePath());
            }
        }
    }

    public static void main(String[] args) throws IOException {
        List<String> files = new ArrayList<String>();
        listFilesForFolder(new File("/path/topdfs"), files);

        for (String s : files) {
            try {
                long a = load(s);
                long b = loadNonSeq(s);
                if (a > 50 && b > (a * 2)) {
                    System.out.println(s + " " + a + " " + b);
                }
            } catch (Exception e) {
            }
        }
    }

    private static long load(String s) throws IOException {
        long start = System.currentTimeMillis();
        for (int i=0; i<100; i++) {
            PDDocument.load(new FileInputStream(s)).close();
        }
        long finish = System.currentTimeMillis() - start;
        return finish;
    }

    private static long loadNonSeq(String s) throws IOException {
        long start = System.currentTimeMillis();
        for (int i=0; i<100; i++) {
            PDDocument.loadNonSeq(new FileInputStream(s)).close();
        }
        long finish = System.currentTimeMillis() - start;
        return finish;
    }
}

{code}

> NonSeq parser slower than Seq parser
> ------------------------------------
>
>                 Key: PDFBOX-2860
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-2860
>             Project: PDFBox
>          Issue Type: Bug
>          Components: Parsing
>    Affects Versions: 2.0.0
>            Reporter: simon steiner
>
> PDF from PDFBOX-797
>         for (int i=0; i<1000; i++) {
>             PDDocument.load(new FileInputStream(
>                     "4218.pdf")).close();
>         }
> Nonseq:
> real  0m23.691s
> Seq:
> real  0m9.705s



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org
For additional commands, e-mail: dev-h...@pdfbox.apache.org

Reply via email to