I solved with tika 3.0 and this tika-config.xml:
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<server>
<taskTimeoutMillis>120000</taskTimeoutMillis>
<minimumTimeoutMillis>10</minimumTimeoutMillis>
<port>9998</port>
<maxFiles>20000</maxFiles>
<forkedJvmArgs>
<arg>-Xmx512m</arg>
</forkedJvmArgs>
</server>
<service-loader dynamic="true"/>
<service-loader loadErrorHandler="WARN"/>
<parsers>
<parser class="org.apache.tika.parser.DefaultParser">
<parser-exclude class="org.apache.tika.parser.ocr.TesseractOCRParser"/>
<parser-exclude class="org.apache.tika.parser.microsoft.OfficeParser"/>
<params>
<param name="byteArrayMaxOverride" type="int">30000000</param>
<param name="suppressExceptions" type="bool">true</param>
<param name="ignoreTikaErrors" type="bool">true</param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
<params>
<param name="includeShapeBasedContent" type="bool">false</param>
<param name="suppressExceptions" type="bool">true</param>
</params>
</parser>
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="pdfbox.enableAutoSpace" type="bool">true</param>
<param name="suppressExceptions" type="bool">true</param>
</params>
</parser>
</parsers>
</properties>
Thanks alot
Mario