thank you Erick, i follow you advice and take a look to config apache tika, I have modifie my request handler /update/extract:
<requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler" > <lst name="defaults"> <str name="fmap.Last-Modified">last_modified</str> <str name="uprefix">ignored_</str> <!-- capture link hrefs but ignore div attributes --> <str name="captureAttr">true</str> <str name="fmap.a">links</str> <str name="fmap.div">ignored_</str> </lst> <str name="tika.config">D:\solr\solr-5.3.1\server\solr\tika-data-config.xml</str> </requestHandler> and config tika : dataConfig> <dataSource type="BinFileDataSource" /> <document> <entity name="files" processor="FileListEntityProcessor" dataSource="null" rootEntity="false" baseDir="D:\Lucene\document" fileName=".*.(doc)|(pdf)|(docx)" onError="skip" recursive="true"> <field column="fileAbsolutePath" name="lux_uri" /> <field column="fileSize" name="size" /> <field column="fileLastModified" name="lastModified" /> <entity name="documentImport" processor="TikaEntityProcessor" url="${files.fileAbsolutePath}" format="text"> <field column="file" name="fileName" meta="true"/> <field column="Author" name="author" meta="true"/> <field column="name" name="name" meta="true"/> <field column="title" name="title" meta="true"/> <field column="text" name="text"/> <field column="custom:Testmeta" name="Testmeta" meta="true"/> <field column="LastModifiedBy" name="LastModifiedBy" meta="true"/> </entity> </entity> </document> </dataConfig> and schema.xml: <field name="Testmeta" type="text" indexed="true" stored="true" /> but the prb is the same title of indexed files is wrong for msword