hey... i work with tika and Solr, at the Moment, i can index Dokument information but nur content..
to the details: part of my config: <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler" startup="lazy"> <lst name="defaults"> <str name="ext.map.Last-Modified">last_modified</str> <bool name="ext.ignore.und.fl">true</bool> <str name="fmap.content">text</str> <str name="lowernames">true</str> <str name="uprefix">ignored_</str> <str name="captureAttr">true</str> <str name="fmap.a">links</str> <str name="fmap.div">ignored_</str> </lst> </requestHandler> Part of my Schema: <field name="id" type="string" indexed="true" stored="true" required="true"/> <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/> <field name="name" type="textgen" indexed="true" stored="true"/> <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/> <field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/> <field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true"/> <field name="features" type="text" indexed="true" stored="true" multiValued="true"/> <field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true"/> <field name="weight" type="float" indexed="true" stored="true"/> <field name="price" type="float" indexed="true" stored="true"/> <field name="popularity" type="int" indexed="true" stored="true"/> <field name="inStock" type="boolean" indexed="true" stored="true"/> <field name="title" type="text" indexed="true" stored="true" multiValued="true"/> <field name="subject" type="text" indexed="true" stored="true"/> <field name="description" type="text" indexed="true" stored="true"/> <field name="comments" type="text" indexed="true" stored="true"/> <field name="author" type="textgen" indexed="true" stored="true"/> <field name="keywords" type="textgen" indexed="true" stored="true"/> <field name="category" type="textgen" indexed="true" stored="true"/> <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> <field name="last_modified" type="date" indexed="true" stored="true"/> <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> curl command: curl " http://192.168.105.66:8983/solr/update/extract?literal.id=1234&uprefix=attr_commit=true" -F "myfile=@Word-Text.doc" result in Solr: <doc> - <arr name="attr_commit=trueapplication_name"> <str>TX_WORD 10.1.210.500</str> </arr> - <arr name="attr_commit=truestream_content_type"> <str>application/octet-stream</str> </arr> - <arr name="attr_commit=truestream_name"> <str>Word-Text.doc</str> </arr> - <arr name="attr_commit=truestream_size"> <str>43592</str> </arr> - <arr name="attr_commit=truestream_source_info"> <str>myfile</str> </arr> - <arr name="content_type"> <str>application/msword</str> </arr> <str name="id">1234</str> </doc> _________________________________________________________ _________________________________________________________ _________________________________________________________ But i need the Content to.. what i make wrong? Thanks for Halt King