Hi, here is some more info about it. I use Solr to output only the file names(file id's). Here i enclose the fields in my schema.xml and presently i have only about 40MB of indexed data.
<field name="id" type="string" indexed="true" stored="true" required="true" /> <field name="sku" type="textTight" indexed="true" stored="false" omitNorms="true"/> <field name="name" type="textgen" indexed="true" stored="false"/> <field name="manu" type="textgen" indexed="true" stored="false" omitNorms="true"/> <field name="cat" type="text_ws" indexed="true" stored="false" multiValued="true" omitNorms="true" /> <field name="features" type="text" indexed="true" stored="false" multiValued="true"/> <field name="includes" type="text" indexed="true" stored="false" termVectors="true" termPositions="true" termOffsets="true" /> <field name="weight" type="float" indexed="true" stored="false"/> <field name="price" type="float" indexed="true" stored="false"/> <field name="popularity" type="int" indexed="true" stored="false" /> <field name="inStock" type="boolean" indexed="true" stored="false" /> <!-- The following store examples are used to demonstrate the various ways one might _CHOOSE_ to implement spatial. It is highly unlikely that you would ever have ALL of these fields defined. --> <field name="store" type="location" indexed="true" stored="false"/> <field name="store_lat_lon" type="latLon" indexed="true" stored="false"/> <field name="store_hash" type="geohash" indexed="true" stored="false"/> <!-- Common metadata fields, named specifically to match up with SolrCell metadata when parsing rich documents such as Word, PDF. Some fields are multiValued only because Tika currently may return multiple values for them. --> <field name="title" type="text" indexed="true" stored="true" multiValued="true"/> <field name="subject" type="text" indexed="true" stored="false"/> <field name="description" type="text" indexed="true" stored="false"/> <field name="comments" type="text" indexed="true" stored="false"/> <field name="author" type="textgen" indexed="true" stored="false"/> <field name="keywords" type="textgen" indexed="true" stored="false"/> <field name="category" type="textgen" indexed="true" stored="false"/> <field name="content_type" type="string" indexed="true" stored="false" multiValued="true"/> <field name="last_modified" type="date" indexed="true" stored="false"/> <field name="links" type="string" indexed="true" stored="false" multiValued="true"/> <!-- added here content satya--> <field name="content" type="spell" indexed="true" stored="false" multiValued="true"/> <!-- catchall field, containing all other searchable text fields (implemented via copyField further on in this schema --> <field name="text" type="text" indexed="true" stored="false" multiValued="true" termVectors="true"/> <!-- catchall text field that indexes tokens both normally and in reverse for efficient leading wildcard queries. here satya--> <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/> <!-- non-tokenized version of manufacturer to make it easier to sort or group results by manufacturer. copied from "manu" via copyField here satya--> <field name="manu_exact" type="string" indexed="true" stored="false"/> <field name="spell" type="spell" indexed="true" stored="false" multiValued="true"/> <!-- heere changed --> <field name="payloads" type="payloads" indexed="true" stored="false"/> <field name="timestamp" type="date" indexed="true" stored="false" default="NOW" multiValued="false"/> Regards, satya