Hi,
       here is some more info about it. I use Solr to output only the file
names(file id's). Here i enclose the fields in my schema.xml and presently i
have only about 40MB of indexed data.


   <field name="id" type="string" indexed="true" stored="true"
required="true" />
   <field name="sku" type="textTight" indexed="true" stored="false"
omitNorms="true"/>
   <field name="name" type="textgen" indexed="true" stored="false"/>

   <field name="manu" type="textgen" indexed="true" stored="false"
omitNorms="true"/>
   <field name="cat" type="text_ws" indexed="true" stored="false"
multiValued="true" omitNorms="true" />
   <field name="features" type="text" indexed="true" stored="false"
multiValued="true"/>
   <field name="includes" type="text" indexed="true" stored="false"
termVectors="true" termPositions="true" termOffsets="true" />

   <field name="weight" type="float" indexed="true" stored="false"/>
   <field name="price"  type="float" indexed="true" stored="false"/>
   <field name="popularity" type="int" indexed="true" stored="false" />
   <field name="inStock" type="boolean" indexed="true" stored="false" />

   <!--
   The following store examples are used to demonstrate the various ways one
might _CHOOSE_ to
    implement spatial.  It is highly unlikely that you would ever have ALL
of these fields defined.
    -->
   <field name="store" type="location" indexed="true" stored="false"/>
   <field name="store_lat_lon" type="latLon" indexed="true" stored="false"/>
   <field name="store_hash" type="geohash" indexed="true" stored="false"/>


   <!-- Common metadata fields, named specifically to match up with
     SolrCell metadata when parsing rich documents such as Word, PDF.
     Some fields are multiValued only because Tika currently may return
     multiple values for them.
   -->
   <field name="title" type="text" indexed="true" stored="true"
multiValued="true"/>
   <field name="subject" type="text" indexed="true" stored="false"/>
   <field name="description" type="text" indexed="true" stored="false"/>
   <field name="comments" type="text" indexed="true" stored="false"/>
   <field name="author" type="textgen" indexed="true" stored="false"/>
   <field name="keywords" type="textgen" indexed="true" stored="false"/>
   <field name="category" type="textgen" indexed="true" stored="false"/>
   <field name="content_type" type="string" indexed="true" stored="false"
multiValued="true"/>
   <field name="last_modified" type="date" indexed="true" stored="false"/>
   <field name="links" type="string" indexed="true" stored="false"
multiValued="true"/>
<!-- added here content satya-->
   <field name="content" type="spell" indexed="true" stored="false"
multiValued="true"/>


   <!-- catchall field, containing all other searchable text fields
(implemented
        via copyField further on in this schema  -->
   <field name="text" type="text" indexed="true" stored="false"
multiValued="true" termVectors="true"/>

   <!-- catchall text field that indexes tokens both normally and in reverse
for efficient
        leading wildcard queries.  here satya-->
   <field name="text_rev" type="text_rev" indexed="true" stored="false"
multiValued="true"/>

   <!-- non-tokenized version of manufacturer to make it easier to sort or
group
        results by manufacturer.  copied from "manu" via copyField here
satya-->
   <field name="manu_exact" type="string" indexed="true" stored="false"/>
   <field name="spell" type="spell" indexed="true" stored="false"
multiValued="true"/>
<!-- heere changed -->
   <field name="payloads" type="payloads" indexed="true" stored="false"/>

     <field name="timestamp" type="date" indexed="true" stored="false"
default="NOW" multiValued="false"/>



Regards,
satya

Reply via email to