thanks to Hoss & Yonik for your support, attached is the BinaryField implementation if anyone else is interested.
-Ben package org.apache.solr.schema; import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.IntFieldSource; import org.apache.solr.core.SolrException; import org.apache.solr.request.XMLWriter; import org.apache.solr.request.TextResponseWriter; import org.apache.lucene.search.SortField; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.commons.codec.binary.Base64; import java.io.IOException; import java.util.Map; import java.util.logging.Logger; /** * Store base64 encoded strings as binary values. * * <xmp> * Usage: * 1. Add new field type in schema.xml * <types> * ... * <fieldtype name="binary" class="solr.BinaryField"/> * </types> * * 2. Then add fields of binary type in schema.xml * <fields> * ... * <field name="body" type="binary" indexed="false" stored="true"/> * </fields> * </xmp> * * @see #org.apache.lucene.document.Field(java.lang.String name, byte[] value, Field.Store store) * @author $Author: bincani $ * @version $Revision: 287 $ */ public class BinaryField extends FieldType { final Logger log = Logger.getLogger(BinaryField.class.getName()); /** * Returns the SortField instance that should be used to sort fields * of this type. */ public SortField getSortField(SchemaField field, boolean reverse) { return new SortField(field.name,SortField.DOC, reverse); } public ValueSource getValueSource(SchemaField field) { return new IntFieldSource(field.name); } public Field createField(SchemaField field, String externalVal, float boost) { byte [] val; try { val = toInternalBinary(externalVal); } catch (Exception e) { throw new SolrException(500, "Error while creating field '" + field + "' from value '" + externalVal + "'", e, false); } if (val==null) return null; Field f = new Field( field.getName(), val, field.stored() ? Field.Store.YES : Field.Store.NO ); f.setOmitNorms(field.omitNorms()); f.setBoost(boost); return f; } /** * Convert an external value (from XML update command or from query string) * into the internal format. * @see #toExternal */ public byte[] toInternalBinary(String val) { return new Base64().decode(val.getBytes()); } public String toInternal(String val) { return null; } public void write(XMLWriter xmlWriter, String name, Field f) throws IOException { xmlWriter.writeStr(name, toExternal(f)); } public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { writer.writeStr(name, toExternal(f), true); } /** * Convert the stored-field format to an external (string, human readable) * value * @see #toInternal */ public String toExternal(Fieldable f) { String extFormat = null; log.info("f.isBinary()=" + f.isBinary()); if (f.isBinary()) { extFormat = new String(new Base64().encode(f.binaryValue())); } else { extFormat = f.stringValue(); } log.info("extFormat.length()=" + extFormat.length()); // show only a segment if (extFormat.length() > 10) extFormat = extFormat.substring(0, 10); extFormat = "*** BinaryField " + extFormat + " BinaryField ***"; return extFormat; } /** * @deprecated replaced by [EMAIL PROTECTED] #toExternal(Fieldable f)} */ @Deprecated public String toExternal(Field f) { return this.toExternal((Fieldable)f); } } > -----Original Message----- > From: Chris Hostetter [mailto:[EMAIL PROTECTED] > Sent: Friday, 7 July 2006 6:33 AM > To: solr-user@lucene.apache.org > Subject: RE: base64 support & containers > > > : No - no advanced use of XML has been implemented. > : One of the fields in the add request would contain the > original binary > : document encoded in base64, then this would preferably be decoded to > : binary and placed into a lucene binary field, which would need to be > : defined in Solr. > > Ah! ... I think I'm understanding now: your goal is to be > able to send binary data to Solr in some way as a field value > when adding/updating a doc -- preferably by base64 encoding > it -- and then get the data back in the same way when > fetching the doc as a result of a query, but instead of just > storing the base64 encoded data, you'd like Solr to utilize > the "binary" storage mechanism thta Lucene supports .... > presumably because it should take up less space then storing > hte base64 encoded value. > > does that capture your goal fairly? > > there's no way to do this with Solr out of the box ... but i > think it should be possible to write your own subclass of > FieldType which does the > base64 decoding/encoding in the createField and write > methods. (no existing subclasses override createField, they > leverage it by implimenting toInternal, but that assumes you > want to use the String constructor of Field -- it doesn't > mean you can't override it and use the byte[] constructor instead) > > once you have your new FieldType, you can use it in your > schema just like any other built in field type class... > > <fieldtype name="string" class="solr.StrField" /> > <fieldtype name="binary" class="your.package.BinaryField" /> > > > ...that *should* work, but by all means if you run into snags > feel free to send followup questions to the solr-dev list. > > > > -Hoss > >