thanks to Hoss & Yonik for your support, attached is the BinaryField
implementation if anyone else is interested.
-Ben
package org.apache.solr.schema;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.function.IntFieldSource;
import org.apache.solr.core.SolrException;
import org.apache.solr.request.XMLWriter;
import org.apache.solr.request.TextResponseWriter;
import org.apache.lucene.search.SortField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.commons.codec.binary.Base64;
import java.io.IOException;
import java.util.Map;
import java.util.logging.Logger;
/**
* Store base64 encoded strings as binary values.
*
* <xmp>
* Usage:
* 1. Add new field type in schema.xml
* <types>
* ...
* <fieldtype name="binary" class="solr.BinaryField"/>
* </types>
*
* 2. Then add fields of binary type in schema.xml
* <fields>
* ...
* <field name="body" type="binary" indexed="false" stored="true"/>
* </fields>
* </xmp>
*
* @see #org.apache.lucene.document.Field(java.lang.String name, byte[]
value, Field.Store store)
* @author $Author: bincani $
* @version $Revision: 287 $
*/
public class BinaryField extends FieldType {
final Logger log =
Logger.getLogger(BinaryField.class.getName());
/**
* Returns the SortField instance that should be used to sort
fields
* of this type.
*/
public SortField getSortField(SchemaField field, boolean
reverse) {
return new SortField(field.name,SortField.DOC, reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new IntFieldSource(field.name);
}
public Field createField(SchemaField field, String externalVal,
float boost) {
byte [] val;
try {
val = toInternalBinary(externalVal);
}
catch (Exception e) {
throw new SolrException(500, "Error while
creating field '" + field + "' from value '" + externalVal + "'", e,
false);
}
if (val==null) return null;
Field f = new Field(
field.getName(),
val,
field.stored() ? Field.Store.YES :
Field.Store.NO
);
f.setOmitNorms(field.omitNorms());
f.setBoost(boost);
return f;
}
/**
* Convert an external value (from XML update command or from
query string)
* into the internal format.
* @see #toExternal
*/
public byte[] toInternalBinary(String val) {
return new Base64().decode(val.getBytes());
}
public String toInternal(String val) {
return null;
}
public void write(XMLWriter xmlWriter, String name, Field f)
throws IOException {
xmlWriter.writeStr(name, toExternal(f));
}
public void write(TextResponseWriter writer, String name,
Fieldable f) throws IOException {
writer.writeStr(name, toExternal(f), true);
}
/**
* Convert the stored-field format to an external (string, human
readable)
* value
* @see #toInternal
*/
public String toExternal(Fieldable f) {
String extFormat = null;
log.info("f.isBinary()=" + f.isBinary());
if (f.isBinary()) {
extFormat = new String(new
Base64().encode(f.binaryValue()));
}
else {
extFormat = f.stringValue();
}
log.info("extFormat.length()=" + extFormat.length());
// show only a segment
if (extFormat.length() > 10)
extFormat = extFormat.substring(0, 10);
extFormat = "*** BinaryField " + extFormat + "
BinaryField ***";
return extFormat;
}
/**
* @deprecated replaced by [EMAIL PROTECTED] #toExternal(Fieldable f)}
*/
@Deprecated public String toExternal(Field f) {
return this.toExternal((Fieldable)f);
}
}
> -----Original Message-----
> From: Chris Hostetter [mailto:[EMAIL PROTECTED]
> Sent: Friday, 7 July 2006 6:33 AM
> To: [email protected]
> Subject: RE: base64 support & containers
>
>
> : No - no advanced use of XML has been implemented.
> : One of the fields in the add request would contain the
> original binary
> : document encoded in base64, then this would preferably be decoded to
> : binary and placed into a lucene binary field, which would need to be
> : defined in Solr.
>
> Ah! ... I think I'm understanding now: your goal is to be
> able to send binary data to Solr in some way as a field value
> when adding/updating a doc -- preferably by base64 encoding
> it -- and then get the data back in the same way when
> fetching the doc as a result of a query, but instead of just
> storing the base64 encoded data, you'd like Solr to utilize
> the "binary" storage mechanism thta Lucene supports ....
> presumably because it should take up less space then storing
> hte base64 encoded value.
>
> does that capture your goal fairly?
>
> there's no way to do this with Solr out of the box ... but i
> think it should be possible to write your own subclass of
> FieldType which does the
> base64 decoding/encoding in the createField and write
> methods. (no existing subclasses override createField, they
> leverage it by implimenting toInternal, but that assumes you
> want to use the String constructor of Field -- it doesn't
> mean you can't override it and use the byte[] constructor instead)
>
> once you have your new FieldType, you can use it in your
> schema just like any other built in field type class...
>
> <fieldtype name="string" class="solr.StrField" />
> <fieldtype name="binary" class="your.package.BinaryField" />
>
>
> ...that *should* work, but by all means if you run into snags
> feel free to send followup questions to the solr-dev list.
>
>
>
> -Hoss
>
>