Author: vgritsenko Date: Tue Jan 30 17:05:23 2007 New Revision: 501664 URL: http://svn.apache.org/viewvc?view=rev&rev=501664 Log: <action dev="VG" type="fix" fixes-bug="19203" due-to="Natalia Shilenkova"> Fix indexing of multi-byte numeric types (long, double, etc). Due to the change in the BTree, existing databases has to be re-built using DatabaseRebuild command line utility. </action>
Added: xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java (with props) xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java (with props) Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/data/Value.java xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTree.java xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTreeFiler.java xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/ValueIndexer.java xml/xindice/trunk/status.xml Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/data/Value.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/data/Value.java?view=diff&rev=501664&r1=501663&r2=501664 ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/data/Value.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/data/Value.java Tue Jan 30 17:05:23 2007 @@ -187,11 +187,10 @@ byte b1 = data[pos + i]; byte b2 = ddata[dpos + i]; - if (b1 == b2) - continue; - else { - short s1 = (short) (b1 >>> 0); - short s2 = (short) (b2 >>> 0); + if (b1 != b2) { + // get unsigned value + int s1 = ((int) b1) & 0xFF; + int s2 = ((int) b2) & 0xFF; return s1 > s2 ? (i + 1) : -(i + 1); } } Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTree.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTree.java?view=diff&rev=501664&r1=501663&r2=501664 ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTree.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTree.java Tue Jan 30 17:05:23 2007 @@ -903,7 +903,7 @@ leftIdx = -(leftIdx + 1); } n = Math.min(leftIdx + 1, ptrs.length); - for (int i = 0; i <= n; i++) { + for (int i = 0; i < n; i++) { if (query.testValue(values[i])) { callback.indexInfo(values[i], ptrs[i]); } Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTreeFiler.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTreeFiler.java?view=diff&rev=501664&r1=501663&r2=501664 ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTreeFiler.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/filer/BTreeFiler.java Tue Jan 30 17:05:23 2007 @@ -61,8 +61,8 @@ * * @version $Revision$, $Date$ */ -public final class BTreeFiler extends BTree - implements Filer { +public class BTreeFiler extends BTree + implements Filer { private static final Log log = LogFactory.getLog(BTreeFiler.class); Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/ValueIndexer.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/ValueIndexer.java?view=diff&rev=501664&r1=501663&r2=501664 ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/ValueIndexer.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/ValueIndexer.java Tue Jan 30 17:05:23 2007 @@ -177,6 +177,17 @@ return pattern; } + /** + * Creates new Value object that depends on string parameter and a type of indexer. + * The idea here is that any value of any type should be transformed to a byte array + * in such a way that two Values of the same type are comparable. Value objects are + * compared by comparing their data arrays byte-by-byte, starting from byte with + * index 0. Also, data arrays will behave as array of <b>unsigned</b> bytes with + * values ranging from 0 to 255. + * + * @param value + * @return new Value object that represents specific value of this indexer type + */ public Value getTypedValue(String value) { if (type != STRING && type != TRIMMED) { value = value.trim(); @@ -189,6 +200,13 @@ try { switch (type) { case INTEGER: + /* + Generally, two integers can be compared byte-by-byte, returning correct results, + except negative numbers which will be always bigger than positive numbers. + To solve this, change the value to be unsigned. Number range changes from + Long.MIN_VALUE / Long.MAX_VALUE to 0 / Long.MAX_VALUE - Long.MIN_VALUE. + This is done by flipping the first bit of the byte with index 0. + */ long l = Long.parseLong(value); b[0] = (byte) ((l >>> 56) & 0xFF); b[1] = (byte) ((l >>> 48) & 0xFF); @@ -197,29 +215,61 @@ b[4] = (byte) ((l >>> 24) & 0xFF); b[5] = (byte) ((l >>> 16) & 0xFF); b[6] = (byte) ((l >>> 8) & 0xFF); - b[7] = (byte) ((l >>> 0) & 0xFF); + b[7] = (byte) (l & 0xFF); + b[0] = (byte) (b[0] ^ 0x80); break; + case FLOAT: + /* + Float/Double number are stored according to IEEE standard 754. In short, + float numbers have the folloing format: 1 bit to indicate the sign of + the number, 8 bits for exponent, 23 bits for mantissa, and double + numbers have the following format: 1 bit to indicate the sign of + the number, 11 bits for exponent, 52 bits for mantissa. Both float and + double are normalized, so they can be compared byte-by-byte, except + that comparing two negative numbers or two number with different signs + will return incorrect results. + This is solved by changing number sign (is is stored in the first bit) + and flipping all the bits for negative numbers. + */ double d = Double.parseDouble(value); - int i1 = (int) Math.round(d); - int i2 = (int) Math.round((d - i1) * 1000000000); - b[0] = (byte) ((i1 >>> 24) & 0xFF); - b[1] = (byte) ((i1 >>> 16) & 0xFF); - b[2] = (byte) ((i1 >>> 8) & 0xFF); - b[3] = (byte) ((i1 >>> 0) & 0xFF); - b[4] = (byte) ((i2 >>> 24) & 0xFF); - b[5] = (byte) ((i2 >>> 16) & 0xFF); - b[6] = (byte) ((i2 >>> 8) & 0xFF); - b[7] = (byte) ((i2 >>> 0) & 0xFF); + long bits = Double.doubleToLongBits(d); + + b[0] = (byte) ((bits >>> 56) & 0xFF); + if ((b[0] & 0xFF) != 0) { + // negative numbers + b[0] ^= 0xFF; + b[1] = (byte) ((bits >>> 48) & 0xFF ^ 0xFF); + b[2] = (byte) ((bits >>> 40) & 0xFF ^ 0xFF); + b[3] = (byte) ((bits >>> 32) & 0xFF ^ 0xFF); + b[4] = (byte) ((bits >>> 24) & 0xFF ^ 0xFF); + b[5] = (byte) ((bits >>> 16) & 0xFF ^ 0xFF); + b[6] = (byte) ((bits >>> 8) & 0xFF ^ 0xFF); + b[7] = (byte) (bits & 0xFF ^ 0xFF); + } else { + b[0] ^= 0x80; + b[1] = (byte) ((bits >>> 48) & 0xFF); + b[2] = (byte) ((bits >>> 40) & 0xFF); + b[3] = (byte) ((bits >>> 32) & 0xFF); + b[4] = (byte) ((bits >>> 24) & 0xFF); + b[5] = (byte) ((bits >>> 16) & 0xFF); + b[6] = (byte) ((bits >>> 8) & 0xFF); + b[7] = (byte) (bits & 0xFF); + } + break; + case BYTE: b[0] = Byte.parseByte(value); + b[0] = (byte) (b[0] ^ 0x80); break; + case CHAR: char c = value.charAt(0); b[0] = (byte) ((c >>> 8) & 0xFF); b[1] = (byte) ((c >>> 0) & 0xFF); break; + case BOOLEAN: if ("[true][yes][1][y][on]".indexOf("[" + value.toLowerCase() + "]") != -1) { b[0] = 1; @@ -229,12 +279,15 @@ return EmptyValue; } break; + default: if (log.isWarnEnabled()) { log.warn("invalid type : " + type); } } + return new Value(b); + } catch (Exception e) { return EmptyValue; } @@ -260,14 +313,14 @@ // Write the pos b[l + 1] = (byte) ((pos >>> 24) & 0xFF); b[l + 2] = (byte) ((pos >>> 16) & 0xFF); - b[l + 3] = (byte) ((pos >>> 8) & 0xFF); - b[l + 4] = (byte) ((pos >>> 0) & 0xFF); + b[l + 3] = (byte) ((pos >>> 8) & 0xFF); + b[l + 4] = (byte) ((pos >>> 0) & 0xFF); // Write the len b[l + 5] = (byte) ((len >>> 24) & 0xFF); b[l + 6] = (byte) ((len >>> 16) & 0xFF); - b[l + 7] = (byte) ((len >>> 8) & 0xFF); - b[l + 8] = (byte) ((len >>> 0) & 0xFF); + b[l + 7] = (byte) ((len >>> 8) & 0xFF); + b[l + 8] = (byte) ((len >>> 0) & 0xFF); // Write the elemID b[l + 9] = (byte) ((elemID >>> 8) & 0xFF); Added: xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java?view=auto&rev=501664 ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java (added) +++ xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java Tue Jan 30 17:05:23 2007 @@ -0,0 +1,225 @@ +/* + * Copyright 2006 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $Id$ + */ + +package org.apache.xindice.tools; + +import org.apache.xindice.xml.dom.DOMParser; +import org.apache.xindice.core.filer.BTreeFiler; +import org.apache.xindice.core.filer.BTreeCallback; +import org.apache.xindice.core.data.Value; +import org.apache.xindice.core.data.Key; +import org.apache.xindice.core.Database; +import org.apache.xindice.core.Collection; +import org.apache.xindice.core.DBException; +import org.apache.xindice.core.indexer.Indexer; +import org.apache.xindice.util.Configuration; +import org.apache.xindice.util.XindiceException; + +import java.io.File; +import java.io.IOException; + +/** + * Command line utility to re-build all btree filers of the database. + * + * @version $Revision$, $Date$ + */ +public class DatabaseRebuild { + + private static final int CMD_COPY = 1; + private static final int CMD_INDEX = 2; + + private static Database db; + private static String path; + private static boolean removeBackupFiles; + private static int command; + + + public static void main(String[] args) throws Exception { + if (args.length < 2 || !("copy".equals(args[0]) || "index".equals(args[0])) || + args[1] == null || args[1].length() == 0) { + usage(); + return; + } + + command = "copy".equals(args[0]) ? CMD_COPY : CMD_INDEX; + String dbLocation = args[1]; + if (args.length > 2 && args[2].equals("--remove")) { + removeBackupFiles = true; + } + + String config = "<root-collection dbroot='" + dbLocation + "/' name='" + dbLocation + "'/>"; + db = new Database(); + + try { + db.setConfig(new Configuration(DOMParser.toDocument(config))); + + path = db.getCollectionRoot().getPath(); + processChildCollections("/"); + } finally { + db.close(); + } + } + + private static void usage() { + System.out.println("Commands:"); + System.out.println("copy <db location> [--remove]"); + System.out.println("index <db location>"); + } + + private static void processChildCollections(String colRoot) { + System.out.println("Getting child collections for " + colRoot); + try { + // Get a Collection reference + Collection col = db.getCollection(colRoot); + if (col == null) { + System.out.println("Error fetching collection '" + colRoot + "'"); + return; + } + + processCollection(col, colRoot); + + String[] colNames = col.listCollections(); + for (int i = 0; i < colNames.length; i++) { + processChildCollections(colRoot + colNames[i] + "/"); + } + } catch (Exception e) { + System.out.println("Got an excefption when processing collection: " + colRoot); + e.printStackTrace(); + } + } + + private static void processCollection(Collection col, String location) throws XindiceException, IOException { + switch (command) { + case CMD_INDEX: + rebuildIndex(col); + break; + + case CMD_COPY: + rebuildCollection(col, location, col.getName()); + break; + } + } + + private static void rebuildCollection(Collection col, String location, String name) throws XindiceException, IOException { + if (!(col.getFiler() instanceof BTreeFiler)) { + System.out.println("Filer for collection " + location + " is not BTreeFiler. Skipping..."); + return; + } + + // close collection and its filer + col.close(); + + System.out.println("Processing collection " + location); + + File root = new File(path + location); + + // backup + String fileName = path + location + "/" + name; + // FIXME What if copy fails. It's probably a better idea to first make a copy, + // and rename after that? + new File(fileName + ".tbl").renameTo(new File(fileName + ".old.tbl")); + + // prepare + BTreeCopy filer = new BTreeCopy(); + filer.setLocation(root, name + ".old"); + filer.setConfig(col.getFiler().getConfig()); + if (!filer.exists()) { + System.out.println("Filer for " + fileName + ".old.tbl does not exists"); + return; + } + + BTreeFiler newFiler = new BTreeFiler(); + newFiler.setLocation(root, name); + newFiler.setConfig(col.getFiler().getConfig()); + if (newFiler.exists()) { + System.out.println("Filer for " + fileName + ".tbl already exists"); + return; + } + + // copy + newFiler.create(); + try { + filer.open(); + newFiler.open(); + + filer.copy(newFiler); + if (removeBackupFiles) { + filer.deleteFile(); + } + } finally { + try { + filer.close(); + } catch (Exception e) { + e.printStackTrace(); + } + try { + newFiler.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + private static void rebuildIndex(Collection col) throws DBException { + if (col.getFiler() != null) { + String[] list = col.listIndexers(); + for (int i = 0; i < list.length; i++) { + Indexer idx = col.getIndexer(list[i]); + Configuration idxConf = idx.getConfig(); + System.out.println("Rebuilding index " + list[i] + " for collection " + col.getName()); + col.dropIndexer(idx); + col.createIndexer(idxConf); + } + } + } + + private static class BTreeCopy extends BTreeFiler { + private Value getValue(long pointer) throws IOException { + return super.readValue(pointer); + } + + private void copy(BTreeFiler newFiler) throws XindiceException, IOException { + query(null, new CopyCallback(this, newFiler)); + } + + private boolean deleteFile() { + return getFile().delete(); + } + } + + private static class CopyCallback implements BTreeCallback { + private BTreeCopy filer; + private BTreeFiler newFiler; + + public CopyCallback(BTreeCopy filer, BTreeFiler newFiler) { + this.filer = filer; + this.newFiler = newFiler; + } + + public boolean indexInfo(Value value, long pointer) { + try { + Value v = filer.getValue(pointer); + newFiler.writeRecord(new Key(value), v); + } catch (Exception e) { + e.printStackTrace(); + } + + return true; + } + } +} Propchange: xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: xml/xindice/trunk/java/src/org/apache/xindice/tools/DatabaseRebuild.java ------------------------------------------------------------------------------ svn:keywords = Id Revision Author Date Added: xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java?view=auto&rev=501664 ============================================================================== --- xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java (added) +++ xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java Tue Jan 30 17:05:23 2007 @@ -0,0 +1,171 @@ +/* + * Copyright 2006 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $Id$ + */ + +package org.apache.xindice.core.indexer; + +import junit.framework.TestCase; +import org.apache.xindice.core.Database; +import org.apache.xindice.core.Collection; +import org.apache.xindice.core.DatabaseTest; +import org.apache.xindice.core.DBException; +import org.apache.xindice.util.Configuration; +import org.apache.xindice.xml.dom.DOMParser; +import org.w3c.dom.Document; + +/** + * Tests indexed queries + * + * @version $Revision$, $Date$ + */ +public class ValueIndexerTest extends TestCase { + + private Database db; + private Collection collection; + + + public ValueIndexerTest(String name) { + super(name); + } + + public void setUp() throws Exception { + String name = getClass().getName(); + db = new Database(); + db.setConfig(new Configuration(DOMParser.toDocument(DatabaseTest.DATABASE))); + collection = db.createCollection(name, new Configuration( + DOMParser.toDocument( + "<collection compressed=\"true\" name=\"" + name + "\" inline-metadata=\"true\">" + + "<filer class=\"org.apache.xindice.core.filer.BTreeFiler\" />" + + "</collection>"), false + )); + } + + public void tearDown() throws Exception { + db.dropCollection(collection); + db.close(); + } + + private Indexer createIndex(String name, String pattern, String type) throws Exception { + String config = "<index name='" + name + "' " + + "class='org.apache.xindice.core.indexer.ValueIndexer' " + + "pattern='" + pattern + "' type='" + type + "'/>"; + Indexer ind = collection.createIndexer(new Configuration(DOMParser.toDocument(config))); + Thread.sleep(100); + return ind; + } + + private IndexMatch[] query(Indexer ind, String pattern, String value, int op) throws DBException { + IndexPattern indPattern = new IndexPattern(collection.getSymbols(), pattern, null); + IndexQuery query = new IndexQuery(indPattern, op, value); + return ind.queryMatches(query); + + } + + public void testLongIntIndex() throws Exception { + Indexer ind = createIndex("IntIndex", "[EMAIL PROTECTED]", "long"); + + Document document = DOMParser.toDocument("<test value='1050687000291'/>"); + collection.insertDocument("key1", document); + + document = DOMParser.toDocument("<test value='1049903940000'/>"); + collection.insertDocument("key2", document); + + document = DOMParser.toDocument("<test value='-1050687000291'/>"); + collection.insertDocument("key3", document); + + IndexMatch[] match = query(ind, "[EMAIL PROTECTED]", "1049903940000", IndexQuery.GT); + + assertEquals(1, match.length); + } + + public void testDoubleFloatIndex() throws Exception { + Indexer ind = createIndex("FloatIndex", "[EMAIL PROTECTED]", "float"); + + Document document = DOMParser.toDocument("<test value='71.4894'/>"); + collection.insertDocument("key1", document); + + document = DOMParser.toDocument("<test value='211.499539'/>"); + collection.insertDocument("key2", document); + + document = DOMParser.toDocument("<test value='-211.499539'/>"); + collection.insertDocument("key3", document); + + document = DOMParser.toDocument("<test value='-391.958379'/>"); + collection.insertDocument("key4", document); + + document = DOMParser.toDocument("<test value='0.00499539'/>"); + collection.insertDocument("key5", document); + + document = DOMParser.toDocument("<test value='0'/>"); + collection.insertDocument("key6", document); + + document = DOMParser.toDocument("<test value='" + Double.NEGATIVE_INFINITY + "'/>"); + collection.insertDocument("key7", document); + + document = DOMParser.toDocument("<test value='" + Double.POSITIVE_INFINITY + "'/>"); + collection.insertDocument("key8", document); + + IndexMatch[] match = query(ind, "[EMAIL PROTECTED]", "71.48940001", IndexQuery.LT); + assertEquals(6, match.length); + + match = query(ind, "[EMAIL PROTECTED]", "-211.499539", IndexQuery.LT); + assertEquals(2, match.length); + } + + public void testByteIndex() throws Exception { + Indexer ind = createIndex("ByteIndex", "[EMAIL PROTECTED]", "byte"); + + Document document = DOMParser.toDocument("<test value='20'/>"); + collection.insertDocument("key1", document); + + document = DOMParser.toDocument("<test value='-112'/>"); + collection.insertDocument("key2", document); + + IndexMatch[] match = query(ind, "[EMAIL PROTECTED]", "-112", IndexQuery.GT); + + assertEquals(1, match.length); + } + + public void testCharIndex() throws Exception { + Indexer ind = createIndex("CharIndex", "[EMAIL PROTECTED]", "char"); + + Document document = DOMParser.toDocument("<test value='z'/>"); + collection.insertDocument("key1", document); + + document = DOMParser.toDocument("<test value='\u00FF'/>"); + collection.insertDocument("key2", document); + + IndexMatch[] match = query(ind, "[EMAIL PROTECTED]", "z", IndexQuery.EQ); + + assertEquals(1, match.length); + } + + public void testBooleanIndex() throws Exception { + Indexer ind = createIndex("BooleanIndex", "[EMAIL PROTECTED]", "boolean"); + + Document document = DOMParser.toDocument("<test value='false'/>"); + collection.insertDocument("key1", document); + + document = DOMParser.toDocument("<test value='true'/>"); + collection.insertDocument("key2", document); + + IndexMatch[] match = query(ind, "[EMAIL PROTECTED]", "false", IndexQuery.EQ); + + assertEquals(1, match.length); + } + +} Propchange: xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: xml/xindice/trunk/java/tests/src/org/apache/xindice/core/indexer/ValueIndexerTest.java ------------------------------------------------------------------------------ svn:keywords = Id Revision Author Date Modified: xml/xindice/trunk/status.xml URL: http://svn.apache.org/viewvc/xml/xindice/trunk/status.xml?view=diff&rev=501664&r1=501663&r2=501664 ============================================================================== --- xml/xindice/trunk/status.xml (original) +++ xml/xindice/trunk/status.xml Tue Jan 30 17:05:23 2007 @@ -74,7 +74,12 @@ </todo> <changes> - <release version="1.1b5-dev" date="Oct 27 2006"> + <release version="1.1b5-dev" date="Jan 30 2007"> + <action dev="VG" type="fix" fixes-bug="19203" due-to="Natalia Shilenkova"> + Fix indexing of multi-byte numeric types (long, double, etc). Due to + the change in the BTree, existing databases has to be re-built using + DatabaseRebuild command line utility. + </action> <action dev="VG" type="update" fixes-bug="41002" due-to="Natalia Shilenkova"> Ignore SAXNotSupportedException in DOMParser.setFeature if feature is being set to false. (Needed for Oracle 10g application server).