Author: mrglavas Date: Wed Jul 4 19:56:10 2012 New Revision: 1357381 URL: http://svn.apache.org/viewvc?rev=1357381&view=rev Log: Enhancements to the hashing algorithms used in internal data structures within Xerces to make them more resistant to collisions. To improve the distribution a new hash function is randomly selected each time the number of items in a bucket exceeds a threshold.
Added: xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java (with props) Modified: xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java Modified: xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java?rev=1357381&r1=1357380&r2=1357381&view=diff ============================================================================== --- xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java (original) +++ xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java Wed Jul 4 19:56:10 2012 @@ -19,6 +19,7 @@ package org.apache.xerces.impl.dtd; import java.util.ArrayList; import java.util.Hashtable; +import java.util.Random; import org.apache.xerces.impl.dtd.models.CMAny; import org.apache.xerces.impl.dtd.models.CMBinOp; @@ -2639,6 +2640,26 @@ public class DTDGrammar * @author Andy Clark, IBM */ protected static final class QNameHashtable { + + private static final class PrimeNumberSequenceGenerator { + + private static int [] PRIMES = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, + 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, + 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, + 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, + 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, + 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, + 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, + 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727}; + + static void generateSequence(int[] arrayToFill) { + Random r = new Random(); + for (int i = 0; i < arrayToFill.length; ++i) { + arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)]; + } + } + } // // Constants @@ -2651,11 +2672,29 @@ public class DTDGrammar // that we get a better distribution for hashing. -Ac /** Hashtable size (101). */ private static final int HASHTABLE_SIZE = 101; + + /** Maximum hash collisions per bucket for a table with load factor == 1. */ + private static final int MAX_HASH_COLLISIONS = 40; + + private static final int MULTIPLIERS_SIZE = 1 << 5; + private static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1; // // Data // private Object[][] fHashTable = new Object[HASHTABLE_SIZE][]; + + /** actual table size **/ + private int fTableSize = HASHTABLE_SIZE; + + /** The total number of entries in the hash table. */ + private int fCount = 0; + + /** + * Array of randomly selected hash function multipliers or <code>null</code> + * if the default String.hashCode() function should be used. + */ + private int[] fHashMultipliers; // // Public methods @@ -2663,7 +2702,7 @@ public class DTDGrammar /** Associates the given value with the specified key tuple. */ public void put(String key, int value) { - int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE; + int hash = (hash(key) & 0x7FFFFFFF) % fTableSize; Object[] bucket = fHashTable[hash]; if (bucket == null) { @@ -2672,6 +2711,11 @@ public class DTDGrammar bucket[1] = key; bucket[2] = new int[]{value}; fHashTable[hash] = bucket; + if (++fCount > fTableSize) { + // Rehash the table if the number of entries + // would exceed the number of buckets. + rehash(); + } } else { int count = ((int[])bucket[0])[0]; int offset = 1 + 2*count; @@ -2692,10 +2736,20 @@ public class DTDGrammar } j += 2; } - if (! found) { + if (!found) { bucket[offset++] = key; bucket[offset]= new int[]{value}; ((int[])bucket[0])[0] = ++count; + if (++fCount > fTableSize) { + // Rehash the table if the number of entries + // would exceed the number of buckets. + rehash(); + } + else if (count > MAX_HASH_COLLISIONS) { + // Select a new hash function and rehash the table if + // MAX_HASH_COLLISIONS is exceeded. + rebalance(); + } } } @@ -2706,7 +2760,7 @@ public class DTDGrammar /** Returns the value associated with the specified key tuple. */ public int get(String key) { - int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE; + int hash = (hash(key) & 0x7FFFFFFF) % fTableSize; Object[] bucket = fHashTable[hash]; if (bucket == null) { @@ -2724,6 +2778,93 @@ public class DTDGrammar return -1; } // get(int,String,String) + + public int hash(String symbol) { + if (fHashMultipliers == null) { + return symbol.hashCode(); + } + return hash0(symbol); + } // hash(String):int + + private int hash0(String symbol) { + int code = 0; + final int length = symbol.length(); + final int[] multipliers = fHashMultipliers; + for (int i = 0; i < length; ++i) { + code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i); + } + return code; + } // hash0(String):int + + private void rehash() { + rehashCommon(fHashTable.length * 2 + 1); + } // rehash() + + private void rebalance() { + if (fHashMultipliers == null) { + fHashMultipliers = new int[MULTIPLIERS_SIZE]; + } + PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); + rehashCommon(fHashTable.length); + } // rebalance() + + private void rehashCommon(final int newCapacity) { + + final int oldCapacity = fHashTable.length; + final Object[][] oldTable = fHashTable; + + final Object[][] newTable = new Object[newCapacity][]; + + fHashTable = newTable; + fTableSize = fHashTable.length; + + for (int i = 0; i < oldCapacity; ++i) { + final Object[] oldBucket = oldTable[i]; + if (oldBucket != null) { + final int oldCount = ((int[]) oldBucket[0])[0]; + boolean oldBucketReused = false; + int k = 1; + for (int j = 0; j < oldCount; ++j) { + final String key = (String) oldBucket[k]; + final Object value = oldBucket[k+1]; + + final int hash = (hash(key) & 0x7FFFFFFF) % fTableSize; + Object[] bucket = fHashTable[hash]; + + if (bucket == null) { + if (oldBucketReused) { + bucket = new Object[1 + 2*INITIAL_BUCKET_SIZE]; + bucket[0] = new int[]{1}; + } + else { + bucket = oldBucket; + ((int[])bucket[0])[0] = 1; + oldBucketReused = true; + } + bucket[1] = key; + bucket[2] = value; + fHashTable[hash] = bucket; + } + else { + int count = ((int[])bucket[0])[0]; + int offset = 1 + 2*count; + if (offset == bucket.length) { + int newSize = count + INITIAL_BUCKET_SIZE; + Object[] newBucket = new Object[1 + 2*newSize]; + System.arraycopy(bucket, 0, newBucket, 0, offset); + bucket = newBucket; + fHashTable[hash] = bucket; + } + bucket[offset++] = key; + bucket[offset]= value; + ((int[])bucket[0])[0] = ++count; + } + k += 2; + } + } + } + + } // rehashCommon(int) } // class QNameHashtable Added: xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java?rev=1357381&view=auto ============================================================================== --- xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java (added) +++ xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java Wed Jul 4 19:56:10 2012 @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xerces.util; + +import java.util.Random; + +/** + * @version $Id$ + */ +final class PrimeNumberSequenceGenerator { + + private static int [] PRIMES = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, + 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, + 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, + 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, + 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, + 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, + 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, + 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727}; + + static void generateSequence(int[] arrayToFill) { + Random r = new Random(); + for (int i = 0; i < arrayToFill.length; ++i) { + arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)]; + } + } +} Propchange: xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java ------------------------------------------------------------------------------ svn:keywords = Author Date Id Revision Modified: xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java?rev=1357381&r1=1357380&r2=1357381&view=diff ============================================================================== --- xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java (original) +++ xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java Wed Jul 4 19:56:10 2012 @@ -127,6 +127,7 @@ public class SoftReferenceSymbolTable ex public String addSymbol(String symbol) { clean(); // search for identical symbol + int collisionCount = 0; int bucket = hash(symbol) % fTableSize; for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) { SREntryData data = (SREntryData)entry.get(); @@ -136,13 +137,20 @@ public class SoftReferenceSymbolTable ex if (data.symbol.equals(symbol)) { return data.symbol; } + ++collisionCount; } if (fCount >= fThreshold) { // Rehash the table if the threshold is exceeded rehash(); bucket = hash(symbol) % fTableSize; - } + } + else if (collisionCount >= fCollisionThreshold) { + // Select a new hash function and rehash the table if + // the collision threshold is exceeded. + rebalance(); + bucket = hash(symbol) % fTableSize; + } // add new entry symbol = symbol.intern(); @@ -165,6 +173,7 @@ public class SoftReferenceSymbolTable ex public String addSymbol(char[] buffer, int offset, int length) { clean(); // search for identical symbol + int collisionCount = 0; int bucket = hash(buffer, offset, length) % fTableSize; OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) { SREntryData data = (SREntryData)entry.get(); @@ -174,18 +183,26 @@ public class SoftReferenceSymbolTable ex if (length == data.characters.length) { for (int i = 0; i < length; i++) { if (buffer[offset + i] != data.characters[i]) { + ++collisionCount; continue OUTER; } } return data.symbol; } + ++collisionCount; } if (fCount >= fThreshold) { // Rehash the table if the threshold is exceeded rehash(); bucket = hash(buffer, offset, length) % fTableSize; - } + } + else if (collisionCount >= fCollisionThreshold) { + // Select a new hash function and rehash the table if + // the collision threshold is exceeded. + rebalance(); + bucket = hash(buffer, offset, length) % fTableSize; + } // add new entry String symbol = new String(buffer, offset, length).intern(); @@ -218,6 +235,20 @@ public class SoftReferenceSymbolTable ex rehashCommon(((int) (fCount / fLoadFactor)) * 2 + 1); } + /** + * Randomly selects a new hash function and reorganizes this SymbolTable + * in order to more evenly distribute its entries across the table. This + * method is called automatically when the number keys in one of the + * SymbolTable's buckets exceeds the given collision threshold. + */ + protected void rebalance() { + if (fHashMultipliers == null) { + fHashMultipliers = new int[MULTIPLIERS_SIZE]; + } + PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); + rehashCommon(fBuckets.length); + } + private void rehashCommon(final int newCapacity) { final int oldCapacity = fBuckets.length; @@ -235,7 +266,7 @@ public class SoftReferenceSymbolTable ex SREntryData data = (SREntryData)e.get(); if (data != null) { - int index = hash(data.characters, 0, data.characters.length) % newCapacity; + int index = hash(data.symbol) % newCapacity; if (newTable[index] != null) { newTable[index].prev = e; } Modified: xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java?rev=1357381&r1=1357380&r2=1357381&view=diff ============================================================================== --- xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java (original) +++ xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java Wed Jul 4 19:56:10 2012 @@ -31,19 +31,34 @@ public class SymbolHash { // // Constants // - + /** Default table size. */ - protected int fTableSize = 101; + protected static final int TABLE_SIZE = 101; + + /** Maximum hash collisions per bucket. */ + protected static final int MAX_HASH_COLLISIONS = 40; + + protected static final int MULTIPLIERS_SIZE = 1 << 5; + protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1; // // Data // + + /** Actual table size **/ + protected int fTableSize; /** Buckets. */ protected Entry[] fBuckets; /** Number of elements. */ protected int fNum = 0; + + /** + * Array of randomly selected hash function multipliers or <code>null</code> + * if the default String.hashCode() function should be used. + */ + protected int[] fHashMultipliers; // // Constructors @@ -51,7 +66,7 @@ public class SymbolHash { /** Constructs a key table with the default size. */ public SymbolHash() { - fBuckets = new Entry[fTableSize]; + this(TABLE_SIZE); } /** @@ -77,26 +92,37 @@ public class SymbolHash { * @param value */ public void put(Object key, Object value) { + + // search for identical key + int collisionCount = 0; final int hash = hash(key); int bucket = hash % fTableSize; - Entry entry = search(key, bucket); - - // replace old value - if (entry != null) { - entry.value = value; - } - // create new entry - else { - if (fNum >= fTableSize) { - // Rehash the table if the number of entries - // would exceed the number of buckets. - rehash(); - bucket = hash % fTableSize; + for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { + if (key.equals(entry.key)) { + // replace old value + entry.value = value; + return; } - entry = new Entry(key, value, fBuckets[bucket]); - fBuckets[bucket] = entry; - fNum++; + ++collisionCount; + } + + if (fNum >= fTableSize) { + // Rehash the table if the number of entries + // would exceed the number of buckets. + rehash(); + bucket = hash % fTableSize; + } + else if (collisionCount >= MAX_HASH_COLLISIONS && key instanceof String) { + // Select a new hash function and rehash the table if + // MAX_HASH_COLLISIONS is exceeded. + rebalance(); + bucket = hash(key) % fTableSize; } + + // create new entry + Entry entry = new Entry(key, value, fBuckets[bucket]); + fBuckets[bucket] = entry; + ++fNum; } /** @@ -161,6 +187,7 @@ public class SymbolHash { public SymbolHash makeClone() { SymbolHash newTable = new SymbolHash(fTableSize); newTable.fNum = fNum; + newTable.fHashMultipliers = fHashMultipliers != null ? (int[]) fHashMultipliers.clone() : null; for (int i = 0; i < fTableSize; i++) { if (fBuckets[i] != null) { newTable.fBuckets[i] = fBuckets[i].makeClone(); @@ -178,6 +205,7 @@ public class SymbolHash { fBuckets[i] = null; } fNum = 0; + fHashMultipliers = null; } // clear(): void protected Entry search(Object key, int bucket) { @@ -195,8 +223,21 @@ public class SymbolHash { * @param key The key to hash. */ protected int hash(Object key) { - return key.hashCode() & 0x7FFFFFFF; - } + if (fHashMultipliers == null || !(key instanceof String)) { + return key.hashCode() & 0x7FFFFFFF; + } + return hash0((String) key); + } // hash(Object):int + + private int hash0(String symbol) { + int code = 0; + final int length = symbol.length(); + final int[] multipliers = fHashMultipliers; + for (int i = 0; i < length; ++i) { + code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i); + } + return code & 0x7FFFFFFF; + } // hash0(String):int /** * Increases the capacity of and internally reorganizes this @@ -205,11 +246,28 @@ public class SymbolHash { * number of keys in the SymbolHash exceeds its number of buckets. */ protected void rehash() { - + rehashCommon((fBuckets.length << 1) + 1); + } + + /** + * Randomly selects a new hash function and reorganizes this SymbolHash + * in order to more evenly distribute its entries across the table. This + * method is called automatically when the number keys in one of the + * SymbolHash's buckets exceeds MAX_HASH_COLLISIONS. + */ + protected void rebalance() { + if (fHashMultipliers == null) { + fHashMultipliers = new int[MULTIPLIERS_SIZE]; + } + PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); + rehashCommon(fBuckets.length); + } + + private void rehashCommon(final int newCapacity) { + final int oldCapacity = fBuckets.length; final Entry[] oldTable = fBuckets; - final int newCapacity = (oldCapacity << 1) + 1; final Entry[] newTable = new Entry[newCapacity]; fBuckets = newTable; Modified: xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java?rev=1357381&r1=1357380&r2=1357381&view=diff ============================================================================== --- xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java (original) +++ xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java Wed Jul 4 19:56:10 2012 @@ -84,6 +84,12 @@ public class SymbolTable { /** Default table size. */ protected static final int TABLE_SIZE = 101; + + /** Maximum hash collisions per bucket for a table with load factor == 1. */ + protected static final int MAX_HASH_COLLISIONS = 40; + + protected static final int MULTIPLIERS_SIZE = 1 << 5; + protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1; // // Data @@ -104,6 +110,18 @@ public class SymbolTable { /** The load factor for the SymbolTable. */ protected float fLoadFactor; + + /** + * A new hash function is selected and the table is rehashed when + * the number of keys in the bucket exceeds this threshold. + */ + protected final int fCollisionThreshold; + + /** + * Array of randomly selected hash function multipliers or <code>null</code> + * if the default String.hashCode() function should be used. + */ + protected int[] fHashMultipliers; // // Constructors @@ -136,6 +154,7 @@ public class SymbolTable { fTableSize = initialCapacity; fBuckets = new Entry[fTableSize]; fThreshold = (int)(fTableSize * loadFactor); + fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor); fCount = 0; } @@ -174,18 +193,26 @@ public class SymbolTable { public String addSymbol(String symbol) { // search for identical symbol + int collisionCount = 0; int bucket = hash(symbol) % fTableSize; for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { if (entry.symbol.equals(symbol)) { return entry.symbol; } + ++collisionCount; } if (fCount >= fThreshold) { // Rehash the table if the threshold is exceeded rehash(); bucket = hash(symbol) % fTableSize; - } + } + else if (collisionCount >= fCollisionThreshold) { + // Select a new hash function and rehash the table if + // the collision threshold is exceeded. + rebalance(); + bucket = hash(symbol) % fTableSize; + } // create new entry Entry entry = new Entry(symbol, fBuckets[bucket]); @@ -208,23 +235,32 @@ public class SymbolTable { public String addSymbol(char[] buffer, int offset, int length) { // search for identical symbol + int collisionCount = 0; int bucket = hash(buffer, offset, length) % fTableSize; OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { if (length == entry.characters.length) { for (int i = 0; i < length; i++) { if (buffer[offset + i] != entry.characters[i]) { + ++collisionCount; continue OUTER; } } return entry.symbol; } + ++collisionCount; } if (fCount >= fThreshold) { // Rehash the table if the threshold is exceeded rehash(); bucket = hash(buffer, offset, length) % fTableSize; - } + } + else if (collisionCount >= fCollisionThreshold) { + // Select a new hash function and rehash the table if + // the collision threshold is exceeded. + rebalance(); + bucket = hash(buffer, offset, length) % fTableSize; + } // add new entry Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]); @@ -243,8 +279,21 @@ public class SymbolTable { * @param symbol The symbol to hash. */ public int hash(String symbol) { - return symbol.hashCode() & 0x7FFFFFFF; + if (fHashMultipliers == null) { + return symbol.hashCode() & 0x7FFFFFFF; + } + return hash0(symbol); } // hash(String):int + + private int hash0(String symbol) { + int code = 0; + final int length = symbol.length(); + final int[] multipliers = fHashMultipliers; + for (int i = 0; i < length; ++i) { + code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i); + } + return code & 0x7FFFFFFF; + } // hash0(String):int /** * Returns a hashcode value for the specified symbol information. @@ -258,14 +307,25 @@ public class SymbolTable { * @param length The length of the symbol. */ public int hash(char[] buffer, int offset, int length) { + if (fHashMultipliers == null) { + int code = 0; + for (int i = 0; i < length; ++i) { + code = code * 31 + buffer[offset + i]; + } + return code & 0x7FFFFFFF; + } + return hash0(buffer, offset, length); + } // hash(char[],int,int):int + + private int hash0(char[] buffer, int offset, int length) { int code = 0; + final int[] multipliers = fHashMultipliers; for (int i = 0; i < length; ++i) { - code = code * 31 + buffer[offset + i]; + code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + i]; } return code & 0x7FFFFFFF; - - } // hash(char[],int,int):int + } // hash0(char[],int,int):int /** * Increases the capacity of and internally reorganizes this @@ -275,11 +335,28 @@ public class SymbolTable { * and load factor. */ protected void rehash() { - + rehashCommon(fBuckets.length * 2 + 1); + } + + /** + * Randomly selects a new hash function and reorganizes this SymbolTable + * in order to more evenly distribute its entries across the table. This + * method is called automatically when the number keys in one of the + * SymbolTable's buckets exceeds the given collision threshold. + */ + protected void rebalance() { + if (fHashMultipliers == null) { + fHashMultipliers = new int[MULTIPLIERS_SIZE]; + } + PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); + rehashCommon(fBuckets.length); + } + + private void rehashCommon(final int newCapacity) { + int oldCapacity = fBuckets.length; Entry[] oldTable = fBuckets; - int newCapacity = oldCapacity * 2 + 1; Entry[] newTable = new Entry[newCapacity]; fThreshold = (int)(newCapacity * fLoadFactor); @@ -291,7 +368,7 @@ public class SymbolTable { Entry e = old; old = old.next; - int index = hash(e.characters, 0, e.characters.length) % newCapacity; + int index = hash(e.symbol) % newCapacity; e.next = newTable[index]; newTable[index] = e; } Modified: xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java?rev=1357381&r1=1357380&r2=1357381&view=diff ============================================================================== --- xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java (original) +++ xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java Wed Jul 4 19:56:10 2012 @@ -50,6 +50,12 @@ public class XMLAttributesImpl /** Default table size. */ protected static final int TABLE_SIZE = 101; + /** Maximum hash collisions per bucket. */ + protected static final int MAX_HASH_COLLISIONS = 40; + + protected static final int MULTIPLIERS_SIZE = 1 << 5; + protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1; + /** * Threshold at which an instance is treated * as a large attribute list. @@ -103,6 +109,12 @@ public class XMLAttributesImpl * Indicates whether the table view contains consistent data. */ protected boolean fIsTableViewConsistent; + + /** + * Array of randomly selected hash function multipliers or <code>null</code> + * if the default String.hashCode() function should be used. + */ + protected int[] fHashMultipliers; // // Constructors @@ -203,7 +215,8 @@ public class XMLAttributesImpl * the user of this class adds attributes, removes them, and * then adds more. */ - if (!fIsTableViewConsistent || fLength == SIZE_LIMIT) { + if (!fIsTableViewConsistent || fLength == SIZE_LIMIT || + (fLength > SIZE_LIMIT && fLength > fTableViewBuckets)) { prepareAndPopulateTableView(); fIsTableViewConsistent = true; } @@ -232,12 +245,14 @@ public class XMLAttributesImpl // We need to check if any of the attributes has the same rawname. else { // Search the table. + int collisionCount = 0; Attribute found = fAttributeTableView[bucket]; while (found != null) { if (found.name.rawname == name.rawname) { break; } found = found.next; + ++collisionCount; } // This attribute is unique. if (found == null) { @@ -250,10 +265,19 @@ public class XMLAttributesImpl } fAttributes = attributes; } - - // Update table view - fAttributes[index].next = fAttributeTableView[bucket]; - fAttributeTableView[bucket] = fAttributes[index]; + // Select a new hash function and rehash the table view + // if the collision threshold is exceeded. + if (collisionCount >= MAX_HASH_COLLISIONS) { + // The current attribute will be processed in the rehash. + // Need to set its name first. + fAttributes[index].name.setValues(name); + rebalanceTableView(fLength); + } + else { + // Update table view + fAttributes[index].next = fAttributeTableView[bucket]; + fAttributeTableView[bucket] = fAttributes[index]; + } } // Duplicate. We still need to find the index. else { @@ -829,60 +853,84 @@ public class XMLAttributesImpl */ public QName checkDuplicatesNS() { // If the list is small check for duplicates using pairwise comparison. - if (fLength <= SIZE_LIMIT) { - for (int i = 0; i < fLength - 1; ++i) { - Attribute att1 = fAttributes[i]; - for (int j = i + 1; j < fLength; ++j) { - Attribute att2 = fAttributes[j]; + final int length = fLength; + if (length <= SIZE_LIMIT) { + final Attribute[] attributes = fAttributes; + for (int i = 0; i < length - 1; ++i) { + Attribute att1 = attributes[i]; + for (int j = i + 1; j < length; ++j) { + Attribute att2 = attributes[j]; if (att1.name.localpart == att2.name.localpart && att1.name.uri == att2.name.uri) { - return att2.name; + return att2.name; } } } + return null; } // If the list is large check duplicates using a hash table. else { - // We don't want this table view to be read if someone calls - // addAttribute so we invalidate it up front. - fIsTableViewConsistent = false; - - prepareTableView(); - - Attribute attr; - int bucket; - - for (int i = fLength - 1; i >= 0; --i) { - attr = fAttributes[i]; - bucket = getTableViewBucket(attr.name.localpart, attr.name.uri); + return checkManyDuplicatesNS(); + } + } + + private QName checkManyDuplicatesNS() { + // We don't want this table view to be read if someone calls + // addAttribute so we invalidate it up front. + fIsTableViewConsistent = false; + + prepareTableView(); + + Attribute attr; + int bucket; + + final int length = fLength; + final Attribute[] attributes = fAttributes; + final Attribute[] attributeTableView = fAttributeTableView; + final int[] attributeTableViewChainState = fAttributeTableViewChainState; + int largeCount = fLargeCount; + + for (int i = 0; i < length; ++i) { + attr = attributes[i]; + bucket = getTableViewBucket(attr.name.localpart, attr.name.uri); + + // The chain is stale. + // This must be a unique attribute. + if (attributeTableViewChainState[bucket] != largeCount) { + attributeTableViewChainState[bucket] = largeCount; + attr.next = null; + attributeTableView[bucket] = attr; + } + // This chain is active. + // We need to check if any of the attributes has the same name. + else { + // Search the table. + int collisionCount = 0; + Attribute found = attributeTableView[bucket]; + while (found != null) { + if (found.name.localpart == attr.name.localpart && + found.name.uri == attr.name.uri) { + return attr.name; + } + found = found.next; + ++collisionCount; + } - // The chain is stale. - // This must be a unique attribute. - if (fAttributeTableViewChainState[bucket] != fLargeCount) { - fAttributeTableViewChainState[bucket] = fLargeCount; - attr.next = null; - fAttributeTableView[bucket] = attr; - } - // This chain is active. - // We need to check if any of the attributes has the same name. + // Select a new hash function and rehash the table view + // if the collision threshold is exceeded. + if (collisionCount >= MAX_HASH_COLLISIONS) { + // The current attribute will be processed in the rehash. + rebalanceTableViewNS(i+1); + largeCount = fLargeCount; + } else { - // Search the table. - Attribute found = fAttributeTableView[bucket]; - while (found != null) { - if (found.name.localpart == attr.name.localpart && - found.name.uri == attr.name.uri) { - return attr.name; - } - found = found.next; - } - // Update table view - attr.next = fAttributeTableView[bucket]; - fAttributeTableView[bucket] = attr; + attr.next = attributeTableView[bucket]; + attributeTableView[bucket] = attr; } } - } - return null; + } + return null; } /** @@ -933,7 +981,7 @@ public class XMLAttributesImpl * would be hashed */ protected int getTableViewBucket(String qname) { - return (qname.hashCode() & 0x7FFFFFFF) % fTableViewBuckets; + return (hash(qname) & 0x7FFFFFFF) % fTableViewBuckets; } /** @@ -947,13 +995,36 @@ public class XMLAttributesImpl */ protected int getTableViewBucket(String localpart, String uri) { if (uri == null) { - return (localpart.hashCode() & 0x7FFFFFFF) % fTableViewBuckets; + return (hash(localpart) & 0x7FFFFFFF) % fTableViewBuckets; } else { - return ((localpart.hashCode() + uri.hashCode()) - & 0x7FFFFFFF) % fTableViewBuckets; + return (hash(localpart, uri) & 0x7FFFFFFF) % fTableViewBuckets; } } + + private int hash(String localpart) { + if (fHashMultipliers == null) { + return localpart.hashCode(); + } + return hash0(localpart); + } // hash(String):int + + private int hash(String localpart, String uri) { + if (fHashMultipliers == null) { + return localpart.hashCode() + uri.hashCode() * 31; + } + return hash0(localpart) + hash0(uri) * fHashMultipliers[MULTIPLIERS_SIZE]; + } // hash(String,String):int + + private int hash0(String symbol) { + int code = 0; + final int length = symbol.length(); + final int[] multipliers = fHashMultipliers; + for (int i = 0; i < length; ++i) { + code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i); + } + return code; + } // hash0(String):int /** * Purges all elements from the table view. @@ -971,9 +1042,31 @@ public class XMLAttributesImpl } /** + * Increases the capacity of the table view. + */ + private void growTableView() { + final int length = fLength; + int tableViewBuckets = fTableViewBuckets; + do { + tableViewBuckets = (tableViewBuckets << 1) + 1; + if (tableViewBuckets < 0) { + tableViewBuckets = Integer.MAX_VALUE; + break; + } + } + while (length > tableViewBuckets); + fTableViewBuckets = tableViewBuckets; + fAttributeTableView = null; + fLargeCount = 1; + } + + /** * Prepares the table view of the attributes list for use. */ protected void prepareTableView() { + if (fLength > fTableViewBuckets) { + growTableView(); + } if (fAttributeTableView == null) { fAttributeTableView = new Attribute[fTableViewBuckets]; fAttributeTableViewChainState = new int[fTableViewBuckets]; @@ -989,11 +1082,15 @@ public class XMLAttributesImpl * previously read. */ protected void prepareAndPopulateTableView() { + prepareAndPopulateTableView(fLength); + } + + private void prepareAndPopulateTableView(final int count) { prepareTableView(); - // Need to populate the hash table with the attributes we've scanned so far. + // Need to populate the hash table with the attributes we've processed so far. Attribute attr; int bucket; - for (int i = 0; i < fLength; ++i) { + for (int i = 0; i < count; ++i) { attr = fAttributes[i]; bucket = getTableViewBucket(attr.name.rawname); if (fAttributeTableViewChainState[bucket] != fLargeCount) { @@ -1008,6 +1105,55 @@ public class XMLAttributesImpl } } } + + private void prepareAndPopulateTableViewNS(final int count) { + prepareTableView(); + // Need to populate the hash table with the attributes we've processed so far. + Attribute attr; + int bucket; + for (int i = 0; i < count; ++i) { + attr = fAttributes[i]; + bucket = getTableViewBucket(attr.name.localpart, attr.name.uri); + if (fAttributeTableViewChainState[bucket] != fLargeCount) { + fAttributeTableViewChainState[bucket] = fLargeCount; + attr.next = null; + fAttributeTableView[bucket] = attr; + } + else { + // Update table view + attr.next = fAttributeTableView[bucket]; + fAttributeTableView[bucket] = attr; + } + } + } + + /** + * Randomly selects a new hash function and reorganizes the table view + * in order to more evenly distribute its entries. This method is called + * automatically when the number of attributes in one bucket exceeds + * MAX_HASH_COLLISIONS. + */ + private void rebalanceTableView(final int count) { + if (fHashMultipliers == null) { + fHashMultipliers = new int[MULTIPLIERS_SIZE + 1]; + } + PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); + prepareAndPopulateTableView(count); + } + + /** + * Randomly selects a new hash function and reorganizes the table view + * in order to more evenly distribute its entries. This method is called + * automatically when the number of attributes in one bucket exceeds + * MAX_HASH_COLLISIONS. + */ + private void rebalanceTableViewNS(final int count) { + if (fHashMultipliers == null) { + fHashMultipliers = new int[MULTIPLIERS_SIZE + 1]; + } + PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); + prepareAndPopulateTableViewNS(count); + } // // Classes --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@xerces.apache.org For additional commands, e-mail: commits-h...@xerces.apache.org