Extend NumberTools to support int/long/float/double to string
--------------------------------------------------------------
Key: LUCENE-530
URL: http://issues.apache.org/jira/browse/LUCENE-530
Project: Lucene - Java
Type: Improvement
Components: Analysis
Versions: 1.9
Reporter: Andy Hind
Priority: Minor
Extend Number tools to support int/long/float/double to string
So you can search using range queries on int/long/float/double, if you want.
Here is the basis for how NumberTools cold be extended to support
int/long/double/float.
As I only write these values to the index and fix tokenisation in searchesI was
not so fussed about the reverse transformations back to Strings.
public class NumericEncoder
{
/*
* Constants for integer encoding
*/
static int INTEGER_SIGN_MASK = 0x80000000;
/*
* Constants for long encoding
*/
static long LONG_SIGN_MASK = 0x8000000000000000L;
/*
* Constants for float encoding
*/
static int FLOAT_SIGN_MASK = 0x80000000;
static int FLOAT_EXPONENT_MASK = 0x7F800000;
static int FLOAT_MANTISSA_MASK = 0x007FFFFF;
/*
* Constants for double encoding
*/
static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;
static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;
private NumericEncoder()
{
super();
}
/**
* Encode an integer into a string that orders correctly using string
* comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
* ffffffff.
*
* @param intToEncode
* @return
*/
public static String encode(int intToEncode)
{
int replacement = intToEncode ^ INTEGER_SIGN_MASK;
return encodeToHex(replacement);
}
/**
* Encode a long into a string that orders correctly using string comparison
* Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
* ffffffffffffffff.
*
* @param longToEncode
* @return
*/
public static String encode(long longToEncode)
{
long replacement = longToEncode ^ LONG_SIGN_MASK;
return encodeToHex(replacement);
}
/**
* Encode a float into a string that orders correctly according to string
* comparison. Note that there is no negative NaN but there are codings that
* imply this. So NaN and -Infinity may not compare as expected.
*
* @param floatToEncode
* @return
*/
public static String encode(float floatToEncode)
{
int bits = Float.floatToIntBits(floatToEncode);
int sign = bits & FLOAT_SIGN_MASK;
int exponent = bits & FLOAT_EXPONENT_MASK;
int mantissa = bits & FLOAT_MANTISSA_MASK;
if (sign != 0)
{
exponent ^= FLOAT_EXPONENT_MASK;
mantissa ^= FLOAT_MANTISSA_MASK;
}
sign ^= FLOAT_SIGN_MASK;
int replacement = sign | exponent | mantissa;
return encodeToHex(replacement);
}
/**
* Encode a double into a string that orders correctly according to string
* comparison. Note that there is no negative NaN but there are codings that
* imply this. So NaN and -Infinity may not compare as expected.
*
* @param doubleToEncode
* @return
*/
public static String encode(double doubleToEncode)
{
long bits = Double.doubleToLongBits(doubleToEncode);
long sign = bits & DOUBLE_SIGN_MASK;
long exponent = bits & DOUBLE_EXPONENT_MASK;
long mantissa = bits & DOUBLE_MANTISSA_MASK;
if (sign != 0)
{
exponent ^= DOUBLE_EXPONENT_MASK;
mantissa ^= DOUBLE_MANTISSA_MASK;
}
sign ^= DOUBLE_SIGN_MASK;
long replacement = sign | exponent | mantissa;
return encodeToHex(replacement);
}
private static String encodeToHex(int i)
{
char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
int charPos = 8;
do
{
buf[--charPos] = DIGITS[i & MASK];
i >>>= 4;
}
while (i != 0);
return new String(buf);
}
private static String encodeToHex(long l)
{
char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0',
'0', '0', '0', '0', '0', '0', '0' };
int charPos = 16;
do
{
buf[--charPos] = DIGITS[(int) l & MASK];
l >>>= 4;
}
while (l != 0);
return new String(buf);
}
private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6',
'7', '8', '9', 'a', 'b', 'c', 'd', 'e',
'f' };
private static final int MASK = (1 << 4) - 1;
}
public class NumericEncodingTest extends TestCase
{
public NumericEncodingTest()
{
super();
}
public NumericEncodingTest(String arg0)
{
super(arg0);
}
/**
* Do an exhaustive test for integers
*
*/
public void xtestAllIntegerEncodings()
{
String lastString = null;
String nextString = null;
for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++)
{
nextString = NumericEncoder.encode((int) i);
if (lastString != null)
{
assertFalse(lastString.compareTo(nextString) > 0);
}
lastString = nextString;
}
}
/**
* Do an exhaustive test for float
*
*/
public void xtestAllFloatEncodings()
{
Float last = null;
Float next = null;
String lastString = null;
String nextString = null;
for (int sign = 1; sign >= 0; sign--)
{
if (sign == 0)
{
for (int exponent = 0; exponent <= 0xFF; exponent++)
{
for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++)
{
int bitPattern = sign << 31 | exponent << 23 | mantissa;
next = Float.intBitsToFloat(bitPattern);
if (!next.equals(Float.NaN) && (last != null) &&
(last.compareTo(next) > 0))
{
System.err.println(last + " > " + next);
}
if (!next.equals(Float.NaN))
{
nextString = NumericEncoder.encode(next);
if ((lastString != null) &&
(lastString.compareTo(nextString) > 0))
{
System.err.println(lastString + " > " +
nextString);
}
lastString = nextString;
}
last = next;
}
}
}
else
{
for (int exponent = 0xFF; exponent >= 0; exponent--)
{
for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--)
{
int bitPattern = sign << 31 | exponent << 23 | mantissa;
next = Float.intBitsToFloat(bitPattern);
if (!next.equals(Float.NaN) && (last != null) &&
(last.compareTo(next) > 0))
{
System.err.println(last + " > " + next);
}
if (!next.equals(Float.NaN))
{
nextString = NumericEncoder.encode(next);
if ((lastString != null) &&
(lastString.compareTo(nextString) > 0))
{
System.err.println(lastString + " > " +
nextString);
}
lastString = nextString;
}
last = next;
}
}
}
}
}
/*
* Sample test for int
*/
public void testIntegerEncoding()
{
assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE));
assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1));
assertEquals("7fffffff", NumericEncoder.encode(-1));
assertEquals("80000000", NumericEncoder.encode(0));
assertEquals("80000001", NumericEncoder.encode(1));
assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1));
assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE));
}
/*
* Sample test for long
*/
public void testLongEncoding()
{
assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE));
assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE +
1));
assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L));
assertEquals("8000000000000000", NumericEncoder.encode(0L));
assertEquals("8000000000000001", NumericEncoder.encode(1L));
assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE -
1));
assertEquals("ffffffffffffffff",
NumericEncoder.encode(Long.MAX_VALUE));
}
/*
* Sample test for float
*/
public void testFloatEncoding()
{
assertEquals("007fffff",
NumericEncoder.encode(Float.NEGATIVE_INFINITY));
assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE));
assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE));
assertEquals("7fffffff", NumericEncoder.encode(-0f));
assertEquals("80000000", NumericEncoder.encode(0f));
assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE));
assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE));
assertEquals("ff800000",
NumericEncoder.encode(Float.POSITIVE_INFINITY));
assertEquals("ffc00000", NumericEncoder.encode(Float.NaN));
}
/*
* Sample test for double
*/
public void testDoubleEncoding()
{
assertEquals("000fffffffffffff",
NumericEncoder.encode(Double.NEGATIVE_INFINITY));
assertEquals("0010000000000000",
NumericEncoder.encode(-Double.MAX_VALUE));
assertEquals("7ffffffffffffffe",
NumericEncoder.encode(-Double.MIN_VALUE));
assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d));
assertEquals("8000000000000000", NumericEncoder.encode(0d));
assertEquals("8000000000000001",
NumericEncoder.encode(Double.MIN_VALUE));
assertEquals("ffefffffffffffff",
NumericEncoder.encode(Double.MAX_VALUE));
assertEquals("fff0000000000000",
NumericEncoder.encode(Double.POSITIVE_INFINITY));
assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN));
}
}
--
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]