Hi, I have a small patch for Java to optimize the heapspace footprint
of (for example):
struct OptIntPair {
1: i32 a
2: i32 b
}
Each instance will have an __isset_bit_vector which points to a BitSet,
which points to a long[], adding around 50 bytes of overhead to this object.
The patch changes this by storing a raw byte and doing direct bitfield
operations, like:
byte __isset_bitfield;
public void unsetB() {
__isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __B_ISSET_ID);
}
A little nasty, but a big space win: on my machine, this brings down the total
size of an OptIntPair from 85 bytes to 25 bytes. A BitSet gets used as a
fallback
when more than 64 __isset entries are needed.
Is this something that could be considered? The patch is attached. Other people
have
mentioned this shortcoming before, see slide 22 of:
http://www.slideshare.net/aszegedi/everything-i-ever-learned-about-jvm-performance-tuning-twitter
Thanks for such useful software,
Brian Bloniarz
Index: lib/java/src/org/apache/thrift/EncodingUtils.java
===================================================================
--- lib/java/src/org/apache/thrift/EncodingUtils.java (revision 1211584)
+++ lib/java/src/org/apache/thrift/EncodingUtils.java (working copy)
@@ -82,4 +82,67 @@
| ((buf[offset + 2] & 0xff) << 8) | ((buf[offset + 3] & 0xff));
}
+ /**
+ * Bitfield utilities.
+ * Returns true if the bit at position is set in v.
+ */
+ public static final boolean testBit(byte v, int position) {
+ return testBit((int)v, position);
+ }
+
+ public static final boolean testBit(short v, int position) {
+ return testBit((int)v, position);
+ }
+
+ public static final boolean testBit(int v, int position) {
+ return (v & (1 << position)) != 0;
+ }
+
+ public static final boolean testBit(long v, int position) {
+ return (v & (1L << position)) != 0L;
+ }
+
+ /**
+ * Returns v, with the bit at position set to zero.
+ */
+ public static final byte clearBit(byte v, int position) {
+ return (byte)clearBit((int)v, position);
+ }
+
+ public static final short clearBit(short v, int position) {
+ return (short)clearBit((int)v, position);
+ }
+
+ public static final int clearBit(int v, int position) {
+ return v & ~(1 << position);
+ }
+
+ public static final long clearBit(long v, int position) {
+ return v & ~(1L << position);
+ }
+
+ /**
+ * Returns v, with the bit at position set to 1 or 0 depending on value.
+ */
+ public static final byte setBit(byte v, int position, boolean value) {
+ return (byte)setBit((int)v, position, value);
+ }
+
+ public static final short setBit(short v, int position, boolean value) {
+ return (short)setBit((int)v, position, value);
+ }
+
+ public static final int setBit(int v, int position, boolean value) {
+ if(value)
+ return v | (1 << position);
+ else
+ return clearBit(v, position);
+ }
+
+ public static final long setBit(long v, int position, boolean value) {
+ if(value)
+ return v | (1L << position);
+ else
+ return clearBit(v, position);
+ }
}
Index: compiler/cpp/src/generate/t_java_generator.cc
===================================================================
--- compiler/cpp/src/generate/t_java_generator.cc (revision 1211584)
+++ compiler/cpp/src/generate/t_java_generator.cc (working copy)
@@ -240,7 +240,8 @@
void generate_deep_copy_container(std::ofstream& out, std::string
source_name_p1, std::string source_name_p2, std::string result_name, t_type*
type);
void generate_deep_copy_non_container(std::ofstream& out, std::string
source_name, std::string dest_name, t_type* type);
- bool has_bit_vector(t_struct* tstruct);
+ enum isset_type { ISSET_NONE, ISSET_PRIMITIVE, ISSET_BITSET };
+ isset_type needs_isset(t_struct* tstruct, std::string *outPrimitiveType =
NULL);
/**
* Helper rendering functions
@@ -352,6 +353,7 @@
"import org.apache.thrift.scheme.StandardScheme;\n\n" +
"import org.apache.thrift.scheme.TupleScheme;\n" +
"import org.apache.thrift.protocol.TTupleProtocol;\n" +
+ "import org.apache.thrift.EncodingUtils;\n" +
"import java.util.List;\n" +
"import java.util.ArrayList;\n" +
"import java.util.Map;\n" +
@@ -1296,9 +1298,18 @@
}
}
- if (i > 0) {
+ std::string primitiveType;
+ switch(needs_isset(tstruct, &primitiveType)) {
+ case ISSET_NONE:
+ break;
+ case ISSET_PRIMITIVE:
+ indent(out) << "private " << primitiveType << " __isset_bitfield = 0;"
<< endl;
+ break;
+ case ISSET_BITSET:
indent(out) << "private BitSet __isset_bit_vector = new BitSet(" << i <<
");" << endl;
+ break;
}
+
if (optionals > 0) {
std::string output_string = "private _Fields optionals[] = {";
for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
@@ -1370,9 +1381,16 @@
indent(out) << "public " << tstruct->get_name() << "(" <<
tstruct->get_name() << " other) {" << endl;
indent_up();
- if (has_bit_vector(tstruct)) {
+ switch(needs_isset(tstruct)) {
+ case ISSET_NONE:
+ break;
+ case ISSET_PRIMITIVE:
+ indent(out) << "__isset_bitfield = other.__isset_bitfield;" << endl;
+ break;
+ case ISSET_BITSET:
indent(out) << "__isset_bit_vector.clear();" << endl;
indent(out) << "__isset_bit_vector.or(other.__isset_bit_vector);" << endl;
+ break;
}
for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
@@ -1788,6 +1806,7 @@
*/
void t_java_generator::generate_java_bean_boilerplate(ofstream& out,
t_struct* tstruct) {
+ isset_type issetType = needs_isset(tstruct);
const vector<t_field*>& fields = tstruct->get_members();
vector<t_field*>::const_iterator f_iter;
for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
@@ -1928,6 +1947,8 @@
indent_up();
if (type_can_be_null(type)) {
indent(out) << "this." << field_name << " = null;" << endl;
+ } else if(issetType == ISSET_PRIMITIVE) {
+ indent(out) << "__isset_bitfield =
EncodingUtils.clearBit(__isset_bitfield, " << isset_field_id(field) << ");" <<
endl;
} else {
indent(out) << "__isset_bit_vector.clear(" << isset_field_id(field) <<
");" << endl;
}
@@ -1940,6 +1961,8 @@
indent_up();
if (type_can_be_null(type)) {
indent(out) << "return this." << field_name << " != null;" << endl;
+ } else if(issetType == ISSET_PRIMITIVE) {
+ indent(out) << "return EncodingUtils.testBit(__isset_bitfield, " <<
isset_field_id(field) << ");" << endl;
} else {
indent(out) << "return __isset_bit_vector.get(" << isset_field_id(field)
<< ");" << endl;
}
@@ -1952,6 +1975,8 @@
indent(out) << "if (!value) {" << endl;
indent(out) << " this." << field_name << " = null;" << endl;
indent(out) << "}" << endl;
+ } else if(issetType == ISSET_PRIMITIVE) {
+ indent(out) << "__isset_bitfield =
EncodingUtils.setBit(__isset_bitfield, " << isset_field_id(field) << ",
value);" << endl;
} else {
indent(out) << "__isset_bit_vector.set(" << isset_field_id(field) << ",
value);" << endl;
}
@@ -3759,16 +3784,33 @@
indent(out) << "}" << endl;
}
-bool t_java_generator::has_bit_vector(t_struct* tstruct) {
+t_java_generator::isset_type t_java_generator::needs_isset(t_struct* tstruct,
std::string *outPrimitiveType) {
const vector<t_field*>& members = tstruct->get_members();
vector<t_field*>::const_iterator m_iter;
+ int count = 0;
for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
if (!type_can_be_null(get_true_type((*m_iter)->get_type()))) {
- return true;
+ count++;
}
}
- return false;
+ if(count == 0) {
+ return ISSET_NONE;
+ } else if(count <= 64) {
+ if(outPrimitiveType != NULL) {
+ if(count <= 8)
+ *outPrimitiveType = "byte";
+ else if(count <= 16)
+ *outPrimitiveType = "short";
+ else if(count <= 32)
+ *outPrimitiveType = "int";
+ else if(count <= 64)
+ *outPrimitiveType = "long";
+ }
+ return ISSET_PRIMITIVE;
+ } else {
+ return ISSET_BITSET;
+ }
}
void t_java_generator::generate_java_struct_clear(std::ofstream& out,
t_struct* tstruct) {
@@ -3838,9 +3880,19 @@
void t_java_generator::generate_java_struct_read_object(ofstream& out,
t_struct* tstruct) {
indent(out) << "private void readObject(java.io.ObjectInputStream in) throws
java.io.IOException, ClassNotFoundException {" << endl;
indent(out) << " try {" << endl;
- if (!tstruct->is_union() && has_bit_vector(tstruct)) {
- indent(out) << " // it doesn't seem like you should have to do this,
but java serialization is wacky, and doesn't call the default constructor." <<
endl;
- indent(out) << " __isset_bit_vector = new BitSet(1);" << endl;
+ if (!tstruct->is_union()) {
+ switch(needs_isset(tstruct)) {
+ case ISSET_NONE:
+ break;
+ case ISSET_PRIMITIVE:
+ indent(out) << " // it doesn't seem like you should have to do
this, but java serialization is wacky, and doesn't call the default
constructor." << endl;
+ indent(out) << " __isset_bitfield = 0;" << endl;
+ break;
+ case ISSET_BITSET:
+ indent(out) << " // it doesn't seem like you should have to do
this, but java serialization is wacky, and doesn't call the default
constructor." << endl;
+ indent(out) << " __isset_bit_vector = new BitSet(1);" << endl;
+ break;
+ }
}
indent(out) << " read(new org.apache.thrift.protocol.TCompactProtocol(new
org.apache.thrift.transport.TIOStreamTransport(in)));" << endl;
indent(out) << " } catch (org.apache.thrift.TException te) {" << endl;