Hi, I have a small patch for Java to optimize the heapspace footprint
of (for example):

struct OptIntPair {
    1: i32 a
    2: i32 b
}

Each instance will have an __isset_bit_vector which points to a BitSet,
which points to a long[], adding around 50 bytes of overhead to this object.

The patch changes this by storing a raw byte and doing direct bitfield 
operations, like:
  byte __isset_bitfield;

  public void unsetB() {
    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __B_ISSET_ID);
  }
A little nasty, but a big space win: on my machine, this brings down the total
size of an OptIntPair from 85 bytes to 25 bytes. A BitSet gets used as a 
fallback
when more than 64 __isset entries are needed.

Is this something that could be considered? The patch is attached. Other people 
have
mentioned this shortcoming before, see slide 22 of:
http://www.slideshare.net/aszegedi/everything-i-ever-learned-about-jvm-performance-tuning-twitter

Thanks for such useful software,
Brian Bloniarz
Index: lib/java/src/org/apache/thrift/EncodingUtils.java
===================================================================
--- lib/java/src/org/apache/thrift/EncodingUtils.java   (revision 1211584)
+++ lib/java/src/org/apache/thrift/EncodingUtils.java   (working copy)
@@ -82,4 +82,67 @@
         | ((buf[offset + 2] & 0xff) << 8) | ((buf[offset + 3] & 0xff));
   }
 
+  /**
+   * Bitfield utilities.
+   * Returns true if the bit at position is set in v.
+   */
+  public static final boolean testBit(byte v, int position) {
+    return testBit((int)v, position);
+  }
+
+  public static final boolean testBit(short v, int position) {
+    return testBit((int)v, position);
+  }
+
+  public static final boolean testBit(int v, int position) {
+    return (v & (1 << position)) != 0;
+  }
+
+  public static final boolean testBit(long v, int position) {
+    return (v & (1L << position)) != 0L;
+  }
+
+  /**
+   * Returns v, with the bit at position set to zero.
+   */
+  public static final byte clearBit(byte v, int position) {
+    return (byte)clearBit((int)v, position);
+  }
+
+  public static final short clearBit(short v, int position) {
+    return (short)clearBit((int)v, position);
+  }
+
+  public static final int clearBit(int v, int position) {
+    return v & ~(1 << position);
+  }
+
+  public static final long clearBit(long v, int position) {
+    return v & ~(1L << position);
+  }
+
+  /**
+   * Returns v, with the bit at position set to 1 or 0 depending on value.
+   */
+  public static final byte setBit(byte v, int position, boolean value) {
+    return (byte)setBit((int)v, position, value);
+  }
+
+  public static final short setBit(short v, int position, boolean value) {
+    return (short)setBit((int)v, position, value);
+  }
+
+  public static final int setBit(int v, int position, boolean value) {
+    if(value)
+      return v | (1 << position);
+    else
+      return clearBit(v, position);
+  }
+
+  public static final long setBit(long v, int position, boolean value) {
+    if(value)
+      return v | (1L << position);
+    else
+      return clearBit(v, position);
+  }
 }
Index: compiler/cpp/src/generate/t_java_generator.cc
===================================================================
--- compiler/cpp/src/generate/t_java_generator.cc       (revision 1211584)
+++ compiler/cpp/src/generate/t_java_generator.cc       (working copy)
@@ -240,7 +240,8 @@
   void generate_deep_copy_container(std::ofstream& out, std::string 
source_name_p1, std::string source_name_p2, std::string result_name, t_type* 
type);
   void generate_deep_copy_non_container(std::ofstream& out, std::string 
source_name, std::string dest_name, t_type* type);
 
-  bool has_bit_vector(t_struct* tstruct);
+  enum isset_type { ISSET_NONE, ISSET_PRIMITIVE, ISSET_BITSET };
+  isset_type needs_isset(t_struct* tstruct, std::string *outPrimitiveType = 
NULL);
 
   /**
    * Helper rendering functions
@@ -352,6 +353,7 @@
     "import org.apache.thrift.scheme.StandardScheme;\n\n" +
     "import org.apache.thrift.scheme.TupleScheme;\n" +
     "import org.apache.thrift.protocol.TTupleProtocol;\n" +
+    "import org.apache.thrift.EncodingUtils;\n" +
     "import java.util.List;\n" +
     "import java.util.ArrayList;\n" +
     "import java.util.Map;\n" +
@@ -1296,9 +1298,18 @@
       }
     }
 
-    if (i > 0) {
+    std::string primitiveType;
+    switch(needs_isset(tstruct, &primitiveType)) {
+    case ISSET_NONE:
+      break;
+    case ISSET_PRIMITIVE:
+      indent(out) << "private " << primitiveType << " __isset_bitfield = 0;" 
<< endl;
+      break;
+    case ISSET_BITSET:
       indent(out) << "private BitSet __isset_bit_vector = new BitSet(" << i << 
");" << endl;
+      break;
     }
+
     if (optionals > 0) {
       std::string output_string = "private _Fields optionals[] = {";
       for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
@@ -1370,9 +1381,16 @@
   indent(out) << "public " << tstruct->get_name() << "(" << 
tstruct->get_name() << " other) {" << endl;
   indent_up();
 
-  if (has_bit_vector(tstruct)) {
+  switch(needs_isset(tstruct)) {
+  case ISSET_NONE:
+    break;
+  case ISSET_PRIMITIVE:
+    indent(out) << "__isset_bitfield = other.__isset_bitfield;" << endl;
+    break;
+  case ISSET_BITSET:
     indent(out) << "__isset_bit_vector.clear();" << endl;
     indent(out) << "__isset_bit_vector.or(other.__isset_bit_vector);" << endl; 
   
+    break;
   }
 
   for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
@@ -1788,6 +1806,7 @@
  */
 void t_java_generator::generate_java_bean_boilerplate(ofstream& out,
                                                       t_struct* tstruct) {
+  isset_type issetType = needs_isset(tstruct);
   const vector<t_field*>& fields = tstruct->get_members();
   vector<t_field*>::const_iterator f_iter;
   for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
@@ -1928,6 +1947,8 @@
     indent_up();
     if (type_can_be_null(type)) {
       indent(out) << "this." << field_name << " = null;" << endl;
+    } else if(issetType == ISSET_PRIMITIVE) {
+      indent(out) << "__isset_bitfield = 
EncodingUtils.clearBit(__isset_bitfield, " << isset_field_id(field) << ");" << 
endl;
     } else {
       indent(out) << "__isset_bit_vector.clear(" << isset_field_id(field) << 
");" << endl;
     }
@@ -1940,6 +1961,8 @@
     indent_up();
     if (type_can_be_null(type)) {
       indent(out) << "return this." << field_name << " != null;" << endl;
+    } else if(issetType == ISSET_PRIMITIVE) {
+      indent(out) << "return EncodingUtils.testBit(__isset_bitfield, " << 
isset_field_id(field) << ");" << endl;
     } else {
       indent(out) << "return __isset_bit_vector.get(" << isset_field_id(field) 
<< ");" << endl;
     }
@@ -1952,6 +1975,8 @@
       indent(out) << "if (!value) {" << endl;
       indent(out) << "  this." << field_name << " = null;" << endl;
       indent(out) << "}" << endl;
+    } else if(issetType == ISSET_PRIMITIVE) {
+      indent(out) << "__isset_bitfield = 
EncodingUtils.setBit(__isset_bitfield, " << isset_field_id(field) << ", 
value);" << endl;
     } else {
       indent(out) << "__isset_bit_vector.set(" << isset_field_id(field) << ", 
value);" << endl;
     }
@@ -3759,16 +3784,33 @@
   indent(out) << "}" << endl;
 }
 
-bool t_java_generator::has_bit_vector(t_struct* tstruct) {
+t_java_generator::isset_type t_java_generator::needs_isset(t_struct* tstruct, 
std::string *outPrimitiveType) {
   const vector<t_field*>& members = tstruct->get_members();
   vector<t_field*>::const_iterator m_iter;
 
+  int count = 0;
   for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
     if (!type_can_be_null(get_true_type((*m_iter)->get_type()))) {
-      return true;
+      count++;
     }
   }
-  return false;
+  if(count == 0) {
+    return ISSET_NONE;
+  } else if(count <= 64) {
+    if(outPrimitiveType != NULL) {
+      if(count <= 8)
+        *outPrimitiveType = "byte";
+      else if(count <= 16)
+        *outPrimitiveType = "short";
+      else if(count <= 32)
+        *outPrimitiveType = "int";
+      else if(count <= 64)
+        *outPrimitiveType = "long";
+    }
+    return ISSET_PRIMITIVE;
+  } else {
+    return ISSET_BITSET;
+  }
 }
 
 void t_java_generator::generate_java_struct_clear(std::ofstream& out, 
t_struct* tstruct) {
@@ -3838,9 +3880,19 @@
 void t_java_generator::generate_java_struct_read_object(ofstream& out, 
t_struct* tstruct) {
   indent(out) << "private void readObject(java.io.ObjectInputStream in) throws 
java.io.IOException, ClassNotFoundException {" << endl;
   indent(out) << "  try {" << endl;
-  if (!tstruct->is_union() && has_bit_vector(tstruct)) {
-    indent(out) << "    // it doesn't seem like you should have to do this, 
but java serialization is wacky, and doesn't call the default constructor." << 
endl;
-    indent(out) << "    __isset_bit_vector = new BitSet(1);" << endl;
+  if (!tstruct->is_union()) {
+    switch(needs_isset(tstruct)) {
+      case ISSET_NONE:
+        break;
+      case ISSET_PRIMITIVE:
+        indent(out) << "    // it doesn't seem like you should have to do 
this, but java serialization is wacky, and doesn't call the default 
constructor." << endl;
+        indent(out) << "    __isset_bitfield = 0;" << endl;
+        break;
+      case ISSET_BITSET:
+        indent(out) << "    // it doesn't seem like you should have to do 
this, but java serialization is wacky, and doesn't call the default 
constructor." << endl;
+        indent(out) << "    __isset_bit_vector = new BitSet(1);" << endl;
+        break;
+    }
   }
   indent(out) << "    read(new org.apache.thrift.protocol.TCompactProtocol(new 
org.apache.thrift.transport.TIOStreamTransport(in)));" << endl;
   indent(out) << "  } catch (org.apache.thrift.TException te) {" << endl;

Reply via email to