java/lang/String.intern() should always return the same reference for
equal strings. Therefore we must keep track of them in the VM.  We
maintain a global hash map of string literals. Whenever new string
object is created by the VM, vm_string_intern() should be called on it.

Currently it only need to be put in vm_object_alloc_string_from_utf8()
and vm_object_alloc_string_from_c().

Implementation of VMString.intern() is overriden during preload process.

Signed-off-by: Tomek Grabiec <tgrab...@gmail.com>
---
 Makefile                       |    1 +
 include/vm/string.h            |    9 +++
 regression/jvm/StringTest.java |    8 +++
 test/arch-x86/Makefile         |    2 +
 test/vm/preload-stub.c         |    1 +
 vm/jato.c                      |    9 +++-
 vm/object.c                    |    8 ++-
 vm/preload.c                   |   46 +++++++++++++++
 vm/string.c                    |  121 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 201 insertions(+), 4 deletions(-)
 create mode 100644 include/vm/string.h
 create mode 100644 vm/string.c

diff --git a/Makefile b/Makefile
index 45c736d..65fa1ad 100644
--- a/Makefile
+++ b/Makefile
@@ -119,6 +119,7 @@ VM_OBJS = \
        vm/stack.o              \
        vm/stack-trace.o        \
        vm/static.o             \
+       vm/string.o             \
        vm/thread.o             \
        vm/trace.o              \
        vm/types.o              \
diff --git a/include/vm/string.h b/include/vm/string.h
new file mode 100644
index 0000000..4d3a521
--- /dev/null
+++ b/include/vm/string.h
@@ -0,0 +1,9 @@
+#ifndef JATO_STRING_H
+#define JATO_STRING_H
+
+struct vm_object;
+
+void init_literals_hash_map(void);
+struct vm_object *vm_string_intern(struct vm_object *string);
+
+#endif /* JATO_STRING_H */
diff --git a/regression/jvm/StringTest.java b/regression/jvm/StringTest.java
index 0d08a06..3de7b9e 100644
--- a/regression/jvm/StringTest.java
+++ b/regression/jvm/StringTest.java
@@ -24,8 +24,16 @@ public class StringTest extends TestCase {
         assertObjectEquals("123abcd", a + b);
     }
 
+    public static String test_literal = "Test";
+
+    public static void testStringIntern() {
+        String s1 = new String("Test");
+        assertEquals(s1.intern(), test_literal);
+    }
+
     public static void main(String args[]) {
         testUnicode();
         testStringConcatenation();
+        testStringIntern();
     }
 }
diff --git a/test/arch-x86/Makefile b/test/arch-x86/Makefile
index 36049b8..053dab7 100644
--- a/test/arch-x86/Makefile
+++ b/test/arch-x86/Makefile
@@ -56,6 +56,7 @@ TOPLEVEL_OBJS := \
        lib/guard-page.o \
        lib/radix-tree.o \
        lib/string.o \
+       lib/hash-map.o \
        vm/bytecode.o \
        vm/bytecodes.o \
        vm/call.o \
@@ -68,6 +69,7 @@ TOPLEVEL_OBJS := \
        vm/object.o \
        vm/stack.o \
        vm/static.o \
+       vm/string.o \
        vm/thread.o \
        vm/trace.o \
        vm/types.o \
diff --git a/test/vm/preload-stub.c b/test/vm/preload-stub.c
index 4e82da8..e7d3046 100644
--- a/test/vm/preload-stub.c
+++ b/test/vm/preload-stub.c
@@ -52,6 +52,7 @@ struct vm_class *vm_java_lang_Thread;
 struct vm_class *vm_java_lang_ThreadGroup;
 struct vm_class *vm_java_lang_VMThread;
 struct vm_class *vm_java_lang_IllegalMonitorStateException;
+struct vm_class *vm_java_lang_OutOfMemoryError;
 
 struct vm_method *vm_java_lang_Throwable_initCause;
 struct vm_method *vm_java_lang_Throwable_getCause;
diff --git a/vm/jato.c b/vm/jato.c
index f6744d7..946a8ea 100644
--- a/vm/jato.c
+++ b/vm/jato.c
@@ -71,6 +71,7 @@
 #include "vm/signal.h"
 #include "vm/stack-trace.h"
 #include "vm/static.h"
+#include "vm/string.h"
 #include "vm/system.h"
 #include "vm/thread.h"
 #include "vm/vm.h"
@@ -788,6 +789,11 @@ native_vmclass_isinterface(struct vm_object *clazz)
        return vm_class_is_interface(class);
 }
 
+static struct vm_object *native_vmstring_intern(struct vm_object *str)
+{
+       return vm_string_intern(str);
+}
+
 static struct vm_native natives[] = {
        DEFINE_NATIVE("gnu/classpath/VMStackWalker", "getClassContext", 
&native_vmstackwalker_getclasscontext),
        DEFINE_NATIVE("gnu/classpath/VMSystemProperties", "preInit", 
&native_vmsystemproperties_preinit),
@@ -823,6 +829,7 @@ static struct vm_native natives[] = {
        DEFINE_NATIVE("java/lang/VMRuntime", "mapLibraryName", 
&native_vmruntime_maplibraryname),
        DEFINE_NATIVE("java/lang/VMRuntime", "nativeLoad", 
&native_vmruntime_native_load),
        DEFINE_NATIVE("java/lang/VMRuntime", "runFinalizationForExit", 
&native_vmruntime_run_finalization_for_exit),
+       DEFINE_NATIVE("java/lang/VMString", "intern", &native_vmstring_intern),
        DEFINE_NATIVE("java/lang/VMSystem", "arraycopy", 
&native_vmsystem_arraycopy),
        DEFINE_NATIVE("java/lang/VMSystem", "identityHashCode", 
&native_vmsystem_identityhashcode),
        DEFINE_NATIVE("java/lang/VMSystem", "nanoTime", 
&native_vmsystem_nano_time),
@@ -1234,7 +1241,7 @@ main(int argc, char *argv[])
 #endif
 
        arch_init();
-
+       init_literals_hash_map();
        init_system_properties();
 
        parse_options(argc, argv);
diff --git a/vm/object.c b/vm/object.c
index 205dc25..c8ed0fe 100644
--- a/vm/object.c
+++ b/vm/object.c
@@ -15,10 +15,12 @@
 #include "vm/preload.h"
 #include "vm/object.h"
 #include "vm/stdlib.h"
-#include "lib/string.h"
+#include "vm/string.h"
 #include "vm/types.h"
 #include "vm/utf8.h"
 
+#include "lib/string.h"
+
 static pthread_mutexattr_t obj_mutexattr;
 
 int init_vm_objects(void)
@@ -297,7 +299,7 @@ vm_object_alloc_string_from_utf8(const uint8_t bytes[], 
unsigned int length)
        field_set_int(string, vm_java_lang_String_count, array->array_length);
        field_set_object(string, vm_java_lang_String_value, array);
 
-       return string;
+       return vm_string_intern(string);
 }
 
 struct vm_object *
@@ -330,7 +332,7 @@ vm_object_alloc_string_from_c(const char *bytes)
        field_set_int(string, vm_java_lang_String_count, array->array_length);
        field_set_object(string, vm_java_lang_String_value, array);
 
-       return string;
+       return vm_string_intern(string);
 }
 
 typedef void (*exception_init_fn)(struct vm_object *, struct vm_object *);
diff --git a/vm/preload.c b/vm/preload.c
index f83979c..4d477db 100644
--- a/vm/preload.c
+++ b/vm/preload.c
@@ -29,9 +29,12 @@
 
 #include "vm/die.h"
 #include "vm/classloader.h"
+#include "vm/natives.h"
 #include "vm/preload.h"
 #include "vm/class.h"
 
+#include "jit/cu-mapping.h"
+
 struct preload_entry {
        const char *name;
        struct vm_class **class;
@@ -85,6 +88,7 @@ struct vm_class *vm_java_lang_Float;
 struct vm_class *vm_java_lang_Integer;
 struct vm_class *vm_java_lang_Long;
 struct vm_class *vm_java_lang_Short;
+struct vm_class *vm_java_lang_VMString;
 struct vm_class *vm_boolean_class;
 struct vm_class *vm_char_class;
 struct vm_class *vm_float_class;
@@ -143,6 +147,7 @@ static const struct preload_entry preload_entries[] = {
        { "java/lang/Double", &vm_java_lang_Double },
        { "java/lang/Long", &vm_java_lang_Long },
        { "java/lang/ClassLoader", &vm_java_lang_ClassLoader},
+       { "java/lang/VMString", &vm_java_lang_VMString},
 };
 
 static const struct preload_entry primitive_preload_entries[] = {
@@ -244,6 +249,7 @@ struct vm_method *vm_java_lang_Integer_init;
 struct vm_method *vm_java_lang_Long_init;
 struct vm_method *vm_java_lang_Short_init;
 struct vm_method *vm_java_lang_ClassLoader_loadClass;
+struct vm_method *vm_java_lang_VMString_intern;
 
 static const struct method_preload_entry method_preload_entries[] = {
        {
@@ -390,6 +396,20 @@ static const struct method_preload_entry 
method_preload_entries[] = {
                "(Ljava/lang/String;)Ljava/lang/Class;",
                &vm_java_lang_ClassLoader_loadClass,
        },
+       {
+               &vm_java_lang_VMString,
+               "intern",
+               "(Ljava/lang/String;)Ljava/lang/String;",
+               &vm_java_lang_VMString_intern,
+       },
+};
+
+/*
+ * Methods put in this table will be forcibly marked as native which
+ * will allow VM to provide its own impementation for them.
+ */
+static struct vm_method **native_override_entries[] = {
+       &vm_java_lang_VMString_intern,
 };
 
 int preload_vm_classes(void)
@@ -452,5 +472,31 @@ int preload_vm_classes(void)
                *me->method = method;
        }
 
+       for (unsigned int i = 0; i < ARRAY_SIZE(native_override_entries); ++i) {
+               struct cafebabe_method_info *m_info;
+               struct compilation_unit *cu;
+               struct vm_method *vmm;
+
+               vmm = *native_override_entries[i];
+               vmm->is_vm_native = true;
+
+               cu = vmm->compilation_unit;
+
+               cu->native_ptr = vm_lookup_native(vmm->class->name, vmm->name);
+               if (!cu->native_ptr)
+                       error("no VM native for overriden method: %s.%s%s",
+                             vmm->class->name, vmm->name, vmm->type);
+
+               cu->is_compiled = true;
+
+               if (add_cu_mapping((unsigned long)cu->native_ptr, cu)) {
+                       NOT_IMPLEMENTED;
+                       return -1;
+               }
+
+               m_info = (struct cafebabe_method_info *)vmm->method;
+               m_info->access_flags |= CAFEBABE_METHOD_ACC_NATIVE;
+       }
+
        return 0;
 }
diff --git a/vm/string.c b/vm/string.c
new file mode 100644
index 0000000..e328420
--- /dev/null
+++ b/vm/string.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2009 Tomasz Grabiec
+ *
+ * This file is released under the GPL version 2 with the following
+ * clarification and special exception:
+ *
+ *     Linking this library statically or dynamically with other modules is
+ *     making a combined work based on this library. Thus, the terms and
+ *     conditions of the GNU General Public License cover the whole
+ *     combination.
+ *
+ *     As a special exception, the copyright holders of this library give you
+ *     permission to link this library with independent modules to produce an
+ *     executable, regardless of the license terms of these independent
+ *     modules, and to copy and distribute the resulting executable under terms
+ *     of your choice, provided that you also meet, for each linked independent
+ *     module, the terms and conditions of the license of that module. An
+ *     independent module is a module which is not derived from or based on
+ *     this library. If you modify this library, you may extend this exception
+ *     to your version of the library, but you are not obligated to do so. If
+ *     you do not wish to do so, delete this exception statement from your
+ *     version.
+ *
+ * Please refer to the file LICENSE for details.
+ */
+
+#include "vm/die.h"
+#include "vm/object.h"
+#include "vm/preload.h"
+#include "vm/string.h"
+
+#include "jit/exception.h"
+
+#include "lib/hash-map.h"
+
+#include <pthread.h>
+#include <memory.h>
+
+static struct hash_map *literals;
+pthread_rwlock_t literals_rwlock = PTHREAD_RWLOCK_INITIALIZER;
+
+static int string_obj_comparator(const void *key1, const void *key2)
+{
+       struct vm_object *array1, *array2;
+       jint offset1, offset2;
+       jint count1, count2;
+
+       count1 = field_get_int(key1, vm_java_lang_String_count);
+       count2 = field_get_int(key2, vm_java_lang_String_count);
+
+       if (count1 != count2)
+               return -1;
+
+       offset1 = field_get_int(key1, vm_java_lang_String_offset);
+       offset2 = field_get_int(key2, vm_java_lang_String_offset);
+
+       array1 = field_get_object(key1, vm_java_lang_String_value);
+       array2 = field_get_object(key2, vm_java_lang_String_value);
+
+       int fsize = get_vmtype_size(J_CHAR);
+
+       return memcmp(array1->fields + offset1 * fsize,
+                     array2->fields + offset2 * fsize,
+                     count1 * fsize);
+}
+
+static unsigned long string_obj_hash(const void *key, unsigned long size)
+{
+       struct vm_object *array;
+       unsigned long hash;
+       jint offset;
+       jint count;
+
+       offset = field_get_int(key, vm_java_lang_String_offset);
+       count = field_get_int(key, vm_java_lang_String_count);
+       array = field_get_object(key, vm_java_lang_String_value);
+
+       hash = 0;
+
+       for (jint i = 0; i < count; i++)
+               hash += 31 * hash + array_get_field_char(array, i + offset);
+
+       return hash % size;
+}
+
+void init_literals_hash_map(void)
+{
+       literals = alloc_hash_map(1000, string_obj_hash, string_obj_comparator);
+       if (!literals)
+               error("failed to initialize literals hash map");
+}
+
+struct vm_object *vm_string_intern(struct vm_object *string)
+{
+       struct vm_object *intern;
+
+       pthread_rwlock_rdlock(&literals_rwlock);
+
+       if (hash_map_get(literals, string, (void **) &intern) == 0) {
+               pthread_rwlock_unlock(&literals_rwlock);
+               return intern;
+       }
+
+       pthread_rwlock_unlock(&literals_rwlock);
+       pthread_rwlock_wrlock(&literals_rwlock);
+
+       /*
+        * XXX: we should notify GC that we store a reference to
+        * string here (both as a key and a value). It should be
+        * marked as a weak reference.
+        */
+       intern = string;
+       if (hash_map_put(literals, string, intern)) {
+               signal_new_exception(vm_java_lang_OutOfMemoryError, NULL);
+               intern = NULL;
+       }
+
+       pthread_rwlock_unlock(&literals_rwlock);
+
+       return intern;
+}
-- 
1.6.0.6


------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july
_______________________________________________
Jatovm-devel mailing list
Jatovm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/jatovm-devel

Reply via email to