Re: [drlvm] class unloading: vtable marks benchmarking

Aleksey Ignatenko Thu, 23 Nov 2006 22:09:47 -0800

+1 for benchmarking on multiprocessor machine (4> processors?). Looks like
it is better to use highly multithreaded benchmark to see the worst impact
on performance.


Aleksey.

On 11/24/06, Robin Garner <[EMAIL PROTECTED]> wrote:


Salikh Zakirov wrote:
> Hi,
>
> As a result of numerous class unloading discussions, we
> I've hacked vtable marking proposals into GC_CC directly, and measured
> their impact on the performance. I've attached the two patches
> corresponding to "vtable marks" and "indirect marks".
>
> Benchmark: dacapo-2006-10 hsqldb
> Machine: IBM Thinkpad T41p, Pentium M 1700 MHz (1 core), 1 Gb
> Windows XP SP2, MSVC 7.0, release build
> Benchmark arguments:
>
>   java -verbose:gc -jar c:/work/dacapo/dacapo-2006-10.jar -s default -n
> 3 hsqldb
>
> Benchmarks results:
>
> no vtable marks:      ===== DaCapo hsqldb PASSED in 6168 msec =====
> vtable marks:         ===== DaCapo hsqldb PASSED in 6218 msec =====
> (0.8% slowdown)
> indirect marks:               ===== DaCapo hsqldb PASSED in 6409 msec
=====
> (3.9% slowdown)
>
> Garbage collection times:
> (garbage collection times were collected for the whole dacapo run,
> including warmup benchmark runs).
>
> no vtable marks:
> COMPACT avg  614.375 +/- 117.537 =  4915.000 / 8, min   50.000, max
911.000
> COPY    avg  255.000 +/- 39.325 =  2040.000 / 8, min   90.000, max
490.000
> FORCED  avg  189.333 +/- 7.589 =  2840.000 / 15, min  140.000, max
240.000
>
> vtable marks:
> COMPACT avg  615.500 +/- 119.544 =  4924.000 / 8, min   40.000, max
931.000
> COPY    avg  260.000 +/- 27.839 =  2340.000 / 9, min  160.000, max
460.000
> FORCED  avg  186.667 +/- 7.411 =  2800.000 / 15, min  140.000, max
240.000
>
> indirect marks:
> COMPACT avg  619.375 +/- 123.104 =  4955.000 / 8, min   30.000, max
941.000
> COPY    avg  265.000 +/- 38.868 =  2120.000 / 8, min  110.000, max
500.000
> FORCED  avg  194.000 +/- 8.095 =  2910.000 / 15, min  150.000, max
250.000
>
> Resume: as was predicted, adding unconditional write to object scanning
> does not have much impact on the garbage collection time. However,
> overall impact is visible on benchmark level.
>
> Regarding the false sharing wnen writing vtable marks,
> the benchmarking should be run on a multiprocessor machine and with a
> parallel GC.

Actually I think the results show that the vtable marks are in the
noise.  hsqldb is a highly multithreaded benchmark, and so prone to
timing discrepancies.  What was the variability of the results ?  A
single-threaded benchmark like bloat, antlr or pmd might give less
variation.

The other interesting point is the side data structure, something like

MARK_BYTES=size_of_vtable_space << log_min_vtable_align;
byte[MARK_BYTES] mark_bytes;

mark_bytes[((int)vtable)<<(min_vtable_align)] = 1;

of course this is most space efficient if you coarsely align vtables,
and constrain them to a particular area of the heap.

cheers

> ------------------------------------------------------------------------
>
> diff --git vm/gc_cc/src/collect_copy.cpp vm/gc_cc/src/collect_copy.cpp
> index a3b6a96..a4663fc 100644
> --- vm/gc_cc/src/collect_copy.cpp
> +++ vm/gc_cc/src/collect_copy.cpp
> @@ -168,6 +168,7 @@ static bool gc_copy_process_reference(Sl
>      // move the object?
>  #define pos ((unsigned char*) obj)
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt);
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (pos >= heap.compaction_region_start() && pos <
heap.compaction_region_end()) {
> diff --git vm/gc_cc/src/collect_forced.cpp
vm/gc_cc/src/collect_forced.cpp
> index 072f21e..92bf167 100644
> --- vm/gc_cc/src/collect_forced.cpp
> +++ vm/gc_cc/src/collect_forced.cpp
> @@ -64,6 +64,7 @@ static void forced_process_reference(Par
>      obj->obj_info() = (info & ~MARK_BITS) | heap_mark_phase;
>
>      Partial_Reveal_VTable *vtable = obj->vtable();
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (gcvt->is_array()) { // is array
> diff --git vm/gc_cc/src/collect_slide_compact.cpp
vm/gc_cc/src/collect_slide_compact.cpp
> index e5b4f54..985b94e 100644
> --- vm/gc_cc/src/collect_slide_compact.cpp
> +++ vm/gc_cc/src/collect_slide_compact.cpp
> @@ -454,6 +454,7 @@ static void slide_process_object(Partial
>      assert(obj->vt() & ~RESCAN_BIT); // has vt
>
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt & ~RESCAN_BIT);
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      // process slots
> diff --git vm/gc_cc/src/gc_types.h vm/gc_cc/src/gc_types.h
> index 1ac4236..849aaf0 100644
> --- vm/gc_cc/src/gc_types.h
> +++ vm/gc_cc/src/gc_types.h
> @@ -152,6 +152,9 @@ typedef struct Partial_Reveal_VTable {
>  private:
>      GC_VTable_Info *gcvt;
>  public:
> +    /// pointer to the class reachability mark,
> +    /// used for class unloading
> +    size_t mark;
>
>      void set_gcvt(struct GC_VTable_Info *new_gcvt) { gcvt = new_gcvt; }
>      struct GC_VTable_Info *get_gcvt() { return gcvt; }
> diff --git vm/vmcore/include/vtable.h vm/vmcore/include/vtable.h
> index a1fc8b4..eb08687 100644
> --- vm/vmcore/include/vtable.h
> +++ vm/vmcore/include/vtable.h
> @@ -53,6 +53,7 @@ typedef struct Intfc_Table {
>
>  typedef struct VTable {
>      Byte _gc_private_information[GC_BYTES_IN_VTABLE];
> +    size_t mark;
>      Class* clss;
>
>      // See the masks in vm_for_gc.h.
>
>
> ------------------------------------------------------------------------
>
> diff --git vm/gc_cc/src/collect_copy.cpp vm/gc_cc/src/collect_copy.cpp
> index a3b6a96..c2caac2 100644
> --- vm/gc_cc/src/collect_copy.cpp
> +++ vm/gc_cc/src/collect_copy.cpp
> @@ -168,6 +168,7 @@ static bool gc_copy_process_reference(Sl
>      // move the object?
>  #define pos ((unsigned char*) obj)
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt);
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (pos >= heap.compaction_region_start() && pos <
heap.compaction_region_end()) {
> diff --git vm/gc_cc/src/collect_forced.cpp
vm/gc_cc/src/collect_forced.cpp
> index 072f21e..7e4de43 100644
> --- vm/gc_cc/src/collect_forced.cpp
> +++ vm/gc_cc/src/collect_forced.cpp
> @@ -64,6 +64,7 @@ static void forced_process_reference(Par
>      obj->obj_info() = (info & ~MARK_BITS) | heap_mark_phase;
>
>      Partial_Reveal_VTable *vtable = obj->vtable();
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (gcvt->is_array()) { // is array
> diff --git vm/gc_cc/src/collect_slide_compact.cpp
vm/gc_cc/src/collect_slide_compact.cpp
> index e5b4f54..4a3ee9c 100644
> --- vm/gc_cc/src/collect_slide_compact.cpp
> +++ vm/gc_cc/src/collect_slide_compact.cpp
> @@ -454,6 +454,7 @@ static void slide_process_object(Partial
>      assert(obj->vt() & ~RESCAN_BIT); // has vt
>
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt & ~RESCAN_BIT);
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      // process slots
> diff --git vm/gc_cc/src/gc_types.h vm/gc_cc/src/gc_types.h
> index 1ac4236..da9a48c 100644
> --- vm/gc_cc/src/gc_types.h
> +++ vm/gc_cc/src/gc_types.h
> @@ -152,6 +152,9 @@ typedef struct Partial_Reveal_VTable {
>  private:
>      GC_VTable_Info *gcvt;
>  public:
> +    /// pointer to the class reachability mark,
> +    /// used for class unloading
> +    size_t *mark;
>
>      void set_gcvt(struct GC_VTable_Info *new_gcvt) { gcvt = new_gcvt; }
>      struct GC_VTable_Info *get_gcvt() { return gcvt; }
> diff --git vm/vmcore/include/Class.h vm/vmcore/include/Class.h
> index 7194edb..a6c198c 100644
> --- vm/vmcore/include/Class.h
> +++ vm/vmcore/include/Class.h
> @@ -772,6 +772,8 @@ enum AccessAndPropertiesFlags {
>   * calling the verifier, preparing, resolving and initializing the
class.*/
>
>  struct Class {
> +    /// mark used for the class unloading
> +    size_t mark;
>  private:
>      typedef struct {
>          union {


--
Robin Garner
Dept. of Computer Science
Australian National University
http://cs.anu.edu.au/people/Robin.Garner/

Re: [drlvm] class unloading: vtable marks benchmarking

Reply via email to