Aleksey Ignatenko wrote:
+1 for benchmarking on multiprocessor machine (4> processors?). Looks like
it is better to use highly multithreaded benchmark to see the worst impact
on performance.

Agreed, but care should be taken to ensure results are statistically significant. I'm not a statistician, but remembering back to my Physics class I would do 5 or more runs, use the minimum score over the 5 runs, and count results within the standard dev on the mean as a 'no result'.

cheers

Aleksey.

On 11/24/06, Robin Garner <[EMAIL PROTECTED]> wrote:

Salikh Zakirov wrote:
> Hi,
>
> As a result of numerous class unloading discussions, we
> I've hacked vtable marking proposals into GC_CC directly, and measured
> their impact on the performance. I've attached the two patches
> corresponding to "vtable marks" and "indirect marks".
>
> Benchmark: dacapo-2006-10 hsqldb
> Machine: IBM Thinkpad T41p, Pentium M 1700 MHz (1 core), 1 Gb
> Windows XP SP2, MSVC 7.0, release build
> Benchmark arguments:
>
>   java -verbose:gc -jar c:/work/dacapo/dacapo-2006-10.jar -s default -n
> 3 hsqldb
>
> Benchmarks results:
>
> no vtable marks:      ===== DaCapo hsqldb PASSED in 6168 msec =====
> vtable marks:         ===== DaCapo hsqldb PASSED in 6218 msec =====
> (0.8% slowdown)
> indirect marks:               ===== DaCapo hsqldb PASSED in 6409 msec
=====
> (3.9% slowdown)
>
> Garbage collection times:
> (garbage collection times were collected for the whole dacapo run,
> including warmup benchmark runs).
>
> no vtable marks:
> COMPACT avg  614.375 +/- 117.537 =  4915.000 / 8, min   50.000, max
911.000
> COPY    avg  255.000 +/- 39.325 =  2040.000 / 8, min   90.000, max
490.000
> FORCED  avg  189.333 +/- 7.589 =  2840.000 / 15, min  140.000, max
240.000
>
> vtable marks:
> COMPACT avg  615.500 +/- 119.544 =  4924.000 / 8, min   40.000, max
931.000
> COPY    avg  260.000 +/- 27.839 =  2340.000 / 9, min  160.000, max
460.000
> FORCED  avg  186.667 +/- 7.411 =  2800.000 / 15, min  140.000, max
240.000
>
> indirect marks:
> COMPACT avg  619.375 +/- 123.104 =  4955.000 / 8, min   30.000, max
941.000
> COPY    avg  265.000 +/- 38.868 =  2120.000 / 8, min  110.000, max
500.000
> FORCED  avg  194.000 +/- 8.095 =  2910.000 / 15, min  150.000, max
250.000
>
> Resume: as was predicted, adding unconditional write to object scanning
> does not have much impact on the garbage collection time. However,
> overall impact is visible on benchmark level.
>
> Regarding the false sharing wnen writing vtable marks,
> the benchmarking should be run on a multiprocessor machine and with a
> parallel GC.

Actually I think the results show that the vtable marks are in the
noise.  hsqldb is a highly multithreaded benchmark, and so prone to
timing discrepancies.  What was the variability of the results ?  A
single-threaded benchmark like bloat, antlr or pmd might give less
variation.

The other interesting point is the side data structure, something like

MARK_BYTES=size_of_vtable_space << log_min_vtable_align;
byte[MARK_BYTES] mark_bytes;

mark_bytes[((int)vtable)<<(min_vtable_align)] = 1;

of course this is most space efficient if you coarsely align vtables,
and constrain them to a particular area of the heap.

cheers

> ------------------------------------------------------------------------
>
> diff --git vm/gc_cc/src/collect_copy.cpp vm/gc_cc/src/collect_copy.cpp
> index a3b6a96..a4663fc 100644
> --- vm/gc_cc/src/collect_copy.cpp
> +++ vm/gc_cc/src/collect_copy.cpp
> @@ -168,6 +168,7 @@ static bool gc_copy_process_reference(Sl
>      // move the object?
>  #define pos ((unsigned char*) obj)
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt);
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (pos >= heap.compaction_region_start() && pos <
heap.compaction_region_end()) {
> diff --git vm/gc_cc/src/collect_forced.cpp
vm/gc_cc/src/collect_forced.cpp
> index 072f21e..92bf167 100644
> --- vm/gc_cc/src/collect_forced.cpp
> +++ vm/gc_cc/src/collect_forced.cpp
> @@ -64,6 +64,7 @@ static void forced_process_reference(Par
>      obj->obj_info() = (info & ~MARK_BITS) | heap_mark_phase;
>
>      Partial_Reveal_VTable *vtable = obj->vtable();
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (gcvt->is_array()) { // is array
> diff --git vm/gc_cc/src/collect_slide_compact.cpp
vm/gc_cc/src/collect_slide_compact.cpp
> index e5b4f54..985b94e 100644
> --- vm/gc_cc/src/collect_slide_compact.cpp
> +++ vm/gc_cc/src/collect_slide_compact.cpp
> @@ -454,6 +454,7 @@ static void slide_process_object(Partial
>      assert(obj->vt() & ~RESCAN_BIT); // has vt
>
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt & ~RESCAN_BIT);
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      // process slots
> diff --git vm/gc_cc/src/gc_types.h vm/gc_cc/src/gc_types.h
> index 1ac4236..849aaf0 100644
> --- vm/gc_cc/src/gc_types.h
> +++ vm/gc_cc/src/gc_types.h
> @@ -152,6 +152,9 @@ typedef struct Partial_Reveal_VTable {
>  private:
>      GC_VTable_Info *gcvt;
>  public:
> +    /// pointer to the class reachability mark,
> +    /// used for class unloading
> +    size_t mark;
>
> void set_gcvt(struct GC_VTable_Info *new_gcvt) { gcvt = new_gcvt; }
>      struct GC_VTable_Info *get_gcvt() { return gcvt; }
> diff --git vm/vmcore/include/vtable.h vm/vmcore/include/vtable.h
> index a1fc8b4..eb08687 100644
> --- vm/vmcore/include/vtable.h
> +++ vm/vmcore/include/vtable.h
> @@ -53,6 +53,7 @@ typedef struct Intfc_Table {
>
>  typedef struct VTable {
>      Byte _gc_private_information[GC_BYTES_IN_VTABLE];
> +    size_t mark;
>      Class* clss;
>
>      // See the masks in vm_for_gc.h.
>
>
> ------------------------------------------------------------------------
>
> diff --git vm/gc_cc/src/collect_copy.cpp vm/gc_cc/src/collect_copy.cpp
> index a3b6a96..c2caac2 100644
> --- vm/gc_cc/src/collect_copy.cpp
> +++ vm/gc_cc/src/collect_copy.cpp
> @@ -168,6 +168,7 @@ static bool gc_copy_process_reference(Sl
>      // move the object?
>  #define pos ((unsigned char*) obj)
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt);
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (pos >= heap.compaction_region_start() && pos <
heap.compaction_region_end()) {
> diff --git vm/gc_cc/src/collect_forced.cpp
vm/gc_cc/src/collect_forced.cpp
> index 072f21e..7e4de43 100644
> --- vm/gc_cc/src/collect_forced.cpp
> +++ vm/gc_cc/src/collect_forced.cpp
> @@ -64,6 +64,7 @@ static void forced_process_reference(Par
>      obj->obj_info() = (info & ~MARK_BITS) | heap_mark_phase;
>
>      Partial_Reveal_VTable *vtable = obj->vtable();
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      if (gcvt->is_array()) { // is array
> diff --git vm/gc_cc/src/collect_slide_compact.cpp
vm/gc_cc/src/collect_slide_compact.cpp
> index e5b4f54..4a3ee9c 100644
> --- vm/gc_cc/src/collect_slide_compact.cpp
> +++ vm/gc_cc/src/collect_slide_compact.cpp
> @@ -454,6 +454,7 @@ static void slide_process_object(Partial
>      assert(obj->vt() & ~RESCAN_BIT); // has vt
>
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt & ~RESCAN_BIT);
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>
>      // process slots
> diff --git vm/gc_cc/src/gc_types.h vm/gc_cc/src/gc_types.h
> index 1ac4236..da9a48c 100644
> --- vm/gc_cc/src/gc_types.h
> +++ vm/gc_cc/src/gc_types.h
> @@ -152,6 +152,9 @@ typedef struct Partial_Reveal_VTable {
>  private:
>      GC_VTable_Info *gcvt;
>  public:
> +    /// pointer to the class reachability mark,
> +    /// used for class unloading
> +    size_t *mark;
>
> void set_gcvt(struct GC_VTable_Info *new_gcvt) { gcvt = new_gcvt; }
>      struct GC_VTable_Info *get_gcvt() { return gcvt; }
> diff --git vm/vmcore/include/Class.h vm/vmcore/include/Class.h
> index 7194edb..a6c198c 100644
> --- vm/vmcore/include/Class.h
> +++ vm/vmcore/include/Class.h
> @@ -772,6 +772,8 @@ enum AccessAndPropertiesFlags {
>   * calling the verifier, preparing, resolving and initializing the
class.*/
>
>  struct Class {
> +    /// mark used for the class unloading
> +    size_t mark;
>  private:
>      typedef struct {
>          union {


--
Robin Garner
Dept. of Computer Science
Australian National University
http://cs.anu.edu.au/people/Robin.Garner/




--
Robin Garner
Dept. of Computer Science
Australian National University
http://cs.anu.edu.au/people/Robin.Garner/

Reply via email to