On Mon, Mar 1, 2010 at 01:16, Warren Harris <warrensomeb...@gmail.com> wrote: > I would like to determine what percentage of my application's cpu time is > spent in the garbage collector (for tuning purposes, but also just to > monitor the overhead). Is there any way to obtain this information short of > using gprof? Additional information provided by Gc.stat would be ideal, or > perhaps a Gc.alarm that was called at the beginning of the gc cycle, but > neither of these seem to exist.
Here's what I use to measure GC overhead in my programs. There's a small modification to the runtime, so as to track the time spent in caml_minor_collection, and a helper ml module. It tracks and prints the time spent between calls to the start() and stop() function of the helper module, as well the number of collections, number of bytes allocated, etc. It is rather coarse-grained of course. I use it to profile the different parts of a compiler: parsing, typing, optimizations, code generation, etc. -- Olivier
diff -up ./byterun/sys.h.orig ./byterun/sys.h --- ./byterun/sys.h.orig 2007-02-25 13:38:36.000000000 +0100 +++ ./byterun/sys.h 2007-10-03 15:42:18.000000000 +0200 @@ -27,4 +27,6 @@ CAMLextern value caml_sys_exit (value); extern char * caml_exe_name; +double caml_tick (void); + #endif /* CAML_SYS_H */ diff -up ./byterun/minor_gc.c.orig ./byterun/minor_gc.c --- ./byterun/minor_gc.c.orig 2005-09-22 16:21:50.000000000 +0200 +++ ./byterun/minor_gc.c 2007-10-03 15:42:18.000000000 +0200 @@ -27,6 +27,9 @@ #include "roots.h" #include "signals.h" +#include "alloc.h" +#include "sys.h" + asize_t caml_minor_heap_size; CAMLexport char *caml_young_start = NULL, *caml_young_end = NULL; CAMLexport char *caml_young_ptr = NULL, *caml_young_limit = NULL; @@ -35,6 +38,20 @@ CAMLexport value **caml_ref_table_ptr = static asize_t ref_table_size, ref_table_reserve; int caml_in_minor_collection = 0; +static int caml_gc_timing; +double caml_gc_time = 0.; + +CAMLprim value caml_get_gc_time (value unit) +{ + return caml_copy_double (caml_gc_time); +} + +CAMLprim value caml_set_gc_timing (value v) +{ + caml_gc_timing = Bool_val (v); + return Val_unit; +} + void caml_set_minor_heap_size (asize_t size) { char *new_heap; @@ -217,8 +234,13 @@ void caml_empty_minor_heap (void) */ CAMLexport void caml_minor_collection (void) { + double t_start, t_end; + intnat prev_alloc_words = caml_allocated_words; + if (caml_gc_timing) + t_start = caml_tick (); + caml_empty_minor_heap (); caml_stat_promoted_words += caml_allocated_words - prev_alloc_words; @@ -229,6 +251,12 @@ CAMLexport void caml_minor_collection (v caml_final_do_calls (); caml_empty_minor_heap (); + + if (caml_gc_timing) + { + t_end = caml_tick (); + caml_gc_time += t_end - t_start; + } } CAMLexport value caml_check_urgent_gc (value extra_root) diff -up ./byterun/sys.c.orig ./byterun/sys.c --- ./byterun/sys.c.orig 2007-03-01 14:37:39.000000000 +0100 +++ ./byterun/sys.c 2007-10-03 15:42:18.000000000 +0200 @@ -344,3 +344,54 @@ CAMLprim value caml_sys_read_directory(v caml_ext_table_free(&tbl, 1); CAMLreturn(result); } + + +#include <math.h> +static long dumb_gettime (clockid_t i, struct timespec *ts) +{ + double t = clock () / (double) CLOCKS_PER_SEC; + ts->tv_sec = t; + ts->tv_nsec = (t - floor(t)) * 1e9; + return 0; +} + +static long (* gettime)(clockid_t, struct timespec *); + +#include <dlfcn.h> +static void init_gettime () +{ + void *h; + h = dlopen ("librt.so", RTLD_LAZY); + if (! h) + goto fail; + dlerror (); + *(void **)(&gettime) = dlsym (h, "clock_gettime"); + if (dlerror () != NULL) + { + dlclose (h); + goto fail; + } + return; + + fail: + gettime = dumb_gettime; +} + + +double caml_tick () +{ + struct timespec tp; + + if (! gettime) + init_gettime(); + + if (gettime (CLOCK_PROCESS_CPUTIME_ID, &tp)) + abort(); + + return tp.tv_sec + (1.e-9) * tp.tv_nsec; +} + +CAMLprim value caml_get_tick (value unit) +{ + return caml_copy_double (caml_tick ()); +}
let do_timing = try ignore (Sys.getenv "TIMING") ; true with Not_found -> false external set_gc_timing : bool -> unit = "caml_set_gc_timing" "noalloc" external get_gc_time : unit -> float = "caml_get_gc_time" external get_tick : unit -> float = "caml_get_tick" let bpw = float_of_int (Sys.word_size / 8) let gc_alloc_mem () = let s = Gc.quick_stat () in let mi = s.Gc.minor_words and pr = s.Gc.promoted_words and ma = s.Gc.major_words in (mi -. pr +. ma) *. bpw, s.Gc.heap_words, s.Gc.minor_collections, s.Gc.major_collections, s.Gc.compactions type chrono = { mutable tick : float ; mutable gc_tick : float ; mutable gc_alloc : float ; mutable gc_heap : int ; mutable gc_collec_mi : int ; mutable gc_collec_ma : int ; mutable gc_collec_co : int ; } let make_chrono () = { tick = 0. ; gc_tick = 0. ; gc_alloc = 0. ; gc_heap = 0 ; gc_collec_mi = 0 ; gc_collec_ma = 0 ; gc_collec_co = 0 ; } let chrono_start c = if do_timing then begin c.tick <- get_tick () ; c.gc_tick <- get_gc_time () ; let mem, _, mi, ma, co = gc_alloc_mem () in c.gc_alloc <- mem ; c.gc_collec_mi <- mi ; c.gc_collec_ma <- ma ; c.gc_collec_co <- co end let chrono_stop c = if do_timing then begin c.tick <- get_tick () -. c.tick ; c.gc_tick <- get_gc_time () -. c.gc_tick ; let n_mem, h, n_mi, n_ma, n_co = gc_alloc_mem () in c.gc_alloc <- n_mem -. c.gc_alloc ; c.gc_heap <- h ; c.gc_collec_mi <- n_mi - c.gc_collec_mi ; c.gc_collec_ma <- n_ma - c.gc_collec_ma ; c.gc_collec_co <- n_co - c.gc_collec_co ; end let chrono_accum c1 c2 = c1.tick <- c1.tick +. c2.tick ; c1.gc_tick <- c1.gc_tick +. c2.gc_tick ; c1.gc_alloc <- c1.gc_alloc +. c2.gc_alloc ; c1.gc_heap <- max c1.gc_heap c2.gc_heap ; c1.gc_collec_mi <- c1.gc_collec_mi + c2.gc_collec_mi ; c1.gc_collec_ma <- c1.gc_collec_ma + c2.gc_collec_ma ; c1.gc_collec_co <- c1.gc_collec_co + c2.gc_collec_co let string_of_time t = if t = 0. then "0ms" else if t < 1e-3 then "<1ms" else if t < 1. then Printf.sprintf "%.3fs" t else let m = floor (t /. 60.) in if m < 1. then Printf.sprintf "%.2fs" t else let s = t -. m *. 60. in Printf.sprintf "%.0fm %.2fs" m s let string_of_mem b = let b = int_of_float b in let g, b = let v = b / (1 lsl 30) in if v > 0 then string_of_int v ^ "G ", b - v * (1 lsl 30) else "", b in let m, b = let v = b / (1 lsl 20) in if v > 0 then g ^ string_of_int v ^ "M ", b - v * (1 lsl 20) else g, b in let k, b = let v = b / (1 lsl 10) in if v > 0 then m ^ string_of_int v ^ "K ", b - v * (1 lsl 10) else m, b in let b = if b > 0 then k ^ string_of_int b else k in b let string_of_memw w = string_of_mem (float_of_int w *. bpw) let string_of_coll c = let mi = c.gc_collec_mi and ma = c.gc_collec_ma and co = c.gc_collec_co in let mi = if mi > 0 then string_of_int mi ^ "m " else "" in let ma = if ma > 0 then string_of_int ma ^ "M " else "" in let co = if co > 0 then string_of_int co ^ "C" else "" in mi ^ ma ^ co let pp_chrono msg c = let tp = c.tick -. c.gc_tick and tg = c.gc_tick and mem = c.gc_alloc in Printf.sprintf "%-15s: %8s (GC %-9s, alloc %-20s, heap %-15s, collec %s)" msg (string_of_time tp) (string_of_time tg) (string_of_mem mem) (string_of_memw c.gc_heap) (string_of_coll c) let pp_time msg c = if do_timing then prerr_endline (pp_chrono msg c) let main = make_chrono () let start () = chrono_start main let stop msg = chrono_stop main ; pp_time msg main let _ = set_gc_timing do_timing ; if do_timing then at_exit (fun () -> let s = Gc.quick_stat () in prerr_endline ("top_heap_words = " ^ string_of_memw s.Gc.top_heap_words))
type chrono val make_chrono : unit -> chrono val chrono_start : chrono -> unit val chrono_stop : chrono -> unit val chrono_accum : chrono -> chrono -> unit val pp_chrono : string -> chrono -> string val start : unit -> unit val stop : string -> unit
_______________________________________________ Caml-list mailing list. Subscription management: http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list Archives: http://caml.inria.fr Beginner's list: http://groups.yahoo.com/group/ocaml_beginners Bug reports: http://caml.inria.fr/bin/caml-bugs