Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,40 @@ command-line tools in bin/, but the recommended interface is the
memtrace viewer, which lives at:

https://github.com/janestreet/memtrace_viewer

## Installation
These instructions are for using statmemprof with OCaml 5.3.0+trunk

``` shell
# Setup a new Blank switch
opam switch create 5.3.0 --no-install
eval $(opam env --switch=5.3.0 --set-switch)

# Install dune
opam install dune
```

Now we can get memory traces for programs, here is a multicore fibonacci program:

``` shell
$ opam instal domainslib

$ dune build examples

# Run tracing on single domain
$ MEMTRACE=fib_par.ctf _build/default/examples/fib_par.exe 1 45

# On three domains
$ MEMTRACE=fib_par_2.ctf _build/default/examples/fib_par.exe 3 45
```

these CTF files are viewable in `memtrace_viewer`.

Install memtrace_viewer in another switch (5.1 will work since we only need to read and write trace files)

``` shell
opam switch create 5.1.1 --no-install
opam install memtrace_viewer
memtrace-viewer ./fib_par_2.ctf
```

6 changes: 3 additions & 3 deletions docs/internal.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,16 @@ arbitrary program is hard. There are three standard approaches,
available as options in `perf record`:

- `--call-graph=dwarf` uses the DWARF debugging information

- `--call-graph=fp` follows a chain of frame pointers

- `--call-graph=lbr` uses the Last Branch Record hardware support

However, all of these have disadvantages:

- DWARF exists to support debuggers, and so is designed for
flexibility rather than speed.

This flexibility is necessary to handle the hard cases of C stack
frames: for instance, a C program can define a variable-length
array of ints on the stack, and store its length (in ints, not
Expand Down
2 changes: 1 addition & 1 deletion dune
Original file line number Diff line number Diff line change
@@ -1 +1 @@
(dirs bin src test trace_ocamlopt)
(dirs bin src test trace_ocamlopt examples)
3 changes: 2 additions & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
(synopsis "Streaming client for Memprof")
(description "Generates compact traces of a program's memory use.")
(depends
(ocaml (>= 4.11.0))))
domainslib
(ocaml (or (>= 5.3.0) (and (>= 4.11.0) (< 5.0))))))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this consider the ocaml-variants 5.2.0+statmemprof?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was treating that variant as an alpha release until 5.3.0 was available. I don't see much appeal in supporting that now that 5.3 is available.

3 changes: 3 additions & 0 deletions examples/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(executables
(libraries memtrace domainslib)
(names fib_par))
28 changes: 28 additions & 0 deletions examples/fib_par.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
(* fib_par.ml *)
let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
let n = try int_of_string Sys.argv.(2) with _ -> 1

(* Sequential Fibonacci *)
let rec fib n =
if n < 2 then 1 else fib (n - 1) + fib (n - 2)

module T = Domainslib.Task

let rec fib_par pool n =
let _ = Buffer.create 10000 in
if n > 20 then begin
let a = T.async pool (fun _ -> fib_par pool (n-1)) in
let b = T.async pool (fun _ -> fib_par pool (n-2)) in
T.await pool a + T.await pool b
end else
(* Call sequential Fibonacci if the available work is small *)
fib n

let main () =
Memtrace.trace_if_requested ~context:"fib" ();
let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
let res = T.run pool (fun _ -> fib_par pool n) in
T.teardown_pool pool;
Printf.printf "fib(%d) = %d\n" n res

let _ = main ()
3 changes: 2 additions & 1 deletion memtrace.opam
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ homepage: "https://github.com/janestreet/memtrace"
bug-reports: "https://github.com/janestreet/memtrace/issues"
depends: [
"dune" {>= "2.3"}
"ocaml" {>= "4.11.0"}
"domainslib"
"ocaml" {>= "5.3.0" | >= "4.11.0" & < "5.0"}
]
build: [
["dune" "subst"] {pinned}
Expand Down
121 changes: 57 additions & 64 deletions src/memprof_tracer.ml
Original file line number Diff line number Diff line change
@@ -1,53 +1,42 @@
type t =
{ mutable locked : bool;
mutable locked_ext : bool;
mutable failed : bool;
mutable stopped : bool;
{ failed : bool Atomic.t;
stopped : bool Atomic.t;
mutex : Mutex.t;
report_exn : exn -> unit;
trace : Trace.Writer.t;
ext_sampler : Geometric_sampler.t; }

let curr_active_tracer : t option ref = ref None
let curr_active_tracer : t option Atomic.t = Atomic.make None

let active_tracer () = !curr_active_tracer
let active_tracer () = Atomic.get curr_active_tracer

let bytes_before_ext_sample = ref max_int
let bytes_before_ext_sample = Atomic.make max_int

let draw_sampler_bytes t =
Geometric_sampler.draw t.ext_sampler * (Sys.word_size / 8)

let[@inline never] rec lock_tracer s =
if s.locked then
if s.locked_ext then false
else (Thread.yield (); lock_tracer s)
else if s.failed then
let[@inline never] lock_tracer s =
if Atomic.get s.failed then
false
else
(s.locked <- true; true)

let[@inline never] rec lock_tracer_ext s =
if s.locked then
(Thread.yield (); lock_tracer_ext s)
else if s.failed then
false
else
(s.locked <- true; s.locked_ext <- true; true)
(* During external allocations or closing, a thread may try to obtain
a lock it already holds. In that case, Mutex.lock will throw
an error and we can ignore it. *)
else begin
try
Mutex.lock s.mutex;
true
with
| Sys_error _ -> false
end

let[@inline never] unlock_tracer s =
assert (s.locked && not s.locked_ext && not s.failed);
s.locked <- false

let[@inline never] unlock_tracer_ext s =
assert (s.locked && s.locked_ext && not s.failed);
s.locked_ext <- false;
s.locked <- false
assert (not (Atomic.get s.failed));
Mutex.unlock s.mutex

let[@inline never] mark_failed s e =
assert (s.locked && not s.failed);
s.failed <- true;
s.locked <- false;
s.locked_ext <- false;
s.report_exn e
if (Atomic.compare_and_set s.failed false true) then
s.report_exn e;
Mutex.unlock s.mutex

let default_report_exn e =
match e with
Expand All @@ -63,7 +52,8 @@ let default_report_exn e =

let start ?(report_exn=default_report_exn) ~sampling_rate trace =
let ext_sampler = Geometric_sampler.make ~sampling_rate () in
let s = { trace; locked = false; locked_ext = false; stopped = false; failed = false;
let mutex = Mutex.create () in
let s = { trace; mutex; stopped = Atomic.make false; failed = Atomic.make false;
report_exn; ext_sampler } in
let tracker : (_,_) Gc.Memprof.tracker = {
alloc_minor = (fun info ->
Expand All @@ -75,8 +65,11 @@ let start ?(report_exn=default_report_exn) ~sampling_rate trace =
~callstack:info.callstack
with
| r -> unlock_tracer s; Some r
| exception e -> mark_failed s e; None
end else None);
| exception e ->
mark_failed s e;
None
end
else None);
alloc_major = (fun info ->
if lock_tracer s then begin
match Trace.Writer.put_alloc_with_raw_backtrace trace (Trace.Timestamp.now ())
Expand Down Expand Up @@ -104,68 +97,68 @@ let start ?(report_exn=default_report_exn) ~sampling_rate trace =
match Trace.Writer.put_collect trace (Trace.Timestamp.now ()) id with
| () -> unlock_tracer s
| exception e -> mark_failed s e) } in
curr_active_tracer := Some s;
bytes_before_ext_sample := draw_sampler_bytes s;
Gc.Memprof.start
~sampling_rate
~callstack_size:max_int
tracker;
Atomic.set curr_active_tracer (Some s);
Atomic.set bytes_before_ext_sample (draw_sampler_bytes s);
let _profile = Gc.Memprof.start ~sampling_rate ~callstack_size:max_int tracker in
s

let stop s =
if not s.stopped then begin
s.stopped <- true;
(* Call stop to stop sampling on the current profile.
Promotion and deallocation callbacks from a profile may run
after stop is called, however we ignore these callbacks when
stopping.
*)
if (Atomic.compare_and_set s.stopped false true) then begin
Gc.Memprof.stop ();
if lock_tracer s then begin
try Trace.Writer.close s.trace with e -> mark_failed s e
try Trace.Writer.close s.trace with e ->
(Atomic.set s.failed true; s.report_exn e);
Mutex.unlock s.mutex
end;
curr_active_tracer := None
Atomic.set curr_active_tracer None
end

let[@inline never] ext_alloc_slowpath ~bytes =
match !curr_active_tracer with
| None -> bytes_before_ext_sample := max_int; None
match Atomic.get curr_active_tracer with
| None -> Atomic.set bytes_before_ext_sample max_int; None
| Some s ->
if lock_tracer_ext s then begin
if lock_tracer s then begin
match
let bytes_per_word = Sys.word_size / 8 in
(* round up to an integer number of words *)
let size_words = (bytes + bytes_per_word - 1) / bytes_per_word in
let samples = ref 0 in
while !bytes_before_ext_sample <= 0 do
bytes_before_ext_sample :=
!bytes_before_ext_sample + draw_sampler_bytes s;
incr samples
let samples = Atomic.make 0 in
while Atomic.get bytes_before_ext_sample <= 0 do
ignore (Atomic.fetch_and_add bytes_before_ext_sample (draw_sampler_bytes s));
Atomic.incr samples
done;
assert (!samples > 0);
assert (Atomic.get samples > 0);
let callstack = Printexc.get_callstack max_int in
Some (Trace.Writer.put_alloc_with_raw_backtrace s.trace
(Trace.Timestamp.now ())
~length:size_words
~nsamples:!samples
~nsamples:(Atomic.get samples)
Comment on lines +130 to +140
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that samples can in fact be a ref as it was originally, because it's only used locally.

~source:External
~callstack)
with
| r -> unlock_tracer_ext s; r
| r -> unlock_tracer s; r
| exception e -> mark_failed s e; None
end else None


type ext_token = Trace.Obj_id.t

let ext_alloc ~bytes =
let n = !bytes_before_ext_sample - bytes in
bytes_before_ext_sample := n;
let n = Atomic.fetch_and_add bytes_before_ext_sample (- bytes) in
if n <= 0 then ext_alloc_slowpath ~bytes else None

let ext_free id =
match !curr_active_tracer with
match Atomic.get curr_active_tracer with
| None -> ()
| Some s ->
if lock_tracer_ext s then begin
if lock_tracer s then begin
match
Trace.Writer.put_collect s.trace (Trace.Timestamp.now ()) id
with
| () -> unlock_tracer_ext s; ()
| () -> unlock_tracer s; ()
| exception e -> mark_failed s e; ()
end
2 changes: 1 addition & 1 deletion src/memtrace.ml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ let trace_if_requested ?context ?sampling_rate () =
let sampling_rate =
match Sys.getenv_opt "MEMTRACE_RATE" with
| Some rate -> check_rate (float_of_string_opt rate)
| None | Some "" ->
| None ->
match sampling_rate with
| Some _ -> check_rate sampling_rate
| None -> default_sampling_rate
Expand Down
13 changes: 9 additions & 4 deletions src/trace.ml
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,15 @@ let make_writer dest ?getpid (info : Info.t) =

module IntTbl = Hashtbl.MakeSeeded (struct
type t = int
let seeded_hash _seed (id : t) =

let hash _seed (id : t) =
let h = id * 189696287 in
h lxor (h lsr 23)
let hash = seeded_hash

(* Required for OCaml >= 5.0.0, but causes errors for older compilers
because it is an unused value declaration. *)
let [@warning "-32"] seeded_hash = hash

let equal (a : t) (b : t) = a = b
end)

Expand Down Expand Up @@ -754,7 +759,7 @@ module Writer = struct
(* Unfortunately, efficient access to the backtrace is not possible
with the current Printexc API, even though internally it's an int
array. For now, wave the Obj.magic wand. There's a PR to fix this:
https://github.com/ocaml/ocaml/pull/9663 *)
https://github.com/ocaml/ocaml/pull/9663 *) (* TODO Fix this since 4.12*)
let location_code_array_of_raw_backtrace (b : Printexc.raw_backtrace) =
(Obj.magic b : Location_code.t array)

Expand All @@ -768,7 +773,7 @@ module Writer = struct
let slot = convert_raw_backtrace_slot slot in
match Slot.location slot with
| None -> tail
| Some { filename; line_number; start_char; end_char } ->
| Some { filename; line_number; start_char; end_char; _} ->
let defname = match Slot.name slot with Some n -> n | _ -> "??" in
{ filename; line=line_number; start_char; end_char; defname }::tail in
get_locations (get_raw_backtrace_slot callstack i) |> List.rev
Expand Down