diff --git a/tools/profile_analyzer/profile_analyzer.py b/tools/profile_analyzer/profile_analyzer.py
index 00b107946689f33838503bc54c1803983f06997b..a78f84e5e6496b3aa450f5e36b30f0363da2057d 100755
--- a/tools/profile_analyzer/profile_analyzer.py
+++ b/tools/profile_analyzer/profile_analyzer.py
@@ -39,8 +39,8 @@ Usage:
 
 import collections
 import itertools
+import math
 import re
-import sys
 
 # Create a regex to parse output of the C core basic profiler,
 # as defined in src/core/profiling/basic_timers.c.
@@ -64,19 +64,68 @@ class ImportantMark(object):
   def entry(self):
     return self._entry
 
-  def append_post_entry(self, entry):
-    if self._n > 0:
-      self._post_stack.append(entry)
+  @property
+  def max_depth(self):
+    return self._n
+
+  def append_post_entry(self, post_entry):
+    if self._n > 0 and post_entry.thread == self._entry.thread:
+      self._post_stack.append(post_entry)
       self._n -= 1
 
   def get_deltas(self):
     pre_and_post_stacks = itertools.chain(self._pre_stack, self._post_stack)
     return collections.OrderedDict((stack_entry,
-                                   (self._entry.time - stack_entry.time))
+                                   abs(self._entry.time - stack_entry.time))
                                    for stack_entry in pre_and_post_stacks)
 
-def entries():
-  for line in sys.stdin:
+
+def print_block_statistics(block_times):
+  print '{:<12s}  {:>12s} {:>12s} {:>12s} {:>12s}'.format(
+          'Block tag', '50th p.', '90th p.', '95th p.', '99th p.')
+  for tag, tag_times in sorted(block_times.iteritems()):
+    times = sorted(tag_times)
+    print '{:<12d}: {:>12.3f} {:>12.3f} {:>12.3f} {:>12.3f}'.format(
+            tag, percentile(times, 50), percentile(times, 90),
+            percentile(times, 95), percentile(times, 99))
+  print
+
+def print_grouped_imark_statistics(group_key, imarks_group):
+  values = collections.OrderedDict()
+  for imark in imarks_group:
+    deltas = imark.get_deltas()
+    for relative_entry, time_delta_us in deltas.iteritems():
+      key = '{tag} {type} ({file}:{line})'.format(**relative_entry._asdict())
+      l = values.setdefault(key, list())
+      l.append(time_delta_us)
+
+  print group_key
+  print '{:<50s}  {:>12s} {:>12s} {:>12s} {:>12s}'.format(
+        'Relative mark', '50th p.', '90th p.', '95th p.', '99th p.')
+  for key, time_values in values.iteritems():
+    time_values = sorted(time_values)
+    print '{:<50s}: {:>12.3f} {:>12.3f} {:>12.3f} {:>12.3f}'.format(
+          key, percentile(time_values, 50), percentile(time_values, 90),
+          percentile(time_values, 95), percentile(time_values, 99))
+  print
+
+def percentile(vals, percent):
+  """ Calculates the interpolated percentile given a sorted sequence and a
+  percent (in the usual 0-100 range)."""
+  assert vals, "Empty input sequence."
+  percent /= 100.0
+  k = (len(vals)-1) * percent
+  f = math.floor(k)
+  c = math.ceil(k)
+  if f == c:
+      return vals[int(k)]
+  # else, interpolate
+  d0 = vals[int(f)] * (c-k)
+  d1 = vals[int(c)] * (k-f)
+  return d0 + d1
+
+def entries(f):
+  for line in f:
     m = _RE_LINE.match(line)
     if not m: continue
     yield Entry(time=float(m.group(1)),
@@ -87,53 +136,53 @@ def entries():
                 file=m.group(6),
                 line=m.group(7))
 
-threads = collections.defaultdict(lambda: collections.defaultdict(list))
-times = collections.defaultdict(list)
-
-# Indexed by the mark's tag. Items in the value list correspond to the mark in
-# different stack situations.
-important_marks = collections.defaultdict(list)
-
-for entry in entries():
-  thread = threads[entry.thread]
-  if entry.type == '{':
-    thread[entry.tag].append(entry)
-  if entry.type == '!':
-    # Save a snapshot of the current stack inside a new ImportantMark instance.
-    # Get all entries with type '{' from "thread".
-    stack = [e for entries_for_tag in thread.values()
-               for e in entries_for_tag if e.type == '{']
-    important_marks[entry.tag].append(ImportantMark(entry, stack))
-  elif entry.type == '}':
-    last = thread[entry.tag].pop()
-    times[entry.tag].append(entry.time - last.time)
-    # Update accounting for important marks.
-    for imarks_for_tag in important_marks.itervalues():
-      for imark in imarks_for_tag:
-        imark.append_post_entry(entry)
-
-def percentile(vals, pct):
-  return sorted(vals)[int(len(vals) * pct / 100.0)]
-
-print 'tag 50%/90%/95%/99% us'
-for tag in sorted(times.keys()):
-  vals = times[tag]
-  print '%d %.2f/%.2f/%.2f/%.2f' % (tag,
-                                    percentile(vals, 50),
-                                    percentile(vals, 90),
-                                    percentile(vals, 95),
-                                    percentile(vals, 99))
-
-print
-print 'Important marks:'
-print '================'
-for tag, imark_for_tag in important_marks.iteritems():
-  for imark in imarks_for_tag:
-    deltas = imark.get_deltas()
-    print '{tag} @ {file}:{line}'.format(**imark.entry._asdict())
-    for entry, time_delta_us in deltas.iteritems():
-      format_dict = entry._asdict()
-      format_dict['time_delta_us']  = time_delta_us
-      print '{tag} {type} ({file}:{line}): {time_delta_us:12.3f} us'.format(
-          **format_dict)
-    print
+def main(f):
+  percentiles = (50, 90, 95, 99)
+  threads = collections.defaultdict(lambda: collections.defaultdict(list))
+  times = collections.defaultdict(list)
+  important_marks = collections.defaultdict(list)
+  stack_depth = collections.defaultdict(int)
+  for entry in entries(f):
+    thread = threads[entry.thread]
+    if entry.type == '{':
+      thread[entry.tag].append(entry)
+      stack_depth[entry.thread] += 1
+    if entry.type == '!':
+      # Save a snapshot of the current stack inside a new ImportantMark instance.
+      # Get all entries _for any tag in the thread_.
+      stack = [e for entries_for_tag in thread.itervalues()
+                 for e in entries_for_tag]
+      imark_group_key = '{tag}/{thread}@{file}:{line}'.format(**entry._asdict())
+      important_marks[imark_group_key].append(ImportantMark(entry, stack))
+    elif entry.type == '}':
+      last = thread[entry.tag].pop()
+      times[entry.tag].append(entry.time - last.time)
+      # only access the last "depth" imarks for the tag.
+      depth = stack_depth[entry.thread]
+      for imarks_group in important_marks.itervalues():
+        for imark in imarks_group[-depth:]:
+          # if at a '}' deeper than where the current "imark" was found, ignore.
+          if depth > imark.max_depth: continue
+          imark.append_post_entry(entry)
+      stack_depth[entry.thread] -= 1
+
+  print
+  print 'Block marks:'
+  print '============'
+  print_block_statistics(times)
+
+  print
+  print 'Important marks:'
+  print '================'
+  for group_key, imarks_group in important_marks.iteritems():
+    print_grouped_imark_statistics(group_key, imarks_group)
+
+
+if __name__ == '__main__':
+  # If invoked without arguments, read off sys.stdin. If one argument is given,
+  # take it as a file name and open it for reading.
+  import sys
+  f = sys.stdin
+  if len(sys.argv) == 2:
+    f = open(sys.argv[1], 'r')
+  main(f)