diff --git a/tools/run_tests/run_microbenchmark.py b/tools/run_tests/run_microbenchmark.py
index a9a563c1bec174b54e91b88fad7e655341b39e18..1cafffb52dd2edae7d8f72dc90764b639ced7a13 100755
--- a/tools/run_tests/run_microbenchmark.py
+++ b/tools/run_tests/run_microbenchmark.py
@@ -91,7 +91,9 @@ def collect_latency(bm_name, args):
                                        '--benchmark_list_tests']).splitlines():
     link(line, '%s.txt' % fnize(line))
     benchmarks.append(
-        jobset.JobSpec(['bins/basicprof/%s' % bm_name, '--benchmark_filter=^%s$' % line],
+        jobset.JobSpec(['bins/basicprof/%s' % bm_name,
+                        '--benchmark_filter=^%s$' % line,
+                        '--benchmark_min_time=0.05'],
                        environ={'LATENCY_TRACE': '%s.trace' % fnize(line)}))
     profile_analysis.append(
         jobset.JobSpec([sys.executable,
@@ -103,7 +105,7 @@ def collect_latency(bm_name, args):
     # consume upwards of five gigabytes of ram in some cases, and so analysing
     # hundreds of them at once is impractical -- but we want at least some
     # concurrency or the work takes too long
-    if len(benchmarks) >= min(4, multiprocessing.cpu_count()):
+    if len(benchmarks) >= min(16, multiprocessing.cpu_count()):
       # run up to half the cpu count: each benchmark can use up to two cores
       # (one for the microbenchmark, one for the data flush)
       jobset.run(benchmarks, maxjobs=max(1, multiprocessing.cpu_count()/2),