diff --git a/doc/environment_variables.md b/doc/environment_variables.md
index 513936d6600fbc1562520b9a1c4bf7956f6c1e92..47efb3a1d874db1864d44082d834ec67c8380e43 100644
--- a/doc/environment_variables.md
+++ b/doc/environment_variables.md
@@ -55,6 +55,7 @@ some configuration as environment variables that can be set.
   - queue_timeout
   - server_channel - lightweight trace of significant server channel events
   - secure_endpoint - traces bytes flowing through encrypted channels
+  - timer - timers (alarms) in the grpc internals
   - transport_security - traces metadata about secure channel establishment
   - tcp - traces bytes in and out of a channel
 
diff --git a/include/grpc/impl/codegen/atm_gcc_atomic.h b/include/grpc/impl/codegen/atm_gcc_atomic.h
index a486258c77f4fedd95614b5d5df704d56775ce9c..d52975ce976ab259ed865570f7a5eaae9370ed29 100644
--- a/include/grpc/impl/codegen/atm_gcc_atomic.h
+++ b/include/grpc/impl/codegen/atm_gcc_atomic.h
@@ -39,6 +39,7 @@
 #include <grpc/impl/codegen/port_platform.h>
 
 typedef intptr_t gpr_atm;
+#define GPR_ATM_MAX INTPTR_MAX
 
 #ifdef GPR_LOW_LEVEL_COUNTERS
 extern gpr_atm gpr_counter_atm_cas;
diff --git a/include/grpc/impl/codegen/atm_gcc_sync.h b/include/grpc/impl/codegen/atm_gcc_sync.h
index 946545a671d875504eeedd0eb412446a4eba9047..b537e48f0f3ba9aada1c38d3823d2844b1d4b539 100644
--- a/include/grpc/impl/codegen/atm_gcc_sync.h
+++ b/include/grpc/impl/codegen/atm_gcc_sync.h
@@ -39,6 +39,7 @@
 #include <grpc/impl/codegen/port_platform.h>
 
 typedef intptr_t gpr_atm;
+#define GPR_ATM_MAX INTPTR_MAX
 
 #define GPR_ATM_COMPILE_BARRIER_() __asm__ __volatile__("" : : : "memory")
 
diff --git a/include/grpc/impl/codegen/atm_windows.h b/include/grpc/impl/codegen/atm_windows.h
index 0ab70b95c4a44fc0a541e5238b387d9833083fc8..b8f63da7587905ed9502b40c48ba91fa6625e445 100644
--- a/include/grpc/impl/codegen/atm_windows.h
+++ b/include/grpc/impl/codegen/atm_windows.h
@@ -38,6 +38,7 @@
 #include <grpc/impl/codegen/port_platform.h>
 
 typedef intptr_t gpr_atm;
+#define GPR_ATM_MAX INTPTR_MAX
 
 #define gpr_atm_full_barrier MemoryBarrier
 
diff --git a/include/grpc/impl/codegen/port_platform.h b/include/grpc/impl/codegen/port_platform.h
index 086394648fb9d48f3e73045184283464b82d2669..d525083cd04d67cc410738756ad855f352bf7d9d 100644
--- a/include/grpc/impl/codegen/port_platform.h
+++ b/include/grpc/impl/codegen/port_platform.h
@@ -375,8 +375,10 @@ typedef unsigned __int64 uint64_t;
 #ifndef GRPC_MUST_USE_RESULT
 #if defined(__GNUC__) && !defined(__MINGW32__)
 #define GRPC_MUST_USE_RESULT __attribute__((warn_unused_result))
+#define GPR_ALIGN_STRUCT(n) __attribute__((aligned(n)))
 #else
 #define GRPC_MUST_USE_RESULT
+#define GPR_ALIGN_STRUCT(n)
 #endif
 #endif
 
diff --git a/include/grpc/support/tls.h b/include/grpc/support/tls.h
index a45e1f0a4d557502966ba0b8dba94528ce0370ef..5365449f0dab192b4467be32916b34882f380757 100644
--- a/include/grpc/support/tls.h
+++ b/include/grpc/support/tls.h
@@ -58,7 +58,7 @@
      gpr_tls_set(&foo, new_value);
 
    Accessing a thread local:
-     current_value = gpr_tls_get(&foo, value);
+     current_value = gpr_tls_get(&foo);
 
    ALL functions here may be implemented as macros. */
 
diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c
index f6372c0f3f6425fc29407d891c73c2756dc12bed..7014b98349525c4fd3c55ff999f98c3a30aa322c 100644
--- a/src/core/lib/iomgr/ev_epoll_linux.c
+++ b/src/core/lib/iomgr/ev_epoll_linux.c
@@ -56,6 +56,7 @@
 
 #include "src/core/lib/iomgr/ev_posix.h"
 #include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/timer.h"
 #include "src/core/lib/iomgr/wakeup_fd_posix.h"
 #include "src/core/lib/iomgr/workqueue.h"
 #include "src/core/lib/profiling/timers.h"
@@ -1467,8 +1468,9 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
     return 0;
   }
   timeout = gpr_time_sub(deadline, now);
-  return gpr_time_to_millis(gpr_time_add(
+  int millis = gpr_time_to_millis(gpr_time_add(
       timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
+  return millis >= 1 ? millis : 1;
 }
 
 static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
@@ -1650,6 +1652,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
     for (int i = 0; i < ep_rv; ++i) {
       void *data_ptr = ep_ev[i].data.ptr;
       if (data_ptr == &global_wakeup_fd) {
+        grpc_timer_consume_kick();
         append_error(error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd),
                      err_desc);
       } else if (data_ptr == &pi->workqueue_wakeup_fd) {
diff --git a/src/core/lib/iomgr/ev_poll_posix.c b/src/core/lib/iomgr/ev_poll_posix.c
index ca6e85561152a0b22ce4d8a407c7bbb7f3cd5f29..d90f22336225399096e8a1a4efffcf968c198db7 100644
--- a/src/core/lib/iomgr/ev_poll_posix.c
+++ b/src/core/lib/iomgr/ev_poll_posix.c
@@ -52,6 +52,7 @@
 #include <grpc/support/useful.h>
 
 #include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/timer.h"
 #include "src/core/lib/iomgr/wakeup_fd_cv.h"
 #include "src/core/lib/iomgr/wakeup_fd_posix.h"
 #include "src/core/lib/profiling/timers.h"
@@ -1006,6 +1007,7 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
         }
       } else {
         if (pfds[0].revents & POLLIN_CHECK) {
+          grpc_timer_consume_kick();
           work_combine_error(&error,
                              grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd));
         }
diff --git a/src/core/lib/iomgr/timer.h b/src/core/lib/iomgr/timer.h
index d84a278b183d2c2dc95af3faa7ab053e84c661ca..e0338f93c7d6f7a41a88ff4c823e68821ee59e58 100644
--- a/src/core/lib/iomgr/timer.h
+++ b/src/core/lib/iomgr/timer.h
@@ -101,6 +101,9 @@ bool grpc_timer_check(grpc_exec_ctx *exec_ctx, gpr_timespec now,
 void grpc_timer_list_init(gpr_timespec now);
 void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx);
 
+/* Consume a kick issued by grpc_kick_poller */
+void grpc_timer_consume_kick(void);
+
 /* the following must be implemented by each iomgr implementation */
 
 void grpc_kick_poller(void);
diff --git a/src/core/lib/iomgr/timer_generic.c b/src/core/lib/iomgr/timer_generic.c
index e53c8019299e0f8031edbb443909294747a9680b..d8e60684312bda411aed4c2e5fa73336109ab2f8 100644
--- a/src/core/lib/iomgr/timer_generic.c
+++ b/src/core/lib/iomgr/timer_generic.c
@@ -37,9 +37,13 @@
 
 #include "src/core/lib/iomgr/timer.h"
 
+#include <grpc/support/alloc.h>
 #include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
 #include <grpc/support/sync.h>
+#include <grpc/support/tls.h>
 #include <grpc/support/useful.h>
+#include "src/core/lib/debug/trace.h"
 #include "src/core/lib/iomgr/time_averaged_stats.h"
 #include "src/core/lib/iomgr/timer_heap.h"
 #include "src/core/lib/support/spinlock.h"
@@ -52,12 +56,15 @@
 #define MIN_QUEUE_WINDOW_DURATION 0.01
 #define MAX_QUEUE_WINDOW_DURATION 1
 
+int grpc_timer_trace = 0;
+int grpc_timer_check_trace = 0;
+
 typedef struct {
   gpr_mu mu;
   grpc_time_averaged_stats stats;
   /* All and only timers with deadlines <= this will be in the heap. */
-  gpr_timespec queue_deadline_cap;
-  gpr_timespec min_deadline;
+  gpr_atm queue_deadline_cap;
+  gpr_atm min_deadline;
   /* Index in the g_shard_queue */
   uint32_t shard_queue_index;
   /* This holds all timers with deadlines < queue_deadline_cap. Timers in this
@@ -67,38 +74,92 @@ typedef struct {
   grpc_timer list;
 } shard_type;
 
-/* Protects g_shard_queue */
-static gpr_mu g_mu;
-/* Allow only one run_some_expired_timers at once */
-static gpr_spinlock g_checker_mu = GPR_SPINLOCK_STATIC_INITIALIZER;
+struct shared_mutables {
+  gpr_atm min_timer;
+  /* Allow only one run_some_expired_timers at once */
+  gpr_spinlock checker_mu;
+  bool initialized;
+  /* Protects g_shard_queue */
+  gpr_mu mu;
+} GPR_ALIGN_STRUCT(GPR_CACHELINE_SIZE);
+
+static struct shared_mutables g_shared_mutables = {
+    .checker_mu = GPR_SPINLOCK_STATIC_INITIALIZER, .initialized = false,
+};
 static gpr_clock_type g_clock_type;
 static shard_type g_shards[NUM_SHARDS];
-/* Protected by g_mu */
+/* Protected by g_shared_mutables.mu */
 static shard_type *g_shard_queue[NUM_SHARDS];
-static bool g_initialized = false;
+static gpr_timespec g_start_time;
+
+GPR_TLS_DECL(g_last_seen_min_timer);
+
+static gpr_atm saturating_add(gpr_atm a, gpr_atm b) {
+  if (a > GPR_ATM_MAX - b) {
+    return GPR_ATM_MAX;
+  }
+  return a + b;
+}
+
+static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_atm now,
+                                   gpr_atm *next, grpc_error *error);
+
+static gpr_timespec dbl_to_ts(double d) {
+  gpr_timespec ts;
+  ts.tv_sec = (int64_t)d;
+  ts.tv_nsec = (int32_t)(1e9 * (d - (double)ts.tv_sec));
+  ts.clock_type = GPR_TIMESPAN;
+  return ts;
+}
+
+static gpr_atm timespec_to_atm_round_up(gpr_timespec ts) {
+  ts = gpr_time_sub(ts, g_start_time);
+  double x = GPR_MS_PER_SEC * (double)ts.tv_sec +
+             (double)ts.tv_nsec / GPR_NS_PER_MS +
+             (double)(GPR_NS_PER_SEC - 1) / (double)GPR_NS_PER_SEC;
+  if (x < 0) return 0;
+  if (x > GPR_ATM_MAX) return GPR_ATM_MAX;
+  return (gpr_atm)x;
+}
+
+static gpr_atm timespec_to_atm_round_down(gpr_timespec ts) {
+  ts = gpr_time_sub(ts, g_start_time);
+  double x =
+      GPR_MS_PER_SEC * (double)ts.tv_sec + (double)ts.tv_nsec / GPR_NS_PER_MS;
+  if (x < 0) return 0;
+  if (x > GPR_ATM_MAX) return GPR_ATM_MAX;
+  return (gpr_atm)x;
+}
 
-static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_timespec now,
-                                   gpr_timespec *next, grpc_error *error);
+static gpr_timespec atm_to_timespec(gpr_atm x) {
+  return gpr_time_add(g_start_time, dbl_to_ts((double)x / 1000.0));
+}
 
-static gpr_timespec compute_min_deadline(shard_type *shard) {
+static gpr_atm compute_min_deadline(shard_type *shard) {
   return grpc_timer_heap_is_empty(&shard->heap)
-             ? shard->queue_deadline_cap
+             ? saturating_add(shard->queue_deadline_cap, 1)
              : grpc_timer_heap_top(&shard->heap)->deadline;
 }
 
 void grpc_timer_list_init(gpr_timespec now) {
   uint32_t i;
 
-  g_initialized = true;
-  gpr_mu_init(&g_mu);
+  g_shared_mutables.initialized = true;
+  gpr_mu_init(&g_shared_mutables.mu);
   g_clock_type = now.clock_type;
+  g_start_time = now;
+  g_shared_mutables.min_timer = timespec_to_atm_round_down(now);
+  gpr_tls_init(&g_last_seen_min_timer);
+  gpr_tls_set(&g_last_seen_min_timer, 0);
+  grpc_register_tracer("timer", &grpc_timer_trace);
+  grpc_register_tracer("timer_check", &grpc_timer_check_trace);
 
   for (i = 0; i < NUM_SHARDS; i++) {
     shard_type *shard = &g_shards[i];
     gpr_mu_init(&shard->mu);
     grpc_time_averaged_stats_init(&shard->stats, 1.0 / ADD_DEADLINE_SCALE, 0.1,
                                   0.5);
-    shard->queue_deadline_cap = now;
+    shard->queue_deadline_cap = g_shared_mutables.min_timer;
     shard->shard_queue_index = i;
     grpc_timer_heap_init(&shard->heap);
     shard->list.next = shard->list.prev = &shard->list;
@@ -110,29 +171,23 @@ void grpc_timer_list_init(gpr_timespec now) {
 void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx) {
   int i;
   run_some_expired_timers(
-      exec_ctx, gpr_inf_future(g_clock_type), NULL,
+      exec_ctx, GPR_ATM_MAX, NULL,
       GRPC_ERROR_CREATE_FROM_STATIC_STRING("Timer list shutdown"));
   for (i = 0; i < NUM_SHARDS; i++) {
     shard_type *shard = &g_shards[i];
     gpr_mu_destroy(&shard->mu);
     grpc_timer_heap_destroy(&shard->heap);
   }
-  gpr_mu_destroy(&g_mu);
-  g_initialized = false;
+  gpr_mu_destroy(&g_shared_mutables.mu);
+  gpr_tls_destroy(&g_last_seen_min_timer);
+  g_shared_mutables.initialized = false;
 }
 
 static double ts_to_dbl(gpr_timespec ts) {
   return (double)ts.tv_sec + 1e-9 * ts.tv_nsec;
 }
 
-static gpr_timespec dbl_to_ts(double d) {
-  gpr_timespec ts;
-  ts.tv_sec = (int64_t)d;
-  ts.tv_nsec = (int32_t)(1e9 * (d - (double)ts.tv_sec));
-  ts.clock_type = GPR_TIMESPAN;
-  return ts;
-}
-
+/* returns true if the first element in the list */
 static void list_join(grpc_timer *head, grpc_timer *timer) {
   timer->next = head;
   timer->prev = head->prev;
@@ -158,15 +213,13 @@ static void swap_adjacent_shards_in_queue(uint32_t first_shard_queue_index) {
 
 static void note_deadline_change(shard_type *shard) {
   while (shard->shard_queue_index > 0 &&
-         gpr_time_cmp(
-             shard->min_deadline,
-             g_shard_queue[shard->shard_queue_index - 1]->min_deadline) < 0) {
+         shard->min_deadline <
+             g_shard_queue[shard->shard_queue_index - 1]->min_deadline) {
     swap_adjacent_shards_in_queue(shard->shard_queue_index - 1);
   }
   while (shard->shard_queue_index < NUM_SHARDS - 1 &&
-         gpr_time_cmp(
-             shard->min_deadline,
-             g_shard_queue[shard->shard_queue_index + 1]->min_deadline) > 0) {
+         shard->min_deadline >
+             g_shard_queue[shard->shard_queue_index + 1]->min_deadline) {
     swap_adjacent_shards_in_queue(shard->shard_queue_index);
   }
 }
@@ -179,9 +232,17 @@ void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer,
   GPR_ASSERT(deadline.clock_type == g_clock_type);
   GPR_ASSERT(now.clock_type == g_clock_type);
   timer->closure = closure;
-  timer->deadline = deadline;
+  timer->deadline = timespec_to_atm_round_up(deadline);
+
+  if (grpc_timer_trace) {
+    gpr_log(GPR_DEBUG, "TIMER %p: SET %" PRId64 ".%09d [%" PRIdPTR
+                       "] now %" PRId64 ".%09d [%" PRIdPTR "] call %p[%p]",
+            timer, deadline.tv_sec, deadline.tv_nsec, timer->deadline,
+            now.tv_sec, now.tv_nsec, timespec_to_atm_round_down(now), closure,
+            closure->cb);
+  }
 
-  if (!g_initialized) {
+  if (!g_shared_mutables.initialized) {
     timer->pending = false;
     grpc_closure_sched(exec_ctx, timer->closure,
                        GRPC_ERROR_CREATE_FROM_STATIC_STRING(
@@ -201,12 +262,18 @@ void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer,
 
   grpc_time_averaged_stats_add_sample(&shard->stats,
                                       ts_to_dbl(gpr_time_sub(deadline, now)));
-  if (gpr_time_cmp(deadline, shard->queue_deadline_cap) < 0) {
+  if (timer->deadline < shard->queue_deadline_cap) {
     is_first_timer = grpc_timer_heap_add(&shard->heap, timer);
   } else {
     timer->heap_index = INVALID_HEAP_INDEX;
     list_join(&shard->list, timer);
   }
+  if (grpc_timer_trace) {
+    gpr_log(GPR_DEBUG, "  .. add to shard %d with queue_deadline_cap=%" PRIdPTR
+                       " => is_first_timer=%s",
+            (int)(shard - g_shards), shard->queue_deadline_cap,
+            is_first_timer ? "true" : "false");
+  }
   gpr_mu_unlock(&shard->mu);
 
   /* Deadline may have decreased, we need to adjust the master queue.  Note
@@ -221,28 +288,41 @@ void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer,
      In that case, the timer will simply have to wait for the next
      grpc_timer_check. */
   if (is_first_timer) {
-    gpr_mu_lock(&g_mu);
-    if (gpr_time_cmp(deadline, shard->min_deadline) < 0) {
-      gpr_timespec old_min_deadline = g_shard_queue[0]->min_deadline;
-      shard->min_deadline = deadline;
+    gpr_mu_lock(&g_shared_mutables.mu);
+    if (grpc_timer_trace) {
+      gpr_log(GPR_DEBUG, "  .. old shard min_deadline=%" PRIdPTR,
+              shard->min_deadline);
+    }
+    if (timer->deadline < shard->min_deadline) {
+      gpr_atm old_min_deadline = g_shard_queue[0]->min_deadline;
+      shard->min_deadline = timer->deadline;
       note_deadline_change(shard);
-      if (shard->shard_queue_index == 0 &&
-          gpr_time_cmp(deadline, old_min_deadline) < 0) {
+      if (shard->shard_queue_index == 0 && timer->deadline < old_min_deadline) {
+        gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, timer->deadline);
         grpc_kick_poller();
       }
     }
-    gpr_mu_unlock(&g_mu);
+    gpr_mu_unlock(&g_shared_mutables.mu);
   }
 }
 
+void grpc_timer_consume_kick(void) {
+  /* force re-evaluation of last seeen min */
+  gpr_tls_set(&g_last_seen_min_timer, 0);
+}
+
 void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) {
-  if (!g_initialized) {
+  if (!g_shared_mutables.initialized) {
     /* must have already been cancelled, also the shard mutex is invalid */
     return;
   }
 
   shard_type *shard = &g_shards[GPR_HASH_POINTER(timer, NUM_SHARDS)];
   gpr_mu_lock(&shard->mu);
+  if (grpc_timer_trace) {
+    gpr_log(GPR_DEBUG, "TIMER %p: CANCEL pending=%s", timer,
+            timer->pending ? "true" : "false");
+  }
   if (timer->pending) {
     grpc_closure_sched(exec_ctx, timer->closure, GRPC_ERROR_CANCELLED);
     timer->pending = false;
@@ -260,7 +340,7 @@ void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) {
    for timers that fall at or under it.  Returns true if the queue is no
    longer empty.
    REQUIRES: shard->mu locked */
-static int refill_queue(shard_type *shard, gpr_timespec now) {
+static int refill_queue(shard_type *shard, gpr_atm now) {
   /* Compute the new queue window width and bound by the limits: */
   double computed_deadline_delta =
       grpc_time_averaged_stats_update_average(&shard->stats) *
@@ -271,12 +351,22 @@ static int refill_queue(shard_type *shard, gpr_timespec now) {
   grpc_timer *timer, *next;
 
   /* Compute the new cap and put all timers under it into the queue: */
-  shard->queue_deadline_cap = gpr_time_add(
-      gpr_time_max(now, shard->queue_deadline_cap), dbl_to_ts(deadline_delta));
+  shard->queue_deadline_cap =
+      saturating_add(GPR_MAX(now, shard->queue_deadline_cap),
+                     (gpr_atm)(deadline_delta * 1000.0));
+
+  if (grpc_timer_check_trace) {
+    gpr_log(GPR_DEBUG, "  .. shard[%d]->queue_deadline_cap --> %" PRIdPTR,
+            (int)(shard - g_shards), shard->queue_deadline_cap);
+  }
   for (timer = shard->list.next; timer != &shard->list; timer = next) {
     next = timer->next;
 
-    if (gpr_time_cmp(timer->deadline, shard->queue_deadline_cap) < 0) {
+    if (timer->deadline < shard->queue_deadline_cap) {
+      if (grpc_timer_check_trace) {
+        gpr_log(GPR_DEBUG, "  .. add timer with deadline %" PRIdPTR " to heap",
+                timer->deadline);
+      }
       list_remove(timer);
       grpc_timer_heap_add(&shard->heap, timer);
     }
@@ -287,15 +377,29 @@ static int refill_queue(shard_type *shard, gpr_timespec now) {
 /* This pops the next non-cancelled timer with deadline <= now from the
    queue, or returns NULL if there isn't one.
    REQUIRES: shard->mu locked */
-static grpc_timer *pop_one(shard_type *shard, gpr_timespec now) {
+static grpc_timer *pop_one(shard_type *shard, gpr_atm now) {
   grpc_timer *timer;
   for (;;) {
+    if (grpc_timer_check_trace) {
+      gpr_log(GPR_DEBUG, "  .. shard[%d]: heap_empty=%s",
+              (int)(shard - g_shards),
+              grpc_timer_heap_is_empty(&shard->heap) ? "true" : "false");
+    }
     if (grpc_timer_heap_is_empty(&shard->heap)) {
-      if (gpr_time_cmp(now, shard->queue_deadline_cap) < 0) return NULL;
+      if (now < shard->queue_deadline_cap) return NULL;
       if (!refill_queue(shard, now)) return NULL;
     }
     timer = grpc_timer_heap_top(&shard->heap);
-    if (gpr_time_cmp(timer->deadline, now) > 0) return NULL;
+    if (grpc_timer_check_trace) {
+      gpr_log(GPR_DEBUG,
+              "  .. check top timer deadline=%" PRIdPTR " now=%" PRIdPTR,
+              timer->deadline, now);
+    }
+    if (timer->deadline > now) return NULL;
+    if (grpc_timer_trace) {
+      gpr_log(GPR_DEBUG, "TIMER %p: FIRE %" PRIdPTR "ms late", timer,
+              now - timer->deadline);
+    }
     timer->pending = false;
     grpc_timer_heap_pop(&shard->heap);
     return timer;
@@ -304,7 +408,7 @@ static grpc_timer *pop_one(shard_type *shard, gpr_timespec now) {
 
 /* REQUIRES: shard->mu unlocked */
 static size_t pop_timers(grpc_exec_ctx *exec_ctx, shard_type *shard,
-                         gpr_timespec now, gpr_timespec *new_min_deadline,
+                         gpr_atm now, gpr_atm *new_min_deadline,
                          grpc_error *error) {
   size_t n = 0;
   grpc_timer *timer;
@@ -318,17 +422,29 @@ static size_t pop_timers(grpc_exec_ctx *exec_ctx, shard_type *shard,
   return n;
 }
 
-static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_timespec now,
-                                   gpr_timespec *next, grpc_error *error) {
+static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_atm now,
+                                   gpr_atm *next, grpc_error *error) {
   size_t n = 0;
 
-  /* TODO(ctiller): verify that there are any timers (atomically) here */
+  gpr_atm min_timer = gpr_atm_no_barrier_load(&g_shared_mutables.min_timer);
+  gpr_tls_set(&g_last_seen_min_timer, min_timer);
+  if (now < min_timer) {
+    if (next != NULL) *next = GPR_MIN(*next, min_timer);
+    return 0;
+  }
 
-  if (gpr_spinlock_trylock(&g_checker_mu)) {
-    gpr_mu_lock(&g_mu);
+  if (gpr_spinlock_trylock(&g_shared_mutables.checker_mu)) {
+    gpr_mu_lock(&g_shared_mutables.mu);
 
-    while (gpr_time_cmp(g_shard_queue[0]->min_deadline, now) < 0) {
-      gpr_timespec new_min_deadline;
+    if (grpc_timer_check_trace) {
+      gpr_log(GPR_DEBUG, "  .. shard[%d]->min_deadline = %" PRIdPTR,
+              (int)(g_shard_queue[0] - g_shards),
+              g_shard_queue[0]->min_deadline);
+    }
+
+    while (g_shard_queue[0]->min_deadline < now ||
+           (now != GPR_ATM_MAX && g_shard_queue[0]->min_deadline == now)) {
+      gpr_atm new_min_deadline;
 
       /* For efficiency, we pop as many available timers as we can from the
          shard.  This may violate perfect timer deadline ordering, but that
@@ -336,6 +452,14 @@ static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_timespec now,
       n +=
           pop_timers(exec_ctx, g_shard_queue[0], now, &new_min_deadline, error);
 
+      if (grpc_timer_check_trace) {
+        gpr_log(GPR_DEBUG, "  .. popped --> %" PRIdPTR
+                           ", shard[%d]->min_deadline %" PRIdPTR
+                           " --> %" PRIdPTR ", now=%" PRIdPTR,
+                n, (int)(g_shard_queue[0] - g_shards),
+                g_shard_queue[0]->min_deadline, new_min_deadline, now);
+      }
+
       /* An grpc_timer_init() on the shard could intervene here, adding a new
          timer that is earlier than new_min_deadline.  However,
          grpc_timer_init() will block on the master_lock before it can call
@@ -346,23 +470,24 @@ static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_timespec now,
     }
 
     if (next) {
-      *next = gpr_time_min(*next, g_shard_queue[0]->min_deadline);
+      *next = GPR_MIN(*next, g_shard_queue[0]->min_deadline);
     }
 
-    gpr_mu_unlock(&g_mu);
-    gpr_spinlock_unlock(&g_checker_mu);
+    gpr_atm_no_barrier_store(&g_shared_mutables.min_timer,
+                             g_shard_queue[0]->min_deadline);
+    gpr_mu_unlock(&g_shared_mutables.mu);
+    gpr_spinlock_unlock(&g_shared_mutables.checker_mu);
   } else if (next != NULL) {
     /* TODO(ctiller): this forces calling code to do an short poll, and
        then retry the timer check (because this time through the timer list was
        contended).
 
-       We could reduce the cost here dramatically by keeping a count of how many
-       currently active pollers got through the uncontended case above
+       We could reduce the cost here dramatically by keeping a count of how
+       many currently active pollers got through the uncontended case above
        successfully, and waking up other pollers IFF that count drops to zero.
 
        Once that count is in place, this entire else branch could disappear. */
-    *next = gpr_time_min(
-        *next, gpr_time_add(now, gpr_time_from_millis(1, GPR_TIMESPAN)));
+    *next = GPR_MIN(*next, now + 1);
   }
 
   GRPC_ERROR_UNREF(error);
@@ -372,12 +497,71 @@ static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_timespec now,
 
 bool grpc_timer_check(grpc_exec_ctx *exec_ctx, gpr_timespec now,
                       gpr_timespec *next) {
+  // prelude
   GPR_ASSERT(now.clock_type == g_clock_type);
-  return run_some_expired_timers(
-      exec_ctx, now, next,
+  gpr_atm now_atm = timespec_to_atm_round_down(now);
+
+  /* fetch from a thread-local first: this avoids contention on a globally
+     mutable cacheline in the common case */
+  gpr_atm min_timer = gpr_tls_get(&g_last_seen_min_timer);
+  if (now_atm < min_timer) {
+    if (next != NULL) {
+      *next =
+          atm_to_timespec(GPR_MIN(timespec_to_atm_round_up(*next), min_timer));
+    }
+    if (grpc_timer_check_trace) {
+      gpr_log(GPR_DEBUG,
+              "TIMER CHECK SKIP: now_atm=%" PRIdPTR " min_timer=%" PRIdPTR,
+              now_atm, min_timer);
+    }
+    return 0;
+  }
+
+  grpc_error *shutdown_error =
       gpr_time_cmp(now, gpr_inf_future(now.clock_type)) != 0
           ? GRPC_ERROR_NONE
-          : GRPC_ERROR_CREATE_FROM_STATIC_STRING("Shutting down timer system"));
+          : GRPC_ERROR_CREATE_FROM_STATIC_STRING("Shutting down timer system");
+
+  // tracing
+  if (grpc_timer_check_trace) {
+    char *next_str;
+    if (next == NULL) {
+      next_str = gpr_strdup("NULL");
+    } else {
+      gpr_asprintf(&next_str, "%" PRId64 ".%09d [%" PRIdPTR "]", next->tv_sec,
+                   next->tv_nsec, timespec_to_atm_round_down(*next));
+    }
+    gpr_log(GPR_DEBUG, "TIMER CHECK BEGIN: now=%" PRId64 ".%09d [%" PRIdPTR
+                       "] next=%s tls_min=%" PRIdPTR " glob_min=%" PRIdPTR,
+            now.tv_sec, now.tv_nsec, now_atm, next_str,
+            gpr_tls_get(&g_last_seen_min_timer),
+            gpr_atm_no_barrier_load(&g_shared_mutables.min_timer));
+    gpr_free(next_str);
+  }
+  // actual code
+  bool r;
+  gpr_atm next_atm;
+  if (next == NULL) {
+    r = run_some_expired_timers(exec_ctx, now_atm, NULL, shutdown_error);
+  } else {
+    next_atm = timespec_to_atm_round_down(*next);
+    r = run_some_expired_timers(exec_ctx, now_atm, &next_atm, shutdown_error);
+    *next = atm_to_timespec(next_atm);
+  }
+  // tracing
+  if (grpc_timer_check_trace) {
+    char *next_str;
+    if (next == NULL) {
+      next_str = gpr_strdup("NULL");
+    } else {
+      gpr_asprintf(&next_str, "%" PRId64 ".%09d [%" PRIdPTR "]", next->tv_sec,
+                   next->tv_nsec, next_atm);
+    }
+    gpr_log(GPR_DEBUG, "TIMER CHECK END: %d timers triggered; next=%s", r,
+            next_str);
+    gpr_free(next_str);
+  }
+  return r > 0;
 }
 
 #endif /* GRPC_TIMER_USE_GENERIC */
diff --git a/src/core/lib/iomgr/timer_generic.h b/src/core/lib/iomgr/timer_generic.h
index 1608dce9fb1058bf0dadc414538c8a724568e016..c79a431aa034089a21424773df6add9cfbd58163 100644
--- a/src/core/lib/iomgr/timer_generic.h
+++ b/src/core/lib/iomgr/timer_generic.h
@@ -38,7 +38,7 @@
 #include "src/core/lib/iomgr/exec_ctx.h"
 
 struct grpc_timer {
-  gpr_timespec deadline;
+  gpr_atm deadline;
   uint32_t heap_index; /* INVALID_HEAP_INDEX if not in heap */
   bool pending;
   struct grpc_timer *next;
diff --git a/src/core/lib/iomgr/timer_heap.c b/src/core/lib/iomgr/timer_heap.c
index f736d335e6c4aa68948374d0102dc202de0ba675..03ccfe023a55707e0211b095de593da7645c6513 100644
--- a/src/core/lib/iomgr/timer_heap.c
+++ b/src/core/lib/iomgr/timer_heap.c
@@ -50,7 +50,7 @@
 static void adjust_upwards(grpc_timer **first, uint32_t i, grpc_timer *t) {
   while (i > 0) {
     uint32_t parent = (uint32_t)(((int)i - 1) / 2);
-    if (gpr_time_cmp(first[parent]->deadline, t->deadline) <= 0) break;
+    if (first[parent]->deadline <= t->deadline) break;
     first[i] = first[parent];
     first[i]->heap_index = i;
     i = parent;
@@ -68,12 +68,12 @@ static void adjust_downwards(grpc_timer **first, uint32_t i, uint32_t length,
     uint32_t left_child = 1u + 2u * i;
     if (left_child >= length) break;
     uint32_t right_child = left_child + 1;
-    uint32_t next_i = right_child < length &&
-                              gpr_time_cmp(first[left_child]->deadline,
-                                           first[right_child]->deadline) > 0
-                          ? right_child
-                          : left_child;
-    if (gpr_time_cmp(t->deadline, first[next_i]->deadline) <= 0) break;
+    uint32_t next_i =
+        right_child < length &&
+                first[left_child]->deadline > first[right_child]->deadline
+            ? right_child
+            : left_child;
+    if (t->deadline <= first[next_i]->deadline) break;
     first[i] = first[next_i];
     first[i]->heap_index = i;
     i = next_i;
@@ -97,7 +97,7 @@ static void maybe_shrink(grpc_timer_heap *heap) {
 static void note_changed_priority(grpc_timer_heap *heap, grpc_timer *timer) {
   uint32_t i = timer->heap_index;
   uint32_t parent = (uint32_t)(((int)i - 1) / 2);
-  if (gpr_time_cmp(heap->timers[parent]->deadline, timer->deadline) > 0) {
+  if (heap->timers[parent]->deadline > timer->deadline) {
     adjust_upwards(heap->timers, i, timer);
   } else {
     adjust_downwards(heap->timers, i, heap->timer_count, timer);
diff --git a/test/core/end2end/tests/max_connection_age.c b/test/core/end2end/tests/max_connection_age.c
index 1de54e082522b1ba6026920d2804509466f2584a..59bfdbabb9b88f4289372e80a509963a699ae400 100644
--- a/test/core/end2end/tests/max_connection_age.c
+++ b/test/core/end2end/tests/max_connection_age.c
@@ -57,7 +57,7 @@
    should be shorter than CALL_DEADLINE_S - CQ_MAX_CONNECTION_AGE_WAIT_TIME_S */
 #define CQ_MAX_CONNECTION_AGE_GRACE_WAIT_TIME_S 2
 /* The grace period for the test to observe the channel shutdown process */
-#define IMMEDIATE_SHUTDOWN_GRACE_TIME_MS 300
+#define IMMEDIATE_SHUTDOWN_GRACE_TIME_MS 3000
 
 static void *tag(intptr_t t) { return (void *)t; }
 
diff --git a/test/core/end2end/tests/max_connection_idle.c b/test/core/end2end/tests/max_connection_idle.c
index 9dc1ee47664e038bf27ca9deeadff927f504655f..c0984e4d14e1c332a21ac1c56b2d1dfdc4412fa0 100644
--- a/test/core/end2end/tests/max_connection_idle.c
+++ b/test/core/end2end/tests/max_connection_idle.c
@@ -89,8 +89,8 @@ static void test_max_connection_idle(grpc_end2end_test_config config) {
   /* wait for the channel to reach its maximum idle time */
   grpc_channel_watch_connectivity_state(
       f.client, GRPC_CHANNEL_READY,
-      grpc_timeout_milliseconds_to_deadline(MAX_CONNECTION_IDLE_MS + 500), f.cq,
-      tag(99));
+      grpc_timeout_milliseconds_to_deadline(MAX_CONNECTION_IDLE_MS + 3000),
+      f.cq, tag(99));
   CQ_EXPECT_COMPLETION(cqv, tag(99), 1);
   cq_verify(cqv);
   state = grpc_channel_check_connectivity_state(f.client, 0);
diff --git a/test/core/iomgr/timer_heap_test.c b/test/core/iomgr/timer_heap_test.c
index 410d972313b37d27b96f08aa1a76ef1fde3cabb7..153304fa7b41483916aac0eb6871a0324f2dc57c 100644
--- a/test/core/iomgr/timer_heap_test.c
+++ b/test/core/iomgr/timer_heap_test.c
@@ -47,13 +47,7 @@
 
 #include "test/core/util/test_config.h"
 
-static gpr_timespec random_deadline(void) {
-  gpr_timespec ts;
-  ts.tv_sec = rand();
-  ts.tv_nsec = rand();
-  ts.clock_type = GPR_CLOCK_REALTIME;
-  return ts;
-}
+static gpr_atm random_deadline(void) { return rand(); }
 
 static grpc_timer *create_test_elements(size_t num_elements) {
   grpc_timer *elems = gpr_malloc(num_elements * sizeof(grpc_timer));
@@ -78,12 +72,10 @@ static void check_valid(grpc_timer_heap *pq) {
     size_t left_child = 1u + 2u * i;
     size_t right_child = left_child + 1u;
     if (left_child < pq->timer_count) {
-      GPR_ASSERT(gpr_time_cmp(pq->timers[i]->deadline,
-                              pq->timers[left_child]->deadline) <= 0);
+      GPR_ASSERT(pq->timers[i]->deadline <= pq->timers[left_child]->deadline);
     }
     if (right_child < pq->timer_count) {
-      GPR_ASSERT(gpr_time_cmp(pq->timers[i]->deadline,
-                              pq->timers[right_child]->deadline) <= 0);
+      GPR_ASSERT(pq->timers[i]->deadline <= pq->timers[right_child]->deadline);
     }
   }
 }
@@ -227,20 +219,19 @@ static void test2(void) {
     }
 
     if (num_inserted) {
-      gpr_timespec *min_deadline = NULL;
+      gpr_atm *min_deadline = NULL;
       for (size_t i = 0; i < elems_size; i++) {
         if (elems[i].inserted) {
           if (min_deadline == NULL) {
             min_deadline = &elems[i].elem.deadline;
           } else {
-            if (gpr_time_cmp(elems[i].elem.deadline, *min_deadline) < 0) {
+            if (elems[i].elem.deadline < *min_deadline) {
               min_deadline = &elems[i].elem.deadline;
             }
           }
         }
       }
-      GPR_ASSERT(
-          0 == gpr_time_cmp(grpc_timer_heap_top(&pq)->deadline, *min_deadline));
+      GPR_ASSERT(grpc_timer_heap_top(&pq)->deadline == *min_deadline);
     }
   }
 
diff --git a/test/core/iomgr/timer_list_test.c b/test/core/iomgr/timer_list_test.c
index 85ad5277cc058a10e67c10104e327409eaf8cf02..46e41dd4490bc320b39d42fb3af32e3180e9d415 100644
--- a/test/core/iomgr/timer_list_test.c
+++ b/test/core/iomgr/timer_list_test.c
@@ -45,6 +45,9 @@
 
 #define MAX_CB 30
 
+extern int grpc_timer_trace;
+extern int grpc_timer_check_trace;
+
 static int cb_called[MAX_CB][2];
 
 static void cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
@@ -57,7 +60,11 @@ static void add_test(void) {
   grpc_timer timers[20];
   grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
 
+  gpr_log(GPR_INFO, "add_test");
+
   grpc_timer_list_init(start);
+  grpc_timer_trace = 1;
+  grpc_timer_check_trace = 1;
   memset(cb_called, 0, sizeof(cb_called));
 
   /* 10 ms timers.  will expire in the current epoch */
@@ -120,9 +127,7 @@ static void add_test(void) {
 }
 
 static gpr_timespec tfm(int m) {
-  gpr_timespec t = gpr_time_from_millis(m, GPR_TIMESPAN);
-  t.clock_type = GPR_CLOCK_REALTIME;
-  return t;
+  return gpr_time_from_millis(m, GPR_CLOCK_REALTIME);
 }
 
 /* Cleaning up a list with pending timers. */
@@ -130,7 +135,11 @@ void destruction_test(void) {
   grpc_timer timers[5];
   grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
 
+  gpr_log(GPR_INFO, "destruction_test");
+
   grpc_timer_list_init(gpr_time_0(GPR_CLOCK_REALTIME));
+  grpc_timer_trace = 1;
+  grpc_timer_check_trace = 1;
   memset(cb_called, 0, sizeof(cb_called));
 
   grpc_timer_init(
@@ -170,6 +179,7 @@ void destruction_test(void) {
 
 int main(int argc, char **argv) {
   grpc_test_init(argc, argv);
+  gpr_set_log_verbosity(GPR_LOG_SEVERITY_DEBUG);
   add_test();
   destruction_test();
   return 0;