diff --git a/include/grpc/support/atm_gcc_atomic.h b/include/grpc/support/atm_gcc_atomic.h
index 65d3d0c60f82a9aa10e7f85288bf3599d92dad1f..a2c83860289ae314cc5bf1fd42a42bc44a842937 100644
--- a/include/grpc/support/atm_gcc_atomic.h
+++ b/include/grpc/support/atm_gcc_atomic.h
@@ -46,6 +46,8 @@ typedef gpr_intptr gpr_atm;
 #define gpr_atm_no_barrier_load(p) (__atomic_load_n((p), __ATOMIC_RELAXED))
 #define gpr_atm_rel_store(p, value) \
   (__atomic_store_n((p), (gpr_intptr)(value), __ATOMIC_RELEASE))
+#define gpr_atm_no_barrier_store(p, value) \
+  (__atomic_store_n((p), (gpr_intptr)(value), __ATOMIC_RELAXED))
 
 #define gpr_atm_no_barrier_fetch_add(p, delta) \
   (__atomic_fetch_add((p), (gpr_intptr)(delta), __ATOMIC_RELAXED))
diff --git a/include/grpc/support/atm_gcc_sync.h b/include/grpc/support/atm_gcc_sync.h
index 4955e4436f4485df2cd0f600f0aa20556a05df41..38b5a9eec2571149324a29cb70bdece372b54a8c 100644
--- a/include/grpc/support/atm_gcc_sync.h
+++ b/include/grpc/support/atm_gcc_sync.h
@@ -68,6 +68,11 @@ static __inline void gpr_atm_rel_store(gpr_atm *p, gpr_atm value) {
   *p = value;
 }
 
+static __inline void gpr_atm_no_barrier_store(gpr_atm *p, gpr_atm value) {
+  GPR_ATM_COMPILE_BARRIER_();
+  *p = value;
+}
+
 #undef GPR_ATM_LS_BARRIER_
 #undef GPR_ATM_COMPILE_BARRIER_
 
diff --git a/include/grpc/support/atm_win32.h b/include/grpc/support/atm_win32.h
index da99021c24b0f2c7d3557adb397770dfbc477c65..694528a9ba99bca224624e19aa5dd4ab16944d2e 100644
--- a/include/grpc/support/atm_win32.h
+++ b/include/grpc/support/atm_win32.h
@@ -57,6 +57,11 @@ static __inline void gpr_atm_rel_store(gpr_atm *p, gpr_atm value) {
   *p = value;
 }
 
+static __inline void gpr_atm_no_barrier_store(gpr_atm *p, gpr_atm value) {
+  /* TODO(ctiller): Can we implement something better here? */
+  gpr_atm_rel_store(p, value);
+}
+
 static __inline int gpr_atm_no_barrier_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
 /* InterlockedCompareExchangePointerNoFence() not available on vista or
    windows7 */
diff --git a/src/core/support/stack_lockfree.c b/src/core/support/stack_lockfree.c
index f24e272207fa0ba1ed831dbbf279bd1f6a16c6b5..bc741f8c7030c59dcfaf16e292a749e1b500e201 100644
--- a/src/core/support/stack_lockfree.c
+++ b/src/core/support/stack_lockfree.c
@@ -95,6 +95,8 @@ gpr_stack_lockfree *gpr_stack_lockfree_create(int entries) {
   memset(&stack->pushed, 0, sizeof(stack->pushed));
 #endif
 
+  GPR_ASSERT(sizeof(stack->entries->atm) == sizeof(stack->entries->contents));
+
   /* Point the head at reserved dummy entry */
   stack->head.contents.index = INVALID_ENTRY_INDEX;
   return stack;
@@ -108,11 +110,15 @@ void gpr_stack_lockfree_destroy(gpr_stack_lockfree *stack) {
 int gpr_stack_lockfree_push(gpr_stack_lockfree *stack, int entry) {
   lockfree_node head;
   lockfree_node newhead;
+  lockfree_node curent;
+  lockfree_node newent;
 
   /* First fill in the entry's index and aba ctr for new head */
   newhead.contents.index = (gpr_uint16)entry;
   /* Also post-increment the aba_ctr */
-  newhead.contents.aba_ctr = stack->entries[entry].contents.aba_ctr++;
+  curent.atm = gpr_atm_no_barrier_load(&stack->entries[entry].atm);
+  newhead.contents.aba_ctr = ++curent.contents.aba_ctr;
+  gpr_atm_no_barrier_store(&stack->entries[entry].atm, curent.atm);
 
 #ifndef NDEBUG
   /* Check for double push */
@@ -131,7 +137,9 @@ int gpr_stack_lockfree_push(gpr_stack_lockfree *stack, int entry) {
     /* Atomically get the existing head value for use */
     head.atm = gpr_atm_no_barrier_load(&(stack->head.atm));
     /* Point to it */
-    stack->entries[entry].contents.index = head.contents.index;
+    newent.atm = gpr_atm_no_barrier_load(&stack->entries[entry].atm);
+    newent.contents.index = head.contents.index;
+    gpr_atm_no_barrier_store(&stack->entries[entry].atm, newent.atm);
   } while (!gpr_atm_rel_cas(&(stack->head.atm), head.atm, newhead.atm));
   /* Use rel_cas above to make sure that entry index is set properly */
   return head.contents.index == INVALID_ENTRY_INDEX;