diff --git a/grpc.def b/grpc.def
index 01628638ef56d23a596471c5568780071a79bd3c..ff856e7f192887854d2f0e9183e301ecca75caab 100644
--- a/grpc.def
+++ b/grpc.def
@@ -164,7 +164,9 @@ EXPORTS
     grpc_slice_buffer_move_into
     grpc_slice_buffer_trim_end
     grpc_slice_buffer_move_first
+    grpc_slice_buffer_move_first_into_buffer
     grpc_slice_buffer_take_first
+    grpc_slice_buffer_undo_take_first
     gpr_malloc
     gpr_free
     gpr_realloc
diff --git a/include/grpc/impl/codegen/slice.h b/include/grpc/impl/codegen/slice.h
index 00781bb76bdb8cc936607d2fabb838778eea2ca5..035260445e9bce7e35eb7b65875bf6361c53f6d7 100644
--- a/include/grpc/impl/codegen/slice.h
+++ b/include/grpc/impl/codegen/slice.h
@@ -93,11 +93,16 @@ typedef struct grpc_slice {
 /* Represents an expandable array of slices, to be interpreted as a
    single item. */
 typedef struct {
-  /* slices in the array */
+  /* This is for internal use only. External users (i.e any code outside grpc
+   * core) MUST NOT use this field */
+  grpc_slice *base_slices;
+
+  /* slices in the array (Points to the first valid grpc_slice in the array) */
   grpc_slice *slices;
   /* the number of slices in the array */
   size_t count;
-  /* the number of slices allocated in the array */
+  /* the number of slices allocated in the array. External users (i.e any code
+   * outside grpc core) MUST NOT use this field */
   size_t capacity;
   /* the combined length of all slices in the array */
   size_t length;
diff --git a/include/grpc/slice_buffer.h b/include/grpc/slice_buffer.h
index f1de653af41f447cde52e3e6cfddb2e5383016ca..2ed896645b43a959ede7f72b95ba7c19e78b0f46 100644
--- a/include/grpc/slice_buffer.h
+++ b/include/grpc/slice_buffer.h
@@ -77,8 +77,15 @@ GPRAPI void grpc_slice_buffer_trim_end(grpc_slice_buffer *src, size_t n,
 /* move the first n bytes of src into dst */
 GPRAPI void grpc_slice_buffer_move_first(grpc_slice_buffer *src, size_t n,
                                          grpc_slice_buffer *dst);
+/* move the first n bytes of src into dst (copying them) */
+GPRAPI void grpc_slice_buffer_move_first_into_buffer(grpc_exec_ctx *exec_ctx,
+                                                     grpc_slice_buffer *src,
+                                                     size_t n, void *dst);
 /* take the first slice in the slice buffer */
 GPRAPI grpc_slice grpc_slice_buffer_take_first(grpc_slice_buffer *src);
+/* undo the above with (a possibly different) \a slice */
+GPRAPI void grpc_slice_buffer_undo_take_first(grpc_slice_buffer *src,
+                                              grpc_slice slice);
 
 #ifdef __cplusplus
 }
diff --git a/src/core/lib/slice/slice_buffer.c b/src/core/lib/slice/slice_buffer.c
index 08eaf4963a6e406482bf2e05b7e7de9eec230fed..9176dc8a4208a1c70ac3bc138cf966f278332afd 100644
--- a/src/core/lib/slice/slice_buffer.c
+++ b/src/core/lib/slice/slice_buffer.c
@@ -46,15 +46,27 @@
 #define GROW(x) (3 * (x) / 2)
 
 static void maybe_embiggen(grpc_slice_buffer *sb) {
-  if (sb->count == sb->capacity) {
+  if (sb->base_slices != sb->slices) {
+    memmove(sb->base_slices, sb->slices, sb->count * sizeof(grpc_slice));
+    sb->slices = sb->base_slices;
+  }
+
+  /* How far away from sb->base_slices is sb->slices pointer */
+  size_t slice_offset = (size_t)(sb->slices - sb->base_slices);
+  size_t slice_count = sb->count + slice_offset;
+
+  if (slice_count == sb->capacity) {
     sb->capacity = GROW(sb->capacity);
-    GPR_ASSERT(sb->capacity > sb->count);
-    if (sb->slices == sb->inlined) {
-      sb->slices = gpr_malloc(sb->capacity * sizeof(grpc_slice));
-      memcpy(sb->slices, sb->inlined, sb->count * sizeof(grpc_slice));
+    GPR_ASSERT(sb->capacity > slice_count);
+    if (sb->base_slices == sb->inlined) {
+      sb->base_slices = gpr_malloc(sb->capacity * sizeof(grpc_slice));
+      memcpy(sb->base_slices, sb->inlined, slice_count * sizeof(grpc_slice));
     } else {
-      sb->slices = gpr_realloc(sb->slices, sb->capacity * sizeof(grpc_slice));
+      sb->base_slices =
+          gpr_realloc(sb->base_slices, sb->capacity * sizeof(grpc_slice));
     }
+
+    sb->slices = sb->base_slices + slice_offset;
   }
 }
 
@@ -62,14 +74,14 @@ void grpc_slice_buffer_init(grpc_slice_buffer *sb) {
   sb->count = 0;
   sb->length = 0;
   sb->capacity = GRPC_SLICE_BUFFER_INLINE_ELEMENTS;
-  sb->slices = sb->inlined;
+  sb->base_slices = sb->slices = sb->inlined;
 }
 
 void grpc_slice_buffer_destroy_internal(grpc_exec_ctx *exec_ctx,
                                         grpc_slice_buffer *sb) {
   grpc_slice_buffer_reset_and_unref_internal(exec_ctx, sb);
-  if (sb->slices != sb->inlined) {
-    gpr_free(sb->slices);
+  if (sb->base_slices != sb->inlined) {
+    gpr_free(sb->base_slices);
   }
 }
 
@@ -166,7 +178,6 @@ void grpc_slice_buffer_pop(grpc_slice_buffer *sb) {
 void grpc_slice_buffer_reset_and_unref_internal(grpc_exec_ctx *exec_ctx,
                                                 grpc_slice_buffer *sb) {
   size_t i;
-
   for (i = 0; i < sb->count; i++) {
     grpc_slice_unref_internal(exec_ctx, sb->slices[i]);
   }
@@ -182,32 +193,45 @@ void grpc_slice_buffer_reset_and_unref(grpc_slice_buffer *sb) {
 }
 
 void grpc_slice_buffer_swap(grpc_slice_buffer *a, grpc_slice_buffer *b) {
-  GPR_SWAP(size_t, a->count, b->count);
-  GPR_SWAP(size_t, a->capacity, b->capacity);
-  GPR_SWAP(size_t, a->length, b->length);
+  size_t a_offset = (size_t)(a->slices - a->base_slices);
+  size_t b_offset = (size_t)(b->slices - b->base_slices);
 
-  if (a->slices == a->inlined) {
-    if (b->slices == b->inlined) {
+  size_t a_count = a->count + a_offset;
+  size_t b_count = b->count + b_offset;
+
+  if (a->base_slices == a->inlined) {
+    if (b->base_slices == b->inlined) {
       /* swap contents of inlined buffer */
       grpc_slice temp[GRPC_SLICE_BUFFER_INLINE_ELEMENTS];
-      memcpy(temp, a->slices, b->count * sizeof(grpc_slice));
-      memcpy(a->slices, b->slices, a->count * sizeof(grpc_slice));
-      memcpy(b->slices, temp, b->count * sizeof(grpc_slice));
+      memcpy(temp, a->base_slices, a_count * sizeof(grpc_slice));
+      memcpy(a->base_slices, b->base_slices, b_count * sizeof(grpc_slice));
+      memcpy(b->base_slices, temp, a_count * sizeof(grpc_slice));
     } else {
       /* a is inlined, b is not - copy a inlined into b, fix pointers */
-      a->slices = b->slices;
-      b->slices = b->inlined;
-      memcpy(b->slices, a->inlined, b->count * sizeof(grpc_slice));
+      a->base_slices = b->base_slices;
+      b->base_slices = b->inlined;
+      memcpy(b->base_slices, a->inlined, a_count * sizeof(grpc_slice));
     }
-  } else if (b->slices == b->inlined) {
+  } else if (b->base_slices == b->inlined) {
     /* b is inlined, a is not - copy b inlined int a, fix pointers */
-    b->slices = a->slices;
-    a->slices = a->inlined;
-    memcpy(a->slices, b->inlined, a->count * sizeof(grpc_slice));
+    b->base_slices = a->base_slices;
+    a->base_slices = a->inlined;
+    memcpy(a->base_slices, b->inlined, b_count * sizeof(grpc_slice));
   } else {
     /* no inlining: easy swap */
-    GPR_SWAP(grpc_slice *, a->slices, b->slices);
+    GPR_SWAP(grpc_slice *, a->base_slices, b->base_slices);
   }
+
+  /* Update the slices pointers (cannot do a GPR_SWAP on slices fields here).
+   * Also note that since the base_slices pointers are already swapped we need
+   * use 'b_offset' for 'a->base_slices' and vice versa */
+  a->slices = a->base_slices + b_offset;
+  b->slices = b->base_slices + a_offset;
+
+  /* base_slices and slices fields are correctly set. Swap all other fields */
+  GPR_SWAP(size_t, a->count, b->count);
+  GPR_SWAP(size_t, a->capacity, b->capacity);
+  GPR_SWAP(size_t, a->length, b->length);
 }
 
 void grpc_slice_buffer_move_into(grpc_slice_buffer *src,
@@ -229,7 +253,6 @@ void grpc_slice_buffer_move_into(grpc_slice_buffer *src,
 
 void grpc_slice_buffer_move_first(grpc_slice_buffer *src, size_t n,
                                   grpc_slice_buffer *dst) {
-  size_t src_idx;
   size_t output_len = dst->length + n;
   size_t new_input_len = src->length - n;
   GPR_ASSERT(src->length >= n);
@@ -237,34 +260,55 @@ void grpc_slice_buffer_move_first(grpc_slice_buffer *src, size_t n,
     grpc_slice_buffer_move_into(src, dst);
     return;
   }
-  src_idx = 0;
-  while (src_idx < src->capacity) {
-    grpc_slice slice = src->slices[src_idx];
+
+  while (src->count > 0) {
+    grpc_slice slice = grpc_slice_buffer_take_first(src);
     size_t slice_len = GRPC_SLICE_LENGTH(slice);
     if (n > slice_len) {
       grpc_slice_buffer_add(dst, slice);
       n -= slice_len;
-      src_idx++;
     } else if (n == slice_len) {
       grpc_slice_buffer_add(dst, slice);
-      src_idx++;
       break;
     } else { /* n < slice_len */
-      src->slices[src_idx] = grpc_slice_split_tail(&slice, n);
+      grpc_slice_buffer_undo_take_first(src, grpc_slice_split_tail(&slice, n));
       GPR_ASSERT(GRPC_SLICE_LENGTH(slice) == n);
-      GPR_ASSERT(GRPC_SLICE_LENGTH(src->slices[src_idx]) == slice_len - n);
       grpc_slice_buffer_add(dst, slice);
       break;
     }
   }
   GPR_ASSERT(dst->length == output_len);
-  memmove(src->slices, src->slices + src_idx,
-          sizeof(grpc_slice) * (src->count - src_idx));
-  src->count -= src_idx;
-  src->length = new_input_len;
+  GPR_ASSERT(src->length == new_input_len);
   GPR_ASSERT(src->count > 0);
 }
 
+void grpc_slice_buffer_move_first_into_buffer(grpc_exec_ctx *exec_ctx,
+                                              grpc_slice_buffer *src, size_t n,
+                                              void *dst) {
+  char *dstp = dst;
+  GPR_ASSERT(src->length >= n);
+
+  while (n > 0) {
+    grpc_slice slice = grpc_slice_buffer_take_first(src);
+    size_t slice_len = GRPC_SLICE_LENGTH(slice);
+    if (slice_len > n) {
+      memcpy(dstp, GRPC_SLICE_START_PTR(slice), n);
+      grpc_slice_buffer_undo_take_first(
+          src, grpc_slice_sub_no_ref(slice, n, slice_len));
+      n = 0;
+    } else if (slice_len == n) {
+      memcpy(dstp, GRPC_SLICE_START_PTR(slice), n);
+      grpc_slice_unref_internal(exec_ctx, slice);
+      n = 0;
+    } else {
+      memcpy(dstp, GRPC_SLICE_START_PTR(slice), slice_len);
+      dstp += slice_len;
+      n -= slice_len;
+      grpc_slice_unref_internal(exec_ctx, slice);
+    }
+  }
+}
+
 void grpc_slice_buffer_trim_end(grpc_slice_buffer *sb, size_t n,
                                 grpc_slice_buffer *garbage) {
   GPR_ASSERT(n <= sb->length);
@@ -293,8 +337,17 @@ grpc_slice grpc_slice_buffer_take_first(grpc_slice_buffer *sb) {
   grpc_slice slice;
   GPR_ASSERT(sb->count > 0);
   slice = sb->slices[0];
-  memmove(&sb->slices[0], &sb->slices[1], (sb->count - 1) * sizeof(grpc_slice));
+  sb->slices++;
   sb->count--;
   sb->length -= GRPC_SLICE_LENGTH(slice);
+
   return slice;
 }
+
+void grpc_slice_buffer_undo_take_first(grpc_slice_buffer *sb,
+                                       grpc_slice slice) {
+  sb->slices--;
+  sb->slices[0] = slice;
+  sb->count++;
+  sb->length += GRPC_SLICE_LENGTH(slice);
+}
diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.c b/src/ruby/ext/grpc/rb_grpc_imports.generated.c
index 6c36df9113e51828a2f99ade70134b6e8852fb63..b2d536e7029c787ad8f55a1d8b492e1de3ac57d9 100644
--- a/src/ruby/ext/grpc/rb_grpc_imports.generated.c
+++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.c
@@ -202,7 +202,9 @@ grpc_slice_buffer_swap_type grpc_slice_buffer_swap_import;
 grpc_slice_buffer_move_into_type grpc_slice_buffer_move_into_import;
 grpc_slice_buffer_trim_end_type grpc_slice_buffer_trim_end_import;
 grpc_slice_buffer_move_first_type grpc_slice_buffer_move_first_import;
+grpc_slice_buffer_move_first_into_buffer_type grpc_slice_buffer_move_first_into_buffer_import;
 grpc_slice_buffer_take_first_type grpc_slice_buffer_take_first_import;
+grpc_slice_buffer_undo_take_first_type grpc_slice_buffer_undo_take_first_import;
 gpr_malloc_type gpr_malloc_import;
 gpr_free_type gpr_free_import;
 gpr_realloc_type gpr_realloc_import;
@@ -478,7 +480,9 @@ void grpc_rb_load_imports(HMODULE library) {
   grpc_slice_buffer_move_into_import = (grpc_slice_buffer_move_into_type) GetProcAddress(library, "grpc_slice_buffer_move_into");
   grpc_slice_buffer_trim_end_import = (grpc_slice_buffer_trim_end_type) GetProcAddress(library, "grpc_slice_buffer_trim_end");
   grpc_slice_buffer_move_first_import = (grpc_slice_buffer_move_first_type) GetProcAddress(library, "grpc_slice_buffer_move_first");
+  grpc_slice_buffer_move_first_into_buffer_import = (grpc_slice_buffer_move_first_into_buffer_type) GetProcAddress(library, "grpc_slice_buffer_move_first_into_buffer");
   grpc_slice_buffer_take_first_import = (grpc_slice_buffer_take_first_type) GetProcAddress(library, "grpc_slice_buffer_take_first");
+  grpc_slice_buffer_undo_take_first_import = (grpc_slice_buffer_undo_take_first_type) GetProcAddress(library, "grpc_slice_buffer_undo_take_first");
   gpr_malloc_import = (gpr_malloc_type) GetProcAddress(library, "gpr_malloc");
   gpr_free_import = (gpr_free_type) GetProcAddress(library, "gpr_free");
   gpr_realloc_import = (gpr_realloc_type) GetProcAddress(library, "gpr_realloc");
diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.h b/src/ruby/ext/grpc/rb_grpc_imports.generated.h
index 5745686adf368681bc02f2aa39f80c81778f8b03..4f496b7bdb3f357bf0d581f5bad373c5a74eb57d 100644
--- a/src/ruby/ext/grpc/rb_grpc_imports.generated.h
+++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.h
@@ -557,9 +557,15 @@ extern grpc_slice_buffer_trim_end_type grpc_slice_buffer_trim_end_import;
 typedef void(*grpc_slice_buffer_move_first_type)(grpc_slice_buffer *src, size_t n, grpc_slice_buffer *dst);
 extern grpc_slice_buffer_move_first_type grpc_slice_buffer_move_first_import;
 #define grpc_slice_buffer_move_first grpc_slice_buffer_move_first_import
+typedef void(*grpc_slice_buffer_move_first_into_buffer_type)(grpc_exec_ctx *exec_ctx, grpc_slice_buffer *src, size_t n, void *dst);
+extern grpc_slice_buffer_move_first_into_buffer_type grpc_slice_buffer_move_first_into_buffer_import;
+#define grpc_slice_buffer_move_first_into_buffer grpc_slice_buffer_move_first_into_buffer_import
 typedef grpc_slice(*grpc_slice_buffer_take_first_type)(grpc_slice_buffer *src);
 extern grpc_slice_buffer_take_first_type grpc_slice_buffer_take_first_import;
 #define grpc_slice_buffer_take_first grpc_slice_buffer_take_first_import
+typedef void(*grpc_slice_buffer_undo_take_first_type)(grpc_slice_buffer *src, grpc_slice slice);
+extern grpc_slice_buffer_undo_take_first_type grpc_slice_buffer_undo_take_first_import;
+#define grpc_slice_buffer_undo_take_first grpc_slice_buffer_undo_take_first_import
 typedef void *(*gpr_malloc_type)(size_t size);
 extern gpr_malloc_type gpr_malloc_import;
 #define gpr_malloc gpr_malloc_import