From cbafdd1084384e9233aa846b7f3e627fdc281910 Mon Sep 17 00:00:00 2001
From: Craig Tiller <craig.tiller@gmail.com>
Date: Tue, 29 Nov 2016 13:54:00 -0800
Subject: [PATCH] Fix races on tcp server shutdown on Windows

- we were treating an int as atomic, which is dubious at best
- it was possible to shutdown while an accept was being handled, and
process that shutdown accept before the real accept finished, leading to
a use-after-free up the stack
---
 src/core/lib/iomgr/tcp_server_windows.c | 77 ++++++++++++-------------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/src/core/lib/iomgr/tcp_server_windows.c b/src/core/lib/iomgr/tcp_server_windows.c
index ae54c70d2d..cd031a6c02 100644
--- a/src/core/lib/iomgr/tcp_server_windows.c
+++ b/src/core/lib/iomgr/tcp_server_windows.c
@@ -73,6 +73,7 @@ struct grpc_tcp_listener {
   /* The cached AcceptEx for that port. */
   LPFN_ACCEPTEX AcceptEx;
   int shutting_down;
+  int outstanding_calls;
   /* closure for socket notification of accept being ready */
   grpc_closure on_accept;
   /* linked list */
@@ -140,10 +141,8 @@ grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx,
   return GRPC_ERROR_NONE;
 }
 
-static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
-  if (s->shutdown_complete != NULL) {
-    grpc_exec_ctx_sched(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE, NULL);
-  }
+static void destroy_server(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+  grpc_tcp_server *s = arg;
 
   /* Now that the accepts have been aborted, we can destroy the sockets.
      The IOCP won't get notified on these, so we can flag them as already
@@ -159,6 +158,14 @@ static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
   gpr_free(s);
 }
 
+static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
+  if (s->shutdown_complete != NULL) {
+    grpc_exec_ctx_sched(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE, NULL);
+  }
+
+  grpc_exec_ctx_sched(exec_ctx, grpc_closure_create(destroy_server, s), GRPC_ERROR_NONE, NULL);
+}
+
 grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) {
   gpr_ref_non_zero(&s->refs);
   return s;
@@ -180,17 +187,14 @@ static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
   /* First, shutdown all fd's. This will queue abortion calls for all
      of the pending accepts due to the normal operation mechanism. */
   if (s->active_ports == 0) {
-    immediately_done = 1;
-  }
-  for (sp = s->head; sp; sp = sp->next) {
-    sp->shutting_down = 1;
-    grpc_winsocket_shutdown(sp->socket);
+    finish_shutdown_locked(exec_ctx, s);
+  } else {
+    for (sp = s->head; sp; sp = sp->next) {
+      sp->shutting_down = 1;
+      grpc_winsocket_shutdown(sp->socket);
+    }
   }
   gpr_mu_unlock(&s->mu);
-
-  if (immediately_done) {
-    finish_shutdown(exec_ctx, s);
-  }
 }
 
 void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
@@ -251,24 +255,19 @@ failure:
   return error;
 }
 
-static void decrement_active_ports_and_notify(grpc_exec_ctx *exec_ctx,
+static void decrement_active_ports_and_notify_locked(grpc_exec_ctx *exec_ctx,
                                               grpc_tcp_listener *sp) {
   int notify = 0;
   sp->shutting_down = 0;
-  gpr_mu_lock(&sp->server->mu);
   GPR_ASSERT(sp->server->active_ports > 0);
   if (0 == --sp->server->active_ports) {
-    notify = 1;
-  }
-  gpr_mu_unlock(&sp->server->mu);
-  if (notify) {
-    finish_shutdown(exec_ctx, sp->server);
+    finish_shutdown_locked(exec_ctx, sp->server);
   }
 }
 
 /* In order to do an async accept, we need to create a socket first which
    will be the one assigned to the new incoming connection. */
-static grpc_error *start_accept(grpc_exec_ctx *exec_ctx,
+static grpc_error *start_accept_locked(grpc_exec_ctx *exec_ctx,
                                 grpc_tcp_listener *port) {
   SOCKET sock = INVALID_SOCKET;
   BOOL success;
@@ -276,6 +275,10 @@ static grpc_error *start_accept(grpc_exec_ctx *exec_ctx,
   DWORD bytes_received = 0;
   grpc_error *error = GRPC_ERROR_NONE;
 
+  if (port->shutting_down) {
+    return GRPC_ERROR_NONE;
+  }
+
   sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0,
                    WSA_FLAG_OVERLAPPED);
   if (sock == INVALID_SOCKET) {
@@ -305,20 +308,11 @@ static grpc_error *start_accept(grpc_exec_ctx *exec_ctx,
      immediately process an accept that happened in the meantime. */
   port->new_socket = sock;
   grpc_socket_notify_on_read(exec_ctx, port->socket, &port->on_accept);
+  port->outstanding_calls++;
   return error;
 
 failure:
   GPR_ASSERT(error != GRPC_ERROR_NONE);
-  if (port->shutting_down) {
-    /* We are abandoning the listener port, take that into account to prevent
-       occasional hangs on shutdown. The hang happens when sp->shutting_down
-       change is not seen by on_accept and we proceed to trying new accept,
-       but we fail there because the listening port has been closed in the
-       meantime. */
-    decrement_active_ports_and_notify(exec_ctx, port);
-    GRPC_ERROR_UNREF(error);
-    return GRPC_ERROR_NONE;
-  }
   if (sock != INVALID_SOCKET) closesocket(sock);
   return error;
 }
@@ -338,6 +332,8 @@ static void on_accept(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
   BOOL wsa_success;
   int err;
 
+  gpr_mu_lock(&sp->server->mu);
+
   peer_name.len = sizeof(struct sockaddr_storage);
 
   /* The general mechanism for shutting down is to queue abortion calls. While
@@ -347,6 +343,7 @@ static void on_accept(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
     const char *msg = grpc_error_string(error);
     gpr_log(GPR_INFO, "Skipping on_accept due to error: %s", msg);
     grpc_error_free_string(msg);
+    gpr_mu_unlock(&sp->server->mu);
     return;
   }
 
@@ -356,17 +353,12 @@ static void on_accept(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
   wsa_success = WSAGetOverlappedResult(sock, &info->overlapped,
                                        &transfered_bytes, FALSE, &flags);
   if (!wsa_success) {
-    if (sp->shutting_down) {
-      /* During the shutdown case, we ARE expecting an error. So that's well,
-         and we can wake up the shutdown thread. */
-      decrement_active_ports_and_notify(exec_ctx, sp);
-      return;
-    } else {
+    if (!sp->shutting_down) {
       char *utf8_message = gpr_format_message(WSAGetLastError());
       gpr_log(GPR_ERROR, "on_accept error: %s", utf8_message);
       gpr_free(utf8_message);
-      closesocket(sock);
     }
+    closesocket(sock);
   } else {
     if (!sp->shutting_down) {
       peer_name_string = NULL;
@@ -408,7 +400,11 @@ static void on_accept(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
      the former socked we created has now either been destroy or assigned
      to the new connection. We need to create a new one for the next
      connection. */
-  GPR_ASSERT(GRPC_LOG_IF_ERROR("start_accept", start_accept(exec_ctx, sp)));
+  GPR_ASSERT(GRPC_LOG_IF_ERROR("start_accept", start_accept_locked(exec_ctx, sp)));
+  if (0 == --sp->outstanding_calls) {
+    decrement_active_ports_and_notify_locked(exec_ctx, sp);
+  }
+  gpr_mu_unlock(&sp->server->mu);
 }
 
 static grpc_error *add_socket_to_server(grpc_tcp_server *s, SOCKET sock,
@@ -456,6 +452,7 @@ static grpc_error *add_socket_to_server(grpc_tcp_server *s, SOCKET sock,
   sp->server = s;
   sp->socket = grpc_winsocket_create(sock, "listener");
   sp->shutting_down = 0;
+  sp->outstanding_calls = 0;
   sp->AcceptEx = AcceptEx;
   sp->new_socket = INVALID_SOCKET;
   sp->port = port;
@@ -553,7 +550,7 @@ void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s,
   s->on_accept_cb = on_accept_cb;
   s->on_accept_cb_arg = on_accept_cb_arg;
   for (sp = s->head; sp; sp = sp->next) {
-    GPR_ASSERT(GRPC_LOG_IF_ERROR("start_accept", start_accept(exec_ctx, sp)));
+    GPR_ASSERT(GRPC_LOG_IF_ERROR("start_accept", start_accept_locked(exec_ctx, sp)));
     s->active_ports++;
   }
   gpr_mu_unlock(&s->mu);
-- 
GitLab