From a99157f4dd74583f1fb879792270553443bea34b Mon Sep 17 00:00:00 2001
From: Craig Tiller <craig.tiller@gmail.com>
Date: Tue, 7 Apr 2015 14:32:15 -0700
Subject: [PATCH] Fix server shutdown

A previous fix to make close() occur later can cause socket reuse by servers to fail as previous sockets are left asynchronously open.

This change:
- adds a callback to TCP server shutdown to signal that the server is completely shutdown
- wait for that callback before destroying listeners in the server (and before destroying the server)
- handles fallout
---
 src/core/iomgr/tcp_server.h                   |  4 +-
 src/core/iomgr/tcp_server_posix.c             | 58 ++++++++++++++++---
 src/core/security/server_secure_chttp2.c      |  4 +-
 src/core/surface/completion_queue.c           |  7 +++
 src/core/surface/completion_queue.h           |  2 +
 src/core/surface/server.c                     | 44 ++++++++++++--
 src/core/surface/server.h                     |  2 +
 src/core/surface/server_chttp2.c              |  4 +-
 test/core/end2end/tests/cancel_after_invoke.c |  5 +-
 test/core/end2end/tests/cancel_test_helpers.h |  5 +-
 test/core/iomgr/tcp_server_posix_test.c       | 10 ++--
 11 files changed, 117 insertions(+), 28 deletions(-)

diff --git a/src/core/iomgr/tcp_server.h b/src/core/iomgr/tcp_server.h
index 68ee85c5a7..1e58901a7a 100644
--- a/src/core/iomgr/tcp_server.h
+++ b/src/core/iomgr/tcp_server.h
@@ -71,6 +71,8 @@ int grpc_tcp_server_add_port(grpc_tcp_server *s, const void *addr,
    up when grpc_tcp_server_destroy is called. */
 int grpc_tcp_server_get_fd(grpc_tcp_server *s, unsigned index);
 
-void grpc_tcp_server_destroy(grpc_tcp_server *server);
+void grpc_tcp_server_destroy(grpc_tcp_server *server, 
+                             void (*shutdown_done)(void *shutdown_done_arg), 
+                             void *shutdown_done_arg);
 
 #endif  /* GRPC_INTERNAL_CORE_IOMGR_TCP_SERVER_H */
diff --git a/src/core/iomgr/tcp_server_posix.c b/src/core/iomgr/tcp_server_posix.c
index 90b7eb451d..482166e2eb 100644
--- a/src/core/iomgr/tcp_server_posix.c
+++ b/src/core/iomgr/tcp_server_posix.c
@@ -102,12 +102,18 @@ struct grpc_tcp_server {
   gpr_cv cv;
 
   /* active port count: how many ports are actually still listening */
-  int active_ports;
+  size_t active_ports;
+  /* destroyed port count: how many ports are completely destroyed */
+  size_t destroyed_ports;
 
   /* all listening ports */
   server_port *ports;
   size_t nports;
   size_t port_capacity;
+
+  /* shutdown callback */
+  void (*shutdown_complete)(void *);
+  void *shutdown_complete_arg;
 };
 
 grpc_tcp_server *grpc_tcp_server_create(void) {
@@ -115,6 +121,7 @@ grpc_tcp_server *grpc_tcp_server_create(void) {
   gpr_mu_init(&s->mu);
   gpr_cv_init(&s->cv);
   s->active_ports = 0;
+  s->destroyed_ports = 0;
   s->cb = NULL;
   s->cb_arg = NULL;
   s->ports = gpr_malloc(sizeof(server_port) * INIT_PORT_CAP);
@@ -123,29 +130,62 @@ grpc_tcp_server *grpc_tcp_server_create(void) {
   return s;
 }
 
-void grpc_tcp_server_destroy(grpc_tcp_server *s) {
+static void finish_shutdown(grpc_tcp_server *s) {
+  s->shutdown_complete(s->shutdown_complete_arg);
+
+  gpr_mu_destroy(&s->mu);
+  gpr_cv_destroy(&s->cv);
+
+  gpr_free(s->ports);
+  gpr_free(s);
+}
+
+static void destroyed_port(void *server, int success) {
+  grpc_tcp_server *s = server;
+  gpr_mu_lock(&s->mu);
+  s->destroyed_ports++;
+  if (s->destroyed_ports == s->nports) {
+    gpr_mu_unlock(&s->mu);
+    finish_shutdown(s);
+  } else {
+    gpr_mu_unlock(&s->mu);
+  }
+}
+
+static void dont_care_about_shutdown_completion(void *ignored) {}
+
+void grpc_tcp_server_destroy(grpc_tcp_server *s, 
+                             void (*shutdown_complete)(void *shutdown_complete_arg),
+                             void *shutdown_complete_arg) {
   size_t i;
   gpr_mu_lock(&s->mu);
+
+  s->shutdown_complete = shutdown_complete ? shutdown_complete : dont_care_about_shutdown_completion;
+  s->shutdown_complete_arg = shutdown_complete_arg;
+
   /* shutdown all fd's */
   for (i = 0; i < s->nports; i++) {
     grpc_fd_shutdown(s->ports[i].emfd);
   }
   /* wait while that happens */
+  /* TODO(ctiller): make this asynchronous also */
   while (s->active_ports) {
     gpr_cv_wait(&s->cv, &s->mu, gpr_inf_future);
   }
   gpr_mu_unlock(&s->mu);
 
   /* delete ALL the things */
-  for (i = 0; i < s->nports; i++) {
-    server_port *sp = &s->ports[i];
-    if (sp->addr.sockaddr.sa_family == AF_UNIX) {
-      unlink_if_unix_domain_socket(&sp->addr.un);
+  if (s->nports) {
+    for (i = 0; i < s->nports; i++) {
+      server_port *sp = &s->ports[i];
+      if (sp->addr.sockaddr.sa_family == AF_UNIX) {
+        unlink_if_unix_domain_socket(&sp->addr.un);
+      }
+      grpc_fd_orphan(sp->emfd, destroyed_port, s);
     }
-    grpc_fd_orphan(sp->emfd, NULL, NULL);
+  } else {
+    finish_shutdown(s);
   }
-  gpr_free(s->ports);
-  gpr_free(s);
 }
 
 /* get max listen queue size on linux */
diff --git a/src/core/security/server_secure_chttp2.c b/src/core/security/server_secure_chttp2.c
index c155b80b7e..d5b3a82b87 100644
--- a/src/core/security/server_secure_chttp2.c
+++ b/src/core/security/server_secure_chttp2.c
@@ -120,7 +120,7 @@ static void destroy(grpc_server *server, void *statep) {
   grpc_server_secure_state *state = statep;
   gpr_mu_lock(&state->mu);
   state->is_shutdown = 1;
-  grpc_tcp_server_destroy(state->tcp);
+  grpc_tcp_server_destroy(state->tcp, grpc_server_listener_destroy_done, server);
   gpr_mu_unlock(&state->mu);
   state_unref(state);
 }
@@ -213,7 +213,7 @@ error:
     grpc_resolved_addresses_destroy(resolved);
   }
   if (tcp) {
-    grpc_tcp_server_destroy(tcp);
+    grpc_tcp_server_destroy(tcp, NULL, NULL);
   }
   if (state) {
     gpr_free(state);
diff --git a/src/core/surface/completion_queue.c b/src/core/surface/completion_queue.c
index 6a1d83ce5d..b08bd93693 100644
--- a/src/core/surface/completion_queue.c
+++ b/src/core/surface/completion_queue.c
@@ -432,3 +432,10 @@ void grpc_cq_dump_pending_ops(grpc_completion_queue *cc) {
 grpc_pollset *grpc_cq_pollset(grpc_completion_queue *cc) {
   return &cc->pollset;
 }
+
+void grpc_cq_hack_spin_pollset(grpc_completion_queue *cc) {
+  gpr_mu_lock(GRPC_POLLSET_MU(&cc->pollset));
+  grpc_pollset_kick(&cc->pollset);
+  grpc_pollset_work(&cc->pollset, gpr_time_add(gpr_now(), gpr_time_from_millis(100)));
+  gpr_mu_unlock(GRPC_POLLSET_MU(&cc->pollset));
+}
diff --git a/src/core/surface/completion_queue.h b/src/core/surface/completion_queue.h
index 3054264cad..3e9e2d186e 100644
--- a/src/core/surface/completion_queue.h
+++ b/src/core/surface/completion_queue.h
@@ -114,4 +114,6 @@ void grpc_cq_dump_pending_ops(grpc_completion_queue *cc);
 
 grpc_pollset *grpc_cq_pollset(grpc_completion_queue *cc);
 
+void grpc_cq_hack_spin_pollset(grpc_completion_queue *cc);
+
 #endif  /* GRPC_INTERNAL_CORE_SURFACE_COMPLETION_QUEUE_H */
diff --git a/src/core/surface/server.c b/src/core/surface/server.c
index 424734c54c..2e013ea742 100644
--- a/src/core/surface/server.c
+++ b/src/core/surface/server.c
@@ -137,6 +137,7 @@ struct grpc_server {
   size_t cq_count;
 
   gpr_mu mu;
+  gpr_cv cv;
 
   registered_method *registered_methods;
   requested_call_array requested_calls;
@@ -149,6 +150,7 @@ struct grpc_server {
   channel_data root_channel_data;
 
   listener *listeners;
+  int listeners_destroyed;
   gpr_refcount internal_refcount;
 };
 
@@ -263,6 +265,7 @@ static void server_unref(grpc_server *server) {
   if (gpr_unref(&server->internal_refcount)) {
     grpc_channel_args_destroy(server->channel_args);
     gpr_mu_destroy(&server->mu);
+    gpr_cv_destroy(&server->cv);
     gpr_free(server->channel_filters);
     requested_call_array_destroy(&server->requested_calls);
     while ((rm = server->registered_methods) != NULL) {
@@ -620,6 +623,7 @@ grpc_server *grpc_server_create_from_filters(grpc_completion_queue *cq,
   if (cq) addcq(server, cq);
 
   gpr_mu_init(&server->mu);
+  gpr_cv_init(&server->cv);
 
   server->unregistered_cq = cq;
   /* decremented by grpc_server_destroy */
@@ -781,6 +785,15 @@ grpc_transport_setup_result grpc_server_setup_transport(
   return result;
 }
 
+static int num_listeners(grpc_server *server) {
+  listener *l;
+  int n = 0;
+  for (l = server->listeners; l; l = l->next) {
+    n++;
+  }
+  return n;
+}
+
 static void shutdown_internal(grpc_server *server, gpr_uint8 have_shutdown_tag,
                               void *shutdown_tag) {
   listener *l;
@@ -878,11 +891,6 @@ static void shutdown_internal(grpc_server *server, gpr_uint8 have_shutdown_tag,
   for (l = server->listeners; l; l = l->next) {
     l->destroy(server, l->arg);
   }
-  while (server->listeners) {
-    l = server->listeners;
-    server->listeners = l->next;
-    gpr_free(l);
-  }
 }
 
 void grpc_server_shutdown(grpc_server *server) {
@@ -893,8 +901,18 @@ void grpc_server_shutdown_and_notify(grpc_server *server, void *tag) {
   shutdown_internal(server, 1, tag);
 }
 
+void grpc_server_listener_destroy_done(void *s) {
+  grpc_server *server = s;
+  gpr_mu_lock(&server->mu);
+  server->listeners_destroyed++;
+  gpr_cv_signal(&server->cv);
+  gpr_mu_unlock(&server->mu);
+}
+
 void grpc_server_destroy(grpc_server *server) {
   channel_data *c;
+  listener *l;
+  size_t i;
   gpr_mu_lock(&server->mu);
   if (!server->shutdown) {
     gpr_mu_unlock(&server->mu);
@@ -902,6 +920,22 @@ void grpc_server_destroy(grpc_server *server) {
     gpr_mu_lock(&server->mu);
   }
 
+  while (server->listeners_destroyed != num_listeners(server)) {
+    for (i = 0; i < server->cq_count; i++) {
+      gpr_mu_unlock(&server->mu);
+      grpc_cq_hack_spin_pollset(server->cqs[i]);
+      gpr_mu_lock(&server->mu);
+    }
+
+    gpr_cv_wait(&server->cv, &server->mu, gpr_time_add(gpr_now(), gpr_time_from_millis(100)));
+  }
+
+  while (server->listeners) {
+    l = server->listeners;
+    server->listeners = l->next;
+    gpr_free(l);
+  }
+
   for (c = server->root_channel_data.next; c != &server->root_channel_data;
        c = c->next) {
     shutdown_channel(c);
diff --git a/src/core/surface/server.h b/src/core/surface/server.h
index e33f69b8c7..548a16c6c9 100644
--- a/src/core/surface/server.h
+++ b/src/core/surface/server.h
@@ -51,6 +51,8 @@ void grpc_server_add_listener(grpc_server *server, void *listener,
                                             grpc_pollset **pollsets, size_t npollsets),
                               void (*destroy)(grpc_server *server, void *arg));
 
+void grpc_server_listener_destroy_done(void *server);
+
 /* Setup a transport - creates a channel stack, binds the transport to the
    server */
 grpc_transport_setup_result grpc_server_setup_transport(
diff --git a/src/core/surface/server_chttp2.c b/src/core/surface/server_chttp2.c
index 27434b39e2..9a23125752 100644
--- a/src/core/surface/server_chttp2.c
+++ b/src/core/surface/server_chttp2.c
@@ -75,7 +75,7 @@ static void start(grpc_server *server, void *tcpp, grpc_pollset **pollsets, size
    callbacks) */
 static void destroy(grpc_server *server, void *tcpp) {
   grpc_tcp_server *tcp = tcpp;
-  grpc_tcp_server_destroy(tcp);
+  grpc_tcp_server_destroy(tcp, grpc_server_listener_destroy_done, server);
 }
 
 int grpc_server_add_http2_port(grpc_server *server, const char *addr) {
@@ -131,7 +131,7 @@ error:
     grpc_resolved_addresses_destroy(resolved);
   }
   if (tcp) {
-    grpc_tcp_server_destroy(tcp);
+    grpc_tcp_server_destroy(tcp, NULL, NULL);
   }
   return 0;
 }
diff --git a/test/core/end2end/tests/cancel_after_invoke.c b/test/core/end2end/tests/cancel_after_invoke.c
index e15fa7fcd9..326321f4e2 100644
--- a/test/core/end2end/tests/cancel_after_invoke.c
+++ b/test/core/end2end/tests/cancel_after_invoke.c
@@ -51,10 +51,11 @@ static void *tag(gpr_intptr t) { return (void *)t; }
 
 static grpc_end2end_test_fixture begin_test(grpc_end2end_test_config config,
                                             const char *test_name,
+                                            cancellation_mode mode,
                                             grpc_channel_args *client_args,
                                             grpc_channel_args *server_args) {
   grpc_end2end_test_fixture f;
-  gpr_log(GPR_INFO, "%s/%s", test_name, config.name);
+  gpr_log(GPR_INFO, "%s/%s/%s", test_name, config.name, mode.name);
   f = config.create_fixture(client_args, server_args);
   config.init_client(&f, client_args);
   config.init_server(&f, server_args);
@@ -109,7 +110,7 @@ static void test_cancel_after_invoke(grpc_end2end_test_config config,
   grpc_op ops[6];
   grpc_op *op;
   grpc_call *c;
-  grpc_end2end_test_fixture f = begin_test(config, __FUNCTION__, NULL, NULL);
+  grpc_end2end_test_fixture f = begin_test(config, __FUNCTION__, mode, NULL, NULL);
   gpr_timespec deadline = five_seconds_time();
   cq_verifier *v_client = cq_verifier_create(f.client_cq);
   grpc_metadata_array initial_metadata_recv;
diff --git a/test/core/end2end/tests/cancel_test_helpers.h b/test/core/end2end/tests/cancel_test_helpers.h
index f2581dc32f..3d92b64ae4 100644
--- a/test/core/end2end/tests/cancel_test_helpers.h
+++ b/test/core/end2end/tests/cancel_test_helpers.h
@@ -35,6 +35,7 @@
 #define GRPC_TEST_CORE_END2END_TESTS_CANCEL_TEST_HELPERS_H
 
 typedef struct {
+  const char *name;
   grpc_call_error (*initiate_cancel)(grpc_call *call);
   grpc_status_code expect_status;
   const char *expect_details;
@@ -45,7 +46,7 @@ static grpc_call_error wait_for_deadline(grpc_call *call) {
 }
 
 static const cancellation_mode cancellation_modes[] = {
-    {grpc_call_cancel, GRPC_STATUS_CANCELLED, ""},
-    {wait_for_deadline, GRPC_STATUS_DEADLINE_EXCEEDED, "Deadline Exceeded"}, };
+    {"cancel", grpc_call_cancel, GRPC_STATUS_CANCELLED, ""},
+    {"deadline", wait_for_deadline, GRPC_STATUS_DEADLINE_EXCEEDED, "Deadline Exceeded"}, };
 
 #endif  /* GRPC_TEST_CORE_END2END_TESTS_CANCEL_TEST_HELPERS_H */
diff --git a/test/core/iomgr/tcp_server_posix_test.c b/test/core/iomgr/tcp_server_posix_test.c
index 2689c3f38e..6b80ee1ee8 100644
--- a/test/core/iomgr/tcp_server_posix_test.c
+++ b/test/core/iomgr/tcp_server_posix_test.c
@@ -60,14 +60,14 @@ static void on_connect(void *arg, grpc_endpoint *tcp) {
 
 static void test_no_op(void) {
   grpc_tcp_server *s = grpc_tcp_server_create();
-  grpc_tcp_server_destroy(s);
+  grpc_tcp_server_destroy(s, NULL, NULL);
 }
 
 static void test_no_op_with_start(void) {
   grpc_tcp_server *s = grpc_tcp_server_create();
   LOG_TEST();
   grpc_tcp_server_start(s, NULL, 0, on_connect, NULL);
-  grpc_tcp_server_destroy(s);
+  grpc_tcp_server_destroy(s, NULL, NULL);
 }
 
 static void test_no_op_with_port(void) {
@@ -80,7 +80,7 @@ static void test_no_op_with_port(void) {
   GPR_ASSERT(
       grpc_tcp_server_add_port(s, (struct sockaddr *)&addr, sizeof(addr)));
 
-  grpc_tcp_server_destroy(s);
+  grpc_tcp_server_destroy(s, NULL, NULL);
 }
 
 static void test_no_op_with_port_and_start(void) {
@@ -95,7 +95,7 @@ static void test_no_op_with_port_and_start(void) {
 
   grpc_tcp_server_start(s, NULL, 0, on_connect, NULL);
 
-  grpc_tcp_server_destroy(s);
+  grpc_tcp_server_destroy(s, NULL, NULL);
 }
 
 static void test_connect(int n) {
@@ -144,7 +144,7 @@ static void test_connect(int n) {
 
   gpr_mu_unlock(&mu);
 
-  grpc_tcp_server_destroy(s);
+  grpc_tcp_server_destroy(s, NULL, NULL);
 }
 
 int main(int argc, char **argv) {
-- 
GitLab